import requests
import json
import os
import fitz  # PyMuPDF
from PIL import Image, ImageDraw
ti_app_id = "your_app_id"
ti_secret_code = "your_app_secret"
workspace_id = "your_workspace_id"
file_id = "your_file_id"
host = "https://docflow.textin.com"
url = "/api/app-api/sip/platform/v2/file/fetch"
params = {
    "workspace_id":workspace_id, 
    "with_document": "true", 
    "file_id":file_id
    }
resp = requests.get(url=f"{host}{url}", 
                    params=params,
                    headers={"x-ti-app-id": ti_app_id,
                             "x-ti-secret-code": ti_secret_code,
                             })
resp_json = json.loads(resp.text)
def extract_pages_from_docflow(document):
    pages_out = []
    for page in document.get("pages", []):
        pages_out.append({
            "width": page.get("width", 0),
            "height": page.get("height", 0),
            "angle": page.get("angle", 0),
            "lines": page.get("lines", [])
        })
    return pages_out
def pdf_to_images(pdf_path, output_dir="./docflow_pages", dpi=144):
    os.makedirs(output_dir, exist_ok=True)
    doc = fitz.open(pdf_path)
    zoom = dpi / 72.0
    mat = fitz.Matrix(zoom, zoom)
    image_paths = []
    for i, page in enumerate(doc):
        pix = page.get_pixmap(matrix=mat)
        img_path = os.path.join(output_dir, f"page_{i+1}.png")
        pix.save(img_path)
        image_paths.append(img_path)
    doc.close()
    return image_paths
def draw_quads_on_image(image_path, quads, page_width, page_height, color=(26,102,255), line_width=2):
    image = Image.open(image_path).convert("RGB")
    draw = ImageDraw.Draw(image)
    img_w, img_h = image.size
    scale_x = img_w / page_width if page_width else 1
    scale_y = img_h / page_height if page_height else 1
    for q in quads:
        pos = q.get("position")
        if pos and len(pos) == 8:
            points = [
                (pos[0]*scale_x, pos[1]*scale_y),
                (pos[2]*scale_x, pos[3]*scale_y),
                (pos[4]*scale_x, pos[5]*scale_y),
                (pos[6]*scale_x, pos[7]*scale_y),
                (pos[0]*scale_x, pos[1]*scale_y)
            ]
            draw.line(points, fill=color, width=line_width)
    out_path = image_path.replace('.png', '_boxed.png')
    image.save(out_path)
    return out_path
# 选择要可视化的原始PDF(与解析的同一文件)
pdf_path = "./simple.pdf"  # 请替换为你的原始PDF路径
# 提取坐标数据
files = resp_json.get("result", {}).get("files", [])
if not files:
    raise RuntimeError("未获取到文件解析结果")
document = files[0].get("document", {})
pages = extract_pages_from_docflow(document)
# 转图片并绘制坐标
image_paths = pdf_to_images(pdf_path, output_dir="./docflow_pages")
annotated = []
for i, page in enumerate(pages):
    if i >= len(image_paths):
        break
    out_path = draw_quads_on_image(
        image_paths[i],
        page["lines"],
        page["width"],
        page["height"]
    )
    annotated.append(out_path)
print("标注图片输出:", annotated)