import requests
import json
def extract_specific_fields(workspace_id, task_id, app_id, secret_code):
"""Extract specific fields"""
host = "https://docflow.textin.com"
url = "/api/app-api/sip/platform/v2/file/extract_fields"
# Request body
payload = {
"workspace_id": workspace_id,
"task_id": task_id,
"fields": [
{
"key": "Invoice Code",
"prompt": "Extract complete invoice code"
},
{
"key": "Invoice Date",
"prompt": "Keep only the year part"
}
],
"tables": [
{
"name": "Table1",
"fields": [
{
"key": "Goods Name",
"prompt": "Extract full product name"
},
{
"key": "Unit Price"
}
]
}
]
}
resp = requests.post(
f"{host}{url}",
json=payload,
headers={
"x-ti-app-id": app_id,
"x-ti-secret-code": secret_code,
"Content-Type": "application/json"
},
timeout=60,
)
if resp.status_code != 200:
print(f"Request failed: {resp.status_code}")
print(f"Error message: {resp.text}")
return None
data = resp.json()
if data.get("code") != 200:
print(f"API returned error: {data.get('message')}")
return None
# Process returned results
result = data.get("result", {})
files = result.get("files", [])
for file in files:
print(f"File name: {file.get('name')}")
print(f"Task ID: {file.get('task_id')}")
# Extract field information
file_data = file.get("data", {})
fields = file_data.get("fields", [])
if fields:
print("\n=== Field Information ===")
for field in fields:
key = field.get("key", "")
value = field.get("value", "")
positions = field.get("position", [])
print(f"Field: {key}")
print(f"Value: {value}")
# Display position information
for i, pos in enumerate(positions):
page = pos.get("page", 0)
vertices = pos.get("vertices", [])
print(f" Position {i+1} (Page {page+1}): {vertices}")
print("-" * 30)
# Extract table information
tables = file_data.get("tables", [])
if tables:
print("\n=== Table Information ===")
for table in tables:
table_name = table.get("tableName", "")
print(f"Table name: {table_name}")
items = table.get("items", [])
for row_idx, row in enumerate(items):
print(f" Row {row_idx + 1}:")
for cell in row:
print(f" {cell.get('key')}: {cell.get('value')}")
return data
# Usage example
if __name__ == "__main__":
workspace_id = "<your-workspace-id>"
task_id = "<your-task-id>"
app_id = "<your-app-id>"
secret_code = "<your-secret-code>"
result = extract_specific_fields(workspace_id, task_id, app_id, secret_code)