File size: 4,257 Bytes
1be0b12 075a1f5 1be0b12 5af3434 1be0b12 5af3434 075a1f5 5af3434 075a1f5 5af3434 075a1f5 5af3434 075a1f5 1be0b12 5af3434 aa6d0d8 1be0b12 963f6dd 1be0b12 075a1f5 1be0b12 963f6dd 5af3434 963f6dd 075a1f5 963f6dd 5af3434 075a1f5 963f6dd 075a1f5 5af3434 075a1f5 5af3434 075a1f5 963f6dd 075a1f5 aa6d0d8 1be0b12 075a1f5 963f6dd 075a1f5 1be0b12 aa6d0d8 cb7c829 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 | #!/usr/bin/env python3
import base64
import json
from pathlib import Path
import gradio as gr
from openai import OpenAI
API_KEY = "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"
MODEL = "gpt-5.1"
client = OpenAI(api_key=API_KEY)
def upload_pdf(path):
return client.files.create(file=open(path, "rb"), purpose="assistants").id
def prompt():
return (
"Extract structured JSON from the attached logistics document. Return ONLY valid JSON.\n"
"{\n"
" \"po_number\": string|null,\n"
" \"ship_from_name\": string|null,\n"
" \"ship_from_email\": string|null,\n"
" \"carrier_type\": string|null,\n"
" \"rail_car_number\": string|null,\n"
" \"total_quantity\": number|null,\n"
" \"inventories\": [\n"
" {\n"
" \"productName\": string|null,\n"
" \"productCode\": string|null,\n"
" \"variants\": [\n"
" {\n"
" \"dimensions\": string|null,\n"
" \"pcs_per_pkg\": number|null,\n"
" \"length_ft\": number|null,\n"
" \"width\": number|null,\n"
" \"packages\": number|null,\n"
" \"pieces\": number|null,\n"
" \"fbm\": number|string|null\n"
" }\n"
" ],\n"
" \"total_pcs\": number|null,\n"
" \"total_fbm\": number|string|null\n"
" }\n"
" ],\n"
" \"custom_fields\": {}\n"
"}\n\n"
"SHIP FROM RULES:\n"
"- If explicit fields like 'Origin', 'Ship From' exist, extract that value.\n"
"- If the document is an email-style inbound notice (header block) and shows:\n"
" From: Name <email>\n"
" then ship_from_name = Name, ship_from_email = email.\n"
"- If only an email exists and no human name, set both fields to that email.\n"
"- If both Origin and an email sender exist, use Origin for ship_from_name and still capture the email under ship_from_email.\n"
"- Priority: Origin → Email Name → Mill → Sender block → null.\n\n"
"CARRIER / EQUIPMENT RULE:\n"
"- If the table contains:\n"
" Equipment id = <value>\n"
" Mark = <value>\n"
" then ALWAYS treat 'Equipment id' as the railcar number.\n"
"- NEVER use 'Mark' as railcar number.\n"
"- Carrier type must match the carrier text exactly (e.g., CHICAGO RAIL LINK).\n\n"
"INVENTORY RULES:\n"
"- Do not merge length groups. Each unique length or dimension is its own variant.\n"
"- Extract pcs_per_pkg, packages, pieces, fbm exactly as written.\n"
"- total_pcs = sum of pieces.\n"
"- total_fbm = sum of fbm.\n\n"
"TOTAL QUANTITY RULE:\n"
"- Use explicit totals if they appear.\n"
"- If no explicit total quantity appears, leave null.\n\n"
"CUSTOM FIELDS RULE:\n"
"- Capture all meaningful leftover fields not part of main schema.\n\n"
"Return ONLY the JSON."
)
def extract(file):
path = Path(file.name)
suffix = path.suffix.lower()
if suffix == ".pdf":
fid = upload_pdf(path)
content = [
{"type": "text", "text": prompt()},
{"type": "file", "file": {"file_id": fid}}
]
else:
b64 = base64.b64encode(path.read_bytes()).decode()
content = [
{"type": "text", "text": prompt()},
{"type": "image_url", "image_url": {"url": f"data:image/{suffix[1:]};base64,{b64}"}}
]
r = client.chat.completions.create(
model=MODEL,
messages=[{"role": "user", "content": content}]
)
text = r.choices[0].message.content
s = text.find("{")
e = text.rfind("}")
return text[s:e+1]
def ui(file):
return extract(file)
examples = [
"IMG_0001.jpg",
"IMG_0002.jpg"
]
gr.Interface(
fn=ui,
inputs=gr.File(label="Upload PDF or Image"),
outputs=gr.JSON(label="Extracted JSON"),
title="Logistics OCR Data Extractor (GPT-5.1)",
examples=examples
).launch(True)
|