Spaces:

MLBench
/

Logistics-OCR-Text-Extractor

Sleeping

App Files Files Community

mlbench123 commited on Nov 20, 2025

Commit

1be0b12

verified ·

1 Parent(s): b0f731d

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -41

app.py CHANGED Viewed

@@ -1,3 +1,6 @@
 import base64
 import json
 from pathlib import Path
@@ -10,10 +13,21 @@ MODEL = "gpt-5.1"
 client = OpenAI(api_key=API_KEY)
 def build_prompt():
     return (
-        "You are an extraction system. Extract ONLY explicit data. No guessing.\n\n"
-        "Return JSON strictly:\n"
         "{\n"
         "  \"po_number\": string|null,\n"
         "  \"ship_from\": string|null,\n"
@@ -21,76 +35,98 @@ def build_prompt():
         "  \"rail_car_number\": string|null,\n"
         "  \"total_quantity\": number|null,\n"
         "  \"inventories\": [\n"
-        "    {\n"
-        "      \"productName\": string,\n"
-        "      \"productCode\": string|null,\n"
-        "      \"variants\": [\n"
-        "        {\n"
-        "          \"dimensions\": string|null,\n"
-        "          \"pcs_per_pkg\": number|null,\n"
-        "          \"length_ft\": number|null,\n"
-        "          \"width\": number|null,\n"
-        "          \"packages\": number|null,\n"
-        "          \"pieces\": number|null,\n"
-        "          \"fbm\": number|null\n"
-        "        }\n"
-        "      ],\n"
-        "      \"total_pcs\": number|null,\n"
-        "      \"total_fbm\": number|null\n"
-        "    }\n"
         "  ],\n"
         "  \"custom_fields\": {}\n"
-        "}\n"
-    )
-def upload_pdf(p):
-    f = client.files.create(file=open(p, "rb"), purpose="assistants")
-    return f.id
 def extract(file):
     path = Path(file.name)
-    prompt = build_prompt()
-    ext = path.suffix.lower()
-    if ext == ".pdf":
         fid = upload_pdf(path)
         msg = [
-            {"type": "text", "text": prompt},
             {"type": "file", "file": {"file_id": fid}}
         ]
     else:
         b64 = base64.b64encode(path.read_bytes()).decode()
-        mime = f"image/{ext[1:]}"
         msg = [
-            {"type": "text", "text": prompt},
-            {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}}
         ]
     r = client.chat.completions.create(
         model=MODEL,
         messages=[{"role": "user", "content": msg}]
     )
-    raw = r.choices[0].message.content
-    s = raw.find("{")
-    e = raw.rfind("}")
-    return json.loads(raw[s:e+1])
-sample_files = [
-    ("IMG_0001.jpg", "IMG_0001.jpg"),
-    ("IMG_0002.jpg", "IMG_0002.jpg")
-]
 def ui(file):
     return extract(file)
 gr.Interface(
     fn=ui,
     inputs=gr.File(label="Upload PDF or Image"),
     outputs=gr.JSON(label="Extracted JSON"),
-    title="Logistics OCR Extraction",
-    examples=[f for _, f in sample_files]
 ).launch()

+#!/usr/bin/env python3
+# app.py — Logistics OCR Extractor (PDF + Images) with strict ship_from rules
 import base64
 import json
 from pathlib import Path
 client = OpenAI(api_key=API_KEY)
+# ----------------------- PDF Upload -----------------------
+def upload_pdf(path):
+    f = client.files.create(
+        file=open(path, "rb"),
+        purpose="assistants"
+    )
+    return f.id
+# ----------------------- Prompt Builder -----------------------
 def build_prompt():
     return (
+        "Extract structured JSON from this logistics shipping document. "
+        "Use only what appears in the PDF/image, never hallucinate. "
+        "Return strictly valid JSON in this schema:\n\n"
         "{\n"
         "  \"po_number\": string|null,\n"
         "  \"ship_from\": string|null,\n"
         "  \"rail_car_number\": string|null,\n"
         "  \"total_quantity\": number|null,\n"
         "  \"inventories\": [\n"
+        "     {\n"
+        "       \"productName\": string,\n"
+        "       \"productCode\": string|null,\n"
+        "       \"variants\": [\n"
+        "         {\n"
+        "           \"dimensions\": string|null,\n"
+        "           \"pcs_per_pkg\": number|null,\n"
+        "           \"length_ft\": number|null,\n"
+        "           \"width\": number|null,\n"
+        "           \"packages\": number|null,\n"
+        "           \"pieces\": number|null,\n"
+        "           \"fbm\": number|null\n"
+        "         }\n"
+        "       ],\n"
+        "       \"total_pcs\": number|null,\n"
+        "       \"total_fbm\": number|null\n"
+        "     }\n"
         "  ],\n"
         "  \"custom_fields\": {}\n"
+        "}\n\n"
+        "SHIP_FROM EXTRACTION RULES (MANDATORY):\n"
+        "1. If document contains explicit Origin/Ship From labels, extract that value.\n"
+        "2. If document is an email-based inbound notice and no explicit origin exists, "
+        "set ship_from = the email 'From:' field.\n"
+        "3. If both Origin and Mill exist, use Origin.\n"
+        "4. If only Mill exists AND it is clearly the shipping location, use Mill.\n"
+        "5. Priority order: Origin → Email From → Mill → Sender company block.\n"
+        "6. If none apply, ship_from = null.\n\n"
+        "Rules for inventories:\n"
+        "- Do NOT merge different lengths; create a separate variant per length.\n"
+        "- Extract EXACT numbers shown: packages, pcs_per_pkg, pieces, fbm.\n"
+        "- total_pcs = sum of all variant pieces.\n"
+        "- total_fbm = sum of all variant fbm.\n\n"
+        "Rules for total_quantity:\n"
+        "- If the document shows a total PCS value explicitly, use it.\n"
+        "- If only variants exist, do not compute total_quantity unless the document explicitly states it.\n\n"
+        "Parse tables carefully. If a dimension group (like 2x6) appears, use that.\n"
+        "Return only JSON. No explanations."
+    )
+# ----------------------- Extraction Logic -----------------------
 def extract(file):
     path = Path(file.name)
+    suffix = path.suffix.lower()
+    if suffix == ".pdf":
         fid = upload_pdf(path)
         msg = [
+            {"type": "text", "text": build_prompt()},
             {"type": "file", "file": {"file_id": fid}}
         ]
     else:
         b64 = base64.b64encode(path.read_bytes()).decode()
         msg = [
+            {"type": "text", "text": build_prompt()},
+            {
+                "type": "image_url",
+                "image_url": {"url": f"data:image/{suffix[1:]};base64,{b64}"}
+            }
         ]
     r = client.chat.completions.create(
         model=MODEL,
         messages=[{"role": "user", "content": msg}]
     )
+    txt = r.choices[0].message.content
+    s = txt.find("{")
+    e = txt.rfind("}")
+    return txt[s:e+1]
+# ----------------------- Gradio UI -----------------------
 def ui(file):
     return extract(file)
+# Sample images (optional)
+sample_files = [
+    ("IMG_0001.jpg", "samples/IMG_0001.jpg"),
+    ("IMG_0002.jpg", "samples/IMG_0002.jpg")
+]
 gr.Interface(
     fn=ui,
     inputs=gr.File(label="Upload PDF or Image"),
     outputs=gr.JSON(label="Extracted JSON"),
+    title="Logistics OCR Data Extractor (GPT-5.1)",
+    examples=sample_files
 ).launch()