Spaces:

MLBench
/

Logistics-OCR-Text-Extractor

Sleeping

App Files Files Community

mlbench123 commited on Nov 20, 2025

Commit

963f6dd

verified ·

1 Parent(s): 0fe1fcf

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -96

app.py CHANGED Viewed

@@ -9,31 +9,11 @@ MODEL = "gpt-5.1"
 client = OpenAI(api_key=API_KEY)
-SAMPLE_DIR = Path("samples")
-SAMPLES = {
-    "None": None,
-    "Sample 1 - IMG_0001.jpg": SAMPLE_DIR / "IMG_0001.jpg",
-    "Sample 2 - IMG_0002.jpg": SAMPLE_DIR / "IMG_0002.jpg"
-}
-# ------------------ Upload PDF ------------------
-def upload_pdf(path):
-    f = client.files.create(
-        file=open(path, "rb"),
-        purpose="assistants"
-    )
-    return f.id
-# ------------------ Prompt ---------------------
 def build_prompt():
     return (
-        "You are an advanced extraction system for logistics, rail, lumber, shipping, trucking, "
-        "inventory, and packing documents. Your task is to read the attached PDF or image and "
-        "extract ONLY the information that is explicitly present.\n\n"
-        "You must return STRICT JSON in the EXACT structure below:\n"
         "{\n"
         "  \"po_number\": string|null,\n"
         "  \"ship_from\": string|null,\n"
@@ -41,98 +21,76 @@ def build_prompt():
         "  \"rail_car_number\": string|null,\n"
         "  \"total_quantity\": number|null,\n"
         "  \"inventories\": [\n"
-        "     {\n"
-        "       \"productName\": string,\n"
-        "       \"productCode\": string|null,\n"
-        "       \"pcs\": number|null,\n"
-        "       \"dimensions\": string|null\n"
-        "     }\n"
         "  ],\n"
         "  \"custom_fields\": {}\n"
-        "}\n\n"
-        "YOUR RULES (MUST FOLLOW EXACTLY):\n"
-        "1. Do NOT guess or hallucinate. Only extract values explicitly shown in the document.\n"
-        "2. If a field is not present or cannot be confirmed → output null.\n"
-        "3. PO Number may appear under wording like 'PO', 'Purchase Order', 'P.O.', 'Customer PO', etc.\n"
-        "4. Ship From may appear as 'Origin', 'From', 'Exporter', 'Ship From', 'Supplier', etc.\n"
-        "5. Carrier Type may appear as 'Carrier', 'Carrier Type', 'Routing', 'Mode', 'Transport Type', "
-        "'RAIL', 'TRUCK', 'CN', 'BNSF', 'CP', 'Truckload', etc.\n"
-        "6. Rail Car Number may appear as 'Railcar', 'Rail Car #', 'Car Number', 'Car #', etc.\n"
-        "7. Total Quantity must be ONLY the explicit total PCS/pieces count if it appears. "
-        "If the only total shown is FBM/weight/volume → DO NOT treat that as quantity.\n"
-        "8. Inventories must capture every unique product line that appears. Extract product name, "
-        "item description, dimensions like '2x4', '23/32', and PCS when available.\n"
-        "9. Dimensions may appear as '2 X 4', '2x6', '48x96', '23/32', etc. Normalize to a single "
-        "string representation.\n"
-        "10. custom_fields must contain ANY additional fields not part of the main schema (dates, mills, "
-        "FBM, weights, routing codes, package counts, etc.). Key names must be lower_snake_case.\n"
-        "11. JSON MUST be valid, must not include comments, and must not include text outside the JSON object.\n\n"
-        "ADDITIONAL RULES FOR COMPLEX TABLES:\n"
-        "- If multiple product variants exist, create multiple inventory objects.\n"
-        "- If tables list PCS per package × number of packages, you MAY compute total PCS.\n"
-        "- Never compute derived values unless the math is explicitly possible.\n"
-        "- If a value is ambiguous, set it to null.\n\n"
-        "Final requirement: Return ONLY the JSON object. No explanation, no markdown.\n"
     )
-# ------------------ Extract Core Logic ------------------
-def extract_from_path(path: Path):
-    suffix = path.suffix.lower()
-    if suffix == ".pdf":
-        file_id = upload_pdf(path)
-        content = [
-            {"type": "text", "text": build_prompt()},
-            {"type": "file", "file": {"file_id": file_id}}
         ]
     else:
-        # image handling
-        raw = path.read_bytes()
-        b64 = base64.b64encode(raw).decode()
-        mime = suffix.replace(".", "")
-        content = [
-            {"type": "text", "text": build_prompt()},
-            {"type": "image_url", "image_url": {"url": f"data:image/{mime};base64,{b64}"}}
         ]
-    response = client.chat.completions.create(
         model=MODEL,
-        messages=[{"role": "user", "content": content}]
     )
-    out = response.choices[0].message.content
-    s = out.find("{")
-    e = out.rfind("}")
-    return out[s:e+1]
-# ------------------ UI Logic ------------------
-def run_extraction(uploaded_file, sample_name):
-    if uploaded_file:
-        return extract_from_path(Path(uploaded_file.name))
-    if sample_name != "None":
-        return extract_from_path(SAMPLES[sample_name])
-    return "Upload a file or choose a sample image."
-# ------------------ Gradio Interface ------------------
 gr.Interface(
-    fn=run_extraction,
-    inputs=[
-        gr.File(label="Upload PDF or Image"),
-        gr.Dropdown(list(SAMPLES.keys()), value="None", label="Or choose a sample")
-    ],
     outputs=gr.JSON(label="Extracted JSON"),
-    title="Logistics OCR Text Extraction (GPT-5.1 LLM)",
-    description="Upload a PDF/image or select a built-in sample to extract structured logistics data."
 ).launch()

 client = OpenAI(api_key=API_KEY)
 def build_prompt():
     return (
+        "You are an extraction system. Extract ONLY explicit data. No guessing.\n\n"
+        "Return JSON strictly:\n"
         "{\n"
         "  \"po_number\": string|null,\n"
         "  \"ship_from\": string|null,\n"
         "  \"rail_car_number\": string|null,\n"
         "  \"total_quantity\": number|null,\n"
         "  \"inventories\": [\n"
+        "    {\n"
+        "      \"productName\": string,\n"
+        "      \"productCode\": string|null,\n"
+        "      \"variants\": [\n"
+        "        {\n"
+        "          \"dimensions\": string|null,\n"
+        "          \"pcs_per_pkg\": number|null,\n"
+        "          \"length_ft\": number|null,\n"
+        "          \"width\": number|null,\n"
+        "          \"packages\": number|null,\n"
+        "          \"pieces\": number|null,\n"
+        "          \"fbm\": number|null\n"
+        "        }\n"
+        "      ],\n"
+        "      \"total_pcs\": number|null,\n"
+        "      \"total_fbm\": number|null\n"
+        "    }\n"
         "  ],\n"
         "  \"custom_fields\": {}\n"
+        "}\n"
     )
+def upload_pdf(p):
+    f = client.files.create(file=open(p, "rb"), purpose="assistants")
+    return f.id
+def extract(file):
+    path = Path(file.name)
+    prompt = build_prompt()
+    ext = path.suffix.lower()
+    if ext == ".pdf":
+        fid = upload_pdf(path)
+        msg = [
+            {"type": "text", "text": prompt},
+            {"type": "file", "file": {"file_id": fid}}
         ]
     else:
+        b64 = base64.b64encode(path.read_bytes()).decode()
+        mime = f"image/{ext[1:]}"
+        msg = [
+            {"type": "text", "text": prompt},
+            {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}}
         ]
+    r = client.chat.completions.create(
         model=MODEL,
+        messages=[{"role": "user", "content": msg}]
     )
+    raw = r.choices[0].message.content
+    s = raw.find("{")
+    e = raw.rfind("}")
+    return json.loads(raw[s:e+1])
+sample_files = [
+    ("IMG_0001.jpg", "samples/IMG_0001.jpg"),
+    ("IMG_0002.jpg", "samples/IMG_0002.jpg")
+]
+def ui(file):
+    return extract(file)
 gr.Interface(
+    fn=ui,
+    inputs=gr.File(label="Upload PDF or Image"),
     outputs=gr.JSON(label="Extracted JSON"),
+    title="Logistics OCR Extraction",
+    examples=[f for _, f in sample_files]
 ).launch()