Spaces:

MLBench
/

Logistics-OCR-Text-Extractor

Sleeping

App Files Files Community

mlbench123 commited on Nov 20, 2025

Commit

075a1f5

verified ·

1 Parent(s): b98f926

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -87

app.py CHANGED Viewed

@@ -1,87 +1,105 @@
-import base64
-import json
-from pathlib import Path
-import gradio as gr
-from openai import OpenAI
-API_KEY = "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"
-MODEL = "gpt-5.1"
-client = OpenAI(api_key=API_KEY)
-SAMPLE_DIR = Path("samples")
-SAMPLES = {
-    "Sample 1 (IMG_0001.jpg)": SAMPLE_DIR / "IMG_0001.jpg",
-    "Sample 2 (IMG_0002.jpg)": SAMPLE_DIR / "IMG_0002.jpg",
-    "None": None
-}
-def upload_pdf(p):
-    f = client.files.create(file=open(p, "rb"), purpose="assistants")
-    return f.id
-def prompt():
-    return (
-        "Extract structured JSON:\n"
-        "{"
-        "\"po_number\":string|null,"
-        "\"ship_from\":string|null,"
-        "\"carrier_type\":string|null,"
-        "\"rail_car_number\":string|null,"
-        "\"total_quantity\":number|null,"
-        "\"inventories\":[{\"productName\":string,\"productCode\":string|null,"
-        "\"pcs\":number|null,\"dimensions\":string|null}],"
-        "\"custom_fields\":{}"
-        "}\n"
-        "Use only PDF/image content."
-    )
-def extract_from_path(path: Path):
-    suffix = path.suffix.lower()
-    if suffix == ".pdf":
-        fid = upload_pdf(path)
-        msg = [
-            {"type": "text", "text": prompt()},
-            {"type": "file", "file": {"file_id": fid}}
-        ]
-    else:
-        b64 = base64.b64encode(path.read_bytes()).decode()
-        msg = [
-            {"type": "text", "text": prompt()},
-            {
-                "type": "image_url",
-                "image_url": {"url": f"data:image/{suffix[1:]};base64,{b64}"}
-            }
-        ]
-    response = client.chat.completions.create(
-        model=MODEL,
-        messages=[{"role": "user", "content": msg}]
-    )
-    raw = response.choices[0].message.content
-    start = raw.find("{")
-    end = raw.rfind("}")
-    return raw[start:end+1]
-def run_extraction(uploaded_file, sample_name):
-    if uploaded_file is not None:
-        return extract_from_path(Path(uploaded_file.name))
-    if sample_name != "None":
-        sample_path = SAMPLES[sample_name]
-        return extract_from_path(sample_path)
-    return "Upload a file or select a sample."
-gr.Interface(
-    fn=run_extraction,
-    inputs=[
-        gr.File(label="Upload PDF or Image (optional)"),
-        gr.Dropdown(list(SAMPLES.keys()), value="None", label="Or choose a sample image")
-    ],
-    outputs=gr.JSON(label="Extracted JSON"),
-    title="Logistics OCR Text Extraction (OpenAI GPT-5.1)",
-    description="Upload your own PDF/image or choose a sample to test the extraction."
-).launch()

+import base64
+import json
+from pathlib import Path
+import gradio as gr
+from openai import OpenAI
+API_KEY = "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"
+MODEL = "gpt-5.1"
+client = OpenAI(api_key=API_KEY)
+SAMPLE_DIR = Path("samples")
+SAMPLES = {
+    "None": None,
+    "Sample 1 - IMG_0001.jpg": SAMPLE_DIR / "IMG_0001.jpg",
+    "Sample 2 - IMG_0002.jpg": SAMPLE_DIR / "IMG_0002.jpg"
+}
+# ------------------ Upload PDF ------------------
+def upload_pdf(path):
+    f = client.files.create(
+        file=open(path, "rb"),
+        purpose="assistants"
+    )
+    return f.id
+# ------------------ Prompt ---------------------
+def build_prompt():
+    return (
+        "Extract structured JSON from this shipping document. "
+        "Return ONLY valid JSON:\n"
+        "{\n"
+        "  \"po_number\": string|null,\n"
+        "  \"ship_from\": string|null,\n"
+        "  \"carrier_type\": string|null,\n"
+        "  \"rail_car_number\": string|null,\n"
+        "  \"total_quantity\": number|null,\n"
+        "  \"inventories\": [\n"
+        "     {\"productName\":string,\"productCode\":string|null,\"pcs\":number|null,\"dimensions\":string|null}\n"
+        "  ],\n"
+        "  \"custom_fields\": {}\n"
+        "}\n"
+        "Use ONLY the text visible in the document. Do NOT hallucinate values."
+    )
+# ------------------ Extract Core Logic ------------------
+def extract_from_path(path: Path):
+    suffix = path.suffix.lower()
+    if suffix == ".pdf":
+        file_id = upload_pdf(path)
+        content = [
+            {"type": "text", "text": build_prompt()},
+            {"type": "file", "file": {"file_id": file_id}}
+        ]
+    else:
+        # image handling
+        raw = path.read_bytes()
+        b64 = base64.b64encode(raw).decode()
+        mime = suffix.replace(".", "")
+        content = [
+            {"type": "text", "text": build_prompt()},
+            {"type": "image_url", "image_url": {"url": f"data:image/{mime};base64,{b64}"}}
+        ]
+    response = client.chat.completions.create(
+        model=MODEL,
+        messages=[{"role": "user", "content": content}]
+    )
+    out = response.choices[0].message.content
+    s = out.find("{")
+    e = out.rfind("}")
+    return out[s:e+1]
+# ------------------ UI Logic ------------------
+def run_extraction(uploaded_file, sample_name):
+    if uploaded_file:
+        return extract_from_path(Path(uploaded_file.name))
+    if sample_name != "None":
+        return extract_from_path(SAMPLES[sample_name])
+    return "Upload a file or choose a sample image."
+# ------------------ Gradio Interface ------------------
+gr.Interface(
+    fn=run_extraction,
+    inputs=[
+        gr.File(label="Upload PDF or Image"),
+        gr.Dropdown(list(SAMPLES.keys()), value="None", label="Or choose a sample")
+    ],
+    outputs=gr.JSON(label="Extracted JSON"),
+    title="Logistics OCR Text Extraction (GPT-5.1 LLM)",
+    description="Upload a PDF/image or select a built-in sample to extract structured logistics data."
+).launch()