Spaces:

MLBench
/

Logistics-OCR-Text-Extractor

Sleeping

File size: 4,818 Bytes

#!/usr/bin/env python3

import base64
import json
from pathlib import Path
import gradio as gr
from openai import OpenAI

API_KEY = "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"
MODEL = "gpt-5.1"

client = OpenAI(api_key=API_KEY)


def upload_pdf(path):
    return client.files.create(file=open(path, "rb"), purpose="assistants").id


# ---------------- Prompt (unchanged) ----------------
def prompt():
    return (
        "Extract structured JSON from the attached logistics document. Return ONLY valid JSON.\n"
        "{\n"
        "  \"po_number\": string|null,\n"
        "  \"ship_from_name\": string|null,\n"
        "  \"ship_from_email\": string|null,\n"
        "  \"carrier_type\": string|null,\n"
        "  \"rail_car_number\": string|null,\n"
        "  \"total_quantity\": number|null,\n"
        "  \"inventories\": [\n"
        "    {\n"
        "      \"productName\": string|null,\n"
        "      \"productCode\": string|null,\n"
        "      \"variants\": [\n"
        "        {\n"
        "          \"dimensions\": string|null,\n"
        "          \"pcs_per_pkg\": number|null,\n"
        "          \"length_ft\": number|null,\n"
        "          \"width\": number|null,\n"
        "          \"packages\": number|null,\n"
        "          \"pieces\": number|null,\n"
        "          \"fbm\": number|string|null\n"
        "        }\n"
        "      ],\n"
        "      \"total_pcs\": number|null,\n"
        "      \"total_fbm\": number|string|null\n"
        "    }\n"
        "  ],\n"
        "  \"custom_fields\": {}\n"
        "}\n\n"
        "SHIP FROM RULES:\n"
        "- If explicit fields like 'Origin', 'Ship From' exist, extract that value.\n"
        "- If the document is an email-style inbound notice (header block) and shows:\n"
        "    From: Name <email>\n"
        "  then ship_from_name = Name, ship_from_email = email.\n"
        "- If only an email exists and no human name, set both fields to that email.\n"
        "- If both Origin and an email sender exist, use Origin for ship_from_name and still capture the email under ship_from_email.\n"
        "- Priority: Origin → Email Name → Mill → Sender block → null.\n\n"
        "CARRIER / EQUIPMENT RULE:\n"
        "- If the table contains:\n"
        "      Equipment id = <value>\n"
        "      Mark = <value>\n"
        "  then ALWAYS treat 'Equipment id' as the railcar number.\n"
        "- NEVER use 'Mark' as railcar number.\n"
        "- Carrier type must match the carrier text exactly (e.g., CHICAGO RAIL LINK).\n\n"
        "INVENTORY RULES:\n"
        "- Do not merge length groups. Each unique length or dimension is its own variant.\n"
        "- Extract pcs_per_pkg, packages, pieces, fbm exactly as written.\n"
        "- total_pcs = sum of pieces.\n"
        "- total_fbm = sum of fbm.\n\n"
        "TOTAL QUANTITY RULE:\n"
        "- Use explicit totals if they appear.\n"
        "- If no explicit total quantity appears, leave null.\n\n"
        "CUSTOM FIELDS RULE:\n"
        "- Capture all meaningful leftover fields not part of main schema.\n\n"
        "Return ONLY the JSON."
    )



# ---------------- Extraction ----------------
def extract(path):
    suffix = Path(path).suffix.lower()

    if suffix == ".pdf":
        fid = upload_pdf(path)
        content = [
            {"type": "text", "text": prompt()},
            {"type": "file", "file": {"file_id": fid}}
        ]
    else:
        b64 = base64.b64encode(Path(path).read_bytes()).decode()
        ext = suffix[1:]
        content = [
            {"type": "text", "text": prompt()},
            {"type": "image_url", "image_url": {"url": f"data:image/{ext};base64,{b64}"}}
        ]

    r = client.chat.completions.create(
        model=MODEL,
        messages=[{"role": "user", "content": content}]
    )

    text = r.choices[0].message.content
    return text[text.find("{"): text.rfind("}") + 1]


def ui(image_input, pdf_input):
    if image_input:
        return extract(image_input)
    if pdf_input:
        return extract(pdf_input.name)
    return "{}"


# ---------------- UI ----------------

with gr.Blocks() as demo:
    gr.Markdown("# **Logistics OCR Data Extractor (GPT-5.1)**")

    with gr.Row():
        img = gr.Image(label="Upload Image", type="filepath")
        pdf = gr.File(label="Upload PDF", file_types=["pdf"])

    out = gr.JSON(label="Extracted JSON")
    btn = gr.Button("Submit")

    btn.click(fn=ui, inputs=[img, pdf], outputs=out)

    gr.Examples(
        examples=[
            ["IMG_0001.jpg", None],
            ["IMG_0002.jpg", None]
        ],
        inputs=[img, pdf],
        label="Sample Images"
    )

demo.launch(share=True)