PDF_Upload_Vision

Sleeping

App Files Files Community

Seth0330 commited on Jun 6, 2025

Commit

77a95c0

verified ·

1 Parent(s): 7b9561b

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -130

app.py CHANGED Viewed

@@ -1,151 +1,89 @@
 import streamlit as st
 import requests
-import json
-import io
 import os
-st.set_page_config(page_title="PDF Invoice Extractor (GPT-4o Vision)", layout="wide")
-def get_api_key():
-    key = os.getenv("OPENAI_API_KEY")
-    if not key:
-        st.error("❌ OPENAI_API_KEY not set in your environment")
-        st.stop()
-    return key
-def upload_file_to_openai(pdf_file, api_key):
-    files_url = "https://api.openai.com/v1/files"
     headers = {
-        "Authorization": f"Bearer {api_key}"
     }
     files = {
-        "file": (pdf_file.name, pdf_file, "application/pdf")
-    }
-    data = {
-        "purpose": "vision"
     }
-    with st.spinner("⬆️ Uploading PDF to OpenAI..."):
-        response = requests.post(files_url, headers=headers, files=files, data=data)
-    if response.status_code != 200:
-        st.error(f"File upload failed: {response.text}")
         return None
-    return response.json().get("id")
-def query_gpt4o_vision_with_file_id(file_id, prompt, api_key):
-    api_url = "https://api.openai.com/v1/chat/completions"
-    messages = [
-        {
-            "role": "user",
-            "content": [
-                {"type": "text", "text": prompt},
-                {
-                    "type": "file",
-                    "file": {
-                        "file_id": file_id
-                    }
-                }
-            ]
-        }
-    ]
     headers = {
-        "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json"
     }
-    payload = {
-        "model": "gpt-4o",
-        "messages": messages,
-        "max_tokens": 2000
-    }
-    with st.spinner("🔍 Querying GPT-4o Vision..."):
-        r = requests.post(api_url, headers=headers, json=payload, timeout=120)
-    if r.status_code != 200:
-        st.error(f"🚨 API Error {r.status_code}: {r.text}")
-        return None
-    return r.json()["choices"][0]["message"]["content"]
-def clean_json_response(text):
-    if not text:
-        return None
-    # Strip ``` fences and whitespace
-    text = text.strip()
-    if text.startswith("```json"):
-        text = text[7:]
-    if text.startswith("```"):
-        text = text[3:]
-    if text.endswith("```"):
-        text = text[:-3]
-    text = text.strip()
-    # Find the JSON object
-    start, end = text.find('{'), text.rfind('}') + 1
-    if start < 0 or end < 1:
-        return None
-    frag = text[start:end]
-    # Remove stray trailing commas
-    frag = frag.replace(',\n}', '\n}')
-    try:
-        return json.loads(frag)
-    except Exception:
         return None
-st.title("PDF Invoice Extraction with GPT-4o Vision")
-tab1, tab2 = st.tabs(["Extract Invoice (Vision)", "Custom Prompt (Vision)"])
-api_key = get_api_key()
-with tab1:
-    st.header("Extract Invoice Metadata from PDF (GPT-4o Vision)")
-    pdf = st.file_uploader("Upload Invoice PDF", type="pdf")
-    if st.button("Extract Invoice") and pdf:
-        prompt = (
-            "You are an expert invoice parser. Extract the invoice header fields and all line items from the PDF invoice. "
-            "Return the result as a single JSON object with 'invoice_header' and 'line_items' keys, "
-            "matching this schema:\n"
-            "{\n"
-            '  "invoice_header": {...},\n'
-            '  "line_items": [ {...}, {...} ]\n'
-            "}\n"
-            "If a field is missing, use null. Do not invent fields. Do not add explanations—return JSON only."
-        )
-        pdf.seek(0)  # Reset file pointer
-        # Step 1: Upload file and get file_id
-        file_id = upload_file_to_openai(pdf, api_key)
-        if not file_id:
-            st.stop()
-        # Step 2: Pass file_id to Vision API
-        content = query_gpt4o_vision_with_file_id(file_id, prompt, api_key)
-        st.subheader("Raw Model Output")
-        st.code(content)
-        result = clean_json_response(content)
-        if result:
-            st.success("Extraction Complete")
-            st.subheader("Invoice Metadata")
-            st.json(result.get("invoice_header", {}))
-            st.subheader("Line Items")
-            st.json(result.get("line_items", []))
         else:
-            st.error("Could not parse JSON from the output.")
-with tab2:
-    st.header("Send a Custom Prompt with PDF (GPT-4o Vision)")
-    pdf2 = st.file_uploader("Upload PDF", type="pdf", key="custom_pdf")
-    user_prompt = st.text_area(
-        "Enter your own prompt (for example: 'Summarize this invoice in bullet points' or 'Extract only supplier and total amount')",
-        height=100
-    )
-    if st.button("Send Custom Prompt") and pdf2 and user_prompt:
-        pdf2.seek(0)
-        # Step 1: Upload file and get file_id
-        file_id = upload_file_to_openai(pdf2, api_key)
-        if not file_id:
-            st.stop()
-        # Step 2: Pass file_id to Vision API with your prompt
-        content = query_gpt4o_vision_with_file_id(file_id, user_prompt, api_key)
-        st.subheader("Raw Model Output")
-        st.code(content)
-        result = clean_json_response(content)
-        if result:
-            st.subheader("Parsed JSON Output")
-            st.json(result)
-st.caption("Powered by OpenAI GPT-4o Vision API. Set your OPENAI_API_KEY in your environment to use this app.")

 import streamlit as st
 import requests
+import time
 import os
+# CONFIG — Edit if you want to move to environment variables instead of hard-coding
+UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY", "pktmL5lfqlVv7IWW_MYhdXRl399GA1n8vaLktHefxVY")
+BASE_URL = "https://llmwhisperer-api.us-central.unstract.com/api/v2"
+def upload_pdf_to_unstract(pdf_file):
+    url = f"{BASE_URL}/whisper"
     headers = {
+        "unstract-key": UNSTRACT_API_KEY,
     }
     files = {
+        "file": (pdf_file.name, pdf_file, "application/pdf"),
     }
+    # 'mode': 'form' is not required, default is 'native_text'
+    with st.spinner("Uploading and starting OCR..."):
+        resp = requests.post(url, headers=headers, files=files)
+    if resp.status_code not in (200, 202):
+        st.error(f"Upload failed: {resp.status_code}: {resp.text}")
         return None
+    data = resp.json()
+    whisper_hash = data.get("whisper_hash")
+    if not whisper_hash:
+        st.error(f"No whisper_hash in response: {data}")
+    return whisper_hash
+def poll_until_processed(whisper_hash, poll_interval=3, max_attempts=30):
+    status_url = f"{BASE_URL}/whisper-status?whisper_hash={whisper_hash}"
     headers = {
+        "unstract-key": UNSTRACT_API_KEY,
     }
+    with st.spinner("Processing PDF (OCR in progress)..."):
+        for i in range(max_attempts):
+            resp = requests.get(status_url, headers=headers)
+            if resp.status_code != 200:
+                st.error(f"Status check failed: {resp.status_code}: {resp.text}")
+                return False
+            status = resp.json().get("status")
+            if status == "processed":
+                return True
+            elif status in ("failed", "error"):
+                st.error(f"Processing failed: {resp.text}")
+                return False
+            time.sleep(poll_interval)
+    st.error("Timed out waiting for OCR to complete.")
+    return False
+def retrieve_text(whisper_hash):
+    retrieve_url = f"{BASE_URL}/whisper-retrieve?whisper_hash={whisper_hash}&text_only=true"
+    headers = {
+        "unstract-key": UNSTRACT_API_KEY,
+    }
+    with st.spinner("Retrieving extracted text..."):
+        resp = requests.get(retrieve_url, headers=headers)
+    if resp.status_code != 200:
+        st.error(f"Retrieve failed: {resp.status_code}: {resp.text}")
         return None
+    data = resp.json()
+    result_text = data.get("result_text", "")
+    return result_text
+st.title("Unstract OCR: PDF Invoice Text Extraction")
+uploaded_pdf = st.file_uploader("Upload Invoice PDF", type="pdf")
+if st.button("Extract Text from PDF") and uploaded_pdf:
+    # Step 1: Upload PDF and get whisper_hash
+    whisper_hash = upload_pdf_to_unstract(uploaded_pdf)
+    if not whisper_hash:
+        st.stop()
+    st.success(f"File accepted. Tracking hash: {whisper_hash}")
+    # Step 2: Poll until processed
+    if poll_until_processed(whisper_hash):
+        # Step 3: Retrieve text
+        text = retrieve_text(whisper_hash)
+        if text:
+            st.success("Text extraction complete!")
+            st.subheader("Extracted Text:")
+            st.text_area("Extracted Text", text, height=400)
         else:
+            st.error("Extraction failed at retrieve step.")
+    else:
+        st.error("OCR did not complete successfully.")
+st.caption("Powered by Unstract LLMWhisperer OCR API.")