PDF_Upload_Vision

Sleeping

App Files Files Community

Seth0330 commited on Jun 6, 2025

Commit

7b9561b

verified ·

1 Parent(s): 918613a

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -12

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import requests
 import json
 import io
 import os
-import base64
 st.set_page_config(page_title="PDF Invoice Extractor (GPT-4o Vision)", layout="wide")
@@ -14,10 +13,26 @@ def get_api_key():
         st.stop()
     return key
-def query_gpt4o_vision(pdf_file, prompt):
-    # Read and encode PDF to base64
-    encoded_pdf = base64.b64encode(pdf_file.read()).decode('utf-8')
-    # Compose the prompt for GPT-4o Vision
     messages = [
         {
             "role": "user",
@@ -26,15 +41,14 @@ def query_gpt4o_vision(pdf_file, prompt):
                 {
                     "type": "file",
                     "file": {
-                        "mime_type": "application/pdf",
-                        "data": encoded_pdf
                     }
                 }
             ]
         }
     ]
     headers = {
-        "Authorization": f"Bearer {get_api_key()}",
         "Content-Type": "application/json"
     }
     payload = {
@@ -43,7 +57,7 @@ def query_gpt4o_vision(pdf_file, prompt):
         "max_tokens": 2000
     }
     with st.spinner("🔍 Querying GPT-4o Vision..."):
-        r = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, timeout=120)
     if r.status_code != 200:
         st.error(f"🚨 API Error {r.status_code}: {r.text}")
         return None
@@ -77,6 +91,8 @@ st.title("PDF Invoice Extraction with GPT-4o Vision")
 tab1, tab2 = st.tabs(["Extract Invoice (Vision)", "Custom Prompt (Vision)"])
 with tab1:
     st.header("Extract Invoice Metadata from PDF (GPT-4o Vision)")
     pdf = st.file_uploader("Upload Invoice PDF", type="pdf")
@@ -92,7 +108,12 @@ with tab1:
             "If a field is missing, use null. Do not invent fields. Do not add explanations—return JSON only."
         )
         pdf.seek(0)  # Reset file pointer
-        content = query_gpt4o_vision(pdf, prompt)
         st.subheader("Raw Model Output")
         st.code(content)
         result = clean_json_response(content)
@@ -114,10 +135,14 @@ with tab2:
     )
     if st.button("Send Custom Prompt") and pdf2 and user_prompt:
         pdf2.seek(0)
-        content = query_gpt4o_vision(pdf2, user_prompt)
         st.subheader("Raw Model Output")
         st.code(content)
-        # Optionally try to parse JSON if present
         result = clean_json_response(content)
         if result:
             st.subheader("Parsed JSON Output")

 import json
 import io
 import os
 st.set_page_config(page_title="PDF Invoice Extractor (GPT-4o Vision)", layout="wide")
         st.stop()
     return key
+def upload_file_to_openai(pdf_file, api_key):
+    files_url = "https://api.openai.com/v1/files"
+    headers = {
+        "Authorization": f"Bearer {api_key}"
+    }
+    files = {
+        "file": (pdf_file.name, pdf_file, "application/pdf")
+    }
+    data = {
+        "purpose": "vision"
+    }
+    with st.spinner("⬆️ Uploading PDF to OpenAI..."):
+        response = requests.post(files_url, headers=headers, files=files, data=data)
+    if response.status_code != 200:
+        st.error(f"File upload failed: {response.text}")
+        return None
+    return response.json().get("id")
+def query_gpt4o_vision_with_file_id(file_id, prompt, api_key):
+    api_url = "https://api.openai.com/v1/chat/completions"
     messages = [
         {
             "role": "user",
                 {
                     "type": "file",
                     "file": {
+                        "file_id": file_id
                     }
                 }
             ]
         }
     ]
     headers = {
+        "Authorization": f"Bearer {api_key}",
         "Content-Type": "application/json"
     }
     payload = {
         "max_tokens": 2000
     }
     with st.spinner("🔍 Querying GPT-4o Vision..."):
+        r = requests.post(api_url, headers=headers, json=payload, timeout=120)
     if r.status_code != 200:
         st.error(f"🚨 API Error {r.status_code}: {r.text}")
         return None
 tab1, tab2 = st.tabs(["Extract Invoice (Vision)", "Custom Prompt (Vision)"])
+api_key = get_api_key()
 with tab1:
     st.header("Extract Invoice Metadata from PDF (GPT-4o Vision)")
     pdf = st.file_uploader("Upload Invoice PDF", type="pdf")
             "If a field is missing, use null. Do not invent fields. Do not add explanations—return JSON only."
         )
         pdf.seek(0)  # Reset file pointer
+        # Step 1: Upload file and get file_id
+        file_id = upload_file_to_openai(pdf, api_key)
+        if not file_id:
+            st.stop()
+        # Step 2: Pass file_id to Vision API
+        content = query_gpt4o_vision_with_file_id(file_id, prompt, api_key)
         st.subheader("Raw Model Output")
         st.code(content)
         result = clean_json_response(content)
     )
     if st.button("Send Custom Prompt") and pdf2 and user_prompt:
         pdf2.seek(0)
+        # Step 1: Upload file and get file_id
+        file_id = upload_file_to_openai(pdf2, api_key)
+        if not file_id:
+            st.stop()
+        # Step 2: Pass file_id to Vision API with your prompt
+        content = query_gpt4o_vision_with_file_id(file_id, user_prompt, api_key)
         st.subheader("Raw Model Output")
         st.code(content)
         result = clean_json_response(content)
         if result:
             st.subheader("Parsed JSON Output")