Seth0330 commited on
Commit
7b9561b
·
verified ·
1 Parent(s): 918613a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -12
app.py CHANGED
@@ -3,7 +3,6 @@ import requests
3
  import json
4
  import io
5
  import os
6
- import base64
7
 
8
  st.set_page_config(page_title="PDF Invoice Extractor (GPT-4o Vision)", layout="wide")
9
 
@@ -14,10 +13,26 @@ def get_api_key():
14
  st.stop()
15
  return key
16
 
17
- def query_gpt4o_vision(pdf_file, prompt):
18
- # Read and encode PDF to base64
19
- encoded_pdf = base64.b64encode(pdf_file.read()).decode('utf-8')
20
- # Compose the prompt for GPT-4o Vision
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  messages = [
22
  {
23
  "role": "user",
@@ -26,15 +41,14 @@ def query_gpt4o_vision(pdf_file, prompt):
26
  {
27
  "type": "file",
28
  "file": {
29
- "mime_type": "application/pdf",
30
- "data": encoded_pdf
31
  }
32
  }
33
  ]
34
  }
35
  ]
36
  headers = {
37
- "Authorization": f"Bearer {get_api_key()}",
38
  "Content-Type": "application/json"
39
  }
40
  payload = {
@@ -43,7 +57,7 @@ def query_gpt4o_vision(pdf_file, prompt):
43
  "max_tokens": 2000
44
  }
45
  with st.spinner("🔍 Querying GPT-4o Vision..."):
46
- r = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, timeout=120)
47
  if r.status_code != 200:
48
  st.error(f"🚨 API Error {r.status_code}: {r.text}")
49
  return None
@@ -77,6 +91,8 @@ st.title("PDF Invoice Extraction with GPT-4o Vision")
77
 
78
  tab1, tab2 = st.tabs(["Extract Invoice (Vision)", "Custom Prompt (Vision)"])
79
 
 
 
80
  with tab1:
81
  st.header("Extract Invoice Metadata from PDF (GPT-4o Vision)")
82
  pdf = st.file_uploader("Upload Invoice PDF", type="pdf")
@@ -92,7 +108,12 @@ with tab1:
92
  "If a field is missing, use null. Do not invent fields. Do not add explanations—return JSON only."
93
  )
94
  pdf.seek(0) # Reset file pointer
95
- content = query_gpt4o_vision(pdf, prompt)
 
 
 
 
 
96
  st.subheader("Raw Model Output")
97
  st.code(content)
98
  result = clean_json_response(content)
@@ -114,10 +135,14 @@ with tab2:
114
  )
115
  if st.button("Send Custom Prompt") and pdf2 and user_prompt:
116
  pdf2.seek(0)
117
- content = query_gpt4o_vision(pdf2, user_prompt)
 
 
 
 
 
118
  st.subheader("Raw Model Output")
119
  st.code(content)
120
- # Optionally try to parse JSON if present
121
  result = clean_json_response(content)
122
  if result:
123
  st.subheader("Parsed JSON Output")
 
3
  import json
4
  import io
5
  import os
 
6
 
7
  st.set_page_config(page_title="PDF Invoice Extractor (GPT-4o Vision)", layout="wide")
8
 
 
13
  st.stop()
14
  return key
15
 
16
+ def upload_file_to_openai(pdf_file, api_key):
17
+ files_url = "https://api.openai.com/v1/files"
18
+ headers = {
19
+ "Authorization": f"Bearer {api_key}"
20
+ }
21
+ files = {
22
+ "file": (pdf_file.name, pdf_file, "application/pdf")
23
+ }
24
+ data = {
25
+ "purpose": "vision"
26
+ }
27
+ with st.spinner("⬆️ Uploading PDF to OpenAI..."):
28
+ response = requests.post(files_url, headers=headers, files=files, data=data)
29
+ if response.status_code != 200:
30
+ st.error(f"File upload failed: {response.text}")
31
+ return None
32
+ return response.json().get("id")
33
+
34
+ def query_gpt4o_vision_with_file_id(file_id, prompt, api_key):
35
+ api_url = "https://api.openai.com/v1/chat/completions"
36
  messages = [
37
  {
38
  "role": "user",
 
41
  {
42
  "type": "file",
43
  "file": {
44
+ "file_id": file_id
 
45
  }
46
  }
47
  ]
48
  }
49
  ]
50
  headers = {
51
+ "Authorization": f"Bearer {api_key}",
52
  "Content-Type": "application/json"
53
  }
54
  payload = {
 
57
  "max_tokens": 2000
58
  }
59
  with st.spinner("🔍 Querying GPT-4o Vision..."):
60
+ r = requests.post(api_url, headers=headers, json=payload, timeout=120)
61
  if r.status_code != 200:
62
  st.error(f"🚨 API Error {r.status_code}: {r.text}")
63
  return None
 
91
 
92
  tab1, tab2 = st.tabs(["Extract Invoice (Vision)", "Custom Prompt (Vision)"])
93
 
94
+ api_key = get_api_key()
95
+
96
  with tab1:
97
  st.header("Extract Invoice Metadata from PDF (GPT-4o Vision)")
98
  pdf = st.file_uploader("Upload Invoice PDF", type="pdf")
 
108
  "If a field is missing, use null. Do not invent fields. Do not add explanations—return JSON only."
109
  )
110
  pdf.seek(0) # Reset file pointer
111
+ # Step 1: Upload file and get file_id
112
+ file_id = upload_file_to_openai(pdf, api_key)
113
+ if not file_id:
114
+ st.stop()
115
+ # Step 2: Pass file_id to Vision API
116
+ content = query_gpt4o_vision_with_file_id(file_id, prompt, api_key)
117
  st.subheader("Raw Model Output")
118
  st.code(content)
119
  result = clean_json_response(content)
 
135
  )
136
  if st.button("Send Custom Prompt") and pdf2 and user_prompt:
137
  pdf2.seek(0)
138
+ # Step 1: Upload file and get file_id
139
+ file_id = upload_file_to_openai(pdf2, api_key)
140
+ if not file_id:
141
+ st.stop()
142
+ # Step 2: Pass file_id to Vision API with your prompt
143
+ content = query_gpt4o_vision_with_file_id(file_id, user_prompt, api_key)
144
  st.subheader("Raw Model Output")
145
  st.code(content)
 
146
  result = clean_json_response(content)
147
  if result:
148
  st.subheader("Parsed JSON Output")