Seth0330 commited on
Commit
77a95c0
·
verified ·
1 Parent(s): 7b9561b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -130
app.py CHANGED
@@ -1,151 +1,89 @@
1
  import streamlit as st
2
  import requests
3
- import json
4
- import io
5
  import os
6
 
7
- st.set_page_config(page_title="PDF Invoice Extractor (GPT-4o Vision)", layout="wide")
 
 
8
 
9
- def get_api_key():
10
- key = os.getenv("OPENAI_API_KEY")
11
- if not key:
12
- st.error("❌ OPENAI_API_KEY not set in your environment")
13
- st.stop()
14
- return key
15
-
16
- def upload_file_to_openai(pdf_file, api_key):
17
- files_url = "https://api.openai.com/v1/files"
18
  headers = {
19
- "Authorization": f"Bearer {api_key}"
20
  }
21
  files = {
22
- "file": (pdf_file.name, pdf_file, "application/pdf")
23
- }
24
- data = {
25
- "purpose": "vision"
26
  }
27
- with st.spinner("⬆️ Uploading PDF to OpenAI..."):
28
- response = requests.post(files_url, headers=headers, files=files, data=data)
29
- if response.status_code != 200:
30
- st.error(f"File upload failed: {response.text}")
 
31
  return None
32
- return response.json().get("id")
 
 
 
 
33
 
34
- def query_gpt4o_vision_with_file_id(file_id, prompt, api_key):
35
- api_url = "https://api.openai.com/v1/chat/completions"
36
- messages = [
37
- {
38
- "role": "user",
39
- "content": [
40
- {"type": "text", "text": prompt},
41
- {
42
- "type": "file",
43
- "file": {
44
- "file_id": file_id
45
- }
46
- }
47
- ]
48
- }
49
- ]
50
  headers = {
51
- "Authorization": f"Bearer {api_key}",
52
- "Content-Type": "application/json"
53
  }
54
- payload = {
55
- "model": "gpt-4o",
56
- "messages": messages,
57
- "max_tokens": 2000
58
- }
59
- with st.spinner("🔍 Querying GPT-4o Vision..."):
60
- r = requests.post(api_url, headers=headers, json=payload, timeout=120)
61
- if r.status_code != 200:
62
- st.error(f"🚨 API Error {r.status_code}: {r.text}")
63
- return None
64
- return r.json()["choices"][0]["message"]["content"]
 
 
 
 
65
 
66
- def clean_json_response(text):
67
- if not text:
68
- return None
69
- # Strip ``` fences and whitespace
70
- text = text.strip()
71
- if text.startswith("```json"):
72
- text = text[7:]
73
- if text.startswith("```"):
74
- text = text[3:]
75
- if text.endswith("```"):
76
- text = text[:-3]
77
- text = text.strip()
78
- # Find the JSON object
79
- start, end = text.find('{'), text.rfind('}') + 1
80
- if start < 0 or end < 1:
81
- return None
82
- frag = text[start:end]
83
- # Remove stray trailing commas
84
- frag = frag.replace(',\n}', '\n}')
85
- try:
86
- return json.loads(frag)
87
- except Exception:
88
  return None
 
 
 
89
 
90
- st.title("PDF Invoice Extraction with GPT-4o Vision")
91
 
92
- tab1, tab2 = st.tabs(["Extract Invoice (Vision)", "Custom Prompt (Vision)"])
93
 
94
- api_key = get_api_key()
 
 
 
 
 
95
 
96
- with tab1:
97
- st.header("Extract Invoice Metadata from PDF (GPT-4o Vision)")
98
- pdf = st.file_uploader("Upload Invoice PDF", type="pdf")
99
- if st.button("Extract Invoice") and pdf:
100
- prompt = (
101
- "You are an expert invoice parser. Extract the invoice header fields and all line items from the PDF invoice. "
102
- "Return the result as a single JSON object with 'invoice_header' and 'line_items' keys, "
103
- "matching this schema:\n"
104
- "{\n"
105
- ' "invoice_header": {...},\n'
106
- ' "line_items": [ {...}, {...} ]\n'
107
- "}\n"
108
- "If a field is missing, use null. Do not invent fields. Do not add explanations—return JSON only."
109
- )
110
- pdf.seek(0) # Reset file pointer
111
- # Step 1: Upload file and get file_id
112
- file_id = upload_file_to_openai(pdf, api_key)
113
- if not file_id:
114
- st.stop()
115
- # Step 2: Pass file_id to Vision API
116
- content = query_gpt4o_vision_with_file_id(file_id, prompt, api_key)
117
- st.subheader("Raw Model Output")
118
- st.code(content)
119
- result = clean_json_response(content)
120
- if result:
121
- st.success("Extraction Complete")
122
- st.subheader("Invoice Metadata")
123
- st.json(result.get("invoice_header", {}))
124
- st.subheader("Line Items")
125
- st.json(result.get("line_items", []))
126
  else:
127
- st.error("Could not parse JSON from the output.")
128
-
129
- with tab2:
130
- st.header("Send a Custom Prompt with PDF (GPT-4o Vision)")
131
- pdf2 = st.file_uploader("Upload PDF", type="pdf", key="custom_pdf")
132
- user_prompt = st.text_area(
133
- "Enter your own prompt (for example: 'Summarize this invoice in bullet points' or 'Extract only supplier and total amount')",
134
- height=100
135
- )
136
- if st.button("Send Custom Prompt") and pdf2 and user_prompt:
137
- pdf2.seek(0)
138
- # Step 1: Upload file and get file_id
139
- file_id = upload_file_to_openai(pdf2, api_key)
140
- if not file_id:
141
- st.stop()
142
- # Step 2: Pass file_id to Vision API with your prompt
143
- content = query_gpt4o_vision_with_file_id(file_id, user_prompt, api_key)
144
- st.subheader("Raw Model Output")
145
- st.code(content)
146
- result = clean_json_response(content)
147
- if result:
148
- st.subheader("Parsed JSON Output")
149
- st.json(result)
150
 
151
- st.caption("Powered by OpenAI GPT-4o Vision API. Set your OPENAI_API_KEY in your environment to use this app.")
 
1
  import streamlit as st
2
  import requests
3
+ import time
 
4
  import os
5
 
6
+ # CONFIG Edit if you want to move to environment variables instead of hard-coding
7
+ UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY", "pktmL5lfqlVv7IWW_MYhdXRl399GA1n8vaLktHefxVY")
8
+ BASE_URL = "https://llmwhisperer-api.us-central.unstract.com/api/v2"
9
 
10
+ def upload_pdf_to_unstract(pdf_file):
11
+ url = f"{BASE_URL}/whisper"
 
 
 
 
 
 
 
12
  headers = {
13
+ "unstract-key": UNSTRACT_API_KEY,
14
  }
15
  files = {
16
+ "file": (pdf_file.name, pdf_file, "application/pdf"),
 
 
 
17
  }
18
+ # 'mode': 'form' is not required, default is 'native_text'
19
+ with st.spinner("Uploading and starting OCR..."):
20
+ resp = requests.post(url, headers=headers, files=files)
21
+ if resp.status_code not in (200, 202):
22
+ st.error(f"Upload failed: {resp.status_code}: {resp.text}")
23
  return None
24
+ data = resp.json()
25
+ whisper_hash = data.get("whisper_hash")
26
+ if not whisper_hash:
27
+ st.error(f"No whisper_hash in response: {data}")
28
+ return whisper_hash
29
 
30
+ def poll_until_processed(whisper_hash, poll_interval=3, max_attempts=30):
31
+ status_url = f"{BASE_URL}/whisper-status?whisper_hash={whisper_hash}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  headers = {
33
+ "unstract-key": UNSTRACT_API_KEY,
 
34
  }
35
+ with st.spinner("Processing PDF (OCR in progress)..."):
36
+ for i in range(max_attempts):
37
+ resp = requests.get(status_url, headers=headers)
38
+ if resp.status_code != 200:
39
+ st.error(f"Status check failed: {resp.status_code}: {resp.text}")
40
+ return False
41
+ status = resp.json().get("status")
42
+ if status == "processed":
43
+ return True
44
+ elif status in ("failed", "error"):
45
+ st.error(f"Processing failed: {resp.text}")
46
+ return False
47
+ time.sleep(poll_interval)
48
+ st.error("Timed out waiting for OCR to complete.")
49
+ return False
50
 
51
+ def retrieve_text(whisper_hash):
52
+ retrieve_url = f"{BASE_URL}/whisper-retrieve?whisper_hash={whisper_hash}&text_only=true"
53
+ headers = {
54
+ "unstract-key": UNSTRACT_API_KEY,
55
+ }
56
+ with st.spinner("Retrieving extracted text..."):
57
+ resp = requests.get(retrieve_url, headers=headers)
58
+ if resp.status_code != 200:
59
+ st.error(f"Retrieve failed: {resp.status_code}: {resp.text}")
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  return None
61
+ data = resp.json()
62
+ result_text = data.get("result_text", "")
63
+ return result_text
64
 
65
+ st.title("Unstract OCR: PDF Invoice Text Extraction")
66
 
67
+ uploaded_pdf = st.file_uploader("Upload Invoice PDF", type="pdf")
68
 
69
+ if st.button("Extract Text from PDF") and uploaded_pdf:
70
+ # Step 1: Upload PDF and get whisper_hash
71
+ whisper_hash = upload_pdf_to_unstract(uploaded_pdf)
72
+ if not whisper_hash:
73
+ st.stop()
74
+ st.success(f"File accepted. Tracking hash: {whisper_hash}")
75
 
76
+ # Step 2: Poll until processed
77
+ if poll_until_processed(whisper_hash):
78
+ # Step 3: Retrieve text
79
+ text = retrieve_text(whisper_hash)
80
+ if text:
81
+ st.success("Text extraction complete!")
82
+ st.subheader("Extracted Text:")
83
+ st.text_area("Extracted Text", text, height=400)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  else:
85
+ st.error("Extraction failed at retrieve step.")
86
+ else:
87
+ st.error("OCR did not complete successfully.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
+ st.caption("Powered by Unstract LLMWhisperer OCR API.")