Seth0330 commited on
Commit
796b6f2
·
verified ·
1 Parent(s): ee0b32d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -9
app.py CHANGED
@@ -12,16 +12,22 @@ def upload_pdf_to_unstract(pdf_file):
12
  headers = {
13
  "unstract-key": UNSTRACT_API_KEY,
14
  }
15
- # Always reset file pointer and use "application/pdf"
16
  pdf_file.seek(0)
17
  file_bytes = pdf_file.read()
 
 
 
 
 
 
18
  files = {
19
- "file": (
20
- pdf_file.name if pdf_file.name.lower().endswith(".pdf") else "invoice.pdf",
21
- file_bytes,
22
- "application/pdf"
23
- ),
24
  }
 
 
 
 
25
  with st.spinner("Uploading and starting OCR..."):
26
  resp = requests.post(url, headers=headers, files=files)
27
  if resp.status_code not in (200, 202):
@@ -73,15 +79,12 @@ st.title("Unstract OCR: PDF Invoice Text Extraction")
73
  uploaded_pdf = st.file_uploader("Upload Invoice PDF", type="pdf")
74
 
75
  if st.button("Extract Text from PDF") and uploaded_pdf:
76
- # Step 1: Upload PDF and get whisper_hash
77
  whisper_hash = upload_pdf_to_unstract(uploaded_pdf)
78
  if not whisper_hash:
79
  st.stop()
80
  st.success(f"File accepted. Tracking hash: {whisper_hash}")
81
 
82
- # Step 2: Poll until processed
83
  if poll_until_processed(whisper_hash):
84
- # Step 3: Retrieve text
85
  text = retrieve_text(whisper_hash)
86
  if text:
87
  st.success("Text extraction complete!")
 
12
  headers = {
13
  "unstract-key": UNSTRACT_API_KEY,
14
  }
15
+ # Always reset file pointer
16
  pdf_file.seek(0)
17
  file_bytes = pdf_file.read()
18
+
19
+ # Force correct .pdf extension and type
20
+ file_name = getattr(pdf_file, "name", None)
21
+ if not file_name or not file_name.lower().endswith(".pdf"):
22
+ file_name = "invoice.pdf"
23
+
24
  files = {
25
+ "file": (file_name, file_bytes, "application/pdf"),
 
 
 
 
26
  }
27
+
28
+ # Debug print for troubleshooting
29
+ # st.write("Uploading file with name:", file_name)
30
+
31
  with st.spinner("Uploading and starting OCR..."):
32
  resp = requests.post(url, headers=headers, files=files)
33
  if resp.status_code not in (200, 202):
 
79
  uploaded_pdf = st.file_uploader("Upload Invoice PDF", type="pdf")
80
 
81
  if st.button("Extract Text from PDF") and uploaded_pdf:
 
82
  whisper_hash = upload_pdf_to_unstract(uploaded_pdf)
83
  if not whisper_hash:
84
  st.stop()
85
  st.success(f"File accepted. Tracking hash: {whisper_hash}")
86
 
 
87
  if poll_until_processed(whisper_hash):
 
88
  text = retrieve_text(whisper_hash)
89
  if text:
90
  st.success("Text extraction complete!")