Seth0330 commited on
Commit
3a1cba3
·
verified ·
1 Parent(s): bcd91a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -17
app.py CHANGED
@@ -5,8 +5,9 @@ import json
5
  import re
6
  import os
7
  import time
 
8
 
9
- from main import extract_key_phrases, score_sentences, summarize_text # Optional, keep if used elsewhere
10
 
11
  st.set_page_config(page_title="PDF Tools", layout="wide")
12
 
@@ -239,22 +240,28 @@ UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY") # Set this in your environment
239
  def extract_text_from_pdf_unstract(pdf_file):
240
  headers = {"unstract-key": UNSTRACT_API_KEY}
241
  pdf_bytes = pdf_file.read()
242
- filename = getattr(pdf_file, "name", "uploaded.pdf")
243
- files = {
244
- "file": (filename, io.BytesIO(pdf_bytes), "application/pdf"),
245
- }
246
- whisper_url = f"{UNSTRACT_BASE}/whisper"
247
- with st.spinner("Uploading and processing PDF with Unstract..."):
248
- # For debugging, uncomment next line:
249
- # st.write("DEBUG UPLOAD:", files)
250
- r = requests.post(whisper_url, files=files, headers=headers)
251
- if r.status_code != 202:
252
- st.error(f"Unstract: Error uploading PDF: {r.status_code} - {r.text}")
253
- return None
254
- whisper_hash = r.json().get("whisper_hash")
255
- if not whisper_hash:
256
- st.error("Unstract: No whisper_hash received.")
257
- return None
 
 
 
 
 
 
258
 
259
  # Step 2: Poll /whisper-status until processed
260
  status_url = f"{UNSTRACT_BASE}/whisper-status?whisper_hash={whisper_hash}"
 
5
  import re
6
  import os
7
  import time
8
+ import tempfile
9
 
10
+ from main import extract_key_phrases, score_sentences, summarize_text # Optional
11
 
12
  st.set_page_config(page_title="PDF Tools", layout="wide")
13
 
 
240
  def extract_text_from_pdf_unstract(pdf_file):
241
  headers = {"unstract-key": UNSTRACT_API_KEY}
242
  pdf_bytes = pdf_file.read()
243
+ original_name = getattr(pdf_file, "name", "uploaded.pdf")
244
+ if not original_name.lower().endswith(".pdf"):
245
+ original_name = "uploaded.pdf"
246
+
247
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
248
+ tmp.write(pdf_bytes)
249
+ tmp.flush()
250
+ tmp.seek(0)
251
+ with open(tmp.name, "rb") as f:
252
+ files = {
253
+ "file": (original_name, f, "application/pdf"),
254
+ }
255
+ whisper_url = f"{UNSTRACT_BASE}/whisper"
256
+ with st.spinner("Uploading and processing PDF with Unstract..."):
257
+ r = requests.post(whisper_url, files=files, headers=headers)
258
+ if r.status_code != 202:
259
+ st.error(f"Unstract: Error uploading PDF: {r.status_code} - {r.text}")
260
+ return None
261
+ whisper_hash = r.json().get("whisper_hash")
262
+ if not whisper_hash:
263
+ st.error("Unstract: No whisper_hash received.")
264
+ return None
265
 
266
  # Step 2: Poll /whisper-status until processed
267
  status_url = f"{UNSTRACT_BASE}/whisper-status?whisper_hash={whisper_hash}"