Seth0330 commited on
Commit
2a34e42
·
verified ·
1 Parent(s): e6cd773

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -5
app.py CHANGED
@@ -6,7 +6,7 @@ import re
6
  import os
7
  import time
8
 
9
- from main import extract_key_phrases, score_sentences, summarize_text # Only if still needed for later
10
 
11
  st.set_page_config(page_title="PDF Tools", layout="wide")
12
 
@@ -238,10 +238,10 @@ UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY") # Set this in your environment
238
 
239
  def extract_text_from_pdf_unstract(pdf_file):
240
  headers = {"unstract-key": UNSTRACT_API_KEY}
241
- pdf_bytes = pdf_file.read()
242
- filename = pdf_file.name if hasattr(pdf_file, "name") else "uploaded.pdf"
243
  files = {
244
- "file": (filename, io.BytesIO(pdf_bytes), "application/pdf")
245
  }
246
  whisper_url = f"{UNSTRACT_BASE}/whisper"
247
  with st.spinner("Uploading and processing PDF with Unstract..."):
@@ -276,7 +276,6 @@ def extract_text_from_pdf_unstract(pdf_file):
276
  if r.status_code != 200:
277
  st.error(f"Unstract: Error retrieving extracted text: {r.status_code} - {r.text}")
278
  return None
279
- # Unstract sometimes returns JSON, sometimes raw text
280
  try:
281
  data = r.json()
282
  return data.get("result_text") or r.text
 
6
  import os
7
  import time
8
 
9
+ from main import extract_key_phrases, score_sentences, summarize_text # Optional, keep if you use them elsewhere
10
 
11
  st.set_page_config(page_title="PDF Tools", layout="wide")
12
 
 
238
 
239
  def extract_text_from_pdf_unstract(pdf_file):
240
  headers = {"unstract-key": UNSTRACT_API_KEY}
241
+ pdf_file.seek(0) # Make sure pointer is at start!
242
+ filename = getattr(pdf_file, "name", "uploaded.pdf")
243
  files = {
244
+ "file": (filename, pdf_file, "application/pdf"),
245
  }
246
  whisper_url = f"{UNSTRACT_BASE}/whisper"
247
  with st.spinner("Uploading and processing PDF with Unstract..."):
 
276
  if r.status_code != 200:
277
  st.error(f"Unstract: Error retrieving extracted text: {r.status_code} - {r.text}")
278
  return None
 
279
  try:
280
  data = r.json()
281
  return data.get("result_text") or r.text