Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,8 +5,9 @@ import json
|
|
| 5 |
import re
|
| 6 |
import os
|
| 7 |
import time
|
|
|
|
| 8 |
|
| 9 |
-
from main import extract_key_phrases, score_sentences, summarize_text # Optional
|
| 10 |
|
| 11 |
st.set_page_config(page_title="PDF Tools", layout="wide")
|
| 12 |
|
|
@@ -239,22 +240,28 @@ UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY") # Set this in your environment
|
|
| 239 |
def extract_text_from_pdf_unstract(pdf_file):
|
| 240 |
headers = {"unstract-key": UNSTRACT_API_KEY}
|
| 241 |
pdf_bytes = pdf_file.read()
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
# Step 2: Poll /whisper-status until processed
|
| 260 |
status_url = f"{UNSTRACT_BASE}/whisper-status?whisper_hash={whisper_hash}"
|
|
|
|
| 5 |
import re
|
| 6 |
import os
|
| 7 |
import time
|
| 8 |
+
import tempfile
|
| 9 |
|
| 10 |
+
from main import extract_key_phrases, score_sentences, summarize_text # Optional
|
| 11 |
|
| 12 |
st.set_page_config(page_title="PDF Tools", layout="wide")
|
| 13 |
|
|
|
|
| 240 |
def extract_text_from_pdf_unstract(pdf_file):
|
| 241 |
headers = {"unstract-key": UNSTRACT_API_KEY}
|
| 242 |
pdf_bytes = pdf_file.read()
|
| 243 |
+
original_name = getattr(pdf_file, "name", "uploaded.pdf")
|
| 244 |
+
if not original_name.lower().endswith(".pdf"):
|
| 245 |
+
original_name = "uploaded.pdf"
|
| 246 |
+
|
| 247 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
|
| 248 |
+
tmp.write(pdf_bytes)
|
| 249 |
+
tmp.flush()
|
| 250 |
+
tmp.seek(0)
|
| 251 |
+
with open(tmp.name, "rb") as f:
|
| 252 |
+
files = {
|
| 253 |
+
"file": (original_name, f, "application/pdf"),
|
| 254 |
+
}
|
| 255 |
+
whisper_url = f"{UNSTRACT_BASE}/whisper"
|
| 256 |
+
with st.spinner("Uploading and processing PDF with Unstract..."):
|
| 257 |
+
r = requests.post(whisper_url, files=files, headers=headers)
|
| 258 |
+
if r.status_code != 202:
|
| 259 |
+
st.error(f"Unstract: Error uploading PDF: {r.status_code} - {r.text}")
|
| 260 |
+
return None
|
| 261 |
+
whisper_hash = r.json().get("whisper_hash")
|
| 262 |
+
if not whisper_hash:
|
| 263 |
+
st.error("Unstract: No whisper_hash received.")
|
| 264 |
+
return None
|
| 265 |
|
| 266 |
# Step 2: Poll /whisper-status until processed
|
| 267 |
status_url = f"{UNSTRACT_BASE}/whisper-status?whisper_hash={whisper_hash}"
|