Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,7 @@ import re
|
|
| 6 |
import os
|
| 7 |
import time
|
| 8 |
|
| 9 |
-
from main import extract_key_phrases, score_sentences, summarize_text #
|
| 10 |
|
| 11 |
st.set_page_config(page_title="PDF Tools", layout="wide")
|
| 12 |
|
|
@@ -238,10 +238,10 @@ UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY") # Set this in your environment
|
|
| 238 |
|
| 239 |
def extract_text_from_pdf_unstract(pdf_file):
|
| 240 |
headers = {"unstract-key": UNSTRACT_API_KEY}
|
| 241 |
-
|
| 242 |
-
filename =
|
| 243 |
files = {
|
| 244 |
-
"file": (filename,
|
| 245 |
}
|
| 246 |
whisper_url = f"{UNSTRACT_BASE}/whisper"
|
| 247 |
with st.spinner("Uploading and processing PDF with Unstract..."):
|
|
@@ -276,7 +276,6 @@ def extract_text_from_pdf_unstract(pdf_file):
|
|
| 276 |
if r.status_code != 200:
|
| 277 |
st.error(f"Unstract: Error retrieving extracted text: {r.status_code} - {r.text}")
|
| 278 |
return None
|
| 279 |
-
# Unstract sometimes returns JSON, sometimes raw text
|
| 280 |
try:
|
| 281 |
data = r.json()
|
| 282 |
return data.get("result_text") or r.text
|
|
|
|
| 6 |
import os
|
| 7 |
import time
|
| 8 |
|
| 9 |
+
from main import extract_key_phrases, score_sentences, summarize_text # Optional, keep if you use them elsewhere
|
| 10 |
|
| 11 |
st.set_page_config(page_title="PDF Tools", layout="wide")
|
| 12 |
|
|
|
|
| 238 |
|
| 239 |
def extract_text_from_pdf_unstract(pdf_file):
|
| 240 |
headers = {"unstract-key": UNSTRACT_API_KEY}
|
| 241 |
+
pdf_file.seek(0) # Make sure pointer is at start!
|
| 242 |
+
filename = getattr(pdf_file, "name", "uploaded.pdf")
|
| 243 |
files = {
|
| 244 |
+
"file": (filename, pdf_file, "application/pdf"),
|
| 245 |
}
|
| 246 |
whisper_url = f"{UNSTRACT_BASE}/whisper"
|
| 247 |
with st.spinner("Uploading and processing PDF with Unstract..."):
|
|
|
|
| 276 |
if r.status_code != 200:
|
| 277 |
st.error(f"Unstract: Error retrieving extracted text: {r.status_code} - {r.text}")
|
| 278 |
return None
|
|
|
|
| 279 |
try:
|
| 280 |
data = r.json()
|
| 281 |
return data.get("result_text") or r.text
|