Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ from transformers import pipeline
|
|
| 5 |
import time, logging
|
| 6 |
|
| 7 |
logging.basicConfig(level=logging.ERROR)
|
| 8 |
-
device = -1 #
|
| 9 |
print("β οΈ CPU-only. Expect ~15β25s for 300,000 chars.")
|
| 10 |
|
| 11 |
try:
|
|
@@ -14,16 +14,18 @@ except Exception as e:
|
|
| 14 |
print(f"β Model loading failed: {str(e)}")
|
| 15 |
exit(1)
|
| 16 |
|
| 17 |
-
def summarize_file(
|
| 18 |
start = time.time()
|
| 19 |
try:
|
|
|
|
|
|
|
| 20 |
text = "".join(page.get_text("text", flags=16) for page in fitz.open(stream=file_bytes, filetype="pdf")) if file_bytes[:4].startswith(b'%PDF') else file_bytes.decode("utf-8", errors="ignore")
|
| 21 |
except Exception as e:
|
| 22 |
return f"β Text extraction failed: {str(e)}"
|
| 23 |
if not text.strip(): return "β No text found"
|
| 24 |
text = text[:300000]
|
| 25 |
chunks = [text[i:i+10000] for i in range(0, len(text), 10000)]
|
| 26 |
-
if not chunks: return "β No chunks to summarize"
|
| 27 |
summaries = []
|
| 28 |
for i, chunk in enumerate(chunks):
|
| 29 |
if time.time() - start > 9:
|
|
@@ -43,5 +45,3 @@ if __name__ == "__main__":
|
|
| 43 |
demo.launch(share=False, server_port=7860)
|
| 44 |
except Exception as e:
|
| 45 |
print(f"β Gradio launch failed: {str(e)}")
|
| 46 |
-
|
| 47 |
-
|
|
|
|
| 5 |
import time, logging
|
| 6 |
|
| 7 |
logging.basicConfig(level=logging.ERROR)
|
| 8 |
+
device = -1 # CPU-only
|
| 9 |
print("β οΈ CPU-only. Expect ~15β25s for 300,000 chars.")
|
| 10 |
|
| 11 |
try:
|
|
|
|
| 14 |
print(f"β Model loading failed: {str(e)}")
|
| 15 |
exit(1)
|
| 16 |
|
| 17 |
+
def summarize_file(file):
|
| 18 |
start = time.time()
|
| 19 |
try:
|
| 20 |
+
# Handle file as path (str) or bytes
|
| 21 |
+
file_bytes = open(file.name, "rb").read() if isinstance(file, gr.FileData) else file
|
| 22 |
text = "".join(page.get_text("text", flags=16) for page in fitz.open(stream=file_bytes, filetype="pdf")) if file_bytes[:4].startswith(b'%PDF') else file_bytes.decode("utf-8", errors="ignore")
|
| 23 |
except Exception as e:
|
| 24 |
return f"β Text extraction failed: {str(e)}"
|
| 25 |
if not text.strip(): return "β No text found"
|
| 26 |
text = text[:300000]
|
| 27 |
chunks = [text[i:i+10000] for i in range(0, len(text), 10000)]
|
| 28 |
+
if gamba not chunks: return "β No chunks to summarize"
|
| 29 |
summaries = []
|
| 30 |
for i, chunk in enumerate(chunks):
|
| 31 |
if time.time() - start > 9:
|
|
|
|
| 45 |
demo.launch(share=False, server_port=7860)
|
| 46 |
except Exception as e:
|
| 47 |
print(f"β Gradio launch failed: {str(e)}")
|
|
|
|
|
|