exampletwo / app.py
tejovanth's picture
Update app.py
1a918fe verified
raw
history blame
1.91 kB
import gradio as gr
import fitz
import torch
from transformers import pipeline
import time, logging
logging.basicConfig(level=logging.ERROR)
device = -1 # Force CPU (no GPU detected)
print("⚠️ CPU-only. Expect ~15–25s for 300,000 chars.")
try:
summarizer = pipeline("summarization", model="t5-small", device=device, torch_dtype=torch.float32)
except Exception as e:
print(f"❌ Model loading failed: {str(e)}")
exit(1)
def summarize_file(file_bytes):
start = time.time()
try:
text = "".join(page.get_text("text", flags=16) for page in fitz.open(stream=file_bytes, filetype="pdf")) if file_bytes[:4].startswith(b'%PDF') else file_bytes.decode("utf-8", errors="ignore")
except Exception as e:
return f"❌ Text extraction failed: {str(e)}"
if not text.strip(): return "❌ No text found"
text = text[:300000]
chunks = [text[i:i+10000] for i in range(0, len(text), 10000)]
if not chunks: return "❌ No chunks to summarize"
summaries = []
for i, chunk in enumerate(chunks):
if time.time() - start > 9:
summaries.append("⚠️ Stopped early")
break
try:
summary = summarizer(chunk, max_length=40, min_length=10, do_sample=False)[0]['summary_text']
summaries.append(f"**Chunk {i+1}**:\n{summary}")
except Exception as e:
summaries.append(f"**Chunk {i+1}**: ❌ Error: {str(e)}")
return f"**Chars**: {len(text)}\n**Time**: {time.time()-start:.2f}s\n\n" + "\n\n".join(summaries)
demo = gr.Interface(fn=summarize_file, inputs=gr.File(label="πŸ“„ PDF/TXT Notes"), outputs=gr.Textbox(label="πŸ“ Summary"), title="Fast Summarizer", description="300,000+ chars in ~15–25s (CPU)")
if __name__ == "__main__":
try:
demo.launch(share=False, server_port=7860)
except Exception as e:
print(f"❌ Gradio launch failed: {str(e)}")