Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import fitz | |
| import torch | |
| from transformers import pipeline | |
| import time, logging | |
| logging.basicConfig(level=logging.ERROR) | |
| device = -1 # Force CPU (no GPU detected) | |
| print("β οΈ CPU-only. Expect ~15β25s for 300,000 chars.") | |
| try: | |
| summarizer = pipeline("summarization", model="t5-small", device=device, torch_dtype=torch.float32) | |
| except Exception as e: | |
| print(f"β Model loading failed: {str(e)}") | |
| exit(1) | |
| def summarize_file(file_bytes): | |
| start = time.time() | |
| try: | |
| text = "".join(page.get_text("text", flags=16) for page in fitz.open(stream=file_bytes, filetype="pdf")) if file_bytes[:4].startswith(b'%PDF') else file_bytes.decode("utf-8", errors="ignore") | |
| except Exception as e: | |
| return f"β Text extraction failed: {str(e)}" | |
| if not text.strip(): return "β No text found" | |
| text = text[:300000] | |
| chunks = [text[i:i+10000] for i in range(0, len(text), 10000)] | |
| if not chunks: return "β No chunks to summarize" | |
| summaries = [] | |
| for i, chunk in enumerate(chunks): | |
| if time.time() - start > 9: | |
| summaries.append("β οΈ Stopped early") | |
| break | |
| try: | |
| summary = summarizer(chunk, max_length=40, min_length=10, do_sample=False)[0]['summary_text'] | |
| summaries.append(f"**Chunk {i+1}**:\n{summary}") | |
| except Exception as e: | |
| summaries.append(f"**Chunk {i+1}**: β Error: {str(e)}") | |
| return f"**Chars**: {len(text)}\n**Time**: {time.time()-start:.2f}s\n\n" + "\n\n".join(summaries) | |
| demo = gr.Interface(fn=summarize_file, inputs=gr.File(label="π PDF/TXT Notes"), outputs=gr.Textbox(label="π Summary"), title="Fast Summarizer", description="300,000+ chars in ~15β25s (CPU)") | |
| if __name__ == "__main__": | |
| try: | |
| demo.launch(share=False, server_port=7860) | |
| except Exception as e: | |
| print(f"β Gradio launch failed: {str(e)}") | |