Spaces:
Runtime error
Runtime error
| from fastapi import FastAPI, Request, UploadFile, File, Form | |
| from fastapi.responses import HTMLResponse | |
| from fastapi.staticfiles import StaticFiles | |
| from fastapi.templating import Jinja2Templates | |
| from transformers import pipeline | |
| import torch | |
| import io | |
| import pdfplumber | |
| from docx import Document | |
| app = FastAPI(title="Lecture Summarizer") | |
| app.mount("/static", StaticFiles(directory="static"), name="static") | |
| templates = Jinja2Templates(directory="templates") | |
| # Load model once (HF Spaces often have GPU — it will use it automatically if available) | |
| device = 0 if torch.cuda.is_available() else -1 | |
| print(f"Using device: {'GPU' if device == 0 else 'CPU'}") | |
| summarizer = pipeline( | |
| "summarization", | |
| model="sshleifer/distilbart-cnn-12-6", # swap to "facebook/bart-large-cnn" for better quality if you upgrade hardware | |
| device=device | |
| ) | |
| def extract_text(content: bytes, filename: str) -> str | None: | |
| filename = filename.lower() | |
| try: | |
| if filename.endswith('.pdf'): | |
| with pdfplumber.open(io.BytesIO(content)) as pdf: | |
| return "\n".join(page.extract_text() or "" for page in pdf.pages) | |
| elif filename.endswith('.docx'): | |
| doc = Document(io.BytesIO(content)) | |
| return "\n".join(para.text for para in doc.paragraphs if para.text.strip()) | |
| elif filename.endswith('.txt'): | |
| return content.decode('utf-8', errors='replace') | |
| return None | |
| except: | |
| return None | |
| def summarize_text(text: str, target_ratio: float = 0.32) -> str: | |
| words = len(text.split()) | |
| if words < 100: | |
| return text | |
| target_length = int(words * target_ratio) | |
| max_l = max(500, min(1300, target_length + 200)) | |
| min_l = max(300, int(target_length * 0.7)) | |
| try: | |
| result = summarizer( | |
| text, | |
| max_length=max_l, | |
| min_length=min_l, | |
| length_penalty=1.8, | |
| num_beams=4, | |
| early_stopping=True, | |
| do_sample=False, | |
| truncation=True | |
| ) | |
| return result[0]['summary_text'] | |
| except Exception as e: | |
| print(f"Summarization error: {e}") | |
| return "Error generating summary — text may be too long or empty." | |
| async def home(request: Request): | |
| return templates.TemplateResponse("index.html", {"request": request}) | |
| async def summarize( | |
| file: UploadFile | None = File(None), | |
| text: str | None = Form(None) | |
| ): | |
| if file: | |
| content = await file.read() | |
| extracted = extract_text(content, file.filename) | |
| if not extracted: | |
| return {"error": "Could not extract text from file (supported: pdf, docx, txt)"} | |
| elif text and text.strip(): | |
| extracted = text.strip() | |
| else: | |
| return {"error": "Provide either text or a file upload"} | |
| summary = summarize_text(extracted) | |
| return {"summary": summary} |