from fastapi import FastAPI, Request, UploadFile, File, Form from fastapi.responses import HTMLResponse from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates from transformers import pipeline import torch import io import pdfplumber from docx import Document app = FastAPI(title="Lecture Summarizer") app.mount("/static", StaticFiles(directory="static"), name="static") templates = Jinja2Templates(directory="templates") # Load model once (HF Spaces often have GPU — it will use it automatically if available) device = 0 if torch.cuda.is_available() else -1 print(f"Using device: {'GPU' if device == 0 else 'CPU'}") summarizer = pipeline( "summarization", model="sshleifer/distilbart-cnn-12-6", # swap to "facebook/bart-large-cnn" for better quality if you upgrade hardware device=device ) def extract_text(content: bytes, filename: str) -> str | None: filename = filename.lower() try: if filename.endswith('.pdf'): with pdfplumber.open(io.BytesIO(content)) as pdf: return "\n".join(page.extract_text() or "" for page in pdf.pages) elif filename.endswith('.docx'): doc = Document(io.BytesIO(content)) return "\n".join(para.text for para in doc.paragraphs if para.text.strip()) elif filename.endswith('.txt'): return content.decode('utf-8', errors='replace') return None except: return None def summarize_text(text: str, target_ratio: float = 0.32) -> str: words = len(text.split()) if words < 100: return text target_length = int(words * target_ratio) max_l = max(500, min(1300, target_length + 200)) min_l = max(300, int(target_length * 0.7)) try: result = summarizer( text, max_length=max_l, min_length=min_l, length_penalty=1.8, num_beams=4, early_stopping=True, do_sample=False, truncation=True ) return result[0]['summary_text'] except Exception as e: print(f"Summarization error: {e}") return "Error generating summary — text may be too long or empty." @app.get("/", response_class=HTMLResponse) async def home(request: Request): return templates.TemplateResponse("index.html", {"request": request}) @app.post("/summarize") async def summarize( file: UploadFile | None = File(None), text: str | None = Form(None) ): if file: content = await file.read() extracted = extract_text(content, file.filename) if not extracted: return {"error": "Could not extract text from file (supported: pdf, docx, txt)"} elif text and text.strip(): extracted = text.strip() else: return {"error": "Provide either text or a file upload"} summary = summarize_text(extracted) return {"summary": summary}