Azidan's picture
Create app.py
9add39c verified
from fastapi import FastAPI, Request, UploadFile, File, Form
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from transformers import pipeline
import torch
import io
import pdfplumber
from docx import Document
app = FastAPI(title="Lecture Summarizer")
app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")
# Load model once (HF Spaces often have GPU — it will use it automatically if available)
device = 0 if torch.cuda.is_available() else -1
print(f"Using device: {'GPU' if device == 0 else 'CPU'}")
summarizer = pipeline(
"summarization",
model="sshleifer/distilbart-cnn-12-6", # swap to "facebook/bart-large-cnn" for better quality if you upgrade hardware
device=device
)
def extract_text(content: bytes, filename: str) -> str | None:
filename = filename.lower()
try:
if filename.endswith('.pdf'):
with pdfplumber.open(io.BytesIO(content)) as pdf:
return "\n".join(page.extract_text() or "" for page in pdf.pages)
elif filename.endswith('.docx'):
doc = Document(io.BytesIO(content))
return "\n".join(para.text for para in doc.paragraphs if para.text.strip())
elif filename.endswith('.txt'):
return content.decode('utf-8', errors='replace')
return None
except:
return None
def summarize_text(text: str, target_ratio: float = 0.32) -> str:
words = len(text.split())
if words < 100:
return text
target_length = int(words * target_ratio)
max_l = max(500, min(1300, target_length + 200))
min_l = max(300, int(target_length * 0.7))
try:
result = summarizer(
text,
max_length=max_l,
min_length=min_l,
length_penalty=1.8,
num_beams=4,
early_stopping=True,
do_sample=False,
truncation=True
)
return result[0]['summary_text']
except Exception as e:
print(f"Summarization error: {e}")
return "Error generating summary — text may be too long or empty."
@app.get("/", response_class=HTMLResponse)
async def home(request: Request):
return templates.TemplateResponse("index.html", {"request": request})
@app.post("/summarize")
async def summarize(
file: UploadFile | None = File(None),
text: str | None = Form(None)
):
if file:
content = await file.read()
extracted = extract_text(content, file.filename)
if not extracted:
return {"error": "Could not extract text from file (supported: pdf, docx, txt)"}
elif text and text.strip():
extracted = text.strip()
else:
return {"error": "Provide either text or a file upload"}
summary = summarize_text(extracted)
return {"summary": summary}