Azidan commited on
Commit
9add39c
·
verified ·
1 Parent(s): 96640d6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, UploadFile, File, Form
2
+ from fastapi.responses import HTMLResponse
3
+ from fastapi.staticfiles import StaticFiles
4
+ from fastapi.templating import Jinja2Templates
5
+ from transformers import pipeline
6
+ import torch
7
+ import io
8
+ import pdfplumber
9
+ from docx import Document
10
+
11
+ app = FastAPI(title="Lecture Summarizer")
12
+
13
+ app.mount("/static", StaticFiles(directory="static"), name="static")
14
+ templates = Jinja2Templates(directory="templates")
15
+
16
+ # Load model once (HF Spaces often have GPU — it will use it automatically if available)
17
+ device = 0 if torch.cuda.is_available() else -1
18
+ print(f"Using device: {'GPU' if device == 0 else 'CPU'}")
19
+
20
+ summarizer = pipeline(
21
+ "summarization",
22
+ model="sshleifer/distilbart-cnn-12-6", # swap to "facebook/bart-large-cnn" for better quality if you upgrade hardware
23
+ device=device
24
+ )
25
+
26
+ def extract_text(content: bytes, filename: str) -> str | None:
27
+ filename = filename.lower()
28
+ try:
29
+ if filename.endswith('.pdf'):
30
+ with pdfplumber.open(io.BytesIO(content)) as pdf:
31
+ return "\n".join(page.extract_text() or "" for page in pdf.pages)
32
+ elif filename.endswith('.docx'):
33
+ doc = Document(io.BytesIO(content))
34
+ return "\n".join(para.text for para in doc.paragraphs if para.text.strip())
35
+ elif filename.endswith('.txt'):
36
+ return content.decode('utf-8', errors='replace')
37
+ return None
38
+ except:
39
+ return None
40
+
41
+ def summarize_text(text: str, target_ratio: float = 0.32) -> str:
42
+ words = len(text.split())
43
+ if words < 100:
44
+ return text
45
+ target_length = int(words * target_ratio)
46
+ max_l = max(500, min(1300, target_length + 200))
47
+ min_l = max(300, int(target_length * 0.7))
48
+ try:
49
+ result = summarizer(
50
+ text,
51
+ max_length=max_l,
52
+ min_length=min_l,
53
+ length_penalty=1.8,
54
+ num_beams=4,
55
+ early_stopping=True,
56
+ do_sample=False,
57
+ truncation=True
58
+ )
59
+ return result[0]['summary_text']
60
+ except Exception as e:
61
+ print(f"Summarization error: {e}")
62
+ return "Error generating summary — text may be too long or empty."
63
+
64
+ @app.get("/", response_class=HTMLResponse)
65
+ async def home(request: Request):
66
+ return templates.TemplateResponse("index.html", {"request": request})
67
+
68
+ @app.post("/summarize")
69
+ async def summarize(
70
+ file: UploadFile | None = File(None),
71
+ text: str | None = Form(None)
72
+ ):
73
+ if file:
74
+ content = await file.read()
75
+ extracted = extract_text(content, file.filename)
76
+ if not extracted:
77
+ return {"error": "Could not extract text from file (supported: pdf, docx, txt)"}
78
+ elif text and text.strip():
79
+ extracted = text.strip()
80
+ else:
81
+ return {"error": "Provide either text or a file upload"}
82
+
83
+ summary = summarize_text(extracted)
84
+ return {"summary": summary}