oluinioluwa814 commited on
Commit
0284dfd
·
verified ·
1 Parent(s): cd6811c

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -49
app.py DELETED
@@ -1,49 +0,0 @@
1
- import gradio as gr
2
- import torch
3
- from transformers import pipeline
4
- from PyPDF2 import PdfReader
5
- from pathlib import Path
6
-
7
- # 1. Pick the best small Google model for abstractive summarisation
8
- SUMMARISER = pipeline(
9
- "summarization",
10
- model="google/pegasus-xsum",
11
- device=0 if torch.cuda.is_available() else -1,
12
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
13
- )
14
-
15
- # 2. Helper: extract raw text from uploaded PDF
16
- def pdf_to_text(pdf_file):
17
- reader = PdfReader(pdf_file.name)
18
- text = "".join(page.extract_text() or "" for page in reader.pages)
19
- return text.strip()
20
-
21
- # 3. Core summarisation logic
22
- def summarise_pdf(pdf_file):
23
- if pdf_file is None:
24
- return "❗ Please upload a PDF file."
25
- text = pdf_to_text(pdf_file)
26
- if not text:
27
- return "❗ Could not extract text from this PDF."
28
-
29
- # Pegasus-XSUM works best with ≤512 tokens; chunk if needed
30
- max_chunk = 450 # tokens ≈ 1800 chars
31
- chunks = [text[i : i + max_chunk] for i in range(0, len(text), max_chunk)]
32
- summaries = [SUMMARISER(chunk, max_length=64, min_length=16, do_sample=False)[0]["summary_text"] for chunk in chunks]
33
-
34
- return "\n".join(summaries)
35
-
36
- # 4. Gradio UI
37
- iface = gr.Interface(
38
- fn=summarise_pdf,
39
- inputs=gr.Textbox(label="Upload PDF", lines=3),
40
- outputs=gr.Textbox(label="Summary", lines=10),
41
- title="PDF Summariser – Google Pegasus-XSUM",
42
- description="Upload any PDF and get a concise abstractive summary in seconds.",
43
- # allow_flagging="never",
44
- )
45
-
46
- # 5. Launch (use queue for HF Spaces)
47
- if __name__ == "__main__":
48
- iface.launch(debug=True)
49
-