Yatheshr commited on
Commit
1d2babb
·
verified ·
1 Parent(s): 61dd806

Delete Test/app.py

Browse files
Files changed (1) hide show
  1. Test/app.py +0 -46
Test/app.py DELETED
@@ -1,46 +0,0 @@
1
- import gradio as gr
2
- import fitz # PyMuPDF
3
- from transformers import pipeline
4
- import textwrap
5
-
6
- # Load summarization model
7
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
8
-
9
- # Extract text from PDF
10
- def extract_text_from_pdf(file_obj):
11
- text = ""
12
- with fitz.open(stream=file_obj.read(), filetype="pdf") as doc:
13
- for page in doc:
14
- text += page.get_text()
15
- return text.strip().replace("\n", " ")
16
-
17
- # Chunk long text into manageable sizes
18
- def chunk_text(text, max_chunk_len=1000):
19
- return textwrap.wrap(text, max_chunk_len)
20
-
21
- # Summarize long PDFs by chunking
22
- def summarize_long_pdf(file_obj):
23
- full_text = extract_text_from_pdf(file_obj)
24
- if not full_text:
25
- return "❌ No readable text extracted from the PDF."
26
-
27
- chunks = chunk_text(full_text, max_chunk_len=1000)
28
- summaries = []
29
-
30
- for i, chunk in enumerate(chunks):
31
- try:
32
- summary = summarizer(chunk, max_length=120, min_length=30, do_sample=False)[0]['summary_text']
33
- summaries.append(f"🔹 Part {i+1}: {summary}")
34
- except Exception as e:
35
- summaries.append(f"⚠️ Error summarizing part {i+1}: {e}")
36
-
37
- return "\n\n".join(summaries)
38
-
39
- # Gradio UI
40
- gr.Interface(
41
- fn=summarize_long_pdf,
42
- inputs=gr.File(label="📥 Upload Multi-page PDF"),
43
- outputs=gr.Textbox(label="📝 Full Summary"),
44
- title="📘 Multi-Page PDF Summarizer",
45
- description="Upload long PDFs (e.g., Morningstar reports). Summarized in chunks using BART."
46
- ).launch()