Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,48 +1,28 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
import docx
|
| 5 |
-
import os
|
| 6 |
|
| 7 |
-
# Load a strong summarization model
|
| 8 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
| 9 |
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
if len(text) > 2000:
|
| 33 |
-
text = text[:2000]
|
| 34 |
-
|
| 35 |
-
summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
|
| 36 |
-
return summary[0]["summary_text"]
|
| 37 |
-
|
| 38 |
-
with gr.Blocks() as demo:
|
| 39 |
-
gr.Markdown("## 🧠 Smart Summarizer")
|
| 40 |
-
with gr.Row():
|
| 41 |
-
pdf_in = gr.File(label="Upload PDF", file_types=[".pdf"])
|
| 42 |
-
docx_in = gr.File(label="Upload Word (.docx)", file_types=[".docx"])
|
| 43 |
-
txt_in = gr.Textbox(lines=8, label="Or Paste Text")
|
| 44 |
-
btn = gr.Button("Summarize")
|
| 45 |
-
out = gr.Textbox(lines=8, label="Summary")
|
| 46 |
-
btn.click(fn=summarize_input, inputs=[pdf_in, docx_in, txt_in], outputs=out)
|
| 47 |
-
|
| 48 |
-
demo.launch()
|
|
|
|
| 1 |
+
import gradio as gr from transformers import pipeline import docx2txt import PyPDF2 import os
|
| 2 |
+
|
| 3 |
+
Load the summarizer model
|
|
|
|
|
|
|
| 4 |
|
|
|
|
| 5 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
| 6 |
|
| 7 |
+
Extract text based on file type
|
| 8 |
+
|
| 9 |
+
def extract_text(file): ext = file.name.split(".")[-1].lower() if ext == "txt": return file.read().decode("utf-8") elif ext == "pdf": reader = PyPDF2.PdfReader(file) return "\n".join(page.extract_text() for page in reader.pages if page.extract_text()) elif ext == "docx": return docx2txt.process(file) else: return "Unsupported file type. Please upload a .pdf, .docx, or .txt file."
|
| 10 |
+
|
| 11 |
+
Chunk long text for full-document summarization
|
| 12 |
+
|
| 13 |
+
def chunk_text(text, max_length=1000): paragraphs = text.split("\n") chunks = [] current_chunk = "" for para in paragraphs: if len(current_chunk) + len(para) < max_length: current_chunk += para + "\n" else: chunks.append(current_chunk.strip()) current_chunk = para + "\n" if current_chunk: chunks.append(current_chunk.strip()) return chunks
|
| 14 |
+
|
| 15 |
+
Full summarization function
|
| 16 |
+
|
| 17 |
+
def summarize_input(text, file): source_text = text.strip() if text.strip() else extract_text(file) if not source_text: return "Please enter text or upload a valid file."
|
| 18 |
+
|
| 19 |
+
chunks = chunk_text(source_text)
|
| 20 |
+
summaries = [summarizer(chunk, max_length=130, min_length=30, do_sample=False)[0]['summary_text'] for chunk in chunks]
|
| 21 |
+
return "\n\n".join(summaries)
|
| 22 |
+
|
| 23 |
+
Gradio interface
|
| 24 |
+
|
| 25 |
+
iface = gr.Interface( fn=summarize_input, inputs=[ gr.Textbox(lines=8, label="Enter text (optional)"), gr.File(label="Upload file (.txt, .pdf, .docx)", file_types=[".pdf", ".docx", ".txt"]) ], outputs=gr.Textbox(label="Summary"), title="📄 Smart Document Summarizer", description="Paste text or upload a document to get a full summary using a Hugging Face transformer." )
|
| 26 |
+
|
| 27 |
+
iface.launch()
|
| 28 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|