Spaces:

aspendse
/

Text-summarizer

Sleeping

App Files Files Community

aspendse commited on Jul 15, 2025

Commit

a16a237

verified ·

1 Parent(s): c7a2237

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -45

app.py CHANGED Viewed

@@ -1,48 +1,28 @@
-import gradio as gr
-from transformers import pipeline
-import PyPDF2
-import docx
-import os
-# Load a strong summarization model
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-def extract_text_from_pdf(file):
-    reader = PyPDF2.PdfReader(file.name)
-    return "\n".join(page.extract_text() or "" for page in reader.pages)
-def extract_text_from_docx(file):
-    doc = docx.Document(file.name)
-    return "\n".join(para.text for para in doc.paragraphs)
-def summarize_input(pdf_file, docx_file, text_input):
-    if pdf_file:
-        text = extract_text_from_pdf(pdf_file)
-    elif docx_file:
-        text = extract_text_from_docx(docx_file)
-    elif text_input:
-        text = text_input
-    else:
-        return "❌ Please upload a file or paste text."
-    if not text.strip():
-        return "❌ Input could not be read or is empty."
-    # Limit to 2000 chars to avoid model truncation
-    if len(text) > 2000:
-        text = text[:2000]
-    summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
-    return summary[0]["summary_text"]
-with gr.Blocks() as demo:
-    gr.Markdown("## 🧠 Smart Summarizer")
-    with gr.Row():
-        pdf_in = gr.File(label="Upload PDF", file_types=[".pdf"])
-        docx_in = gr.File(label="Upload Word (.docx)", file_types=[".docx"])
-    txt_in = gr.Textbox(lines=8, label="Or Paste Text")
-    btn = gr.Button("Summarize")
-    out = gr.Textbox(lines=8, label="Summary")
-    btn.click(fn=summarize_input, inputs=[pdf_in, docx_in, txt_in], outputs=out)
-demo.launch()

+import gradio as gr from transformers import pipeline import docx2txt import PyPDF2 import os
+Load the summarizer model
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+Extract text based on file type
+def extract_text(file): ext = file.name.split(".")[-1].lower() if ext == "txt": return file.read().decode("utf-8") elif ext == "pdf": reader = PyPDF2.PdfReader(file) return "\n".join(page.extract_text() for page in reader.pages if page.extract_text()) elif ext == "docx": return docx2txt.process(file) else: return "Unsupported file type. Please upload a .pdf, .docx, or .txt file."
+Chunk long text for full-document summarization
+def chunk_text(text, max_length=1000): paragraphs = text.split("\n") chunks = [] current_chunk = "" for para in paragraphs: if len(current_chunk) + len(para) < max_length: current_chunk += para + "\n" else: chunks.append(current_chunk.strip()) current_chunk = para + "\n" if current_chunk: chunks.append(current_chunk.strip()) return chunks
+Full summarization function
+def summarize_input(text, file): source_text = text.strip() if text.strip() else extract_text(file) if not source_text: return "Please enter text or upload a valid file."
+chunks = chunk_text(source_text)
+summaries = [summarizer(chunk, max_length=130, min_length=30, do_sample=False)[0]['summary_text'] for chunk in chunks]
+return "\n\n".join(summaries)
+Gradio interface
+iface = gr.Interface( fn=summarize_input, inputs=[ gr.Textbox(lines=8, label="Enter text (optional)"), gr.File(label="Upload file (.txt, .pdf, .docx)", file_types=[".pdf", ".docx", ".txt"]) ], outputs=gr.Textbox(label="Summary"), title="📄 Smart Document Summarizer", description="Paste text or upload a document to get a full summary using a Hugging Face transformer." )
+iface.launch()