Spaces:

aspendse
/

Text-summarizer

Sleeping

App Files Files Community

aspendse commited on Jul 15, 2025

Commit

d9fdae3

verified ·

1 Parent(s): 0d08870

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -42

app.py CHANGED Viewed

@@ -2,63 +2,47 @@ import gradio as gr
 from transformers import pipeline
 import PyPDF2
 import docx
-# Load the summarization pipeline
-summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
-def read_pdf(file):
-    try:
-        reader = PyPDF2.PdfReader(file.name)
-        text = ""
-        for page in reader.pages:
-            text += page.extract_text() or ""
-        return text
-    except Exception:
-        return "Failed to read PDF."
-def read_docx(file):
-    try:
-        doc = docx.Document(file.name)
-        return "\n".join([para.text for para in doc.paragraphs])
-    except Exception:
-        return "Failed to read Word document."
-def summarize_file(pdf_file, docx_file, text_input):
     if pdf_file:
-        text = read_pdf(pdf_file)
     elif docx_file:
-        text = read_docx(docx_file)
     elif text_input:
         text = text_input
     else:
-        return "Please upload a file or enter some text."
     if not text.strip():
-        return "No valid text found to summarize."
-    if len(text) > 3000:
-        text = text[:3000]
-    summary = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]["summary_text"]
-    return summary
 with gr.Blocks() as demo:
-    gr.Markdown("## 🧠 Smart Text Summarizer")
     with gr.Row():
-        pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
-        docx_input = gr.File(label="Upload Word File", file_types=[".docx"])
-    text_input = gr.Textbox(lines=8, label="Or Paste Text Here")
-    summarize_button = gr.Button("Summarize")
-    output = gr.Textbox(lines=8, label="Summary")
-    summarize_button.click(
-        summarize_file,
-        inputs=[pdf_input, docx_input, text_input],
-        outputs=output
-    )
 demo.launch()

 from transformers import pipeline
 import PyPDF2
 import docx
+import os
+# Load a strong summarization model
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+def extract_text_from_pdf(file):
+    reader = PyPDF2.PdfReader(file.name)
+    return "\n".join(page.extract_text() or "" for page in reader.pages)
+def extract_text_from_docx(file):
+    doc = docx.Document(file.name)
+    return "\n".join(para.text for para in doc.paragraphs)
+def summarize_input(pdf_file, docx_file, text_input):
     if pdf_file:
+        text = extract_text_from_pdf(pdf_file)
     elif docx_file:
+        text = extract_text_from_docx(docx_file)
     elif text_input:
         text = text_input
     else:
+        return "❌ Please upload a file or paste text."
     if not text.strip():
+        return "❌ Input could not be read or is empty."
+    # Limit to 2000 chars to avoid model truncation
+    if len(text) > 2000:
+        text = text[:2000]
+    summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
+    return summary[0]["summary_text"]
 with gr.Blocks() as demo:
+    gr.Markdown("## 🧠 Smart Summarizer")
     with gr.Row():
+        pdf_in = gr.File(label="Upload PDF", file_types=[".pdf"])
+        docx_in = gr.File(label="Upload Word (.docx)", file_types=[".docx"])
+    txt_in = gr.Textbox(lines=8, label="Or Paste Text")
+    btn = gr.Button("Summarize")
+    out = gr.Textbox(lines=8, label="Summary")
+    btn.click(fn=summarize_input, inputs=[pdf_in, docx_in, txt_in], outputs=out)
 demo.launch()