Spaces:

aspendse
/

Text-summarizer

Sleeping

App Files Files Community

aspendse commited on Jul 15, 2025

Commit

d6464e5

verified ·

1 Parent(s): 4ae9d44

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -40

app.py CHANGED Viewed

@@ -1,45 +1,64 @@
 import gradio as gr
 from transformers import pipeline
-import docx2txt
 import PyPDF2
-# Load the summarizer model
-summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-# Function to extract text from supported file types
-def extract_text(file):
-    if file is None:
-        return ""
-    file_path = file.name
-    if file_path.endswith(".txt"):
-        with open(file_path, "r", encoding="utf-8") as f:
-            return f.read()
-    elif file_path.endswith(".pdf"):
-        reader = PyPDF2.PdfReader(file)
-        return "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
-    elif file_path.endswith(".docx"):
-        return docx2txt.process(file)
     else:
-        return "Unsupported file format. Please upload a .txt, .pdf, or .docx file."
-# Summarize either text input or uploaded file
-def summarize(text, file):
-    extracted_text = text.strip() or extract_text(file)
-    if not extracted_text:
-        return "Please enter text or upload a file."
-    summary = summarizer(extracted_text, max_length=130, min_length=30, do_sample=False)
-    return summary[0]['summary_text']
-# Gradio interface
-iface = gr.Interface(
-    fn=summarize,
-    inputs=[
-        gr.Textbox(lines=10, label="Enter text (optional)"),
-        gr.File(label="Upload a file (.txt, .pdf, .docx)")
-    ],
-    outputs=gr.Textbox(label="Summary"),
-    title="Text Summarizer",
-    description="Paste text or upload a file to summarize using a BART model."
-)
-iface.launch()

 import gradio as gr
 from transformers import pipeline
 import PyPDF2
+import docx
+# Load the summarization pipeline
+summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
+def read_pdf(file):
+    try:
+        reader = PyPDF2.PdfReader(file.name)
+        text = ""
+        for page in reader.pages:
+            text += page.extract_text() or ""
+        return text
+    except Exception:
+        return "Failed to read PDF."
+def read_docx(file):
+    try:
+        doc = docx.Document(file.name)
+        return "\n".join([para.text for para in doc.paragraphs])
+    except Exception:
+        return "Failed to read Word document."
+def summarize_file(pdf_file, docx_file, text_input):
+    if pdf_file:
+        text = read_pdf(pdf_file)
+    elif docx_file:
+        text = read_docx(docx_file)
+    elif text_input:
+        text = text_input
     else:
+        return "Please upload a file or enter some text."
+    if not text.strip():
+        return "No valid text found to summarize."
+    if len(text) > 3000:
+        text = text[:3000]
+    summary = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]["summary_text"]
+    return summary
+with gr.Blocks() as demo:
+    gr.Markdown("## 🧠 Smart Text Summarizer")
+    with gr.Row():
+        pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
+        docx_input = gr.File(label="Upload Word File", file_types=[".docx"])
+    text_input = gr.Textbox(lines=8, label="Or Paste Text Here")
+    summarize_button = gr.Button("Summarize")
+    output = gr.Textbox(lines=8, label="Summary")
+    summarize_button.click(
+        summarize_file,
+        inputs=[pdf_input, docx_input, text_input],
+        outputs=output
+    )
+demo.launch()