Spaces:

aspendse
/

Text-summarizer

Sleeping

App Files Files Community

aspendse commited on Jul 15, 2025

Commit

274f18d

verified ·

1 Parent(s): b76110c

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -55

app.py CHANGED Viewed

@@ -1,68 +1,80 @@
-import gradio as gr
 from transformers import pipeline
 from PyPDF2 import PdfReader
-from docx import Document
 from fpdf import FPDF
-import os
-# Load summarization pipeline
-summarizer = pipeline("summarization")
-# Function to read text from different file types
-def extract_text(file):
     text = ""
-    if file.name.endswith(".pdf"):
-        reader = PdfReader(file.name)
-        for page in reader.pages:
-            text += page.extract_text()
-    elif file.name.endswith(".docx"):
-        doc = Document(file.name)
-        for para in doc.paragraphs:
-            text += para.text + "\n"
-    else:
-        text = file.read().decode("utf-8")
     return text
-# Function to summarize and return as PDF or Word
-def summarize_file(file, output_format):
-    text = extract_text(file)
-    if not text.strip():
-        return None, "File is empty or could not be read."
-    summarized = summarizer(text, max_length=150, min_length=40, do_sample=False)[0]["summary_text"]
-    output_path = "/tmp/summary_output"
-    if output_format == "PDF":
-        pdf = FPDF()
-        pdf.add_page()
-        pdf.set_auto_page_break(auto=True, margin=15)
-        pdf.set_font("Arial", size=12)
-        for line in summarized.split("\n"):
-            pdf.multi_cell(0, 10, line)
-        output_file = f"{output_path}.pdf"
-        pdf.output(output_file)
     else:
-        doc = Document()
-        doc.add_heading("Summary", 0)
-        doc.add_paragraph(summarized)
-        output_file = f"{output_path}.docx"
-        doc.save(output_file)
-    return output_file, "Success"
-# Gradio interface
-iface = gr.Interface(
-    fn=summarize_file,
-    inputs=[
-        gr.File(label="Upload a text, Word, or PDF file"),
-        gr.Radio(["PDF", "Word"], label="Choose output format")
-    ],
-    outputs=[
-        gr.File(label="Download Summary"),
-        gr.Textbox(label="Status")
-    ],
-    title="Smart Text Summarizer",
-    description="Upload a .txt, .docx, or .pdf file and get the summary back as a Word or PDF file."
-)
-iface.launch()

+import streamlit as st
 from transformers import pipeline
 from PyPDF2 import PdfReader
+import docx2txt
 from fpdf import FPDF
+from io import BytesIO
+from docx import Document
+# Load summarizer
+summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
+# Helper: Extract text from PDF
+def extract_text_from_pdf(file):
+    pdf = PdfReader(file)
     text = ""
+    for page in pdf.pages:
+        text += page.extract_text()
     return text
+# Helper: Save summary to DOCX
+def generate_docx(text):
+    doc = Document()
+    doc.add_heading("Summary", 0)
+    doc.add_paragraph(text)
+    buffer = BytesIO()
+    doc.save(buffer)
+    buffer.seek(0)
+    return buffer
+# Helper: Save summary to PDF
+def generate_pdf(text):
+    pdf = FPDF()
+    pdf.add_page()
+    pdf.set_font("Arial", size=12)
+    for line in text.split("\n"):
+        pdf.multi_cell(0, 10, line)
+    buffer = BytesIO()
+    pdf.output(buffer)
+    buffer.seek(0)
+    return buffer
+# App UI
+st.set_page_config(page_title="Smart Text Summarizer", layout="centered")
+st.title("📄 Smart Text Summarizer")
+uploaded_file = st.file_uploader("Upload a PDF or Word file", type=["pdf", "docx"])
+if uploaded_file:
+    file_type = uploaded_file.name.split(".")[-1].lower()
+    # Extract text
+    if file_type == "pdf":
+        text = extract_text_from_pdf(uploaded_file)
+    elif file_type == "docx":
+        text = docx2txt.process(uploaded_file)
     else:
+        st.error("Unsupported file format.")
+        st.stop()
+    if not text:
+        st.error("No readable text found in the file.")
+        st.stop()
+    st.subheader("🔍 Extracted Text")
+    st.text_area("Preview", text[:1000] + "...", height=200)
+    # Summarize
+    if st.button("Summarize"):
+        with st.spinner("Summarizing..."):
+            summary = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
+            st.success("Summary Generated ✅")
+            st.subheader("📝 Summary")
+            st.write(summary)
+            # Download options
+            docx_file = generate_docx(summary)
+            pdf_file = generate_pdf(summary)
+            st.download_button("📥 Download as DOCX", docx_file, file_name="summary.docx")
+            st.download_button("📥 Download as PDF", pdf_file, file_name="summary.pdf")