Text-summarizer / app.py
aspendse's picture
Upload app.py
76e4ee3 verified
raw
history blame
2.03 kB
import gradio as gr
from transformers import pipeline
from PyPDF2 import PdfReader
from docx import Document
from fpdf import FPDF
import os
# Load summarization pipeline
summarizer = pipeline("summarization")
# Function to read text from different file types
def extract_text(file):
text = ""
if file.name.endswith(".pdf"):
reader = PdfReader(file.name)
for page in reader.pages:
text += page.extract_text()
elif file.name.endswith(".docx"):
doc = Document(file.name)
for para in doc.paragraphs:
text += para.text + "\n"
else:
text = file.read().decode("utf-8")
return text
# Function to summarize and return as PDF or Word
def summarize_file(file, output_format):
text = extract_text(file)
if not text.strip():
return None, "File is empty or could not be read."
summarized = summarizer(text, max_length=150, min_length=40, do_sample=False)[0]["summary_text"]
output_path = "/tmp/summary_output"
if output_format == "PDF":
pdf = FPDF()
pdf.add_page()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.set_font("Arial", size=12)
for line in summarized.split("\n"):
pdf.multi_cell(0, 10, line)
output_file = f"{output_path}.pdf"
pdf.output(output_file)
else:
doc = Document()
doc.add_heading("Summary", 0)
doc.add_paragraph(summarized)
output_file = f"{output_path}.docx"
doc.save(output_file)
return output_file, "Success"
# Gradio interface
iface = gr.Interface(
fn=summarize_file,
inputs=[
gr.File(label="Upload a text, Word, or PDF file"),
gr.Radio(["PDF", "Word"], label="Choose output format")
],
outputs=[
gr.File(label="Download Summary"),
gr.Textbox(label="Status")
],
title="Smart Text Summarizer",
description="Upload a .txt, .docx, or .pdf file and get the summary back as a Word or PDF file."
)
iface.launch()