Text_Summarizer / app.py
Sneha-Kaurav's picture
Update app.py
68b7fb5 verified
import torch
import gradio as gr
import textwrap
from transformers import pipeline
import nltk
from newspaper import Article
from url_input import fetch_text_from_url
nltk.download('punkt')
nltk.download('punkt_tab')
from nltk.tokenize import sent_tokenize
## Summarizer pipeline
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16)
## function to enter as input in gradio
def summarizer_text(text, summary_type):
max_chunk_length = 1000
# Detect if the input is a URL
if text.strip().lower().startswith("http"):
fetched = fetch_text_from_url(text.strip())
if not fetched:
return "Could not fetch or process the URL content."
text = fetched
chunks = textwrap.wrap(text, max_chunk_length)
summaries = []
for chunk in chunks:
result = summarizer(chunk,
max_length=300,
min_length=50,
do_sample=False,
truncation=True)[0]['summary_text']
summaries.append(result)
combined_summary = " ".join(summaries)
# Optional: only re-summarize if input was long
if len(summaries) > 1:
combined_summary = summarizer(combined_summary,
max_length= 400,
min_length=50,
do_sample=False)[0]['summary_text']
if summary_type == "Bullet Points":
bullet_lines = sent_tokenize(combined_summary)
bullet_summary = "\n• " + "\n• ".join(bullet_lines)
return bullet_summary
return combined_summary
# UI using gradio
sum_text = gr.Interface(
fn=summarizer_text,
inputs=[
gr.Textbox(lines=12, label="Input Text", placeholder="Paste your text or URL(full address) here..."),
gr.Radio(["Narrative", "Bullet Points"], label="Summary Type")
],
outputs=gr.Textbox(lines=10, label="Summarized Output"),
title= "Text Summarizer",
description= "Paste a long text or an article URL. Choose summary type: narrative or bullet points."
)
sum_text.launch()