Spaces:
Sleeping
Sleeping
| import torch | |
| import gradio as gr | |
| import textwrap | |
| from transformers import pipeline | |
| import nltk | |
| from newspaper import Article | |
| from url_input import fetch_text_from_url | |
| nltk.download('punkt') | |
| nltk.download('punkt_tab') | |
| from nltk.tokenize import sent_tokenize | |
| ## Summarizer pipeline | |
| summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16) | |
| ## function to enter as input in gradio | |
| def summarizer_text(text, summary_type): | |
| max_chunk_length = 1000 | |
| # Detect if the input is a URL | |
| if text.strip().lower().startswith("http"): | |
| fetched = fetch_text_from_url(text.strip()) | |
| if not fetched: | |
| return "Could not fetch or process the URL content." | |
| text = fetched | |
| chunks = textwrap.wrap(text, max_chunk_length) | |
| summaries = [] | |
| for chunk in chunks: | |
| result = summarizer(chunk, | |
| max_length=300, | |
| min_length=50, | |
| do_sample=False, | |
| truncation=True)[0]['summary_text'] | |
| summaries.append(result) | |
| combined_summary = " ".join(summaries) | |
| # Optional: only re-summarize if input was long | |
| if len(summaries) > 1: | |
| combined_summary = summarizer(combined_summary, | |
| max_length= 400, | |
| min_length=50, | |
| do_sample=False)[0]['summary_text'] | |
| if summary_type == "Bullet Points": | |
| bullet_lines = sent_tokenize(combined_summary) | |
| bullet_summary = "\n• " + "\n• ".join(bullet_lines) | |
| return bullet_summary | |
| return combined_summary | |
| # UI using gradio | |
| sum_text = gr.Interface( | |
| fn=summarizer_text, | |
| inputs=[ | |
| gr.Textbox(lines=12, label="Input Text", placeholder="Paste your text or URL(full address) here..."), | |
| gr.Radio(["Narrative", "Bullet Points"], label="Summary Type") | |
| ], | |
| outputs=gr.Textbox(lines=10, label="Summarized Output"), | |
| title= "Text Summarizer", | |
| description= "Paste a long text or an article URL. Choose summary type: narrative or bullet points." | |
| ) | |
| sum_text.launch() |