| | import gradio as gr |
| | import requests |
| | from newspaper import Article |
| | from transformers import pipeline |
| | import nltk |
| | import os |
| | import PyPDF2 |
| |
|
| | |
| | summarizer = pipeline("summarization", model="harao-ml/flant5-finetuned-summarize") |
| |
|
| | |
| | def split_text(text, max_tokens=512): |
| | words = text.split() |
| | for i in range(0, len(words), max_tokens): |
| | yield ' '.join(words[i:i + max_tokens]) |
| |
|
| | |
| | def clean_text(text): |
| | text = ' '.join(text.split()) |
| | text = ' '.join(word for word in text.split() if len(word) < 100) |
| | return text |
| |
|
| |
|
| | |
| | def fetch_article_details(url): |
| | try: |
| | article = Article(url) |
| | article.download() |
| | article.parse() |
| | title = article.title or "Untitled" |
| | author = ", ".join(article.authors) if article.authors else "Unknown" |
| | pub_date = article.publish_date.strftime('%B %d, %Y') if article.publish_date else "Unknown" |
| | return title, author, pub_date, article.text |
| | except Exception as e: |
| | return None, None, None, f"Error fetching article: {str(e)}" |
| |
|
| | |
| | def generate_summary(content): |
| | if not content.strip(): |
| | return "No input provided." |
| | text = content |
| | cleaned_text = clean_text(text) |
| | chunks = list(split_text(cleaned_text)) |
| | cons_summary = ''.join([summarizer(chunk, do_sample=False)[0]['summary_text'] for chunk in chunks if chunk.strip()]) if chunks else '' |
| | summary = summarizer(text, do_sample=False)[0]['summary_text'] |
| | return summary |
| |
|
| | |
| | def summarize_input(mixed_input): |
| | if mixed_input.startswith("http://") or mixed_input.startswith("https://"): |
| | title, author, pub_date, content = fetch_article_details(mixed_input) |
| | if content.startswith("Error"): |
| | return f"### Error\n\n{content}" |
| | summary = generate_summary(content) |
| | return f"**Title:** {title}\n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**π Summary** \n\n{summary}\n\n[π Read more]({mixed_input})\n\n---" |
| | else: |
| | summary = generate_summary(mixed_input) |
| | return f"## π Summary \n\n{summary}\n\nπ **Original Text:**\n\n{mixed_input}\n\n---" |
| | |
| | |
| | def summarize_file(file): |
| | try: |
| | if file is None: |
| | return "" |
| |
|
| | text = "" |
| | if file.name.endswith(".pdf"): |
| | with open(file.name, "rb") as f: |
| | reader = PyPDF2.PdfReader(f) |
| | for page in reader.pages: |
| | text += page.extract_text() or "" |
| | elif file.name.endswith(".txt"): |
| | with open(file.name, "r", encoding="utf-8") as f: |
| | text = f.read() |
| | else: |
| | return "β Unsupported file type." |
| |
|
| | if not text.strip(): |
| | return "β No text found in file." |
| |
|
| | summary = generate_summary(text) |
| | original_text = text |
| |
|
| | |
| | result = ( |
| | f"### π Summary\n\n" |
| | f"{summary}\n\n" |
| | f"---\n\n" |
| | f"π **Original Extracted Text:**\n\n{original_text}" |
| | ) |
| | return result |
| | except Exception as e: |
| | return f"β Error processing file: {str(e)}" |
| |
|
| |
|
| |
|
| | |
| | def fetch_news(): |
| | url = 'https://newsapi.org/v2/top-headlines' |
| | api_key = os.environ.get("api_key") |
| | params = { |
| | 'apiKey': api_key, |
| | 'language': 'en', |
| | 'sources': 'associated-press', |
| | 'pageSize': 10 |
| | } |
| | try: |
| | response = requests.get(url, params=params) |
| | if response.status_code != 200: |
| | return f"Error: Failed to fetch news. Status code: {response.status_code}" |
| |
|
| | articles = response.json().get("articles", []) |
| | summaries = [f'## π° Top Stories - Instant Insights\n\n'] |
| | for article in articles: |
| | title = article.get("title", "No title") |
| | article_url = article.get("url", "#") |
| | author = article.get("author", "Unknown") |
| | pub_date = article.get("publishedAt", "Unknown") |
| | content = extract_full_content(article_url) or article.get("content") or article.get("description") or "" |
| | summary = generate_summary(content) |
| | summaries.append(f"**{title}** \n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**π Summary:** {summary}\n\n [π Read more]({article_url})\n\n---") |
| |
|
| | if not summaries: |
| | return "### No articles could be summarized." |
| | return "\n\n".join(summaries) |
| | except Exception as e: |
| | return f"### Error fetching news\n\n{str(e)}" |
| |
|
| | |
| | def extract_full_content(url): |
| | try: |
| | article = Article(url) |
| | article.download() |
| | article.parse() |
| | return article.text |
| | except Exception: |
| | return None |
| |
|
| | |
| | with gr.Blocks(theme=gr.themes.Default(font="Arial", font_mono="Courier New")) as demo: |
| | |
| | gr.Markdown("# π° Sum Up! Stay Informed, Instantly") |
| | gr.Markdown("### FLAN-T5-Driven Summarizer for Multi-Format Content") |
| | gr.Markdown("Sum Up! effectively distills lengthy content into clear, concise summaries with just a text input, file upload, or URL. Stay informed with instant access to auto-summarized top news headlinesβall in just one click.") |
| |
|
| | |
| | gr.Markdown("---") |
| | with gr.Row(): |
| | |
| | with gr.Column(scale=1, min_width=300): |
| | with gr.Accordion("π’ News at a Glance", open=False): |
| | gr.Markdown("**Source: Associated Press**") |
| | gr.Markdown( |
| | "Click to get today's top news from the Associated Press, simplified and ready to read.") |
| | news_btn = gr.Button("β‘ News Now", variant="primary", elem_id="news-now-btn") |
| |
|
| | |
| | with gr.Column(scale=2, min_width=400): |
| | gr.Markdown("### Provide content to summarize") |
| | gr.Markdown("#### Enter Text or URL") |
| | input_box = gr.Textbox( |
| | label="Enter URL or Text", |
| | placeholder="Paste a URL or text here...", |
| | lines=5, |
| | ) |
| | summarize_btn = gr.Button("π Summarize", variant="primary", elem_id="summarize-btn") |
| |
|
| | |
| | clear_btn = gr.Button("Clear", variant="secondary", elem_id="clear-btn") |
| |
|
| | gr.Markdown("#### Upload a File") |
| | file_input = gr.File( |
| | label="Upload a .pdf or .txt file", file_types=[".pdf", ".txt"] |
| | ) |
| | gr.Markdown("**Note:** Only PDF and TXT files are supported.") |
| |
|
| | |
| | gr.Markdown("---") |
| | gr.Markdown("### π‘ Key Takeaways") |
| | with gr.Row(): |
| | with gr.Column(scale=1): |
| | gen_output = gr.Markdown() |
| |
|
| | |
| | summarize_btn.click(fn=summarize_input, inputs=input_box, outputs=gen_output) |
| | file_input.change(fn=summarize_file, inputs=file_input, outputs=gen_output) |
| | news_btn.click(fn=fetch_news, inputs=[], outputs=gen_output) |
| |
|
| | |
| | clear_btn.click( |
| | fn=lambda: ("", None, ""), |
| | inputs=[], |
| | outputs=[input_box, file_input, gen_output], |
| | ) |
| |
|
| | |
| | gen_output = gr.Markdown(value="") |
| |
|
| | |
| | css = """ |
| | #summarize-btn { |
| | background-color: #4CAF50 !important; /* Green for Summarize */ |
| | color: white !important; |
| | font-size: 16px !important; |
| | padding: 10px 20px !important; |
| | border-radius: 5px !important; |
| | margin-top: 20px !important; |
| | width: 100%; |
| | } |
| | |
| | #news-now-btn { |
| | background-color: #0078D7 !important; /* Blue for News Now */ |
| | color: white !important; |
| | font-size: 16px !important; |
| | padding: 10px 20px !important; |
| | border-radius: 5px !important; |
| | margin-top: 20px !important; |
| | width: 100%; |
| | } |
| | |
| | #clear-btn { |
| | background-color: #d6d8db !important; /* Lighter Gray for Clear */ |
| | color: black !important; |
| | font-size: 16px !important; |
| | padding: 10px 20px !important; |
| | border-radius: 5px !important; |
| | margin-top: 20px !important; |
| | width: 100%; |
| | } |
| | """ |
| |
|
| | |
| | demo.css = css |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |