Spaces:
Paused
Paused
| from transformers import pipeline, AutoTokenizer | |
| import gradio as gr | |
| # Load tokenizer for truncation | |
| tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-6-6") | |
| # Load summarization pipeline | |
| summarizer = pipeline( | |
| "summarization", | |
| model="sshleifer/distilbart-cnn-6-6", | |
| ) | |
| # Truncation helper 🫶 | |
| def safe_truncate(text, max_tokens=1024): | |
| tokens = tokenizer.encode(text, truncation=True, max_length=max_tokens) | |
| return tokenizer.decode(tokens, skip_special_tokens=True) | |
| # Summary function with truncation applied | |
| def summarize_article(text): | |
| short_text = safe_truncate(text) # prevent model from breaking! | |
| summary = summarizer( | |
| short_text, | |
| max_length=250, | |
| min_length=100, | |
| do_sample=False, | |
| ) | |
| return summary[0]['summary_text'] | |
| # Default example | |
| default_article = """New York (CNN)When Liana Barrientos was 23 years old, she got married...""" # [shortened for demo] | |
| default_summary = summarize_article(default_article) | |
| # Gradio Interface | |
| iface = gr.Interface( | |
| fn=summarize_article, | |
| inputs=gr.Textbox(lines=20, label="Article (Read Only)", value=default_article, interactive=False), | |
| outputs=gr.Textbox(label="Summary (Read Only)", value=default_summary, interactive=False), | |
| title="⚡ Fast Article Summarizer (CPU Optimized)", | |
| description="Fast summarization with longer output using CPU only. Inputs and outputs are read-only." | |
| ) | |
| iface.launch() | |