Spaces:

ar08
/

news-summary-backend

Paused

Update app.py

d712281 verified 12 months ago

1.45 kB

	from transformers import pipeline, AutoTokenizer
	import gradio as gr

	# Load tokenizer for truncation
	tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-6-6")

	# Load summarization pipeline
	summarizer = pipeline(
	"summarization",
	model="sshleifer/distilbart-cnn-6-6",
	)

	# Truncation helper 🫶
	def safe_truncate(text, max_tokens=1024):
	tokens = tokenizer.encode(text, truncation=True, max_length=max_tokens)
	return tokenizer.decode(tokens, skip_special_tokens=True)

	# Summary function with truncation applied
	def summarize_article(text):
	short_text = safe_truncate(text) # prevent model from breaking!
	summary = summarizer(
	short_text,
	max_length=250,
	min_length=100,
	do_sample=False,
	)
	return summary[0]['summary_text']

	# Default example
	default_article = """New York (CNN)When Liana Barrientos was 23 years old, she got married...""" # [shortened for demo]
	default_summary = summarize_article(default_article)

	# Gradio Interface
	iface = gr.Interface(
	fn=summarize_article,
	inputs=gr.Textbox(lines=20, label="Article (Read Only)", value=default_article, interactive=False),
	outputs=gr.Textbox(label="Summary (Read Only)", value=default_summary, interactive=False),
	title="⚡ Fast Article Summarizer (CPU Optimized)",
	description="Fast summarization with longer output using CPU only. Inputs and outputs are read-only."
	)

	iface.launch()