Spaces:

profplate
/

youtube-comments

Paused

App Files Files Community

youtube-comments / app.py

profplate

Create app.py

12496be verified about 1 month ago

raw

history blame contribute delete

9.35 kB

	"""
	Text Sentiment Analyzer
	-----------------------
	A Gradio Space that analyzes the sentiment of any block of text
	(book review, student essay, social media post, etc.) and surfaces
	the five most emotionally charged sentences.

	Designed for a free CPU Hugging Face Space.
	"""

	import re
	import logging
	from collections import Counter

	import gradio as gr
	import pandas as pd
	import matplotlib.pyplot as plt
	from transformers import pipeline

	# === Setup Logging ===
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s - %(levelname)s - %(message)s",
	)

	# === Load model once at startup ===
	# DistilBERT SST-2 is small (~250MB), fast on CPU, and gives a clean
	# POSITIVE / NEGATIVE label with a confidence score we can use as an
	# "emotional intensity" signal.
	MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english"
	logging.info(f"Loading sentiment model: {MODEL_NAME}")
	sentiment_pipe = pipeline(
	"sentiment-analysis",
	model=MODEL_NAME,
	truncation=True,
	)
	logging.info("Model loaded.")


	# ---------------------------------------------------------------------------
	# Core helpers
	# ---------------------------------------------------------------------------

	def split_sentences(text: str):
	"""Lightweight sentence splitter that avoids extra dependencies."""
	text = text.strip()
	if not text:
	return []
	# Split on ., !, ? followed by whitespace, keeping reasonable boundaries.
	raw = re.split(r"(?<=[.!?])\s+", text)
	return [s.strip() for s in raw if s.strip()]


	def analyze_sentences(sentences):
	"""Run the sentiment model on each sentence and return a list of dicts."""
	if not sentences:
	return []
	results = sentiment_pipe(sentences)
	out = []
	for sent, res in zip(sentences, results):
	label = res["label"].upper()
	score = float(res["score"])
	# Signed intensity: + for positive, - for negative.
	signed = score if label == "POSITIVE" else -score
	out.append({
	"sentence": sent,
	"label": label,
	"confidence": score,
	"signed_score": signed,
	})
	return out


	def overall_summary(sentence_results):
	"""Build a plain-language summary of the document's overall sentiment."""
	if not sentence_results:
	return "No text to analyze."

	counts = Counter(r["label"] for r in sentence_results)
	total = len(sentence_results)
	pos = counts.get("POSITIVE", 0)
	neg = counts.get("NEGATIVE", 0)

	avg_signed = sum(r["signed_score"] for r in sentence_results) / total
	if avg_signed > 0.25:
	verdict = "Overall tone: POSITIVE"
	elif avg_signed < -0.25:
	verdict = "Overall tone: NEGATIVE"
	else:
	verdict = "Overall tone: MIXED / NEUTRAL"

	return (
	f"{verdict}\n"
	f"Sentences analyzed: {total}\n"
	f"Positive: {pos} \| Negative: {neg}\n"
	f"Average signed sentiment: {avg_signed:+.2f} (range -1.0 to +1.0)"
	)


	def plot_pie_chart(sentence_results):
	"""Pie chart of positive vs negative sentence counts."""
	counts = Counter(r["label"] for r in sentence_results)
	pos = counts.get("POSITIVE", 0)
	neg = counts.get("NEGATIVE", 0)

	fig, ax = plt.subplots(figsize=(4, 4))
	if pos == 0 and neg == 0:
	ax.text(0.5, 0.5, "No data", ha="center", va="center")
	ax.axis("off")
	return fig

	labels, sizes, colors = [], [], []
	if pos:
	labels.append("Positive")
	sizes.append(pos)
	colors.append("#4CAF50")
	if neg:
	labels.append("Negative")
	sizes.append(neg)
	colors.append("#E53935")

	ax.pie(
	sizes,
	labels=labels,
	colors=colors,
	autopct="%1.1f%%",
	startangle=90,
	wedgeprops={"edgecolor": "white", "linewidth": 2},
	)
	ax.set_title("Sentence-Level Sentiment Distribution")
	return fig


	def top_charged_sentences(sentence_results, k: int = 5):
	"""Return the k sentences with the highest absolute sentiment confidence."""
	ranked = sorted(
	sentence_results,
	key=lambda r: r["confidence"],
	reverse=True,
	)[:k]

	rows = []
	for i, r in enumerate(ranked, start=1):
	marker = "🟢 POSITIVE" if r["label"] == "POSITIVE" else "🔴 NEGATIVE"
	rows.append({
	"Rank": i,
	"Polarity": marker,
	"Confidence": f"{r['confidence']:.3f}",
	"Sentence": r["sentence"],
	})
	return pd.DataFrame(rows)


	def render_highlighted(sentence_results, k: int = 5):
	"""Return HTML where the top-k charged sentences are color-highlighted."""
	if not sentence_results:
	return "<p><em>No text to display.</em></p>"

	# Identify which sentences are in the top-k by confidence.
	top_indices = set(
	idx for idx, _ in sorted(
	enumerate(sentence_results),
	key=lambda pair: pair[1]["confidence"],
	reverse=True,
	)[:k]
	)

	parts = ["<div style='line-height:1.7; font-size:1rem;'>"]
	for idx, r in enumerate(sentence_results):
	text = gr.utils.sanitize_html(r["sentence"]) if hasattr(gr.utils, "sanitize_html") else r["sentence"]
	# Basic escaping fallback
	text = (text.replace("&", "&")
	.replace("<", "<")
	.replace(">", ">"))
	if idx in top_indices:
	color = "#C8E6C9" if r["label"] == "POSITIVE" else "#FFCDD2"
	border = "#2E7D32" if r["label"] == "POSITIVE" else "#B71C1C"
	parts.append(
	f"<span style='background:{color}; "
	f"border-bottom:2px solid {border}; padding:2px 4px; "
	f"border-radius:3px; margin-right:2px;'>{text}</span> "
	)
	else:
	parts.append(f"<span>{text}</span> ")
	parts.append("</div>")
	return "".join(parts)


	# ---------------------------------------------------------------------------
	# Gradio entry point
	# ---------------------------------------------------------------------------

	def analyze_text(text: str):
	try:
	if not text or not text.strip():
	return "Please paste some text to analyze.", None, None, ""

	sentences = split_sentences(text)
	if not sentences:
	return "No sentences detected.", None, None, ""

	results = analyze_sentences(sentences)
	summary = overall_summary(results)
	chart = plot_pie_chart(results)
	table = top_charged_sentences(results, k=5)
	highlighted = render_highlighted(results, k=5)

	return summary, chart, table, highlighted

	except Exception as e:
	logging.exception(f"Unexpected error: {e}")
	return f"Unexpected error: {e}", None, None, ""


	EXAMPLE_TEXTS = [
	[
	"I picked up this novel expecting another forgettable thriller, "
	"but I was completely wrong. The prose is luminous and the "
	"characters feel painfully real. By the final chapter I was in "
	"tears. There are a few slow stretches in the middle, and one "
	"subplot never quite pays off, but those are minor complaints. "
	"This is easily the best book I have read all year."
	],
	[
	"The student demonstrates a solid grasp of the source material "
	"and writes with genuine enthusiasm. However, the argument loses "
	"focus in the third section, and several claims go unsupported. "
	"The conclusion is rushed and underwhelming. With more careful "
	"revision, this could become a strong essay."
	],
	[
	"Honestly, the new update is a disaster. Everything that used to "
	"work is now broken, the interface is hideous, and customer "
	"support has been useless. I cannot believe they shipped this. "
	"On the bright side, the dark mode looks nice."
	],
	]


	with gr.Blocks(title="Text Sentiment Analyzer") as demo:
	gr.HTML(
	"<h1 style='text-align:center;'>📝 Text Sentiment Analyzer</h1>"
	"<p style='text-align:center;'>Paste any block of text — a book "
	"review, a student essay, a social media post — and get an overall "
	"sentiment read plus the five most emotionally charged sentences.</p>"
	)

	with gr.Row():
	with gr.Column():
	text_in = gr.Textbox(
	label="Paste your text here",
	lines=12,
	placeholder="Paste a review, essay, post, or any prose…",
	)
	submit_btn = gr.Button("Analyze", variant="primary")
	gr.Examples(
	examples=EXAMPLE_TEXTS,
	inputs=text_in,
	label="Try an example",
	)

	with gr.Column():
	summary_out = gr.Textbox(label="Overall Sentiment Summary", lines=5)
	chart_out = gr.Plot(label="Sentiment Distribution")

	gr.HTML("<h3>🔥 Five Most Emotionally Charged Sentences</h3>")
	table_out = gr.Dataframe(
	label="Top Charged Sentences",
	wrap=True,
	)

	gr.HTML("<h3>🖍 Highlighted Text</h3>")
	highlighted_out = gr.HTML()

	submit_btn.click(
	analyze_text,
	inputs=[text_in],
	outputs=[summary_out, chart_out, table_out, highlighted_out],
	)


	if __name__ == "__main__":
	demo.launch()