profplate's picture
Create app.py
12496be verified
"""
Text Sentiment Analyzer
-----------------------
A Gradio Space that analyzes the sentiment of any block of text
(book review, student essay, social media post, etc.) and surfaces
the five most emotionally charged sentences.
Designed for a free CPU Hugging Face Space.
"""
import re
import logging
from collections import Counter
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
from transformers import pipeline
# === Setup Logging ===
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
# === Load model once at startup ===
# DistilBERT SST-2 is small (~250MB), fast on CPU, and gives a clean
# POSITIVE / NEGATIVE label with a confidence score we can use as an
# "emotional intensity" signal.
MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english"
logging.info(f"Loading sentiment model: {MODEL_NAME}")
sentiment_pipe = pipeline(
"sentiment-analysis",
model=MODEL_NAME,
truncation=True,
)
logging.info("Model loaded.")
# ---------------------------------------------------------------------------
# Core helpers
# ---------------------------------------------------------------------------
def split_sentences(text: str):
"""Lightweight sentence splitter that avoids extra dependencies."""
text = text.strip()
if not text:
return []
# Split on ., !, ? followed by whitespace, keeping reasonable boundaries.
raw = re.split(r"(?<=[.!?])\s+", text)
return [s.strip() for s in raw if s.strip()]
def analyze_sentences(sentences):
"""Run the sentiment model on each sentence and return a list of dicts."""
if not sentences:
return []
results = sentiment_pipe(sentences)
out = []
for sent, res in zip(sentences, results):
label = res["label"].upper()
score = float(res["score"])
# Signed intensity: + for positive, - for negative.
signed = score if label == "POSITIVE" else -score
out.append({
"sentence": sent,
"label": label,
"confidence": score,
"signed_score": signed,
})
return out
def overall_summary(sentence_results):
"""Build a plain-language summary of the document's overall sentiment."""
if not sentence_results:
return "No text to analyze."
counts = Counter(r["label"] for r in sentence_results)
total = len(sentence_results)
pos = counts.get("POSITIVE", 0)
neg = counts.get("NEGATIVE", 0)
avg_signed = sum(r["signed_score"] for r in sentence_results) / total
if avg_signed > 0.25:
verdict = "Overall tone: POSITIVE"
elif avg_signed < -0.25:
verdict = "Overall tone: NEGATIVE"
else:
verdict = "Overall tone: MIXED / NEUTRAL"
return (
f"{verdict}\n"
f"Sentences analyzed: {total}\n"
f"Positive: {pos} | Negative: {neg}\n"
f"Average signed sentiment: {avg_signed:+.2f} (range -1.0 to +1.0)"
)
def plot_pie_chart(sentence_results):
"""Pie chart of positive vs negative sentence counts."""
counts = Counter(r["label"] for r in sentence_results)
pos = counts.get("POSITIVE", 0)
neg = counts.get("NEGATIVE", 0)
fig, ax = plt.subplots(figsize=(4, 4))
if pos == 0 and neg == 0:
ax.text(0.5, 0.5, "No data", ha="center", va="center")
ax.axis("off")
return fig
labels, sizes, colors = [], [], []
if pos:
labels.append("Positive")
sizes.append(pos)
colors.append("#4CAF50")
if neg:
labels.append("Negative")
sizes.append(neg)
colors.append("#E53935")
ax.pie(
sizes,
labels=labels,
colors=colors,
autopct="%1.1f%%",
startangle=90,
wedgeprops={"edgecolor": "white", "linewidth": 2},
)
ax.set_title("Sentence-Level Sentiment Distribution")
return fig
def top_charged_sentences(sentence_results, k: int = 5):
"""Return the k sentences with the highest absolute sentiment confidence."""
ranked = sorted(
sentence_results,
key=lambda r: r["confidence"],
reverse=True,
)[:k]
rows = []
for i, r in enumerate(ranked, start=1):
marker = "🟢 POSITIVE" if r["label"] == "POSITIVE" else "🔴 NEGATIVE"
rows.append({
"Rank": i,
"Polarity": marker,
"Confidence": f"{r['confidence']:.3f}",
"Sentence": r["sentence"],
})
return pd.DataFrame(rows)
def render_highlighted(sentence_results, k: int = 5):
"""Return HTML where the top-k charged sentences are color-highlighted."""
if not sentence_results:
return "<p><em>No text to display.</em></p>"
# Identify which sentences are in the top-k by confidence.
top_indices = set(
idx for idx, _ in sorted(
enumerate(sentence_results),
key=lambda pair: pair[1]["confidence"],
reverse=True,
)[:k]
)
parts = ["<div style='line-height:1.7; font-size:1rem;'>"]
for idx, r in enumerate(sentence_results):
text = gr.utils.sanitize_html(r["sentence"]) if hasattr(gr.utils, "sanitize_html") else r["sentence"]
# Basic escaping fallback
text = (text.replace("&", "&amp;")
.replace("<", "&lt;")
.replace(">", "&gt;"))
if idx in top_indices:
color = "#C8E6C9" if r["label"] == "POSITIVE" else "#FFCDD2"
border = "#2E7D32" if r["label"] == "POSITIVE" else "#B71C1C"
parts.append(
f"<span style='background:{color}; "
f"border-bottom:2px solid {border}; padding:2px 4px; "
f"border-radius:3px; margin-right:2px;'>{text}</span> "
)
else:
parts.append(f"<span>{text}</span> ")
parts.append("</div>")
return "".join(parts)
# ---------------------------------------------------------------------------
# Gradio entry point
# ---------------------------------------------------------------------------
def analyze_text(text: str):
try:
if not text or not text.strip():
return "Please paste some text to analyze.", None, None, ""
sentences = split_sentences(text)
if not sentences:
return "No sentences detected.", None, None, ""
results = analyze_sentences(sentences)
summary = overall_summary(results)
chart = plot_pie_chart(results)
table = top_charged_sentences(results, k=5)
highlighted = render_highlighted(results, k=5)
return summary, chart, table, highlighted
except Exception as e:
logging.exception(f"Unexpected error: {e}")
return f"Unexpected error: {e}", None, None, ""
EXAMPLE_TEXTS = [
[
"I picked up this novel expecting another forgettable thriller, "
"but I was completely wrong. The prose is luminous and the "
"characters feel painfully real. By the final chapter I was in "
"tears. There are a few slow stretches in the middle, and one "
"subplot never quite pays off, but those are minor complaints. "
"This is easily the best book I have read all year."
],
[
"The student demonstrates a solid grasp of the source material "
"and writes with genuine enthusiasm. However, the argument loses "
"focus in the third section, and several claims go unsupported. "
"The conclusion is rushed and underwhelming. With more careful "
"revision, this could become a strong essay."
],
[
"Honestly, the new update is a disaster. Everything that used to "
"work is now broken, the interface is hideous, and customer "
"support has been useless. I cannot believe they shipped this. "
"On the bright side, the dark mode looks nice."
],
]
with gr.Blocks(title="Text Sentiment Analyzer") as demo:
gr.HTML(
"<h1 style='text-align:center;'>📝 Text Sentiment Analyzer</h1>"
"<p style='text-align:center;'>Paste any block of text — a book "
"review, a student essay, a social media post — and get an overall "
"sentiment read plus the five most emotionally charged sentences.</p>"
)
with gr.Row():
with gr.Column():
text_in = gr.Textbox(
label="Paste your text here",
lines=12,
placeholder="Paste a review, essay, post, or any prose…",
)
submit_btn = gr.Button("Analyze", variant="primary")
gr.Examples(
examples=EXAMPLE_TEXTS,
inputs=text_in,
label="Try an example",
)
with gr.Column():
summary_out = gr.Textbox(label="Overall Sentiment Summary", lines=5)
chart_out = gr.Plot(label="Sentiment Distribution")
gr.HTML("<h3>🔥 Five Most Emotionally Charged Sentences</h3>")
table_out = gr.Dataframe(
label="Top Charged Sentences",
wrap=True,
)
gr.HTML("<h3>🖍 Highlighted Text</h3>")
highlighted_out = gr.HTML()
submit_btn.click(
analyze_text,
inputs=[text_in],
outputs=[summary_out, chart_out, table_out, highlighted_out],
)
if __name__ == "__main__":
demo.launch()