Spaces:

profplate
/

youtube-comments

Paused

File size: 9,353 Bytes

12496be

"""
Text Sentiment Analyzer
-----------------------
A Gradio Space that analyzes the sentiment of any block of text
(book review, student essay, social media post, etc.) and surfaces
the five most emotionally charged sentences.

Designed for a free CPU Hugging Face Space.
"""

import re
import logging
from collections import Counter

import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
from transformers import pipeline

# === Setup Logging ===
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
)

# === Load model once at startup ===
# DistilBERT SST-2 is small (~250MB), fast on CPU, and gives a clean
# POSITIVE / NEGATIVE label with a confidence score we can use as an
# "emotional intensity" signal.
MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english"
logging.info(f"Loading sentiment model: {MODEL_NAME}")
sentiment_pipe = pipeline(
    "sentiment-analysis",
    model=MODEL_NAME,
    truncation=True,
)
logging.info("Model loaded.")


# ---------------------------------------------------------------------------
# Core helpers
# ---------------------------------------------------------------------------

def split_sentences(text: str):
    """Lightweight sentence splitter that avoids extra dependencies."""
    text = text.strip()
    if not text:
        return []
    # Split on ., !, ? followed by whitespace, keeping reasonable boundaries.
    raw = re.split(r"(?<=[.!?])\s+", text)
    return [s.strip() for s in raw if s.strip()]


def analyze_sentences(sentences):
    """Run the sentiment model on each sentence and return a list of dicts."""
    if not sentences:
        return []
    results = sentiment_pipe(sentences)
    out = []
    for sent, res in zip(sentences, results):
        label = res["label"].upper()
        score = float(res["score"])
        # Signed intensity: + for positive, - for negative.
        signed = score if label == "POSITIVE" else -score
        out.append({
            "sentence": sent,
            "label": label,
            "confidence": score,
            "signed_score": signed,
        })
    return out


def overall_summary(sentence_results):
    """Build a plain-language summary of the document's overall sentiment."""
    if not sentence_results:
        return "No text to analyze."

    counts = Counter(r["label"] for r in sentence_results)
    total = len(sentence_results)
    pos = counts.get("POSITIVE", 0)
    neg = counts.get("NEGATIVE", 0)

    avg_signed = sum(r["signed_score"] for r in sentence_results) / total
    if avg_signed > 0.25:
        verdict = "Overall tone: POSITIVE"
    elif avg_signed < -0.25:
        verdict = "Overall tone: NEGATIVE"
    else:
        verdict = "Overall tone: MIXED / NEUTRAL"

    return (
        f"{verdict}\n"
        f"Sentences analyzed: {total}\n"
        f"Positive: {pos}  |  Negative: {neg}\n"
        f"Average signed sentiment: {avg_signed:+.2f}  (range -1.0 to +1.0)"
    )


def plot_pie_chart(sentence_results):
    """Pie chart of positive vs negative sentence counts."""
    counts = Counter(r["label"] for r in sentence_results)
    pos = counts.get("POSITIVE", 0)
    neg = counts.get("NEGATIVE", 0)

    fig, ax = plt.subplots(figsize=(4, 4))
    if pos == 0 and neg == 0:
        ax.text(0.5, 0.5, "No data", ha="center", va="center")
        ax.axis("off")
        return fig

    labels, sizes, colors = [], [], []
    if pos:
        labels.append("Positive")
        sizes.append(pos)
        colors.append("#4CAF50")
    if neg:
        labels.append("Negative")
        sizes.append(neg)
        colors.append("#E53935")

    ax.pie(
        sizes,
        labels=labels,
        colors=colors,
        autopct="%1.1f%%",
        startangle=90,
        wedgeprops={"edgecolor": "white", "linewidth": 2},
    )
    ax.set_title("Sentence-Level Sentiment Distribution")
    return fig


def top_charged_sentences(sentence_results, k: int = 5):
    """Return the k sentences with the highest absolute sentiment confidence."""
    ranked = sorted(
        sentence_results,
        key=lambda r: r["confidence"],
        reverse=True,
    )[:k]

    rows = []
    for i, r in enumerate(ranked, start=1):
        marker = "🟢 POSITIVE" if r["label"] == "POSITIVE" else "🔴 NEGATIVE"
        rows.append({
            "Rank": i,
            "Polarity": marker,
            "Confidence": f"{r['confidence']:.3f}",
            "Sentence": r["sentence"],
        })
    return pd.DataFrame(rows)


def render_highlighted(sentence_results, k: int = 5):
    """Return HTML where the top-k charged sentences are color-highlighted."""
    if not sentence_results:
        return "<p><em>No text to display.</em></p>"

    # Identify which sentences are in the top-k by confidence.
    top_indices = set(
        idx for idx, _ in sorted(
            enumerate(sentence_results),
            key=lambda pair: pair[1]["confidence"],
            reverse=True,
        )[:k]
    )

    parts = ["<div style='line-height:1.7; font-size:1rem;'>"]
    for idx, r in enumerate(sentence_results):
        text = gr.utils.sanitize_html(r["sentence"]) if hasattr(gr.utils, "sanitize_html") else r["sentence"]
        # Basic escaping fallback
        text = (text.replace("&", "&amp;")
                    .replace("<", "&lt;")
                    .replace(">", "&gt;"))
        if idx in top_indices:
            color = "#C8E6C9" if r["label"] == "POSITIVE" else "#FFCDD2"
            border = "#2E7D32" if r["label"] == "POSITIVE" else "#B71C1C"
            parts.append(
                f"<span style='background:{color}; "
                f"border-bottom:2px solid {border}; padding:2px 4px; "
                f"border-radius:3px; margin-right:2px;'>{text}</span> "
            )
        else:
            parts.append(f"<span>{text}</span> ")
    parts.append("</div>")
    return "".join(parts)


# ---------------------------------------------------------------------------
# Gradio entry point
# ---------------------------------------------------------------------------

def analyze_text(text: str):
    try:
        if not text or not text.strip():
            return "Please paste some text to analyze.", None, None, ""

        sentences = split_sentences(text)
        if not sentences:
            return "No sentences detected.", None, None, ""

        results = analyze_sentences(sentences)
        summary = overall_summary(results)
        chart = plot_pie_chart(results)
        table = top_charged_sentences(results, k=5)
        highlighted = render_highlighted(results, k=5)

        return summary, chart, table, highlighted

    except Exception as e:
        logging.exception(f"Unexpected error: {e}")
        return f"Unexpected error: {e}", None, None, ""


EXAMPLE_TEXTS = [
    [
        "I picked up this novel expecting another forgettable thriller, "
        "but I was completely wrong. The prose is luminous and the "
        "characters feel painfully real. By the final chapter I was in "
        "tears. There are a few slow stretches in the middle, and one "
        "subplot never quite pays off, but those are minor complaints. "
        "This is easily the best book I have read all year."
    ],
    [
        "The student demonstrates a solid grasp of the source material "
        "and writes with genuine enthusiasm. However, the argument loses "
        "focus in the third section, and several claims go unsupported. "
        "The conclusion is rushed and underwhelming. With more careful "
        "revision, this could become a strong essay."
    ],
    [
        "Honestly, the new update is a disaster. Everything that used to "
        "work is now broken, the interface is hideous, and customer "
        "support has been useless. I cannot believe they shipped this. "
        "On the bright side, the dark mode looks nice."
    ],
]


with gr.Blocks(title="Text Sentiment Analyzer") as demo:
    gr.HTML(
        "<h1 style='text-align:center;'>📝 Text Sentiment Analyzer</h1>"
        "<p style='text-align:center;'>Paste any block of text — a book "
        "review, a student essay, a social media post — and get an overall "
        "sentiment read plus the five most emotionally charged sentences.</p>"
    )

    with gr.Row():
        with gr.Column():
            text_in = gr.Textbox(
                label="Paste your text here",
                lines=12,
                placeholder="Paste a review, essay, post, or any prose…",
            )
            submit_btn = gr.Button("Analyze", variant="primary")
            gr.Examples(
                examples=EXAMPLE_TEXTS,
                inputs=text_in,
                label="Try an example",
            )

        with gr.Column():
            summary_out = gr.Textbox(label="Overall Sentiment Summary", lines=5)
            chart_out = gr.Plot(label="Sentiment Distribution")

    gr.HTML("<h3>🔥 Five Most Emotionally Charged Sentences</h3>")
    table_out = gr.Dataframe(
        label="Top Charged Sentences",
        wrap=True,
    )

    gr.HTML("<h3>🖍 Highlighted Text</h3>")
    highlighted_out = gr.HTML()

    submit_btn.click(
        analyze_text,
        inputs=[text_in],
        outputs=[summary_out, chart_out, table_out, highlighted_out],
    )


if __name__ == "__main__":
    demo.launch()