Spaces:
Paused
Paused
| """ | |
| Text Sentiment Analyzer | |
| ----------------------- | |
| A Gradio Space that analyzes the sentiment of any block of text | |
| (book review, student essay, social media post, etc.) and surfaces | |
| the five most emotionally charged sentences. | |
| Designed for a free CPU Hugging Face Space. | |
| """ | |
| import re | |
| import logging | |
| from collections import Counter | |
| import gradio as gr | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from transformers import pipeline | |
| # === Setup Logging === | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s - %(levelname)s - %(message)s", | |
| ) | |
| # === Load model once at startup === | |
| # DistilBERT SST-2 is small (~250MB), fast on CPU, and gives a clean | |
| # POSITIVE / NEGATIVE label with a confidence score we can use as an | |
| # "emotional intensity" signal. | |
| MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english" | |
| logging.info(f"Loading sentiment model: {MODEL_NAME}") | |
| sentiment_pipe = pipeline( | |
| "sentiment-analysis", | |
| model=MODEL_NAME, | |
| truncation=True, | |
| ) | |
| logging.info("Model loaded.") | |
| # --------------------------------------------------------------------------- | |
| # Core helpers | |
| # --------------------------------------------------------------------------- | |
| def split_sentences(text: str): | |
| """Lightweight sentence splitter that avoids extra dependencies.""" | |
| text = text.strip() | |
| if not text: | |
| return [] | |
| # Split on ., !, ? followed by whitespace, keeping reasonable boundaries. | |
| raw = re.split(r"(?<=[.!?])\s+", text) | |
| return [s.strip() for s in raw if s.strip()] | |
| def analyze_sentences(sentences): | |
| """Run the sentiment model on each sentence and return a list of dicts.""" | |
| if not sentences: | |
| return [] | |
| results = sentiment_pipe(sentences) | |
| out = [] | |
| for sent, res in zip(sentences, results): | |
| label = res["label"].upper() | |
| score = float(res["score"]) | |
| # Signed intensity: + for positive, - for negative. | |
| signed = score if label == "POSITIVE" else -score | |
| out.append({ | |
| "sentence": sent, | |
| "label": label, | |
| "confidence": score, | |
| "signed_score": signed, | |
| }) | |
| return out | |
| def overall_summary(sentence_results): | |
| """Build a plain-language summary of the document's overall sentiment.""" | |
| if not sentence_results: | |
| return "No text to analyze." | |
| counts = Counter(r["label"] for r in sentence_results) | |
| total = len(sentence_results) | |
| pos = counts.get("POSITIVE", 0) | |
| neg = counts.get("NEGATIVE", 0) | |
| avg_signed = sum(r["signed_score"] for r in sentence_results) / total | |
| if avg_signed > 0.25: | |
| verdict = "Overall tone: POSITIVE" | |
| elif avg_signed < -0.25: | |
| verdict = "Overall tone: NEGATIVE" | |
| else: | |
| verdict = "Overall tone: MIXED / NEUTRAL" | |
| return ( | |
| f"{verdict}\n" | |
| f"Sentences analyzed: {total}\n" | |
| f"Positive: {pos} | Negative: {neg}\n" | |
| f"Average signed sentiment: {avg_signed:+.2f} (range -1.0 to +1.0)" | |
| ) | |
| def plot_pie_chart(sentence_results): | |
| """Pie chart of positive vs negative sentence counts.""" | |
| counts = Counter(r["label"] for r in sentence_results) | |
| pos = counts.get("POSITIVE", 0) | |
| neg = counts.get("NEGATIVE", 0) | |
| fig, ax = plt.subplots(figsize=(4, 4)) | |
| if pos == 0 and neg == 0: | |
| ax.text(0.5, 0.5, "No data", ha="center", va="center") | |
| ax.axis("off") | |
| return fig | |
| labels, sizes, colors = [], [], [] | |
| if pos: | |
| labels.append("Positive") | |
| sizes.append(pos) | |
| colors.append("#4CAF50") | |
| if neg: | |
| labels.append("Negative") | |
| sizes.append(neg) | |
| colors.append("#E53935") | |
| ax.pie( | |
| sizes, | |
| labels=labels, | |
| colors=colors, | |
| autopct="%1.1f%%", | |
| startangle=90, | |
| wedgeprops={"edgecolor": "white", "linewidth": 2}, | |
| ) | |
| ax.set_title("Sentence-Level Sentiment Distribution") | |
| return fig | |
| def top_charged_sentences(sentence_results, k: int = 5): | |
| """Return the k sentences with the highest absolute sentiment confidence.""" | |
| ranked = sorted( | |
| sentence_results, | |
| key=lambda r: r["confidence"], | |
| reverse=True, | |
| )[:k] | |
| rows = [] | |
| for i, r in enumerate(ranked, start=1): | |
| marker = "🟢 POSITIVE" if r["label"] == "POSITIVE" else "🔴 NEGATIVE" | |
| rows.append({ | |
| "Rank": i, | |
| "Polarity": marker, | |
| "Confidence": f"{r['confidence']:.3f}", | |
| "Sentence": r["sentence"], | |
| }) | |
| return pd.DataFrame(rows) | |
| def render_highlighted(sentence_results, k: int = 5): | |
| """Return HTML where the top-k charged sentences are color-highlighted.""" | |
| if not sentence_results: | |
| return "<p><em>No text to display.</em></p>" | |
| # Identify which sentences are in the top-k by confidence. | |
| top_indices = set( | |
| idx for idx, _ in sorted( | |
| enumerate(sentence_results), | |
| key=lambda pair: pair[1]["confidence"], | |
| reverse=True, | |
| )[:k] | |
| ) | |
| parts = ["<div style='line-height:1.7; font-size:1rem;'>"] | |
| for idx, r in enumerate(sentence_results): | |
| text = gr.utils.sanitize_html(r["sentence"]) if hasattr(gr.utils, "sanitize_html") else r["sentence"] | |
| # Basic escaping fallback | |
| text = (text.replace("&", "&") | |
| .replace("<", "<") | |
| .replace(">", ">")) | |
| if idx in top_indices: | |
| color = "#C8E6C9" if r["label"] == "POSITIVE" else "#FFCDD2" | |
| border = "#2E7D32" if r["label"] == "POSITIVE" else "#B71C1C" | |
| parts.append( | |
| f"<span style='background:{color}; " | |
| f"border-bottom:2px solid {border}; padding:2px 4px; " | |
| f"border-radius:3px; margin-right:2px;'>{text}</span> " | |
| ) | |
| else: | |
| parts.append(f"<span>{text}</span> ") | |
| parts.append("</div>") | |
| return "".join(parts) | |
| # --------------------------------------------------------------------------- | |
| # Gradio entry point | |
| # --------------------------------------------------------------------------- | |
| def analyze_text(text: str): | |
| try: | |
| if not text or not text.strip(): | |
| return "Please paste some text to analyze.", None, None, "" | |
| sentences = split_sentences(text) | |
| if not sentences: | |
| return "No sentences detected.", None, None, "" | |
| results = analyze_sentences(sentences) | |
| summary = overall_summary(results) | |
| chart = plot_pie_chart(results) | |
| table = top_charged_sentences(results, k=5) | |
| highlighted = render_highlighted(results, k=5) | |
| return summary, chart, table, highlighted | |
| except Exception as e: | |
| logging.exception(f"Unexpected error: {e}") | |
| return f"Unexpected error: {e}", None, None, "" | |
| EXAMPLE_TEXTS = [ | |
| [ | |
| "I picked up this novel expecting another forgettable thriller, " | |
| "but I was completely wrong. The prose is luminous and the " | |
| "characters feel painfully real. By the final chapter I was in " | |
| "tears. There are a few slow stretches in the middle, and one " | |
| "subplot never quite pays off, but those are minor complaints. " | |
| "This is easily the best book I have read all year." | |
| ], | |
| [ | |
| "The student demonstrates a solid grasp of the source material " | |
| "and writes with genuine enthusiasm. However, the argument loses " | |
| "focus in the third section, and several claims go unsupported. " | |
| "The conclusion is rushed and underwhelming. With more careful " | |
| "revision, this could become a strong essay." | |
| ], | |
| [ | |
| "Honestly, the new update is a disaster. Everything that used to " | |
| "work is now broken, the interface is hideous, and customer " | |
| "support has been useless. I cannot believe they shipped this. " | |
| "On the bright side, the dark mode looks nice." | |
| ], | |
| ] | |
| with gr.Blocks(title="Text Sentiment Analyzer") as demo: | |
| gr.HTML( | |
| "<h1 style='text-align:center;'>📝 Text Sentiment Analyzer</h1>" | |
| "<p style='text-align:center;'>Paste any block of text — a book " | |
| "review, a student essay, a social media post — and get an overall " | |
| "sentiment read plus the five most emotionally charged sentences.</p>" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| text_in = gr.Textbox( | |
| label="Paste your text here", | |
| lines=12, | |
| placeholder="Paste a review, essay, post, or any prose…", | |
| ) | |
| submit_btn = gr.Button("Analyze", variant="primary") | |
| gr.Examples( | |
| examples=EXAMPLE_TEXTS, | |
| inputs=text_in, | |
| label="Try an example", | |
| ) | |
| with gr.Column(): | |
| summary_out = gr.Textbox(label="Overall Sentiment Summary", lines=5) | |
| chart_out = gr.Plot(label="Sentiment Distribution") | |
| gr.HTML("<h3>🔥 Five Most Emotionally Charged Sentences</h3>") | |
| table_out = gr.Dataframe( | |
| label="Top Charged Sentences", | |
| wrap=True, | |
| ) | |
| gr.HTML("<h3>🖍 Highlighted Text</h3>") | |
| highlighted_out = gr.HTML() | |
| submit_btn.click( | |
| analyze_text, | |
| inputs=[text_in], | |
| outputs=[summary_out, chart_out, table_out, highlighted_out], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |