| | |
| | """ |
| | Sentiment Analysis App for HuggingFace Spaces |
| | Clean, robust implementation with proper visualization |
| | """ |
| |
|
| | import gradio as gr |
| | import torch |
| | from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| | import numpy as np |
| | import plotly.graph_objects as go |
| | import pandas as pd |
| | from typing import Dict, List, Tuple |
| | import logging |
| |
|
| | |
| | logging.basicConfig(level=logging.INFO) |
| | logger = logging.getLogger(__name__) |
| |
|
| | |
| | |
| | |
| |
|
| | class SentimentAnalyzer: |
| | """Production-ready sentiment analyzer""" |
| | |
| | def __init__(self, model_name: str = "MartinRodrigo/distilbert-sentiment-imdb"): |
| | self.model_name = model_name |
| | self.tokenizer = None |
| | self.model = None |
| | self.device = "cuda" if torch.cuda.is_available() else "cpu" |
| | self.load_model() |
| | |
| | def load_model(self): |
| | """Load model from HuggingFace Hub""" |
| | try: |
| | logger.info(f"Loading model: {self.model_name}") |
| | self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) |
| | self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name) |
| | self.model.to(self.device) |
| | self.model.eval() |
| | logger.info(f"β Model loaded successfully on {self.device}") |
| | except Exception as e: |
| | logger.error(f"β Error loading model: {e}") |
| | logger.info("Falling back to base DistilBERT model...") |
| | self.model_name = "distilbert-base-uncased-finetuned-sst-2-english" |
| | self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) |
| | self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name) |
| | self.model.to(self.device) |
| | self.model.eval() |
| | |
| | def predict(self, text: str) -> Dict: |
| | """Predict sentiment for a single text""" |
| | if not text or not text.strip(): |
| | return { |
| | "sentiment": "ERROR", |
| | "confidence": 0.0, |
| | "probabilities": {"Negative": 0.5, "Positive": 0.5}, |
| | "error": "Please enter some text" |
| | } |
| | |
| | try: |
| | |
| | inputs = self.tokenizer( |
| | text, |
| | return_tensors="pt", |
| | truncation=True, |
| | max_length=512, |
| | padding=True |
| | ) |
| | inputs = {k: v.to(self.device) for k, v in inputs.items()} |
| | |
| | |
| | with torch.no_grad(): |
| | outputs = self.model(**inputs) |
| | probs = torch.nn.functional.softmax(outputs.logits, dim=-1) |
| | |
| | |
| | probs_cpu = probs.cpu().numpy()[0] |
| | predicted_class = int(np.argmax(probs_cpu)) |
| | confidence = float(probs_cpu[predicted_class]) |
| | |
| | sentiment = "POSITIVE" if predicted_class == 1 else "NEGATIVE" |
| | |
| | return { |
| | "sentiment": sentiment, |
| | "confidence": confidence, |
| | "probabilities": { |
| | "Negative": float(probs_cpu[0]), |
| | "Positive": float(probs_cpu[1]) |
| | }, |
| | "error": None |
| | } |
| | |
| | except Exception as e: |
| | logger.error(f"Prediction error: {e}") |
| | return { |
| | "sentiment": "ERROR", |
| | "confidence": 0.0, |
| | "probabilities": {"Negative": 0.5, "Positive": 0.5}, |
| | "error": str(e) |
| | } |
| | |
| | def predict_batch(self, texts: List[str]) -> List[Dict]: |
| | """Predict sentiment for multiple texts""" |
| | return [self.predict(text) for text in texts if text.strip()] |
| |
|
| | |
| | |
| | |
| |
|
| | def create_probability_chart(probabilities: Dict[str, float]) -> go.Figure: |
| | """Create a bar chart for sentiment probabilities""" |
| | fig = go.Figure() |
| | |
| | sentiments = list(probabilities.keys()) |
| | values = list(probabilities.values()) |
| | colors = ["#ff4444" if s == "Negative" else "#44ff44" for s in sentiments] |
| | |
| | fig.add_trace(go.Bar( |
| | x=sentiments, |
| | y=values, |
| | marker_color=colors, |
| | text=[f"{v:.1%}" for v in values], |
| | textposition='outside', |
| | textfont=dict(size=14, color='white'), |
| | hovertemplate='<b>%{x}</b><br>Probability: %{y:.2%}<extra></extra>' |
| | )) |
| | |
| | fig.update_layout( |
| | title={ |
| | 'text': "Sentiment Probability Distribution", |
| | 'x': 0.5, |
| | 'xanchor': 'center', |
| | 'font': {'size': 16, 'color': 'white'} |
| | }, |
| | xaxis_title="Sentiment", |
| | yaxis_title="Probability", |
| | yaxis_range=[0, 1], |
| | height=350, |
| | template="plotly_dark", |
| | showlegend=False, |
| | margin=dict(t=60, b=60, l=60, r=40), |
| | paper_bgcolor='rgba(0,0,0,0)', |
| | plot_bgcolor='rgba(0,0,0,0)' |
| | ) |
| | |
| | return fig |
| |
|
| | def create_batch_chart(results: List[Dict], texts: List[str]) -> go.Figure: |
| | """Create a bar chart for batch analysis results""" |
| | fig = go.Figure() |
| | |
| | text_labels = [f"Text {i+1}" for i in range(len(results))] |
| | confidences = [r['confidence'] for r in results] |
| | sentiments = [r['sentiment'] for r in results] |
| | colors = ["#ff4444" if s == "NEGATIVE" else "#44ff44" for s in sentiments] |
| | |
| | |
| | hover_texts = [t[:100] + "..." if len(t) > 100 else t for t in texts] |
| | |
| | fig.add_trace(go.Bar( |
| | x=text_labels, |
| | y=confidences, |
| | marker_color=colors, |
| | text=[f"{c:.1%}" for c in confidences], |
| | textposition='outside', |
| | textfont=dict(size=12, color='white'), |
| | customdata=list(zip(sentiments, hover_texts)), |
| | hovertemplate='<b>%{x}</b><br>' + |
| | 'Sentiment: %{customdata[0]}<br>' + |
| | 'Confidence: %{y:.1%}<br>' + |
| | 'Text: %{customdata[1]}<extra></extra>' |
| | )) |
| | |
| | fig.update_layout( |
| | title={ |
| | 'text': "Batch Analysis Results", |
| | 'x': 0.5, |
| | 'xanchor': 'center', |
| | 'font': {'size': 16, 'color': 'white'} |
| | }, |
| | xaxis_title="Text Number", |
| | yaxis_title="Confidence", |
| | yaxis_range=[0, 1.1], |
| | height=400, |
| | template="plotly_dark", |
| | showlegend=False, |
| | margin=dict(t=60, b=80, l=60, r=40), |
| | paper_bgcolor='rgba(0,0,0,0)', |
| | plot_bgcolor='rgba(0,0,0,0)' |
| | ) |
| | |
| | return fig |
| |
|
| | |
| | |
| | |
| |
|
| | def analyze_text(text: str) -> Tuple[str, str, go.Figure]: |
| | """Analyze single text and return formatted results""" |
| | result = analyzer.predict(text) |
| | |
| | if result['error']: |
| | return ( |
| | f"β οΈ **Error:** {result['error']}", |
| | "0.0", |
| | create_probability_chart({"Negative": 0.5, "Positive": 0.5}) |
| | ) |
| | |
| | |
| | emoji = "π" if result['sentiment'] == "POSITIVE" else "π" |
| | sentiment_text = f"{emoji} **{result['sentiment']}**" |
| | confidence_text = f"{result['confidence']:.1%}" |
| | |
| | |
| | chart = create_probability_chart(result['probabilities']) |
| | |
| | return sentiment_text, confidence_text, chart |
| |
|
| | def analyze_batch(text_input: str) -> Tuple[str, go.Figure]: |
| | """Analyze multiple texts""" |
| | if not text_input or not text_input.strip(): |
| | empty_fig = go.Figure() |
| | empty_fig.update_layout( |
| | template="plotly_dark", |
| | paper_bgcolor='rgba(0,0,0,0)', |
| | annotations=[{ |
| | 'text': "Please enter texts to analyze", |
| | 'xref': 'paper', |
| | 'yref': 'paper', |
| | 'x': 0.5, |
| | 'y': 0.5, |
| | 'showarrow': False, |
| | 'font': {'size': 16, 'color': 'gray'} |
| | }] |
| | ) |
| | return "Please enter texts (one per line)", empty_fig |
| | |
| | |
| | texts = [line.strip() for line in text_input.split('\n') if line.strip()] |
| | |
| | if not texts: |
| | empty_fig = go.Figure() |
| | empty_fig.update_layout( |
| | template="plotly_dark", |
| | paper_bgcolor='rgba(0,0,0,0)', |
| | annotations=[{ |
| | 'text': "No valid texts found", |
| | 'xref': 'paper', |
| | 'yref': 'paper', |
| | 'x': 0.5, |
| | 'y': 0.5, |
| | 'showarrow': False, |
| | 'font': {'size': 16, 'color': 'gray'} |
| | }] |
| | ) |
| | return "No valid texts found", empty_fig |
| | |
| | |
| | results = analyzer.predict_batch(texts) |
| | |
| | |
| | summary_lines = ["### π Analysis Results\n"] |
| | for i, (text, result) in enumerate(zip(texts, results), 1): |
| | emoji = "π" if result['sentiment'] == "POSITIVE" else "π" |
| | text_preview = text[:60] + "..." if len(text) > 60 else text |
| | summary_lines.append( |
| | f"{i}. {emoji} **{result['sentiment']}** ({result['confidence']:.1%}) - *{text_preview}*" |
| | ) |
| | |
| | summary = "\n".join(summary_lines) |
| | |
| | |
| | chart = create_batch_chart(results, texts) |
| | |
| | return summary, chart |
| |
|
| | |
| | |
| | |
| |
|
| | logger.info("Initializing sentiment analyzer...") |
| | analyzer = SentimentAnalyzer() |
| |
|
| | |
| | |
| | |
| |
|
| | SINGLE_EXAMPLES = [ |
| | ["This movie is absolutely fantastic! I loved every minute of it."], |
| | ["Terrible experience. Waste of time and money."], |
| | ["The product is okay, nothing special but it works."], |
| | ["Best purchase ever! Highly recommend to everyone!"], |
| | ["Disappointed with the quality. Expected much better."] |
| | ] |
| |
|
| | BATCH_EXAMPLE = """This movie is absolutely fantastic! I loved every minute of it. |
| | Terrible experience. Waste of time and money. |
| | The product is okay, nothing special but it works. |
| | Best purchase ever! Highly recommend to everyone!""" |
| |
|
| | |
| | |
| | |
| |
|
| | with gr.Blocks( |
| | theme=gr.themes.Soft( |
| | primary_hue="blue", |
| | secondary_hue="green", |
| | ), |
| | css=""" |
| | .gradio-container { |
| | max-width: 1200px !important; |
| | } |
| | #component-0 { |
| | background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
| | } |
| | """ |
| | ) as demo: |
| | |
| | |
| | gr.Markdown(""" |
| | # π Sentiment Analysis with DistilBERT |
| | |
| | Analyze text sentiment with a fine-tuned transformer model trained on IMDB reviews. |
| | |
| | **Model:** DistilBERT | **Accuracy:** 80% | **F1:** 0.7981 |
| | """) |
| | |
| | |
| | with gr.Tabs(): |
| | |
| | with gr.TabItem("π Single Analysis"): |
| | gr.Markdown("### Analyze individual texts") |
| | |
| | with gr.Row(): |
| | with gr.Column(scale=1): |
| | single_input = gr.Textbox( |
| | label="Enter your text", |
| | placeholder="Type or paste your text here...", |
| | lines=6, |
| | max_lines=10 |
| | ) |
| | single_btn = gr.Button( |
| | "π Analyze", |
| | variant="primary", |
| | size="lg" |
| | ) |
| | |
| | gr.Examples( |
| | examples=SINGLE_EXAMPLES, |
| | inputs=single_input, |
| | label="Try these examples:" |
| | ) |
| | |
| | with gr.Column(scale=1): |
| | single_sentiment = gr.Markdown( |
| | label="Result", |
| | value="*Results will appear here*" |
| | ) |
| | single_confidence = gr.Textbox( |
| | label="Confidence Score", |
| | interactive=False |
| | ) |
| | single_plot = gr.Plot(label="Probability Distribution") |
| | |
| | |
| | with gr.TabItem("π Batch Processing"): |
| | gr.Markdown("### Process multiple texts at once (one per line)") |
| | |
| | with gr.Row(): |
| | with gr.Column(scale=1): |
| | batch_input = gr.Textbox( |
| | label="Enter multiple texts (one per line)", |
| | placeholder="Enter texts, one per line...", |
| | lines=10, |
| | value=BATCH_EXAMPLE |
| | ) |
| | batch_btn = gr.Button( |
| | "π Process Batch", |
| | variant="primary", |
| | size="lg" |
| | ) |
| | |
| | with gr.Column(scale=1): |
| | batch_results = gr.Markdown( |
| | label="Results Summary", |
| | value="*Results will appear here*" |
| | ) |
| | batch_plot = gr.Plot(label="Batch Analytics") |
| | |
| | |
| | with gr.TabItem("βΉοΈ About"): |
| | gr.Markdown(""" |
| | ## About This Model |
| | |
| | ### ποΈ Architecture |
| | - **Base Model:** DistilBERT (Distilled BERT) |
| | - **Parameters:** 66 million |
| | - **Training Data:** IMDB Movie Reviews (50k reviews) |
| | - **Fine-tuning:** Binary sentiment classification |
| | |
| | ### π Performance Metrics |
| | - **Test Accuracy:** 80.0% |
| | - **F1 Score:** 0.7981 |
| | - **Precision:** High |
| | - **Recall:** Balanced |
| | |
| | ### β‘ Features |
| | - Fast inference (~100ms per prediction) |
| | - Batch processing support |
| | - Interactive visualizations |
| | - Production-ready deployment |
| | |
| | ### π Resources |
| | - **Model Repository:** [MartinRodrigo/distilbert-sentiment-imdb](https://huggingface.co/MartinRodrigo/distilbert-sentiment-imdb) |
| | - **Space:** [transformer-sentiment-analysis](https://huggingface.co/spaces/MartinRodrigo/transformer-sentiment-analysis) |
| | - **GitHub:** [ransformer-sentiment-analysis](https://github.com/mrdesautu/ransformer-sentiment-analysis) |
| | |
| | ### π οΈ Tech Stack |
| | - **Framework:** PyTorch + Transformers |
| | - **UI:** Gradio |
| | - **Visualization:** Plotly |
| | - **Tracking:** MLflow (local development) |
| | |
| | --- |
| | |
| | Built with β€οΈ by Martin Rodrigo |
| | """) |
| | |
| | |
| | single_btn.click( |
| | fn=analyze_text, |
| | inputs=[single_input], |
| | outputs=[single_sentiment, single_confidence, single_plot] |
| | ) |
| | |
| | batch_btn.click( |
| | fn=analyze_batch, |
| | inputs=[batch_input], |
| | outputs=[batch_results, batch_plot] |
| | ) |
| |
|
| | |
| | |
| | |
| |
|
| | if __name__ == "__main__": |
| | demo.launch( |
| | share=False, |
| | show_error=True |
| | ) |
| |
|