Spaces:
Sleeping
Sleeping
| # app.py | |
| import gradio as gr | |
| import torch | |
| from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
| import pandas as pd | |
| import numpy as np | |
| from datetime import datetime | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from plotly.subplots import make_subplots | |
| import json | |
| class AdvancedSentimentAnalyzer: | |
| def __init__(self, model_name="tabularisai/multilingual-sentiment-analysis"): | |
| print("Loading model and tokenizer...") | |
| self.model_name = model_name | |
| try: | |
| self.tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| self.model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| # Use the modern pipeline syntax | |
| self.classifier = pipeline( | |
| "text-classification", | |
| model=self.model, | |
| tokenizer=self.tokenizer, | |
| top_k=None # This replaces return_all_scores=True | |
| ) | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| # Fallback to basic sentiment analysis | |
| self.classifier = None | |
| self.sentiment_map = { | |
| 0: "Very Negative", | |
| 1: "Negative", | |
| 2: "Neutral", | |
| 3: "Positive", | |
| 4: "Very Positive" | |
| } | |
| self.sentiment_colors = { | |
| "Very Negative": "#FF6B6B", | |
| "Negative": "#FFA8A8", | |
| "Neutral": "#FFD93D", | |
| "Positive": "#6BCF7F", | |
| "Very Positive": "#4ECDC4" | |
| } | |
| self.language_detection_keywords = { | |
| 'english': ['the', 'and', 'is', 'in', 'to', 'of', 'a', 'for'], | |
| 'spanish': ['el', 'la', 'de', 'que', 'y', 'en', 'un', 'por'], | |
| 'french': ['le', 'la', 'de', 'et', 'que', 'en', 'un', 'pour'], | |
| 'german': ['der', 'die', 'das', 'und', 'zu', 'in', 'den', 'mit'], | |
| 'italian': ['il', 'la', 'di', 'e', 'che', 'in', 'un', 'per'], | |
| 'portuguese': ['o', 'a', 'de', 'e', 'que', 'em', 'um', 'para'], | |
| 'dutch': ['de', 'het', 'en', 'van', 'te', 'in', 'een', 'voor'], | |
| 'russian': ['и', 'в', 'не', 'на', 'я', 'что', 'он', 'с'], | |
| 'chinese': ['的', '是', '在', '了', '有', '和', '为', '我'], | |
| 'japanese': ['の', 'に', 'は', 'を', 'た', 'が', 'で', 'て'], | |
| 'korean': ['이', '에', 'は', 'を', '다', 'が', 'で', 'て'], | |
| 'arabic': ['ال', 'في', 'من', 'على', 'أن', 'ما', 'هو', 'إلى'], | |
| 'hindi': ['की', 'से', 'है', 'और', 'के', 'में', 'यह', 'को'], | |
| 'turkish': ['ve', 'bir', 'bu', 'ile', 'için', 'ama', 'da', 'de'] | |
| } | |
| print("Model loaded successfully!") | |
| def detect_language(self, text): | |
| """Simple language detection based on common words""" | |
| if not text or not isinstance(text, str): | |
| return 'Unknown' | |
| text_lower = text.lower() | |
| scores = {} | |
| for lang, keywords in self.language_detection_keywords.items(): | |
| score = sum(1 for keyword in keywords if keyword in text_lower) | |
| scores[lang] = score | |
| # Only return a language if we have reasonable confidence | |
| detected_lang = max(scores, key=scores.get) if scores and max(scores.values()) > 0 else 'unknown' | |
| return detected_lang.capitalize() | |
| def analyze_sentiment(self, text): | |
| """Advanced sentiment analysis with detailed metrics""" | |
| if not text or not text.strip(): | |
| return { | |
| 'text': text, | |
| 'sentiment': 'Neutral', | |
| 'confidence': 0.0, | |
| 'scores': {sent: 0.2 for sent in self.sentiment_map.values()}, | |
| 'sentiment_score': 0, | |
| 'language': 'Unknown', | |
| 'emotional_intensity': 0.0, | |
| 'error': 'No text provided' | |
| } | |
| try: | |
| # Get predictions using modern pipeline syntax | |
| predictions = self.classifier(text)[0] | |
| # Convert to structured format - ensure proper mapping | |
| sentiment_scores = {} | |
| for pred in predictions: | |
| label = pred['label'] | |
| score = pred['score'] | |
| # Map label to our sentiment scale | |
| if 'very negative' in label.lower() or label == 'LABEL_0': | |
| sentiment_scores["Very Negative"] = score | |
| elif 'negative' in label.lower() or label == 'LABEL_1': | |
| sentiment_scores["Negative"] = score | |
| elif 'neutral' in label.lower() or label == 'LABEL_2': | |
| sentiment_scores["Neutral"] = score | |
| elif 'positive' in label.lower() or label == 'LABEL_3': | |
| sentiment_scores["Positive"] = score | |
| elif 'very positive' in label.lower() or label == 'LABEL_4': | |
| sentiment_scores["Very Positive"] = score | |
| else: | |
| # Fallback: assign by order | |
| sentiment_keys = list(self.sentiment_map.values()) | |
| for i, key in enumerate(sentiment_keys): | |
| if key not in sentiment_scores: | |
| sentiment_scores[key] = score | |
| break | |
| # Ensure all sentiment categories are present | |
| for sentiment in self.sentiment_map.values(): | |
| if sentiment not in sentiment_scores: | |
| sentiment_scores[sentiment] = 0.0 | |
| # Determine dominant sentiment | |
| dominant_sentiment = max(sentiment_scores, key=sentiment_scores.get) | |
| confidence = sentiment_scores[dominant_sentiment] | |
| # Calculate sentiment score (-2 to +2 scale) | |
| sentiment_score = ( | |
| sentiment_scores["Very Positive"] * 2 + | |
| sentiment_scores["Positive"] * 1 + | |
| sentiment_scores["Neutral"] * 0 + | |
| sentiment_scores["Negative"] * -1 + | |
| sentiment_scores["Very Negative"] * -2 | |
| ) | |
| # Detect language | |
| detected_language = self.detect_language(text) | |
| # Emotional intensity | |
| emotional_intensity = max(sentiment_scores.values()) - min(sentiment_scores.values()) | |
| return { | |
| 'text': text, | |
| 'sentiment': dominant_sentiment, | |
| 'confidence': confidence, | |
| 'scores': sentiment_scores, | |
| 'sentiment_score': sentiment_score, | |
| 'language': detected_language, | |
| 'emotional_intensity': emotional_intensity, | |
| 'timestamp': datetime.now().isoformat() | |
| } | |
| except Exception as e: | |
| print(f"Error in sentiment analysis: {e}") | |
| return { | |
| 'text': text, | |
| 'sentiment': 'Neutral', | |
| 'confidence': 0.0, | |
| 'scores': {sent: 0.2 for sent in self.sentiment_map.values()}, | |
| 'sentiment_score': 0, | |
| 'language': 'Unknown', | |
| 'emotional_intensity': 0.0, | |
| 'error': str(e) | |
| } | |
| def batch_analyze(self, texts): | |
| """Analyze multiple texts""" | |
| results = [] | |
| for i, text in enumerate(texts): | |
| if i % 10 == 0: | |
| print(f"Processing {i}/{len(texts)}...") | |
| results.append(self.analyze_sentiment(text)) | |
| return results | |
| # Initialize analyzer | |
| print("Initializing sentiment analyzer...") | |
| analyzer = AdvancedSentimentAnalyzer() | |
| def create_sentiment_chart(scores): | |
| """Create beautiful sentiment distribution chart""" | |
| try: | |
| fig = go.Figure(data=[ | |
| go.Bar( | |
| x=list(scores.keys()), | |
| y=list(scores.values()), | |
| marker_color=[analyzer.sentiment_colors[sent] for sent in scores.keys()], | |
| text=[f'{score:.1%}' for score in scores.values()], | |
| textposition='auto', | |
| ) | |
| ]) | |
| fig.update_layout( | |
| title="Sentiment Distribution", | |
| xaxis_title="Sentiment", | |
| yaxis_title="Confidence Score", | |
| template="plotly_white", | |
| height=300 | |
| ) | |
| return fig | |
| except Exception as e: | |
| print(f"Error creating chart: {e}") | |
| return None | |
| def create_radar_chart(scores): | |
| """Create radar chart for sentiment analysis""" | |
| try: | |
| fig = go.Figure(data=go.Scatterpolar( | |
| r=list(scores.values()), | |
| theta=list(scores.keys()), | |
| fill='toself', | |
| line=dict(color='#4ECDC4'), | |
| marker=dict(size=8) | |
| )) | |
| fig.update_layout( | |
| polar=dict( | |
| radialaxis=dict( | |
| visible=True, | |
| range=[0, 1] | |
| )), | |
| showlegend=False, | |
| template="plotly_white", | |
| height=300 | |
| ) | |
| return fig | |
| except Exception as e: | |
| print(f"Error creating radar chart: {e}") | |
| return None | |
| def analyze_single_review(review_text): | |
| """Analyze single review with enhanced visualization""" | |
| if not review_text or not review_text.strip(): | |
| return "❌ Please enter some text to analyze.", None, None | |
| print(f"Analyzing: {review_text[:100]}...") | |
| result = analyzer.analyze_sentiment(review_text) | |
| # Create main output | |
| sentiment_color = analyzer.sentiment_colors.get(result['sentiment'], '#FFD93D') | |
| output_html = f""" | |
| <div style="padding: 25px; border-radius: 15px; background: linear-gradient(135deg, {sentiment_color}20, {sentiment_color}40); border-left: 5px solid {sentiment_color};"> | |
| <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 15px;"> | |
| <h3 style="margin: 0; color: #2D3748;">🎯 Analysis Result</h3> | |
| <span style="background-color: {sentiment_color}; color: white; padding: 5px 15px; border-radius: 20px; font-weight: bold;"> | |
| {result['sentiment'].upper()} | |
| </span> | |
| </div> | |
| <div style="background: white; padding: 15px; border-radius: 10px; margin: 10px 0;"> | |
| <p style="margin: 0; font-style: italic;">"{result['text']}"</p> | |
| </div> | |
| <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin-top: 20px;"> | |
| <div style="background: white; padding: 15px; border-radius: 10px; text-align: center;"> | |
| <div style="font-size: 24px; color: {sentiment_color}; margin-bottom: 5px;">📊</div> | |
| <div style="font-weight: bold; color: #4A5568;">Confidence</div> | |
| <div style="font-size: 18px; color: #2D3748;">{result['confidence']:.1%}</div> | |
| </div> | |
| <div style="background: white; padding: 15px; border-radius: 10px; text-align: center;"> | |
| <div style="font-size: 24px; color: {sentiment_color}; margin-bottom: 5px;">🌐</div> | |
| <div style="font-weight: bold; color: #4A5568;">Language</div> | |
| <div style="font-size: 18px; color: #2D3748;">{result['language']}</div> | |
| </div> | |
| <div style="background: white; padding: 15px; border-radius: 10px; text-align: center;"> | |
| <div style="font-size: 24px; color: {sentiment_color}; margin-bottom: 5px;">⚡</div> | |
| <div style="font-weight: bold; color: #4A5568;">Intensity</div> | |
| <div style="font-size: 18px; color: #2D3748;">{result['emotional_intensity']:.2f}</div> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| # Create charts | |
| bar_chart = create_sentiment_chart(result['scores']) | |
| radar_chart = create_radar_chart(result['scores']) | |
| return output_html, bar_chart, radar_chart | |
| def analyze_csv_file(csv_file): | |
| """Analyze reviews from CSV file with advanced analytics""" | |
| try: | |
| if csv_file is None: | |
| return "❌ Please upload a CSV file.", None, None | |
| print("Reading CSV file...") | |
| df = pd.read_csv(csv_file.name) | |
| # Assume first column contains reviews | |
| review_column = df.columns[0] | |
| reviews = df[review_column].dropna().tolist() | |
| if not reviews: | |
| return "❌ No reviews found in the CSV file.", None, None | |
| print(f"Analyzing {len(reviews)} reviews...") | |
| results = analyzer.batch_analyze(reviews) | |
| # Create comprehensive results dataframe | |
| results_df = pd.DataFrame({ | |
| 'Review': [r['text'] for r in results], | |
| 'Sentiment': [r['sentiment'] for r in results], | |
| 'Confidence': [r['confidence'] for r in results], | |
| 'Sentiment_Score': [r['sentiment_score'] for r in results], | |
| 'Language': [r['language'] for r in results], | |
| 'Emotional_Intensity': [r['emotional_intensity'] for r in results], | |
| 'Very_Negative_Score': [r['scores']['Very Negative'] for r in results], | |
| 'Negative_Score': [r['scores']['Negative'] for r in results], | |
| 'Neutral_Score': [r['scores']['Neutral'] for r in results], | |
| 'Positive_Score': [r['scores']['Positive'] for r in results], | |
| 'Very_Positive_Score': [r['scores']['Very Positive'] for r in results], | |
| }) | |
| # Generate analytics | |
| sentiment_counts = results_df['Sentiment'].value_counts() | |
| avg_confidence = results_df['Confidence'].mean() | |
| avg_sentiment_score = results_df['Sentiment_Score'].mean() | |
| language_distribution = results_df['Language'].value_counts() | |
| # Create summary visualization | |
| fig = make_subplots( | |
| rows=2, cols=2, | |
| subplot_titles=('Sentiment Distribution', 'Language Distribution', | |
| 'Confidence Distribution', 'Sentiment Scores'), | |
| specs=[[{"type": "pie"}, {"type": "pie"}], | |
| [{"type": "histogram"}, {"type": "histogram"}]] | |
| ) | |
| # Sentiment pie chart | |
| fig.add_trace( | |
| go.Pie( | |
| labels=sentiment_counts.index, | |
| values=sentiment_counts.values, | |
| marker_colors=[analyzer.sentiment_colors.get(sent, '#FFD93D') for sent in sentiment_counts.index] | |
| ), 1, 1 | |
| ) | |
| # Language pie chart (top 10 languages) | |
| top_languages = language_distribution.head(10) | |
| fig.add_trace( | |
| go.Pie(labels=top_languages.index, values=top_languages.values), | |
| 1, 2 | |
| ) | |
| # Confidence histogram | |
| fig.add_trace(go.Histogram(x=results_df['Confidence'], nbinsx=20), 2, 1) | |
| # Sentiment score histogram | |
| fig.add_trace(go.Histogram(x=results_df['Sentiment_Score'], nbinsx=20), 2, 2) | |
| fig.update_layout(height=600, showlegend=False, template="plotly_white") | |
| # Save results | |
| output_filename = f"advanced_sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" | |
| results_df.to_csv(output_filename, index=False) | |
| # Generate comprehensive summary | |
| summary = f""" | |
| ## 📊 BATCH ANALYSIS COMPLETE | |
| **Dataset Overview:** | |
| - 📝 **Total Reviews Analyzed:** {len(results):,} | |
| - 🌐 **Languages Detected:** {len(language_distribution)} | |
| - ⏱️ **Analysis Timestamp:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | |
| **Sentiment Breakdown:** | |
| - 🟢 **Very Positive:** {sentiment_counts.get('Very Positive', 0):,} | |
| - 🟡 **Positive:** {sentiment_counts.get('Positive', 0):,} | |
| - ⚪ **Neutral:** {sentiment_counts.get('Neutral', 0):,} | |
| - 🟠 **Negative:** {sentiment_counts.get('Negative', 0):,} | |
| - 🔴 **Very Negative:** {sentiment_counts.get('Very Negative', 0):,} | |
| **Performance Metrics:** | |
| - 📈 **Average Confidence:** {avg_confidence:.1%} | |
| - 🎯 **Average Sentiment Score:** {avg_sentiment_score:.2f} | |
| - 🏆 **Most Common Language:** {language_distribution.index[0] if len(language_distribution) > 0 else 'N/A'} | |
| **Files Generated:** | |
| - 💾 **Results CSV:** `{output_filename}` | |
| - 📊 **Analytics Dashboard:** See chart below | |
| **Next Steps:** | |
| - Download the CSV for detailed analysis | |
| - Use filters to segment by sentiment or language | |
| - Identify trends and patterns in customer feedback | |
| """ | |
| return summary, output_filename, fig | |
| except Exception as e: | |
| error_msg = f"❌ Error processing file: {str(e)}" | |
| print(error_msg) | |
| return error_msg, None, None | |
| # Create simple Gradio interface without any unsupported parameters | |
| with gr.Blocks() as demo: | |
| gr.Markdown(""" | |
| # 🌍 Advanced Multilingual Sentiment Analysis | |
| *Powered by fine-tuned multilingual transformer model supporting 23 languages* | |
| Analyze customer reviews, social media posts, and feedback across multiple languages with state-of-the-art accuracy. | |
| """) | |
| with gr.Tab("🔍 Single Review Analysis"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### 📥 Input Review") | |
| single_review = gr.Textbox( | |
| label="Enter text in any supported language", | |
| placeholder="Type your review here... (Supports 23 languages including English, Spanish, Chinese, French, German, Arabic, etc.)", | |
| lines=4 | |
| ) | |
| analyze_btn = gr.Button("🚀 Analyze Sentiment", variant="primary") | |
| gr.Markdown(""" | |
| **Supported Languages:** | |
| English, Chinese, Spanish, Hindi, Arabic, Bengali, Portuguese, Russian, | |
| Japanese, German, Malay, Telugu, Vietnamese, Korean, French, Turkish, | |
| Italian, Polish, Ukrainian, Tagalog, Dutch, Swiss German, Swahili | |
| """) | |
| with gr.Column(): | |
| gr.Markdown("### 📊 Analysis Results") | |
| output_html = gr.HTML(label="Detailed Analysis") | |
| with gr.Row(): | |
| bar_chart = gr.Plot(label="Sentiment Distribution") | |
| radar_chart = gr.Plot(label="Sentiment Radar") | |
| analyze_btn.click( | |
| analyze_single_review, | |
| inputs=single_review, | |
| outputs=[output_html, bar_chart, radar_chart] | |
| ) | |
| with gr.Tab("📁 Batch CSV Analysis"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### 📤 Upload CSV File") | |
| csv_upload = gr.File( | |
| label="Upload CSV file with reviews", | |
| file_types=[".csv"] | |
| ) | |
| gr.Markdown(""" | |
| **CSV Format Requirements:** | |
| - First column should contain the review text | |
| - File should be UTF-8 encoded | |
| - Maximum file size: 100MB | |
| - Supports up to 10,000 reviews per batch | |
| """) | |
| batch_analyze_btn = gr.Button("📈 Analyze Batch", variant="primary") | |
| with gr.Column(): | |
| gr.Markdown("### 📋 Analysis Summary") | |
| batch_output = gr.Markdown(label="Batch Summary") | |
| download_output = gr.File(label="Download Results") | |
| batch_chart = gr.Plot(label="Batch Analytics") | |
| batch_analyze_btn.click( | |
| analyze_csv_file, | |
| inputs=csv_upload, | |
| outputs=[batch_output, download_output, batch_chart] | |
| ) | |
| with gr.Tab("ℹ️ About & Instructions"): | |
| gr.Markdown(""" | |
| ## 🎯 About This Tool | |
| This advanced sentiment analysis system uses a fine-tuned multilingual transformer model to analyze text in 23 languages. | |
| ### 🌟 Key Features | |
| - **Multilingual Support**: Analyze sentiment in 23 languages | |
| - **5-Point Scale**: Very Negative → Negative → Neutral → Positive → Very Positive | |
| - **Advanced Analytics**: Confidence scores, emotional intensity, language detection | |
| - **Batch Processing**: Analyze thousands of reviews via CSV upload | |
| - **Visual Analytics**: Interactive charts and comprehensive dashboards | |
| ### 🚀 Use Cases | |
| - **E-commerce**: Product reviews from global marketplaces | |
| - **Customer Support**: Analyze support tickets and feedback | |
| - **Social Media**: Monitor brand sentiment across languages | |
| - **Market Research**: Understand international customer opinions | |
| ### 🔧 Technical Details | |
| - **Base Model**: DistilBERT Multilingual | |
| - **Languages**: 23 languages | |
| - **Sentiment Scale**: 5-point (Very Negative to Very Positive) | |
| - **Processing**: Real-time analysis with batch capabilities | |
| """) | |
| # Launch the application | |
| if __name__ == "__main__": | |
| demo.launch(share=False, debug=True) |