# app.py import gradio as gr import torch from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification import pandas as pd import numpy as np from datetime import datetime import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots import json class AdvancedSentimentAnalyzer: def __init__(self, model_name="tabularisai/multilingual-sentiment-analysis"): print("Loading model and tokenizer...") self.model_name = model_name try: self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.model = AutoModelForSequenceClassification.from_pretrained(model_name) # Use the modern pipeline syntax self.classifier = pipeline( "text-classification", model=self.model, tokenizer=self.tokenizer, top_k=None # This replaces return_all_scores=True ) except Exception as e: print(f"Error loading model: {e}") # Fallback to basic sentiment analysis self.classifier = None self.sentiment_map = { 0: "Very Negative", 1: "Negative", 2: "Neutral", 3: "Positive", 4: "Very Positive" } self.sentiment_colors = { "Very Negative": "#FF6B6B", "Negative": "#FFA8A8", "Neutral": "#FFD93D", "Positive": "#6BCF7F", "Very Positive": "#4ECDC4" } self.language_detection_keywords = { 'english': ['the', 'and', 'is', 'in', 'to', 'of', 'a', 'for'], 'spanish': ['el', 'la', 'de', 'que', 'y', 'en', 'un', 'por'], 'french': ['le', 'la', 'de', 'et', 'que', 'en', 'un', 'pour'], 'german': ['der', 'die', 'das', 'und', 'zu', 'in', 'den', 'mit'], 'italian': ['il', 'la', 'di', 'e', 'che', 'in', 'un', 'per'], 'portuguese': ['o', 'a', 'de', 'e', 'que', 'em', 'um', 'para'], 'dutch': ['de', 'het', 'en', 'van', 'te', 'in', 'een', 'voor'], 'russian': ['и', 'в', 'не', 'на', 'я', 'что', 'он', 'с'], 'chinese': ['的', '是', '在', '了', '有', '和', '为', '我'], 'japanese': ['の', 'に', 'は', 'を', 'た', 'が', 'で', 'て'], 'korean': ['이', '에', 'は', 'を', '다', 'が', 'で', 'て'], 'arabic': ['ال', 'في', 'من', 'على', 'أن', 'ما', 'هو', 'إلى'], 'hindi': ['की', 'से', 'है', 'और', 'के', 'में', 'यह', 'को'], 'turkish': ['ve', 'bir', 'bu', 'ile', 'için', 'ama', 'da', 'de'] } print("Model loaded successfully!") def detect_language(self, text): """Simple language detection based on common words""" if not text or not isinstance(text, str): return 'Unknown' text_lower = text.lower() scores = {} for lang, keywords in self.language_detection_keywords.items(): score = sum(1 for keyword in keywords if keyword in text_lower) scores[lang] = score # Only return a language if we have reasonable confidence detected_lang = max(scores, key=scores.get) if scores and max(scores.values()) > 0 else 'unknown' return detected_lang.capitalize() def analyze_sentiment(self, text): """Advanced sentiment analysis with detailed metrics""" if not text or not text.strip(): return { 'text': text, 'sentiment': 'Neutral', 'confidence': 0.0, 'scores': {sent: 0.2 for sent in self.sentiment_map.values()}, 'sentiment_score': 0, 'language': 'Unknown', 'emotional_intensity': 0.0, 'error': 'No text provided' } try: # Get predictions using modern pipeline syntax predictions = self.classifier(text)[0] # Convert to structured format - ensure proper mapping sentiment_scores = {} for pred in predictions: label = pred['label'] score = pred['score'] # Map label to our sentiment scale if 'very negative' in label.lower() or label == 'LABEL_0': sentiment_scores["Very Negative"] = score elif 'negative' in label.lower() or label == 'LABEL_1': sentiment_scores["Negative"] = score elif 'neutral' in label.lower() or label == 'LABEL_2': sentiment_scores["Neutral"] = score elif 'positive' in label.lower() or label == 'LABEL_3': sentiment_scores["Positive"] = score elif 'very positive' in label.lower() or label == 'LABEL_4': sentiment_scores["Very Positive"] = score else: # Fallback: assign by order sentiment_keys = list(self.sentiment_map.values()) for i, key in enumerate(sentiment_keys): if key not in sentiment_scores: sentiment_scores[key] = score break # Ensure all sentiment categories are present for sentiment in self.sentiment_map.values(): if sentiment not in sentiment_scores: sentiment_scores[sentiment] = 0.0 # Determine dominant sentiment dominant_sentiment = max(sentiment_scores, key=sentiment_scores.get) confidence = sentiment_scores[dominant_sentiment] # Calculate sentiment score (-2 to +2 scale) sentiment_score = ( sentiment_scores["Very Positive"] * 2 + sentiment_scores["Positive"] * 1 + sentiment_scores["Neutral"] * 0 + sentiment_scores["Negative"] * -1 + sentiment_scores["Very Negative"] * -2 ) # Detect language detected_language = self.detect_language(text) # Emotional intensity emotional_intensity = max(sentiment_scores.values()) - min(sentiment_scores.values()) return { 'text': text, 'sentiment': dominant_sentiment, 'confidence': confidence, 'scores': sentiment_scores, 'sentiment_score': sentiment_score, 'language': detected_language, 'emotional_intensity': emotional_intensity, 'timestamp': datetime.now().isoformat() } except Exception as e: print(f"Error in sentiment analysis: {e}") return { 'text': text, 'sentiment': 'Neutral', 'confidence': 0.0, 'scores': {sent: 0.2 for sent in self.sentiment_map.values()}, 'sentiment_score': 0, 'language': 'Unknown', 'emotional_intensity': 0.0, 'error': str(e) } def batch_analyze(self, texts): """Analyze multiple texts""" results = [] for i, text in enumerate(texts): if i % 10 == 0: print(f"Processing {i}/{len(texts)}...") results.append(self.analyze_sentiment(text)) return results # Initialize analyzer print("Initializing sentiment analyzer...") analyzer = AdvancedSentimentAnalyzer() def create_sentiment_chart(scores): """Create beautiful sentiment distribution chart""" try: fig = go.Figure(data=[ go.Bar( x=list(scores.keys()), y=list(scores.values()), marker_color=[analyzer.sentiment_colors[sent] for sent in scores.keys()], text=[f'{score:.1%}' for score in scores.values()], textposition='auto', ) ]) fig.update_layout( title="Sentiment Distribution", xaxis_title="Sentiment", yaxis_title="Confidence Score", template="plotly_white", height=300 ) return fig except Exception as e: print(f"Error creating chart: {e}") return None def create_radar_chart(scores): """Create radar chart for sentiment analysis""" try: fig = go.Figure(data=go.Scatterpolar( r=list(scores.values()), theta=list(scores.keys()), fill='toself', line=dict(color='#4ECDC4'), marker=dict(size=8) )) fig.update_layout( polar=dict( radialaxis=dict( visible=True, range=[0, 1] )), showlegend=False, template="plotly_white", height=300 ) return fig except Exception as e: print(f"Error creating radar chart: {e}") return None def analyze_single_review(review_text): """Analyze single review with enhanced visualization""" if not review_text or not review_text.strip(): return "❌ Please enter some text to analyze.", None, None print(f"Analyzing: {review_text[:100]}...") result = analyzer.analyze_sentiment(review_text) # Create main output sentiment_color = analyzer.sentiment_colors.get(result['sentiment'], '#FFD93D') output_html = f"""

🎯 Analysis Result

{result['sentiment'].upper()}

"{result['text']}"

📊
Confidence
{result['confidence']:.1%}
🌐
Language
{result['language']}
Intensity
{result['emotional_intensity']:.2f}
""" # Create charts bar_chart = create_sentiment_chart(result['scores']) radar_chart = create_radar_chart(result['scores']) return output_html, bar_chart, radar_chart def analyze_csv_file(csv_file): """Analyze reviews from CSV file with advanced analytics""" try: if csv_file is None: return "❌ Please upload a CSV file.", None, None print("Reading CSV file...") df = pd.read_csv(csv_file.name) # Assume first column contains reviews review_column = df.columns[0] reviews = df[review_column].dropna().tolist() if not reviews: return "❌ No reviews found in the CSV file.", None, None print(f"Analyzing {len(reviews)} reviews...") results = analyzer.batch_analyze(reviews) # Create comprehensive results dataframe results_df = pd.DataFrame({ 'Review': [r['text'] for r in results], 'Sentiment': [r['sentiment'] for r in results], 'Confidence': [r['confidence'] for r in results], 'Sentiment_Score': [r['sentiment_score'] for r in results], 'Language': [r['language'] for r in results], 'Emotional_Intensity': [r['emotional_intensity'] for r in results], 'Very_Negative_Score': [r['scores']['Very Negative'] for r in results], 'Negative_Score': [r['scores']['Negative'] for r in results], 'Neutral_Score': [r['scores']['Neutral'] for r in results], 'Positive_Score': [r['scores']['Positive'] for r in results], 'Very_Positive_Score': [r['scores']['Very Positive'] for r in results], }) # Generate analytics sentiment_counts = results_df['Sentiment'].value_counts() avg_confidence = results_df['Confidence'].mean() avg_sentiment_score = results_df['Sentiment_Score'].mean() language_distribution = results_df['Language'].value_counts() # Create summary visualization fig = make_subplots( rows=2, cols=2, subplot_titles=('Sentiment Distribution', 'Language Distribution', 'Confidence Distribution', 'Sentiment Scores'), specs=[[{"type": "pie"}, {"type": "pie"}], [{"type": "histogram"}, {"type": "histogram"}]] ) # Sentiment pie chart fig.add_trace( go.Pie( labels=sentiment_counts.index, values=sentiment_counts.values, marker_colors=[analyzer.sentiment_colors.get(sent, '#FFD93D') for sent in sentiment_counts.index] ), 1, 1 ) # Language pie chart (top 10 languages) top_languages = language_distribution.head(10) fig.add_trace( go.Pie(labels=top_languages.index, values=top_languages.values), 1, 2 ) # Confidence histogram fig.add_trace(go.Histogram(x=results_df['Confidence'], nbinsx=20), 2, 1) # Sentiment score histogram fig.add_trace(go.Histogram(x=results_df['Sentiment_Score'], nbinsx=20), 2, 2) fig.update_layout(height=600, showlegend=False, template="plotly_white") # Save results output_filename = f"advanced_sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" results_df.to_csv(output_filename, index=False) # Generate comprehensive summary summary = f""" ## 📊 BATCH ANALYSIS COMPLETE **Dataset Overview:** - 📝 **Total Reviews Analyzed:** {len(results):,} - 🌐 **Languages Detected:** {len(language_distribution)} - ⏱️ **Analysis Timestamp:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} **Sentiment Breakdown:** - 🟢 **Very Positive:** {sentiment_counts.get('Very Positive', 0):,} - 🟡 **Positive:** {sentiment_counts.get('Positive', 0):,} - ⚪ **Neutral:** {sentiment_counts.get('Neutral', 0):,} - 🟠 **Negative:** {sentiment_counts.get('Negative', 0):,} - 🔴 **Very Negative:** {sentiment_counts.get('Very Negative', 0):,} **Performance Metrics:** - 📈 **Average Confidence:** {avg_confidence:.1%} - 🎯 **Average Sentiment Score:** {avg_sentiment_score:.2f} - 🏆 **Most Common Language:** {language_distribution.index[0] if len(language_distribution) > 0 else 'N/A'} **Files Generated:** - 💾 **Results CSV:** `{output_filename}` - 📊 **Analytics Dashboard:** See chart below **Next Steps:** - Download the CSV for detailed analysis - Use filters to segment by sentiment or language - Identify trends and patterns in customer feedback """ return summary, output_filename, fig except Exception as e: error_msg = f"❌ Error processing file: {str(e)}" print(error_msg) return error_msg, None, None # Create simple Gradio interface without any unsupported parameters with gr.Blocks() as demo: gr.Markdown(""" # 🌍 Advanced Multilingual Sentiment Analysis *Powered by fine-tuned multilingual transformer model supporting 23 languages* Analyze customer reviews, social media posts, and feedback across multiple languages with state-of-the-art accuracy. """) with gr.Tab("🔍 Single Review Analysis"): with gr.Row(): with gr.Column(): gr.Markdown("### 📥 Input Review") single_review = gr.Textbox( label="Enter text in any supported language", placeholder="Type your review here... (Supports 23 languages including English, Spanish, Chinese, French, German, Arabic, etc.)", lines=4 ) analyze_btn = gr.Button("🚀 Analyze Sentiment", variant="primary") gr.Markdown(""" **Supported Languages:** English, Chinese, Spanish, Hindi, Arabic, Bengali, Portuguese, Russian, Japanese, German, Malay, Telugu, Vietnamese, Korean, French, Turkish, Italian, Polish, Ukrainian, Tagalog, Dutch, Swiss German, Swahili """) with gr.Column(): gr.Markdown("### 📊 Analysis Results") output_html = gr.HTML(label="Detailed Analysis") with gr.Row(): bar_chart = gr.Plot(label="Sentiment Distribution") radar_chart = gr.Plot(label="Sentiment Radar") analyze_btn.click( analyze_single_review, inputs=single_review, outputs=[output_html, bar_chart, radar_chart] ) with gr.Tab("📁 Batch CSV Analysis"): with gr.Row(): with gr.Column(): gr.Markdown("### 📤 Upload CSV File") csv_upload = gr.File( label="Upload CSV file with reviews", file_types=[".csv"] ) gr.Markdown(""" **CSV Format Requirements:** - First column should contain the review text - File should be UTF-8 encoded - Maximum file size: 100MB - Supports up to 10,000 reviews per batch """) batch_analyze_btn = gr.Button("📈 Analyze Batch", variant="primary") with gr.Column(): gr.Markdown("### 📋 Analysis Summary") batch_output = gr.Markdown(label="Batch Summary") download_output = gr.File(label="Download Results") batch_chart = gr.Plot(label="Batch Analytics") batch_analyze_btn.click( analyze_csv_file, inputs=csv_upload, outputs=[batch_output, download_output, batch_chart] ) with gr.Tab("ℹ️ About & Instructions"): gr.Markdown(""" ## 🎯 About This Tool This advanced sentiment analysis system uses a fine-tuned multilingual transformer model to analyze text in 23 languages. ### 🌟 Key Features - **Multilingual Support**: Analyze sentiment in 23 languages - **5-Point Scale**: Very Negative → Negative → Neutral → Positive → Very Positive - **Advanced Analytics**: Confidence scores, emotional intensity, language detection - **Batch Processing**: Analyze thousands of reviews via CSV upload - **Visual Analytics**: Interactive charts and comprehensive dashboards ### 🚀 Use Cases - **E-commerce**: Product reviews from global marketplaces - **Customer Support**: Analyze support tickets and feedback - **Social Media**: Monitor brand sentiment across languages - **Market Research**: Understand international customer opinions ### 🔧 Technical Details - **Base Model**: DistilBERT Multilingual - **Languages**: 23 languages - **Sentiment Scale**: 5-point (Very Negative to Very Positive) - **Processing**: Real-time analysis with batch capabilities """) # Launch the application if __name__ == "__main__": demo.launch(share=False, debug=True)