# app.py
import gradio as gr
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import json
class AdvancedSentimentAnalyzer:
def __init__(self, model_name="tabularisai/multilingual-sentiment-analysis"):
print("Loading model and tokenizer...")
self.model_name = model_name
try:
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
# Use the modern pipeline syntax
self.classifier = pipeline(
"text-classification",
model=self.model,
tokenizer=self.tokenizer,
top_k=None # This replaces return_all_scores=True
)
except Exception as e:
print(f"Error loading model: {e}")
# Fallback to basic sentiment analysis
self.classifier = None
self.sentiment_map = {
0: "Very Negative",
1: "Negative",
2: "Neutral",
3: "Positive",
4: "Very Positive"
}
self.sentiment_colors = {
"Very Negative": "#FF6B6B",
"Negative": "#FFA8A8",
"Neutral": "#FFD93D",
"Positive": "#6BCF7F",
"Very Positive": "#4ECDC4"
}
self.language_detection_keywords = {
'english': ['the', 'and', 'is', 'in', 'to', 'of', 'a', 'for'],
'spanish': ['el', 'la', 'de', 'que', 'y', 'en', 'un', 'por'],
'french': ['le', 'la', 'de', 'et', 'que', 'en', 'un', 'pour'],
'german': ['der', 'die', 'das', 'und', 'zu', 'in', 'den', 'mit'],
'italian': ['il', 'la', 'di', 'e', 'che', 'in', 'un', 'per'],
'portuguese': ['o', 'a', 'de', 'e', 'que', 'em', 'um', 'para'],
'dutch': ['de', 'het', 'en', 'van', 'te', 'in', 'een', 'voor'],
'russian': ['и', 'в', 'не', 'на', 'я', 'что', 'он', 'с'],
'chinese': ['的', '是', '在', '了', '有', '和', '为', '我'],
'japanese': ['の', 'に', 'は', 'を', 'た', 'が', 'で', 'て'],
'korean': ['이', '에', 'は', 'を', '다', 'が', 'で', 'て'],
'arabic': ['ال', 'في', 'من', 'على', 'أن', 'ما', 'هو', 'إلى'],
'hindi': ['की', 'से', 'है', 'और', 'के', 'में', 'यह', 'को'],
'turkish': ['ve', 'bir', 'bu', 'ile', 'için', 'ama', 'da', 'de']
}
print("Model loaded successfully!")
def detect_language(self, text):
"""Simple language detection based on common words"""
if not text or not isinstance(text, str):
return 'Unknown'
text_lower = text.lower()
scores = {}
for lang, keywords in self.language_detection_keywords.items():
score = sum(1 for keyword in keywords if keyword in text_lower)
scores[lang] = score
# Only return a language if we have reasonable confidence
detected_lang = max(scores, key=scores.get) if scores and max(scores.values()) > 0 else 'unknown'
return detected_lang.capitalize()
def analyze_sentiment(self, text):
"""Advanced sentiment analysis with detailed metrics"""
if not text or not text.strip():
return {
'text': text,
'sentiment': 'Neutral',
'confidence': 0.0,
'scores': {sent: 0.2 for sent in self.sentiment_map.values()},
'sentiment_score': 0,
'language': 'Unknown',
'emotional_intensity': 0.0,
'error': 'No text provided'
}
try:
# Get predictions using modern pipeline syntax
predictions = self.classifier(text)[0]
# Convert to structured format - ensure proper mapping
sentiment_scores = {}
for pred in predictions:
label = pred['label']
score = pred['score']
# Map label to our sentiment scale
if 'very negative' in label.lower() or label == 'LABEL_0':
sentiment_scores["Very Negative"] = score
elif 'negative' in label.lower() or label == 'LABEL_1':
sentiment_scores["Negative"] = score
elif 'neutral' in label.lower() or label == 'LABEL_2':
sentiment_scores["Neutral"] = score
elif 'positive' in label.lower() or label == 'LABEL_3':
sentiment_scores["Positive"] = score
elif 'very positive' in label.lower() or label == 'LABEL_4':
sentiment_scores["Very Positive"] = score
else:
# Fallback: assign by order
sentiment_keys = list(self.sentiment_map.values())
for i, key in enumerate(sentiment_keys):
if key not in sentiment_scores:
sentiment_scores[key] = score
break
# Ensure all sentiment categories are present
for sentiment in self.sentiment_map.values():
if sentiment not in sentiment_scores:
sentiment_scores[sentiment] = 0.0
# Determine dominant sentiment
dominant_sentiment = max(sentiment_scores, key=sentiment_scores.get)
confidence = sentiment_scores[dominant_sentiment]
# Calculate sentiment score (-2 to +2 scale)
sentiment_score = (
sentiment_scores["Very Positive"] * 2 +
sentiment_scores["Positive"] * 1 +
sentiment_scores["Neutral"] * 0 +
sentiment_scores["Negative"] * -1 +
sentiment_scores["Very Negative"] * -2
)
# Detect language
detected_language = self.detect_language(text)
# Emotional intensity
emotional_intensity = max(sentiment_scores.values()) - min(sentiment_scores.values())
return {
'text': text,
'sentiment': dominant_sentiment,
'confidence': confidence,
'scores': sentiment_scores,
'sentiment_score': sentiment_score,
'language': detected_language,
'emotional_intensity': emotional_intensity,
'timestamp': datetime.now().isoformat()
}
except Exception as e:
print(f"Error in sentiment analysis: {e}")
return {
'text': text,
'sentiment': 'Neutral',
'confidence': 0.0,
'scores': {sent: 0.2 for sent in self.sentiment_map.values()},
'sentiment_score': 0,
'language': 'Unknown',
'emotional_intensity': 0.0,
'error': str(e)
}
def batch_analyze(self, texts):
"""Analyze multiple texts"""
results = []
for i, text in enumerate(texts):
if i % 10 == 0:
print(f"Processing {i}/{len(texts)}...")
results.append(self.analyze_sentiment(text))
return results
# Initialize analyzer
print("Initializing sentiment analyzer...")
analyzer = AdvancedSentimentAnalyzer()
def create_sentiment_chart(scores):
"""Create beautiful sentiment distribution chart"""
try:
fig = go.Figure(data=[
go.Bar(
x=list(scores.keys()),
y=list(scores.values()),
marker_color=[analyzer.sentiment_colors[sent] for sent in scores.keys()],
text=[f'{score:.1%}' for score in scores.values()],
textposition='auto',
)
])
fig.update_layout(
title="Sentiment Distribution",
xaxis_title="Sentiment",
yaxis_title="Confidence Score",
template="plotly_white",
height=300
)
return fig
except Exception as e:
print(f"Error creating chart: {e}")
return None
def create_radar_chart(scores):
"""Create radar chart for sentiment analysis"""
try:
fig = go.Figure(data=go.Scatterpolar(
r=list(scores.values()),
theta=list(scores.keys()),
fill='toself',
line=dict(color='#4ECDC4'),
marker=dict(size=8)
))
fig.update_layout(
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 1]
)),
showlegend=False,
template="plotly_white",
height=300
)
return fig
except Exception as e:
print(f"Error creating radar chart: {e}")
return None
def analyze_single_review(review_text):
"""Analyze single review with enhanced visualization"""
if not review_text or not review_text.strip():
return "❌ Please enter some text to analyze.", None, None
print(f"Analyzing: {review_text[:100]}...")
result = analyzer.analyze_sentiment(review_text)
# Create main output
sentiment_color = analyzer.sentiment_colors.get(result['sentiment'], '#FFD93D')
output_html = f"""
🎯 Analysis Result
{result['sentiment'].upper()}
📊
Confidence
{result['confidence']:.1%}
🌐
Language
{result['language']}
⚡
Intensity
{result['emotional_intensity']:.2f}
"""
# Create charts
bar_chart = create_sentiment_chart(result['scores'])
radar_chart = create_radar_chart(result['scores'])
return output_html, bar_chart, radar_chart
def analyze_csv_file(csv_file):
"""Analyze reviews from CSV file with advanced analytics"""
try:
if csv_file is None:
return "❌ Please upload a CSV file.", None, None
print("Reading CSV file...")
df = pd.read_csv(csv_file.name)
# Assume first column contains reviews
review_column = df.columns[0]
reviews = df[review_column].dropna().tolist()
if not reviews:
return "❌ No reviews found in the CSV file.", None, None
print(f"Analyzing {len(reviews)} reviews...")
results = analyzer.batch_analyze(reviews)
# Create comprehensive results dataframe
results_df = pd.DataFrame({
'Review': [r['text'] for r in results],
'Sentiment': [r['sentiment'] for r in results],
'Confidence': [r['confidence'] for r in results],
'Sentiment_Score': [r['sentiment_score'] for r in results],
'Language': [r['language'] for r in results],
'Emotional_Intensity': [r['emotional_intensity'] for r in results],
'Very_Negative_Score': [r['scores']['Very Negative'] for r in results],
'Negative_Score': [r['scores']['Negative'] for r in results],
'Neutral_Score': [r['scores']['Neutral'] for r in results],
'Positive_Score': [r['scores']['Positive'] for r in results],
'Very_Positive_Score': [r['scores']['Very Positive'] for r in results],
})
# Generate analytics
sentiment_counts = results_df['Sentiment'].value_counts()
avg_confidence = results_df['Confidence'].mean()
avg_sentiment_score = results_df['Sentiment_Score'].mean()
language_distribution = results_df['Language'].value_counts()
# Create summary visualization
fig = make_subplots(
rows=2, cols=2,
subplot_titles=('Sentiment Distribution', 'Language Distribution',
'Confidence Distribution', 'Sentiment Scores'),
specs=[[{"type": "pie"}, {"type": "pie"}],
[{"type": "histogram"}, {"type": "histogram"}]]
)
# Sentiment pie chart
fig.add_trace(
go.Pie(
labels=sentiment_counts.index,
values=sentiment_counts.values,
marker_colors=[analyzer.sentiment_colors.get(sent, '#FFD93D') for sent in sentiment_counts.index]
), 1, 1
)
# Language pie chart (top 10 languages)
top_languages = language_distribution.head(10)
fig.add_trace(
go.Pie(labels=top_languages.index, values=top_languages.values),
1, 2
)
# Confidence histogram
fig.add_trace(go.Histogram(x=results_df['Confidence'], nbinsx=20), 2, 1)
# Sentiment score histogram
fig.add_trace(go.Histogram(x=results_df['Sentiment_Score'], nbinsx=20), 2, 2)
fig.update_layout(height=600, showlegend=False, template="plotly_white")
# Save results
output_filename = f"advanced_sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
results_df.to_csv(output_filename, index=False)
# Generate comprehensive summary
summary = f"""
## 📊 BATCH ANALYSIS COMPLETE
**Dataset Overview:**
- 📝 **Total Reviews Analyzed:** {len(results):,}
- 🌐 **Languages Detected:** {len(language_distribution)}
- ⏱️ **Analysis Timestamp:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
**Sentiment Breakdown:**
- 🟢 **Very Positive:** {sentiment_counts.get('Very Positive', 0):,}
- 🟡 **Positive:** {sentiment_counts.get('Positive', 0):,}
- ⚪ **Neutral:** {sentiment_counts.get('Neutral', 0):,}
- 🟠 **Negative:** {sentiment_counts.get('Negative', 0):,}
- 🔴 **Very Negative:** {sentiment_counts.get('Very Negative', 0):,}
**Performance Metrics:**
- 📈 **Average Confidence:** {avg_confidence:.1%}
- 🎯 **Average Sentiment Score:** {avg_sentiment_score:.2f}
- 🏆 **Most Common Language:** {language_distribution.index[0] if len(language_distribution) > 0 else 'N/A'}
**Files Generated:**
- 💾 **Results CSV:** `{output_filename}`
- 📊 **Analytics Dashboard:** See chart below
**Next Steps:**
- Download the CSV for detailed analysis
- Use filters to segment by sentiment or language
- Identify trends and patterns in customer feedback
"""
return summary, output_filename, fig
except Exception as e:
error_msg = f"❌ Error processing file: {str(e)}"
print(error_msg)
return error_msg, None, None
# Create simple Gradio interface without any unsupported parameters
with gr.Blocks() as demo:
gr.Markdown("""
# 🌍 Advanced Multilingual Sentiment Analysis
*Powered by fine-tuned multilingual transformer model supporting 23 languages*
Analyze customer reviews, social media posts, and feedback across multiple languages with state-of-the-art accuracy.
""")
with gr.Tab("🔍 Single Review Analysis"):
with gr.Row():
with gr.Column():
gr.Markdown("### 📥 Input Review")
single_review = gr.Textbox(
label="Enter text in any supported language",
placeholder="Type your review here... (Supports 23 languages including English, Spanish, Chinese, French, German, Arabic, etc.)",
lines=4
)
analyze_btn = gr.Button("🚀 Analyze Sentiment", variant="primary")
gr.Markdown("""
**Supported Languages:**
English, Chinese, Spanish, Hindi, Arabic, Bengali, Portuguese, Russian,
Japanese, German, Malay, Telugu, Vietnamese, Korean, French, Turkish,
Italian, Polish, Ukrainian, Tagalog, Dutch, Swiss German, Swahili
""")
with gr.Column():
gr.Markdown("### 📊 Analysis Results")
output_html = gr.HTML(label="Detailed Analysis")
with gr.Row():
bar_chart = gr.Plot(label="Sentiment Distribution")
radar_chart = gr.Plot(label="Sentiment Radar")
analyze_btn.click(
analyze_single_review,
inputs=single_review,
outputs=[output_html, bar_chart, radar_chart]
)
with gr.Tab("📁 Batch CSV Analysis"):
with gr.Row():
with gr.Column():
gr.Markdown("### 📤 Upload CSV File")
csv_upload = gr.File(
label="Upload CSV file with reviews",
file_types=[".csv"]
)
gr.Markdown("""
**CSV Format Requirements:**
- First column should contain the review text
- File should be UTF-8 encoded
- Maximum file size: 100MB
- Supports up to 10,000 reviews per batch
""")
batch_analyze_btn = gr.Button("📈 Analyze Batch", variant="primary")
with gr.Column():
gr.Markdown("### 📋 Analysis Summary")
batch_output = gr.Markdown(label="Batch Summary")
download_output = gr.File(label="Download Results")
batch_chart = gr.Plot(label="Batch Analytics")
batch_analyze_btn.click(
analyze_csv_file,
inputs=csv_upload,
outputs=[batch_output, download_output, batch_chart]
)
with gr.Tab("ℹ️ About & Instructions"):
gr.Markdown("""
## 🎯 About This Tool
This advanced sentiment analysis system uses a fine-tuned multilingual transformer model to analyze text in 23 languages.
### 🌟 Key Features
- **Multilingual Support**: Analyze sentiment in 23 languages
- **5-Point Scale**: Very Negative → Negative → Neutral → Positive → Very Positive
- **Advanced Analytics**: Confidence scores, emotional intensity, language detection
- **Batch Processing**: Analyze thousands of reviews via CSV upload
- **Visual Analytics**: Interactive charts and comprehensive dashboards
### 🚀 Use Cases
- **E-commerce**: Product reviews from global marketplaces
- **Customer Support**: Analyze support tickets and feedback
- **Social Media**: Monitor brand sentiment across languages
- **Market Research**: Understand international customer opinions
### 🔧 Technical Details
- **Base Model**: DistilBERT Multilingual
- **Languages**: 23 languages
- **Sentiment Scale**: 5-point (Very Negative to Very Positive)
- **Processing**: Real-time analysis with batch capabilities
""")
# Launch the application
if __name__ == "__main__":
demo.launch(share=False, debug=True)