DevNumb commited on
Commit
67f1b99
·
verified ·
1 Parent(s): f3713da

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +455 -0
app.py ADDED
@@ -0,0 +1,455 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ import torch
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
5
+ import pandas as pd
6
+ import numpy as np
7
+ from datetime import datetime
8
+ import plotly.express as px
9
+ import plotly.graph_objects as go
10
+ from plotly.subplots import make_subplots
11
+
12
+ class AdvancedSentimentAnalyzer:
13
+ def __init__(self, model_name="tabularisai/multilingual-sentiment-analysis"):
14
+ self.model_name = model_name
15
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
16
+ self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
17
+ self.classifier = pipeline(
18
+ "text-classification",
19
+ model=self.model,
20
+ tokenizer=self.tokenizer,
21
+ return_all_scores=True
22
+ )
23
+
24
+ self.sentiment_map = {
25
+ 0: "Very Negative",
26
+ 1: "Negative",
27
+ 2: "Neutral",
28
+ 3: "Positive",
29
+ 4: "Very Positive"
30
+ }
31
+
32
+ self.sentiment_colors = {
33
+ "Very Negative": "#FF6B6B",
34
+ "Negative": "#FFA8A8",
35
+ "Neutral": "#FFD93D",
36
+ "Positive": "#6BCF7F",
37
+ "Very Positive": "#4ECDC4"
38
+ }
39
+
40
+ self.language_detection_keywords = {
41
+ 'english': ['the', 'and', 'is', 'in', 'to'],
42
+ 'spanish': ['el', 'la', 'de', 'que', 'y'],
43
+ 'french': ['le', 'la', 'de', 'et', 'que'],
44
+ 'german': ['der', 'die', 'das', 'und', 'zu'],
45
+ 'italian': ['il', 'la', 'di', 'e', 'che'],
46
+ 'portuguese': ['o', 'a', 'de', 'e', 'que'],
47
+ 'dutch': ['de', 'het', 'en', 'van', 'te'],
48
+ 'russian': ['и', 'в', 'не', 'на', 'я'],
49
+ 'chinese': ['的', '是', '在', '了', '有'],
50
+ 'japanese': ['の', 'に', 'は', 'を', 'た'],
51
+ 'korean': ['이', '에', '는', '을', '다'],
52
+ 'arabic': ['ال', 'في', 'من', 'على', 'أن'],
53
+ 'hindi': ['की', 'से', 'है', 'और', 'के'],
54
+ 'turkish': ['ve', 'bir', 'bu', 'ile', 'için']
55
+ }
56
+
57
+ def detect_language(self, text):
58
+ """Simple language detection based on common words"""
59
+ text_lower = text.lower()
60
+ scores = {}
61
+
62
+ for lang, keywords in self.language_detection_keywords.items():
63
+ score = sum(1 for keyword in keywords if keyword in text_lower)
64
+ scores[lang] = score
65
+
66
+ detected_lang = max(scores, key=scores.get) if scores else 'unknown'
67
+ return detected_lang.capitalize()
68
+
69
+ def analyze_sentiment(self, text):
70
+ """Advanced sentiment analysis with detailed metrics"""
71
+ try:
72
+ # Get predictions
73
+ predictions = self.classifier(text)[0]
74
+
75
+ # Convert to structured format
76
+ sentiment_scores = {
77
+ self.sentiment_map[i]: pred['score']
78
+ for i, pred in enumerate(predictions)
79
+ }
80
+
81
+ # Determine dominant sentiment
82
+ dominant_sentiment = max(sentiment_scores, key=sentiment_scores.get)
83
+ confidence = sentiment_scores[dominant_sentiment]
84
+
85
+ # Calculate sentiment score (-2 to +2 scale)
86
+ sentiment_score = (
87
+ sentiment_scores["Very Positive"] * 2 +
88
+ sentiment_scores["Positive"] * 1 +
89
+ sentiment_scores["Neutral"] * 0 +
90
+ sentiment_scores["Negative"] * -1 +
91
+ sentiment_scores["Very Negative"] * -2
92
+ )
93
+
94
+ # Detect language
95
+ detected_language = self.detect_language(text)
96
+
97
+ # Emotional intensity
98
+ emotional_intensity = max(sentiment_scores.values()) - min(sentiment_scores.values())
99
+
100
+ return {
101
+ 'text': text,
102
+ 'sentiment': dominant_sentiment,
103
+ 'confidence': confidence,
104
+ 'scores': sentiment_scores,
105
+ 'sentiment_score': sentiment_score,
106
+ 'language': detected_language,
107
+ 'emotional_intensity': emotional_intensity,
108
+ 'timestamp': datetime.now().isoformat()
109
+ }
110
+
111
+ except Exception as e:
112
+ return {
113
+ 'text': text,
114
+ 'sentiment': 'Neutral',
115
+ 'confidence': 0.0,
116
+ 'scores': {sent: 0.2 for sent in self.sentiment_map.values()},
117
+ 'sentiment_score': 0,
118
+ 'language': 'Unknown',
119
+ 'emotional_intensity': 0.0,
120
+ 'error': str(e)
121
+ }
122
+
123
+ def batch_analyze(self, texts):
124
+ """Analyze multiple texts"""
125
+ return [self.analyze_sentiment(text) for text in texts]
126
+
127
+ # Initialize analyzer
128
+ analyzer = AdvancedSentimentAnalyzer()
129
+
130
+ def create_sentiment_chart(scores):
131
+ """Create beautiful sentiment distribution chart"""
132
+ fig = go.Figure(data=[
133
+ go.Bar(
134
+ x=list(scores.keys()),
135
+ y=list(scores.values()),
136
+ marker_color=[analyzer.sentiment_colors[sent] for sent in scores.keys()],
137
+ text=[f'{score:.1%}' for score in scores.values()],
138
+ textposition='auto',
139
+ )
140
+ ])
141
+
142
+ fig.update_layout(
143
+ title="Sentiment Distribution",
144
+ xaxis_title="Sentiment",
145
+ yaxis_title="Confidence Score",
146
+ template="plotly_white",
147
+ height=300
148
+ )
149
+
150
+ return fig
151
+
152
+ def create_radar_chart(scores):
153
+ """Create radar chart for sentiment analysis"""
154
+ fig = go.Figure(data=go.Scatterpolar(
155
+ r=list(scores.values()),
156
+ theta=list(scores.keys()),
157
+ fill='toself',
158
+ line=dict(color='#4ECDC4'),
159
+ marker=dict(size=8)
160
+ ))
161
+
162
+ fig.update_layout(
163
+ polar=dict(
164
+ radialaxis=dict(
165
+ visible=True,
166
+ range=[0, 1]
167
+ )),
168
+ showlegend=False,
169
+ template="plotly_white",
170
+ height=300
171
+ )
172
+
173
+ return fig
174
+
175
+ def analyze_single_review(review_text):
176
+ """Analyze single review with enhanced visualization"""
177
+ if not review_text.strip():
178
+ return "Please enter some text to analyze.", None, None
179
+
180
+ result = analyzer.analyze_sentiment(review_text)
181
+
182
+ # Create main output
183
+ sentiment_color = analyzer.sentiment_colors[result['sentiment']]
184
+
185
+ output_html = f"""
186
+ <div style="padding: 25px; border-radius: 15px; background: linear-gradient(135deg, {sentiment_color}20, {sentiment_color}40); border-left: 5px solid {sentiment_color};">
187
+ <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 15px;">
188
+ <h3 style="margin: 0; color: #2D3748;">🎯 Analysis Result</h3>
189
+ <span style="background-color: {sentiment_color}; color: white; padding: 5px 15px; border-radius: 20px; font-weight: bold;">
190
+ {result['sentiment'].upper()}
191
+ </span>
192
+ </div>
193
+
194
+ <div style="background: white; padding: 15px; border-radius: 10px; margin: 10px 0;">
195
+ <p style="margin: 0; font-style: italic;">"{result['text']}"</p>
196
+ </div>
197
+
198
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin-top: 20px;">
199
+ <div style="background: white; padding: 15px; border-radius: 10px; text-align: center;">
200
+ <div style="font-size: 24px; color: {sentiment_color}; margin-bottom: 5px;">📊</div>
201
+ <div style="font-weight: bold; color: #4A5568;">Confidence</div>
202
+ <div style="font-size: 18px; color: #2D3748;">{result['confidence']:.1%}</div>
203
+ </div>
204
+
205
+ <div style="background: white; padding: 15px; border-radius: 10px; text-align: center;">
206
+ <div style="font-size: 24px; color: {sentiment_color}; margin-bottom: 5px;">🌐</div>
207
+ <div style="font-weight: bold; color: #4A5568;">Language</div>
208
+ <div style="font-size: 18px; color: #2D3748;">{result['language']}</div>
209
+ </div>
210
+
211
+ <div style="background: white; padding: 15px; border-radius: 10px; text-align: center;">
212
+ <div style="font-size: 24px; color: {sentiment_color}; margin-bottom: 5px;">⚡</div>
213
+ <div style="font-weight: bold; color: #4A5568;">Intensity</div>
214
+ <div style="font-size: 18px; color: #2D3748;">{result['emotional_intensity']:.2f}</div>
215
+ </div>
216
+ </div>
217
+ </div>
218
+ """
219
+
220
+ # Create charts
221
+ bar_chart = create_sentiment_chart(result['scores'])
222
+ radar_chart = create_radar_chart(result['scores'])
223
+
224
+ return output_html, bar_chart, radar_chart
225
+
226
+ def analyze_csv_file(csv_file):
227
+ """Analyze reviews from CSV file with advanced analytics"""
228
+ try:
229
+ df = pd.read_csv(csv_file.name)
230
+
231
+ # Assume first column contains reviews
232
+ review_column = df.columns[0]
233
+ reviews = df[review_column].dropna().tolist()
234
+
235
+ print(f"Analyzing {len(reviews)} reviews...")
236
+ results = analyzer.batch_analyze(reviews)
237
+
238
+ # Create comprehensive results dataframe
239
+ results_df = pd.DataFrame({
240
+ 'Review': [r['text'] for r in results],
241
+ 'Sentiment': [r['sentiment'] for r in results],
242
+ 'Confidence': [r['confidence'] for r in results],
243
+ 'Sentiment_Score': [r['sentiment_score'] for r in results],
244
+ 'Language': [r['language'] for r in results],
245
+ 'Emotional_Intensity': [r['emotional_intensity'] for r in results],
246
+ 'Very_Negative_Score': [r['scores']['Very Negative'] for r in results],
247
+ 'Negative_Score': [r['scores']['Negative'] for r in results],
248
+ 'Neutral_Score': [r['scores']['Neutral'] for r in results],
249
+ 'Positive_Score': [r['scores']['Positive'] for r in results],
250
+ 'Very_Positive_Score': [r['scores']['Very Positive'] for r in results],
251
+ })
252
+
253
+ # Generate analytics
254
+ sentiment_counts = results_df['Sentiment'].value_counts()
255
+ avg_confidence = results_df['Confidence'].mean()
256
+ avg_sentiment_score = results_df['Sentiment_Score'].mean()
257
+ language_distribution = results_df['Language'].value_counts()
258
+
259
+ # Create summary visualization
260
+ fig = make_subplots(
261
+ rows=2, cols=2,
262
+ subplot_titles=('Sentiment Distribution', 'Language Distribution',
263
+ 'Confidence Distribution', 'Sentiment Scores'),
264
+ specs=[[{"type": "pie"}, {"type": "pie"}],
265
+ [{"type": "histogram"}, {"type": "histogram"}]]
266
+ )
267
+
268
+ # Sentiment pie chart
269
+ fig.add_trace(
270
+ go.Pie(
271
+ labels=sentiment_counts.index,
272
+ values=sentiment_counts.values,
273
+ marker_colors=[analyzer.sentiment_colors[sent] for sent in sentiment_counts.index]
274
+ ), 1, 1
275
+ )
276
+
277
+ # Language pie chart
278
+ fig.add_trace(
279
+ go.Pie(labels=language_distribution.index, values=language_distribution.values),
280
+ 1, 2
281
+ )
282
+
283
+ # Confidence histogram
284
+ fig.add_trace(go.Histogram(x=results_df['Confidence'], nbinsx=20), 2, 1)
285
+
286
+ # Sentiment score histogram
287
+ fig.add_trace(go.Histogram(x=results_df['Sentiment_Score'], nbinsx=20), 2, 2)
288
+
289
+ fig.update_layout(height=600, showlegend=False, template="plotly_white")
290
+
291
+ # Save results
292
+ output_filename = f"advanced_sentiment_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
293
+ results_df.to_csv(output_filename, index=False)
294
+
295
+ # Generate comprehensive summary
296
+ summary = f"""
297
+ 📊 **BATCH ANALYSIS COMPLETE**
298
+
299
+ **Dataset Overview:**
300
+ - 📝 Total Reviews Analyzed: {len(results):,}
301
+ - 🌐 Languages Detected: {len(language_distribution)}
302
+ - ⏱️ Analysis Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
303
+
304
+ **Sentiment Breakdown:**
305
+ 🟢 Very Positive: {sentiment_counts.get('Very Positive', 0):,}
306
+ 🟡 Positive: {sentiment_counts.get('Positive', 0):,}
307
+ ⚪ Neutral: {sentiment_counts.get('Neutral', 0):,}
308
+ 🟠 Negative: {sentiment_counts.get('Negative', 0):,}
309
+ 🔴 Very Negative: {sentiment_counts.get('Very Negative', 0):,}
310
+
311
+ **Performance Metrics:**
312
+ - 📈 Average Confidence: {avg_confidence:.1%}
313
+ - 🎯 Average Sentiment Score: {avg_sentiment_score:.2f}
314
+ - 🏆 Most Common Language: {language_distribution.index[0] if len(language_distribution) > 0 else 'N/A'}
315
+
316
+ **Files Generated:**
317
+ - 💾 Results CSV: `{output_filename}`
318
+ - 📊 Analytics Dashboard: See chart below
319
+
320
+ **Next Steps:**
321
+ - Download the CSV for detailed analysis
322
+ - Use filters to segment by sentiment or language
323
+ - Identify trends and patterns in customer feedback
324
+ """
325
+
326
+ return summary, output_filename, fig
327
+
328
+ except Exception as e:
329
+ return f"❌ Error processing file: {str(e)}", None, None
330
+
331
+ # Create enhanced Gradio interface
332
+ with gr.Blocks(
333
+ theme=gr.themes.Soft(),
334
+ title="🌍 Multilingual Sentiment Analyzer",
335
+ css="""
336
+ .gradio-container {
337
+ max-width: 1200px !important;
338
+ }
339
+ .sentiment-positive { border-left: 4px solid #6BCF7F !important; }
340
+ .sentiment-negative { border-left: 4px solid #FF6B6B !important; }
341
+ .sentiment-neutral { border-left: 4px solid #FFD93D !important; }
342
+ """
343
+ ) as demo:
344
+
345
+ gr.Markdown("""
346
+ # 🌍 Advanced Multilingual Sentiment Analysis
347
+
348
+ *Powered by fine-tuned multilingual transformer model supporting 23 languages*
349
+
350
+ Analyze customer reviews, social media posts, and feedback across multiple languages with state-of-the-art accuracy.
351
+ """)
352
+
353
+ with gr.Tab("🔍 Single Review Analysis"):
354
+ with gr.Row():
355
+ with gr.Column(scale=1):
356
+ gr.Markdown("### 📥 Input Review")
357
+ single_review = gr.Textbox(
358
+ label="Enter text in any supported language",
359
+ placeholder="Type your review here... (Supports 23 languages including English, Spanish, Chinese, French, German, Arabic, etc.)",
360
+ lines=4,
361
+ elem_id="review-input"
362
+ )
363
+ analyze_btn = gr.Button("🚀 Analyze Sentiment", variant="primary")
364
+
365
+ gr.Markdown("""
366
+ **Supported Languages:**
367
+ English, Chinese, Spanish, Hindi, Arabic, Bengali, Portuguese, Russian,
368
+ Japanese, German, Malay, Telugu, Vietnamese, Korean, French, Turkish,
369
+ Italian, Polish, Ukrainian, Tagalog, Dutch, Swiss German, Swahili
370
+ """)
371
+
372
+ with gr.Column(scale=2):
373
+ gr.Markdown("### 📊 Analysis Results")
374
+ output_html = gr.HTML(label="Detailed Analysis")
375
+
376
+ with gr.Row():
377
+ bar_chart = gr.Plot(label="Sentiment Distribution")
378
+ radar_chart = gr.Plot(label="Sentiment Radar")
379
+
380
+ analyze_btn.click(
381
+ analyze_single_review,
382
+ inputs=single_review,
383
+ outputs=[output_html, bar_chart, radar_chart]
384
+ )
385
+
386
+ with gr.Tab("📁 Batch CSV Analysis"):
387
+ with gr.Row():
388
+ with gr.Column():
389
+ gr.Markdown("### 📤 Upload CSV File")
390
+ csv_upload = gr.File(
391
+ label="Upload CSV file with reviews",
392
+ file_types=[".csv"],
393
+ type="filepath"
394
+ )
395
+ gr.Markdown("""
396
+ **CSV Format Requirements:**
397
+ - First column should contain the review text
398
+ - File should be UTF-8 encoded
399
+ - Maximum file size: 100MB
400
+ - Supports up to 10,000 reviews per batch
401
+ """)
402
+
403
+ batch_analyze_btn = gr.Button("📈 Analyze Batch", variant="primary")
404
+
405
+ with gr.Column():
406
+ gr.Markdown("### 📋 Analysis Summary")
407
+ batch_output = gr.Markdown(label="Batch Summary")
408
+ download_output = gr.File(label="Download Results")
409
+ batch_chart = gr.Plot(label="Batch Analytics")
410
+
411
+ batch_analyze_btn.click(
412
+ analyze_csv_file,
413
+ inputs=csv_upload,
414
+ outputs=[batch_output, download_output, batch_chart]
415
+ )
416
+
417
+ with gr.Tab("ℹ️ About & Instructions"):
418
+ gr.Markdown("""
419
+ ## 🎯 About This Tool
420
+
421
+ This advanced sentiment analysis system uses a fine-tuned multilingual transformer model to analyze text in 23 languages.
422
+
423
+ ### 🌟 Key Features
424
+
425
+ - **Multilingual Support**: Analyze sentiment in 23 languages
426
+ - **5-Point Scale**: Very Negative → Negative → Neutral → Positive → Very Positive
427
+ - **Advanced Analytics**: Confidence scores, emotional intensity, language detection
428
+ - **Batch Processing**: Analyze thousands of reviews via CSV upload
429
+ - **Visual Analytics**: Interactive charts and comprehensive dashboards
430
+
431
+ ### 🚀 Use Cases
432
+
433
+ - **E-commerce**: Product reviews from global marketplaces
434
+ - **Customer Support**: Analyze support tickets and feedback
435
+ - **Social Media**: Monitor brand sentiment across languages
436
+ - **Market Research**: Understand international customer opinions
437
+
438
+ ### 📊 Model Information
439
+
440
+ - **Base Model**: `distilbert-base-multilingual-cased`
441
+ - **Fine-tuned on**: Synthetic multilingual data
442
+ - **Languages**: 23 languages including major world languages
443
+ - **Accuracy**: State-of-the-art performance across languages
444
+
445
+ ### 🔧 Technical Details
446
+
447
+ The model uses a transformer architecture fine-tuned specifically for sentiment analysis across multiple languages and cultural contexts.
448
+ """)
449
+
450
+ if __name__ == "__main__":
451
+ demo.launch(
452
+ share=True,
453
+ server_name="0.0.0.0",
454
+ show_error=True
455
+ )