entropy25 commited on
Commit
5bbfe55
·
verified ·
1 Parent(s): f982ed9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +685 -312
app.py CHANGED
@@ -12,23 +12,35 @@ import tempfile
12
  from datetime import datetime
13
  import pandas as pd
14
  from lime.lime_text import LimeTextExplainer
15
- import shap
16
  import logging
 
 
17
 
18
- # Simple Configuration
19
  class Config:
20
- MAX_HISTORY = 500
21
  BATCH_LIMIT = 100
22
  THEMES = {
23
- 'default': {'pos': '#4CAF50', 'neg': '#F44336'},
24
- 'ocean': {'pos': '#2196F3', 'neg': '#FF5722'},
25
- 'dark': {'pos': '#00E676', 'neg': '#FF1744'}
 
26
  }
27
 
28
  config = Config()
29
  logging.basicConfig(level=logging.INFO)
30
 
31
- # Model Manager - Simplified
 
 
 
 
 
 
 
 
 
 
32
  class ModelManager:
33
  def __init__(self):
34
  self.custom_model = None
@@ -37,7 +49,6 @@ class ModelManager:
37
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
38
 
39
  def load_distilbert(self):
40
- """Load DistilBERT for fast analysis"""
41
  if self.distilbert is None:
42
  self.distilbert = pipeline(
43
  "sentiment-analysis",
@@ -47,7 +58,6 @@ class ModelManager:
47
  return self.distilbert
48
 
49
  def load_custom_model(self):
50
- """Load custom model for advanced analysis"""
51
  if self.custom_model is None:
52
  try:
53
  self.custom_tokenizer = BertTokenizer.from_pretrained("entropy25/sentimentanalysis")
@@ -59,13 +69,14 @@ class ModelManager:
59
  raise
60
  return self.custom_model, self.custom_tokenizer
61
 
62
- # Simple History Manager
63
  class HistoryManager:
64
  def __init__(self):
65
  self.history = []
66
 
67
  def add(self, entry):
68
  entry['timestamp'] = datetime.now().isoformat()
 
69
  self.history.append(entry)
70
  if len(self.history) > config.MAX_HISTORY:
71
  self.history = self.history[-config.MAX_HISTORY:]
@@ -73,6 +84,24 @@ class HistoryManager:
73
  def get_all(self):
74
  return self.history.copy()
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  def clear(self):
77
  count = len(self.history)
78
  self.history.clear()
@@ -81,44 +110,139 @@ class HistoryManager:
81
  def size(self):
82
  return len(self.history)
83
 
84
- # Core Analysis Engine
85
  class SentimentEngine:
86
  def __init__(self):
87
  self.model_manager = ModelManager()
88
  self.lime_explainer = LimeTextExplainer(class_names=['Negative', 'Positive'])
89
 
90
- # SINGLE ANALYSIS - DistilBERT with visualizations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  def analyze_single(self, text, theme='default'):
92
- """Single analysis with DistilBERT - fast and visual"""
93
  if not text.strip():
94
- return "Please enter text", None, None, None
95
 
96
  try:
97
  # Use DistilBERT for fast analysis
98
  distilbert = self.model_manager.load_distilbert()
99
  result = distilbert(text)[0]
100
 
101
- # Convert to our format
102
  sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative"
103
  confidence = result['score']
104
  pos_prob = confidence if sentiment == "Positive" else 1 - confidence
105
  neg_prob = 1 - pos_prob
106
 
 
 
 
 
 
 
107
  # Create visualizations
108
  prob_chart = self._create_probability_chart([neg_prob, pos_prob], theme)
109
  confidence_gauge = self._create_confidence_gauge(confidence, sentiment, theme)
110
  wordcloud = self._create_wordcloud(text, sentiment, theme)
 
111
 
112
- result_text = f"Sentiment: {sentiment}\nConfidence: {confidence:.3f}"
 
 
 
 
 
113
 
114
- return result_text, prob_chart, confidence_gauge, wordcloud
115
 
116
  except Exception as e:
117
- return f"Analysis failed: {str(e)}", None, None, None
118
 
119
- # ADVANCED ANALYSIS - Custom model with SHAP/LIME
120
  def analyze_advanced(self, text):
121
- """Advanced analysis with custom model and explanations"""
122
  if not text.strip():
123
  return "Please enter text", None, None
124
 
@@ -136,28 +260,183 @@ class SentimentEngine:
136
  sentiment = "Positive" if probs[1] > probs[0] else "Negative"
137
  confidence = probs.max()
138
 
139
- # Extract keywords using LIME
140
- keywords = self._extract_keywords_lime(text, model, tokenizer)
141
 
142
- # Create SHAP explanation
143
- shap_html = self._create_shap_explanation(text, model, tokenizer)
 
144
 
145
- result_text = f"Sentiment: {sentiment}\nConfidence: {confidence:.3f}\n\nTop Keywords:\n"
146
- result_text += "\n".join([f" {word}: {score:.3f}" for word, score in keywords[:10]])
 
 
 
 
147
 
148
- return result_text, shap_html, self._create_keyword_chart(keywords, sentiment)
 
 
 
 
149
 
150
  except Exception as e:
151
  return f"Advanced analysis failed: {str(e)}", None, None
152
 
153
- # BATCH ANALYSIS - DistilBERT for efficiency
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  def analyze_batch(self, texts, progress_callback=None):
155
- """Batch analysis with DistilBERT"""
156
  if not texts or len(texts) < 2:
157
  return None, "Need at least 2 texts for batch analysis"
158
 
159
  try:
160
- # Limit batch size
161
  if len(texts) > config.BATCH_LIMIT:
162
  texts = texts[:config.BATCH_LIMIT]
163
 
@@ -177,253 +456,175 @@ class SentimentEngine:
177
  'text': text[:100] + "..." if len(text) > 100 else text,
178
  'full_text': text,
179
  'sentiment': sentiment,
180
- 'confidence': confidence
 
181
  })
182
  except:
183
  continue
184
 
185
  if results:
186
- chart = self._create_batch_chart(results)
187
- return chart, f"Analyzed {len(results)} texts successfully"
188
  else:
189
  return None, "Batch analysis failed"
190
 
191
  except Exception as e:
192
  return None, f"Batch analysis error: {str(e)}"
193
 
194
- # Helper methods for visualizations
195
- def _create_probability_chart(self, probs, theme):
196
- """Create probability bar chart"""
197
- fig, ax = plt.subplots(figsize=(8, 5))
198
- colors = [config.THEMES[theme]['neg'], config.THEMES[theme]['pos']]
199
- labels = ['Negative', 'Positive']
200
-
201
- bars = ax.bar(labels, probs, color=colors, alpha=0.8)
202
- ax.set_title('Sentiment Probabilities', fontsize=16, fontweight='bold')
203
- ax.set_ylabel('Probability')
204
- ax.set_ylim(0, 1)
205
-
206
- # Add value labels
207
- for bar, prob in zip(bars, probs):
208
- ax.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.02,
209
- f'{prob:.3f}', ha='center', va='bottom', fontweight='bold')
210
-
211
- plt.tight_layout()
212
- return fig
213
-
214
- def _create_confidence_gauge(self, confidence, sentiment, theme):
215
- """Create confidence gauge"""
216
- fig, ax = plt.subplots(figsize=(8, 5))
217
-
218
- # Create semi-circular gauge
219
- theta = np.linspace(0, np.pi, 100)
220
- r = 1
221
-
222
- # Background
223
- ax.fill_between(theta, 0, r, alpha=0.3, color='lightgray')
224
-
225
- # Confidence indicator
226
- conf_theta = np.pi * (0.5 + (0.4 if sentiment == 'Positive' else -0.4) * confidence)
227
- ax.plot([conf_theta, conf_theta], [0, r], 'k-', linewidth=8)
228
- ax.plot(conf_theta, r, 'ko', markersize=12)
229
-
230
- ax.set_xlim(0, np.pi)
231
- ax.set_ylim(0, 1.2)
232
- ax.set_title(f'{sentiment} - Confidence: {confidence:.3f}', fontsize=16, fontweight='bold')
233
- ax.set_xticks([0, np.pi/2, np.pi])
234
- ax.set_xticklabels(['Negative', 'Neutral', 'Positive'])
235
- ax.axis('off')
236
-
237
- plt.tight_layout()
238
- return fig
239
-
240
- def _create_wordcloud(self, text, sentiment, theme):
241
- """Create word cloud"""
242
- if len(text.split()) < 5:
243
- return None
244
-
245
- fig, ax = plt.subplots(figsize=(10, 6))
246
- colormap = 'Greens' if sentiment == 'Positive' else 'Reds'
247
-
248
- wc = WordCloud(width=800, height=400, background_color='white',
249
- colormap=colormap, max_words=50).generate(text)
250
-
251
- ax.imshow(wc, interpolation='bilinear')
252
- ax.axis('off')
253
- ax.set_title(f'{sentiment} Word Cloud', fontsize=16, fontweight='bold')
254
-
255
- plt.tight_layout()
256
- return fig
257
-
258
- def _extract_keywords_lime(self, text, model, tokenizer):
259
- """Extract keywords using LIME"""
260
- try:
261
- def predict_proba(texts):
262
- results = []
263
- for text in texts:
264
- inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
265
- inputs = {k: v.to(self.model_manager.device) for k, v in inputs.items()}
266
- with torch.no_grad():
267
- outputs = model(**inputs)
268
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
269
- results.append(probs)
270
- return np.array(results)
271
-
272
- explanation = self.lime_explainer.explain_instance(
273
- text, predict_proba, num_features=20, num_samples=100
274
- )
275
-
276
- return [(word, abs(score)) for word, score in explanation.as_list()]
277
-
278
- except Exception as e:
279
- logging.error(f"LIME extraction failed: {e}")
280
- return []
281
-
282
- def _create_shap_explanation(self, text, model, tokenizer):
283
- """Create SHAP explanation HTML"""
284
- try:
285
- # Simple word highlighting based on attention
286
- words = text.split()
287
- html_parts = ['<div style="font-family: Arial; font-size: 16px; line-height: 1.8;">']
288
-
289
- for word in words:
290
- # Simple heuristic for demonstration
291
- if any(pos_word in word.lower() for pos_word in ['good', 'great', 'excellent', 'amazing']):
292
- color = 'rgba(76, 175, 80, 0.3)'
293
- elif any(neg_word in word.lower() for neg_word in ['bad', 'terrible', 'awful', 'horrible']):
294
- color = 'rgba(244, 67, 54, 0.3)'
295
- else:
296
- color = 'transparent'
297
-
298
- html_parts.append(
299
- f'<span style="background-color: {color}; padding: 2px; margin: 1px; '
300
- f'border-radius: 3px;">{word}</span> '
301
- )
302
-
303
- html_parts.append('</div>')
304
- return ''.join(html_parts)
305
-
306
- except Exception as e:
307
- return f"SHAP explanation failed: {str(e)}"
308
-
309
- def _create_keyword_chart(self, keywords, sentiment):
310
- """Create keyword importance chart"""
311
- if not keywords:
312
- return None
313
-
314
- fig, ax = plt.subplots(figsize=(10, 6))
315
- words = [word for word, _ in keywords[:15]]
316
- scores = [score for _, score in keywords[:15]]
317
-
318
- color = config.THEMES['default']['pos'] if sentiment == 'Positive' else config.THEMES['default']['neg']
319
- bars = ax.barh(range(len(words)), scores, color=color, alpha=0.7)
320
-
321
- ax.set_yticks(range(len(words)))
322
- ax.set_yticklabels(words)
323
- ax.set_xlabel('Importance Score')
324
- ax.set_title(f'Top Keywords - {sentiment}', fontsize=16, fontweight='bold')
325
- ax.invert_yaxis()
326
-
327
- plt.tight_layout()
328
- return fig
329
-
330
- def _create_batch_chart(self, results):
331
- """Create batch analysis visualization"""
332
- fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
333
-
334
- # Sentiment distribution
335
- sent_counts = Counter([r['sentiment'] for r in results])
336
- colors = [config.THEMES['default']['pos'], config.THEMES['default']['neg']]
337
- ax1.pie(sent_counts.values(), labels=sent_counts.keys(), autopct='%1.1f%%', colors=colors)
338
- ax1.set_title('Sentiment Distribution')
339
-
340
- # Confidence distribution
341
- confidences = [r['confidence'] for r in results]
342
- ax2.hist(confidences, bins=15, alpha=0.7, color='skyblue', edgecolor='black')
343
- ax2.set_title('Confidence Distribution')
344
- ax2.set_xlabel('Confidence Score')
345
-
346
- # Sentiment over sequence
347
- sentiments = [1 if r['sentiment'] == 'Positive' else 0 for r in results]
348
- ax3.plot(sentiments, marker='o', alpha=0.7)
349
- ax3.set_title('Sentiment Sequence')
350
- ax3.set_ylabel('Sentiment (0=Neg, 1=Pos)')
351
- ax3.set_xlabel('Review Index')
352
-
353
- # Top words
354
- all_text = ' '.join([r['full_text'] for r in results])
355
- words = re.findall(r'\b\w{3,}\b', all_text.lower())
356
- word_counts = Counter(words).most_common(10)
357
-
358
- if word_counts:
359
- words, counts = zip(*word_counts)
360
- ax4.bar(range(len(words)), counts, alpha=0.7)
361
- ax4.set_xticks(range(len(words)))
362
- ax4.set_xticklabels(words, rotation=45)
363
- ax4.set_title('Most Frequent Words')
364
-
365
- plt.tight_layout()
366
- return fig
367
 
368
- # File Processor
369
  class FileProcessor:
370
  @staticmethod
371
  def process_file(file):
372
- """Simple file processing"""
373
  if not file:
374
  return ""
375
 
376
  try:
377
  if file.name.endswith('.csv'):
378
- df = pd.read_csv(file.name)
379
- # Find text column
380
- text_col = None
381
- for col in df.columns:
382
- if df[col].dtype == 'object':
383
- text_col = col
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  break
 
 
 
385
 
386
- if text_col:
387
- texts = df[text_col].dropna().astype(str).tolist()
388
- return '\n'.join([t for t in texts if len(t.strip()) > 10])
389
 
390
  else:
391
- with open(file.name, 'r', encoding='utf-8') as f:
392
- return f.read()
393
-
 
 
 
 
 
 
 
394
  except Exception as e:
395
- return f"File processing failed: {str(e)}"
396
-
397
- return ""
398
 
399
- # Data Exporter
400
  class DataExporter:
401
  @staticmethod
402
  def export_csv(data):
403
- """Export to CSV"""
404
  if not data:
405
  return None, "No data to export"
406
 
407
  try:
408
  temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv')
409
  writer = csv.writer(temp_file)
410
- writer.writerow(['Timestamp', 'Text', 'Sentiment', 'Confidence'])
 
 
 
411
 
412
  for entry in data:
 
 
 
 
 
 
413
  writer.writerow([
414
  entry.get('timestamp', ''),
415
  entry.get('text', ''),
 
416
  entry.get('sentiment', ''),
417
- f"{entry.get('confidence', 0):.4f}"
 
 
418
  ])
419
 
420
  temp_file.close()
421
- return temp_file.name, f"Exported {len(data)} entries"
422
 
423
  except Exception as e:
424
  return None, f"Export failed: {str(e)}"
425
 
426
- # Main Application
427
  class SentimentApp:
428
  def __init__(self):
429
  self.engine = SentimentEngine()
@@ -431,7 +632,6 @@ class SentimentApp:
431
  self.file_processor = FileProcessor()
432
  self.exporter = DataExporter()
433
 
434
- # Sample examples
435
  self.examples = [
436
  "This movie was absolutely fantastic! The acting was superb and the plot kept me engaged throughout.",
437
  "Terrible film with poor acting and a confusing storyline. Complete waste of time.",
@@ -439,18 +639,102 @@ class SentimentApp:
439
  "An masterpiece of cinema! Every scene was beautifully crafted and emotionally powerful.",
440
  "The worst movie I've ever seen. Bad acting, terrible script, and boring plot."
441
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
 
443
  def create_interface():
444
- """Create the Gradio interface"""
445
  app = SentimentApp()
446
 
447
- with gr.Blocks(theme=gr.themes.Soft(), title="Sentiment Analysis Suite") as demo:
448
- gr.Markdown("# 🎭 Movie Sentiment Analysis Suite")
449
- gr.Markdown("**Three specialized analysis modes: Single, Advanced, and Batch processing**")
450
 
451
- # TAB 1: SINGLE ANALYSIS
452
  with gr.Tab("🎯 Single Analysis"):
453
- gr.Markdown("### Fast analysis with DistilBERT + Rich visualizations")
454
 
455
  with gr.Row():
456
  with gr.Column():
@@ -464,7 +748,7 @@ def create_interface():
464
  theme_dropdown = gr.Dropdown(
465
  choices=list(config.THEMES.keys()),
466
  value="default",
467
- label="Theme"
468
  )
469
 
470
  gr.Examples(
@@ -473,99 +757,138 @@ def create_interface():
473
  )
474
 
475
  with gr.Column():
476
- single_result = gr.Textbox(label="Analysis Result", lines=4)
 
477
 
478
  with gr.Row():
479
- prob_plot = gr.Plot(label="Probability Distribution")
480
  confidence_plot = gr.Plot(label="Confidence Gauge")
481
 
482
- wordcloud_plot = gr.Plot(label="Word Cloud Visualization")
 
 
483
 
484
- # TAB 2: ADVANCED ANALYSIS
485
  with gr.Tab("🧠 Advanced Analysis"):
486
- gr.Markdown("### Deep analysis with custom model + SHAP/LIME explanations")
487
 
488
  with gr.Row():
489
  with gr.Column():
490
  advanced_input = gr.Textbox(
491
  label="Text for Deep Analysis",
492
- placeholder="Enter text for detailed analysis...",
493
  lines=6
494
  )
495
  advanced_btn = gr.Button("Deep Analyze", variant="primary", size="lg")
496
 
497
  with gr.Column():
498
- advanced_result = gr.Textbox(label="Detailed Results", lines=8)
499
 
500
  with gr.Row():
501
- shap_output = gr.HTML(label="SHAP Explanation")
502
- keyword_plot = gr.Plot(label="Key Contributing Words")
503
 
504
- # TAB 3: BATCH ANALYSIS
505
  with gr.Tab("📊 Batch Analysis"):
506
- gr.Markdown("### Efficient batch processing with DistilBERT")
507
 
508
  with gr.Row():
509
  with gr.Column():
510
- file_upload = gr.File(label="Upload CSV/TXT file")
 
 
 
511
  batch_input = gr.Textbox(
512
- label="Batch Text (one review per line)",
513
  placeholder="Paste multiple reviews, one per line...",
514
  lines=8
515
  )
516
 
517
  with gr.Column():
518
- load_file_btn = gr.Button("Load File")
519
  batch_btn = gr.Button("Analyze Batch", variant="primary", size="lg")
520
- batch_status = gr.Textbox(label="Status", lines=2)
521
 
522
- batch_plot = gr.Plot(label="Batch Analysis Results")
523
 
524
- # TAB 4: HISTORY & EXPORT
525
- with gr.Tab("📈 History & Export"):
526
- gr.Markdown("### Analysis history and data export")
527
 
528
  with gr.Row():
529
- refresh_btn = gr.Button("Refresh History")
530
- clear_btn = gr.Button("Clear History", variant="stop")
531
- status_display = gr.Textbox(label="History Status", value="Ready")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
 
533
- with gr.Row():
534
- export_csv_btn = gr.Button("Export as CSV", variant="secondary")
535
- export_file = gr.File(label="Download Export")
536
 
537
- history_plot = gr.Plot(label="Analysis Trends")
 
 
 
538
 
539
- # Event handlers
540
- def single_analysis_handler(text, theme):
541
- result_text, prob_fig, conf_fig, cloud_fig = app.engine.analyze_single(text, theme)
542
 
543
- # Add to history
544
- if "failed" not in result_text.lower():
545
- app.history.add({
546
- 'text': text[:100],
547
- 'full_text': text,
548
- 'sentiment': result_text.split('\n')[0].split(': ')[1] if ': ' in result_text else 'Unknown',
549
- 'confidence': float(result_text.split('Confidence: ')[1]) if 'Confidence: ' in result_text else 0.0
550
- })
 
 
 
 
 
 
 
551
 
552
- return result_text, prob_fig, conf_fig, cloud_fig
553
 
554
- def advanced_analysis_handler(text):
555
- result_text, shap_html, keyword_fig = app.engine.analyze_advanced(text)
556
 
557
- # Add to history
558
- if "failed" not in result_text.lower():
559
- app.history.add({
560
- 'text': text[:100],
561
- 'full_text': text,
562
- 'sentiment': result_text.split('\n')[0].split(': ')[1] if ': ' in result_text else 'Unknown',
563
- 'confidence': float(result_text.split('Confidence: ')[1].split('\n')[0]) if 'Confidence: ' in result_text else 0.0
564
- })
 
 
 
 
 
 
 
 
 
 
565
 
566
- return result_text, shap_html, keyword_fig
567
 
568
- def batch_analysis_handler(text):
569
  if not text.strip():
570
  return None, "Please provide text for batch analysis"
571
 
@@ -575,29 +898,68 @@ def create_interface():
575
 
576
  chart, status = app.engine.analyze_batch(texts)
577
 
578
- # Add successful results to history
579
- if chart and "successfully" in status:
580
- for text in texts:
581
- app.history.add({
582
- 'text': text[:100],
583
- 'full_text': text,
584
- 'sentiment': 'Batch',
585
- 'confidence': 0.0
586
- })
 
 
 
 
 
 
 
 
 
 
587
 
588
  return chart, status
589
 
590
- # Bind events
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
  single_btn.click(
592
- single_analysis_handler,
593
  inputs=[single_input, theme_dropdown],
594
- outputs=[single_result, prob_plot, confidence_plot, wordcloud_plot]
595
  )
596
 
597
  advanced_btn.click(
598
- advanced_analysis_handler,
599
  inputs=advanced_input,
600
- outputs=[advanced_result, shap_output, keyword_plot]
601
  )
602
 
603
  load_file_btn.click(
@@ -607,31 +969,42 @@ def create_interface():
607
  )
608
 
609
  batch_btn.click(
610
- batch_analysis_handler,
611
  inputs=batch_input,
612
  outputs=[batch_plot, batch_status]
613
  )
614
 
615
- refresh_btn.click(
616
- lambda: f"History: {app.history.size()} analyses",
617
- outputs=status_display
618
  )
619
 
620
- clear_btn.click(
621
- lambda: f"Cleared {app.history.clear()} entries",
622
- outputs=status_display
623
  )
624
 
625
  export_csv_btn.click(
626
- lambda: app.exporter.export_csv(app.history.get_all()),
627
- outputs=[export_file, status_display]
 
 
 
 
 
 
 
 
 
 
 
628
  )
629
 
630
  return demo
631
 
632
- # Launch the application
633
  if __name__ == "__main__":
634
- logging.info("Starting Sentiment Analysis Suite...")
635
  demo = create_interface()
636
  demo.launch(
637
  share=True,
 
12
  from datetime import datetime
13
  import pandas as pd
14
  from lime.lime_text import LimeTextExplainer
 
15
  import logging
16
+ from contextlib import contextmanager
17
+ import gc
18
 
19
+ # Enhanced Configuration
20
  class Config:
21
+ MAX_HISTORY = 1000
22
  BATCH_LIMIT = 100
23
  THEMES = {
24
+ 'default': {'pos': '#4ecdc4', 'neg': '#ff6b6b'},
25
+ 'ocean': {'pos': '#0077be', 'neg': '#ff6b35'},
26
+ 'forest': {'pos': '#228b22', 'neg': '#dc143c'},
27
+ 'sunset': {'pos': '#ff8c00', 'neg': '#8b0000'}
28
  }
29
 
30
  config = Config()
31
  logging.basicConfig(level=logging.INFO)
32
 
33
+ @contextmanager
34
+ def managed_figure(*args, **kwargs):
35
+ """Context manager for matplotlib figures to prevent memory leaks"""
36
+ fig = plt.figure(*args, **kwargs)
37
+ try:
38
+ yield fig
39
+ finally:
40
+ plt.close(fig)
41
+ gc.collect()
42
+
43
+ # Enhanced Model Manager
44
  class ModelManager:
45
  def __init__(self):
46
  self.custom_model = None
 
49
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
50
 
51
  def load_distilbert(self):
 
52
  if self.distilbert is None:
53
  self.distilbert = pipeline(
54
  "sentiment-analysis",
 
58
  return self.distilbert
59
 
60
  def load_custom_model(self):
 
61
  if self.custom_model is None:
62
  try:
63
  self.custom_tokenizer = BertTokenizer.from_pretrained("entropy25/sentimentanalysis")
 
69
  raise
70
  return self.custom_model, self.custom_tokenizer
71
 
72
+ # Enhanced History Manager
73
  class HistoryManager:
74
  def __init__(self):
75
  self.history = []
76
 
77
  def add(self, entry):
78
  entry['timestamp'] = datetime.now().isoformat()
79
+ entry['analysis_type'] = entry.get('analysis_type', 'single')
80
  self.history.append(entry)
81
  if len(self.history) > config.MAX_HISTORY:
82
  self.history = self.history[-config.MAX_HISTORY:]
 
84
  def get_all(self):
85
  return self.history.copy()
86
 
87
+ def get_by_type(self, analysis_type):
88
+ return [h for h in self.history if h.get('analysis_type') == analysis_type]
89
+
90
+ def get_stats(self):
91
+ if not self.history:
92
+ return {'total': 0, 'positive': 0, 'negative': 0, 'avg_confidence': 0}
93
+
94
+ positive = sum(1 for h in self.history if h.get('sentiment') == 'Positive')
95
+ negative = len(self.history) - positive
96
+ avg_conf = np.mean([h.get('confidence', 0) for h in self.history])
97
+
98
+ return {
99
+ 'total': len(self.history),
100
+ 'positive': positive,
101
+ 'negative': negative,
102
+ 'avg_confidence': avg_conf
103
+ }
104
+
105
  def clear(self):
106
  count = len(self.history)
107
  self.history.clear()
 
110
  def size(self):
111
  return len(self.history)
112
 
113
+ # Enhanced Core Analysis Engine
114
  class SentimentEngine:
115
  def __init__(self):
116
  self.model_manager = ModelManager()
117
  self.lime_explainer = LimeTextExplainer(class_names=['Negative', 'Positive'])
118
 
119
+ def predict_proba_for_lime(self, texts):
120
+ """Prediction function for LIME using custom model"""
121
+ if isinstance(texts, str):
122
+ texts = [texts]
123
+
124
+ model, tokenizer = self.model_manager.load_custom_model()
125
+ results = []
126
+
127
+ for text in texts:
128
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
129
+ inputs = {k: v.to(self.model_manager.device) for k, v in inputs.items()}
130
+
131
+ with torch.no_grad():
132
+ outputs = model(**inputs)
133
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
134
+ results.append(probs)
135
+
136
+ return np.array(results)
137
+
138
+ def extract_keywords_lime(self, text, top_k=15):
139
+ """Enhanced keyword extraction using LIME"""
140
+ try:
141
+ explanation = self.lime_explainer.explain_instance(
142
+ text, self.predict_proba_for_lime, num_features=top_k*2, num_samples=200
143
+ )
144
+
145
+ word_scores = []
146
+ for word, score in explanation.as_list():
147
+ if len(word.strip()) >= 2:
148
+ word_scores.append((word.strip().lower(), score))
149
+
150
+ # Sort by absolute importance
151
+ word_scores.sort(key=lambda x: abs(x[1]), reverse=True)
152
+ return word_scores[:top_k]
153
+
154
+ except Exception as e:
155
+ logging.error(f"LIME extraction failed: {e}")
156
+ return []
157
+
158
+ def create_heatmap_html(self, text, word_scores):
159
+ """Create enhanced HTML heatmap visualization"""
160
+ words = text.split()
161
+ html_parts = ['<div style="font-family: Arial; font-size: 16px; line-height: 1.8; padding: 10px; border: 1px solid #ddd; border-radius: 8px;">']
162
+
163
+ # Create word score dictionary
164
+ score_dict = dict(word_scores) if word_scores else {}
165
+
166
+ # Normalize scores for color intensity
167
+ if score_dict:
168
+ max_abs_score = max(abs(score) for score in score_dict.values())
169
+ else:
170
+ max_abs_score = 1
171
+
172
+ for word in words:
173
+ clean_word = re.sub(r'[^\w]', '', word.lower())
174
+ score = score_dict.get(clean_word, 0)
175
+
176
+ if score > 0:
177
+ # Positive contribution - green shades
178
+ intensity = min(0.8, abs(score) / max_abs_score) if max_abs_score > 0 else 0
179
+ color = f"rgba(76, 175, 80, {intensity})"
180
+ border_color = "#4CAF50"
181
+ elif score < 0:
182
+ # Negative contribution - red shades
183
+ intensity = min(0.8, abs(score) / max_abs_score) if max_abs_score > 0 else 0
184
+ color = f"rgba(244, 67, 54, {intensity})"
185
+ border_color = "#F44336"
186
+ else:
187
+ # Neutral - light gray
188
+ color = "rgba(158, 158, 158, 0.1)"
189
+ border_color = "transparent"
190
+
191
+ html_parts.append(
192
+ f'<span style="background-color: {color}; padding: 3px 6px; margin: 2px; '
193
+ f'border-radius: 4px; border: 1px solid {border_color}; '
194
+ f'display: inline-block;" title="Impact Score: {score:.4f}">{word}</span> '
195
+ )
196
+
197
+ html_parts.append('</div>')
198
+ html_parts.append('<div style="margin-top: 10px; font-size: 12px; color: #666;">')
199
+ html_parts.append('<span style="background: rgba(76, 175, 80, 0.6); padding: 2px 8px; margin: 0 5px;">Positive Impact</span>')
200
+ html_parts.append('<span style="background: rgba(244, 67, 54, 0.6); padding: 2px 8px; margin: 0 5px;">Negative Impact</span>')
201
+ html_parts.append('</div>')
202
+
203
+ return ''.join(html_parts)
204
+
205
+ # SINGLE ANALYSIS - Enhanced with detailed LIME
206
  def analyze_single(self, text, theme='default'):
 
207
  if not text.strip():
208
+ return "Please enter text", None, None, None, None, None
209
 
210
  try:
211
  # Use DistilBERT for fast analysis
212
  distilbert = self.model_manager.load_distilbert()
213
  result = distilbert(text)[0]
214
 
 
215
  sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative"
216
  confidence = result['score']
217
  pos_prob = confidence if sentiment == "Positive" else 1 - confidence
218
  neg_prob = 1 - pos_prob
219
 
220
+ # Enhanced keyword extraction with LIME
221
+ keywords = self.extract_keywords_lime(text)
222
+
223
+ # Create heatmap
224
+ heatmap_html = self.create_heatmap_html(text, keywords)
225
+
226
  # Create visualizations
227
  prob_chart = self._create_probability_chart([neg_prob, pos_prob], theme)
228
  confidence_gauge = self._create_confidence_gauge(confidence, sentiment, theme)
229
  wordcloud = self._create_wordcloud(text, sentiment, theme)
230
+ keyword_chart = self._create_keyword_chart(keywords, sentiment, theme)
231
 
232
+ # Enhanced result text
233
+ top_keywords = ", ".join([f"{word}({score:+.3f})" for word, score in keywords[:5]])
234
+ result_text = (f"Sentiment: {sentiment}\n"
235
+ f"Confidence: {confidence:.3f}\n"
236
+ f"Positive Probability: {pos_prob:.3f}\n"
237
+ f"Top Keywords: {top_keywords}")
238
 
239
+ return result_text, prob_chart, confidence_gauge, wordcloud, keyword_chart, heatmap_html
240
 
241
  except Exception as e:
242
+ return f"Analysis failed: {str(e)}", None, None, None, None, None
243
 
244
+ # ADVANCED ANALYSIS - Enhanced
245
  def analyze_advanced(self, text):
 
246
  if not text.strip():
247
  return "Please enter text", None, None
248
 
 
260
  sentiment = "Positive" if probs[1] > probs[0] else "Negative"
261
  confidence = probs.max()
262
 
263
+ # Enhanced keyword extraction
264
+ keywords = self.extract_keywords_lime(text, top_k=20)
265
 
266
+ # Create enhanced visualizations
267
+ keyword_chart = self._create_detailed_keyword_chart(keywords, sentiment)
268
+ heatmap_html = self.create_heatmap_html(text, keywords)
269
 
270
+ # Enhanced result text
271
+ result_text = f"Advanced Analysis Results:\n"
272
+ result_text += f"Sentiment: {sentiment} (Confidence: {confidence:.3f})\n"
273
+ result_text += f"Positive Probability: {probs[1]:.3f}\n"
274
+ result_text += f"Negative Probability: {probs[0]:.3f}\n\n"
275
+ result_text += f"Top Contributing Words:\n"
276
 
277
+ for i, (word, score) in enumerate(keywords[:10], 1):
278
+ impact = "Positive" if score > 0 else "Negative"
279
+ result_text += f"{i:2}. {word:15} {score:+.4f} ({impact})\n"
280
+
281
+ return result_text, heatmap_html, keyword_chart
282
 
283
  except Exception as e:
284
  return f"Advanced analysis failed: {str(e)}", None, None
285
 
286
+ # Enhanced visualization methods
287
+ def _create_probability_chart(self, probs, theme):
288
+ with managed_figure(figsize=(8, 5)) as fig:
289
+ ax = fig.add_subplot(111)
290
+ colors = [config.THEMES[theme]['neg'], config.THEMES[theme]['pos']]
291
+ labels = ['Negative', 'Positive']
292
+
293
+ bars = ax.bar(labels, probs, color=colors, alpha=0.8, edgecolor='white', linewidth=2)
294
+ ax.set_title('Sentiment Probabilities', fontsize=16, fontweight='bold', pad=20)
295
+ ax.set_ylabel('Probability', fontsize=12)
296
+ ax.set_ylim(0, 1)
297
+ ax.grid(axis='y', alpha=0.3)
298
+
299
+ for bar, prob in zip(bars, probs):
300
+ ax.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.02,
301
+ f'{prob:.3f}', ha='center', va='bottom', fontweight='bold', fontsize=12)
302
+
303
+ return fig
304
+
305
+ def _create_confidence_gauge(self, confidence, sentiment, theme):
306
+ with managed_figure(figsize=(8, 5)) as fig:
307
+ ax = fig.add_subplot(111)
308
+
309
+ # Enhanced gauge design
310
+ theta = np.linspace(0, np.pi, 100)
311
+ r_outer = 1.0
312
+ r_inner = 0.7
313
+
314
+ # Background gauge
315
+ ax.fill_between(theta, r_inner, r_outer, alpha=0.2, color='lightgray')
316
+
317
+ # Confidence zones
318
+ low_zone = theta[theta <= np.pi/3]
319
+ med_zone = theta[(theta > np.pi/3) & (theta <= 2*np.pi/3)]
320
+ high_zone = theta[theta > 2*np.pi/3]
321
+
322
+ ax.fill_between(low_zone, r_inner, r_outer, alpha=0.3, color='red', label='Low')
323
+ ax.fill_between(med_zone, r_inner, r_outer, alpha=0.3, color='orange', label='Medium')
324
+ ax.fill_between(high_zone, r_inner, r_outer, alpha=0.3, color='green', label='High')
325
+
326
+ # Needle
327
+ angle = np.pi * (0.5 + (0.4 if sentiment == 'Positive' else -0.4) * confidence)
328
+ ax.plot([angle, angle], [0, r_outer*0.9], 'k-', linewidth=6)
329
+ ax.plot(angle, r_outer*0.9, 'ko', markersize=12)
330
+
331
+ ax.set_xlim(0, np.pi)
332
+ ax.set_ylim(0, 1.2)
333
+ ax.set_title(f'{sentiment} Confidence: {confidence:.3f}', fontsize=16, fontweight='bold')
334
+ ax.axis('off')
335
+
336
+ return fig
337
+
338
+ def _create_keyword_chart(self, keywords, sentiment, theme):
339
+ if not keywords:
340
+ return None
341
+
342
+ with managed_figure(figsize=(10, 6)) as fig:
343
+ ax = fig.add_subplot(111)
344
+
345
+ words = [word for word, _ in keywords[:12]]
346
+ scores = [abs(score) for _, score in keywords[:12]] # Use absolute values for bar height
347
+ colors = [config.THEMES[theme]['pos'] if keywords[i][1] > 0
348
+ else config.THEMES[theme]['neg'] for i in range(len(words))]
349
+
350
+ bars = ax.barh(range(len(words)), scores, color=colors, alpha=0.7, edgecolor='white')
351
+ ax.set_yticks(range(len(words)))
352
+ ax.set_yticklabels(words)
353
+ ax.set_xlabel('Importance Score (Absolute)', fontsize=12)
354
+ ax.set_title(f'Top Contributing Words - {sentiment}', fontsize=14, fontweight='bold')
355
+ ax.invert_yaxis()
356
+ ax.grid(axis='x', alpha=0.3)
357
+
358
+ # Add score labels
359
+ for i, (bar, score) in enumerate(zip(bars, scores)):
360
+ original_score = keywords[i][1]
361
+ ax.text(bar.get_width() + max(scores)*0.01, bar.get_y() + bar.get_height()/2.,
362
+ f'{original_score:+.3f}', ha='left', va='center', fontsize=9)
363
+
364
+ return fig
365
+
366
+ def _create_detailed_keyword_chart(self, keywords, sentiment):
367
+ if not keywords:
368
+ return None
369
+
370
+ with managed_figure(figsize=(12, 8)) as fig:
371
+ ax = fig.add_subplot(111)
372
+
373
+ # Separate positive and negative keywords
374
+ pos_keywords = [(w, s) for w, s in keywords if s > 0]
375
+ neg_keywords = [(w, s) for w, s in keywords if s < 0]
376
+
377
+ # Plot positive keywords (right side)
378
+ if pos_keywords:
379
+ pos_words = [w for w, _ in pos_keywords[:10]]
380
+ pos_scores = [s for _, s in pos_keywords[:10]]
381
+ y_pos = np.arange(len(pos_words))
382
+
383
+ ax.barh(y_pos, pos_scores, alpha=0.7, color='#4ecdc4',
384
+ label=f'Positive Impact ({len(pos_keywords)} words)')
385
+
386
+ for i, (word, score) in enumerate(zip(pos_words, pos_scores)):
387
+ ax.text(score + max(pos_scores)*0.02, i, f'{score:.3f}',
388
+ va='center', fontsize=9)
389
+
390
+ ax.set_yticks(y_pos)
391
+ ax.set_yticklabels(pos_words)
392
+
393
+ # Plot negative keywords (left side)
394
+ if neg_keywords:
395
+ neg_words = [w for w, _ in neg_keywords[:10]]
396
+ neg_scores = [s for _, s in neg_keywords[:10]]
397
+ y_neg = np.arange(len(neg_words)) - len(pos_keywords) - 1
398
+
399
+ ax.barh(y_neg, neg_scores, alpha=0.7, color='#ff6b6b',
400
+ label=f'Negative Impact ({len(neg_keywords)} words)')
401
+
402
+ for i, (word, score) in enumerate(zip(neg_words, neg_scores)):
403
+ ax.text(score - abs(min(neg_scores))*0.02, y_neg[i], f'{score:.3f}',
404
+ va='center', ha='right', fontsize=9)
405
+
406
+ ax.set_yticks(y_neg)
407
+ ax.set_yticklabels(neg_words)
408
+
409
+ ax.axvline(x=0, color='black', linestyle='-', alpha=0.3)
410
+ ax.set_xlabel('Impact Score', fontsize=12)
411
+ ax.set_title('Detailed Word Impact Analysis', fontsize=14, fontweight='bold')
412
+ ax.legend()
413
+ ax.grid(axis='x', alpha=0.3)
414
+
415
+ return fig
416
+
417
+ def _create_wordcloud(self, text, sentiment, theme):
418
+ if len(text.split()) < 5:
419
+ return None
420
+
421
+ with managed_figure(figsize=(10, 6)) as fig:
422
+ ax = fig.add_subplot(111)
423
+ colormap = 'Greens' if sentiment == 'Positive' else 'Reds'
424
+
425
+ wc = WordCloud(width=800, height=400, background_color='white',
426
+ colormap=colormap, max_words=50, relative_scaling=0.5).generate(text)
427
+
428
+ ax.imshow(wc, interpolation='bilinear')
429
+ ax.axis('off')
430
+ ax.set_title(f'{sentiment} Word Cloud', fontsize=16, fontweight='bold')
431
+
432
+ return fig
433
+
434
+ # BATCH ANALYSIS - Keep existing but enhanced
435
  def analyze_batch(self, texts, progress_callback=None):
 
436
  if not texts or len(texts) < 2:
437
  return None, "Need at least 2 texts for batch analysis"
438
 
439
  try:
 
440
  if len(texts) > config.BATCH_LIMIT:
441
  texts = texts[:config.BATCH_LIMIT]
442
 
 
456
  'text': text[:100] + "..." if len(text) > 100 else text,
457
  'full_text': text,
458
  'sentiment': sentiment,
459
+ 'confidence': confidence,
460
+ 'analysis_type': 'batch'
461
  })
462
  except:
463
  continue
464
 
465
  if results:
466
+ chart = self._create_enhanced_batch_chart(results)
467
+ return chart, f"Successfully analyzed {len(results)} texts"
468
  else:
469
  return None, "Batch analysis failed"
470
 
471
  except Exception as e:
472
  return None, f"Batch analysis error: {str(e)}"
473
 
474
+ def _create_enhanced_batch_chart(self, results):
475
+ with managed_figure(figsize=(15, 10)) as fig:
476
+ gs = fig.add_gridspec(2, 3, hspace=0.3, wspace=0.3)
477
+
478
+ # Sentiment distribution pie
479
+ ax1 = fig.add_subplot(gs[0, 0])
480
+ sent_counts = Counter([r['sentiment'] for r in results])
481
+ colors = ['#4ecdc4' if sent == 'Positive' else '#ff6b6b' for sent in sent_counts.keys()]
482
+ wedges, texts, autotexts = ax1.pie(sent_counts.values(), labels=sent_counts.keys(),
483
+ autopct='%1.1f%%', colors=colors, startangle=90)
484
+ ax1.set_title('Sentiment Distribution', fontweight='bold')
485
+
486
+ # Confidence histogram
487
+ ax2 = fig.add_subplot(gs[0, 1])
488
+ confidences = [r['confidence'] for r in results]
489
+ ax2.hist(confidences, bins=15, alpha=0.7, color='skyblue', edgecolor='black')
490
+ ax2.set_title('Confidence Distribution', fontweight='bold')
491
+ ax2.set_xlabel('Confidence Score')
492
+ ax2.set_ylabel('Frequency')
493
+ ax2.grid(axis='y', alpha=0.3)
494
+
495
+ # Sentiment vs Confidence scatter
496
+ ax3 = fig.add_subplot(gs[0, 2])
497
+ pos_results = [r for r in results if r['sentiment'] == 'Positive']
498
+ neg_results = [r for r in results if r['sentiment'] == 'Negative']
499
+
500
+ if pos_results:
501
+ ax3.scatter([1]*len(pos_results), [r['confidence'] for r in pos_results],
502
+ c='#4ecdc4', alpha=0.6, s=50, label='Positive')
503
+ if neg_results:
504
+ ax3.scatter([0]*len(neg_results), [r['confidence'] for r in neg_results],
505
+ c='#ff6b6b', alpha=0.6, s=50, label='Negative')
506
+
507
+ ax3.set_xlim(-0.5, 1.5)
508
+ ax3.set_ylim(0, 1)
509
+ ax3.set_xticks([0, 1])
510
+ ax3.set_xticklabels(['Negative', 'Positive'])
511
+ ax3.set_ylabel('Confidence')
512
+ ax3.set_title('Sentiment vs Confidence', fontweight='bold')
513
+ ax3.legend()
514
+ ax3.grid(alpha=0.3)
515
+
516
+ # Sentiment sequence
517
+ ax4 = fig.add_subplot(gs[1, :])
518
+ sentiments = [1 if r['sentiment'] == 'Positive' else 0 for r in results]
519
+ confidences = [r['confidence'] for r in results]
520
+ colors = ['#4ecdc4' if s == 1 else '#ff6b6b' for s in sentiments]
521
+
522
+ scatter = ax4.scatter(range(len(results)), sentiments, c=colors, s=confidences*100, alpha=0.7)
523
+ ax4.set_title('Sentiment Sequence (bubble size = confidence)', fontweight='bold')
524
+ ax4.set_ylabel('Sentiment')
525
+ ax4.set_xlabel('Review Index')
526
+ ax4.set_yticks([0, 1])
527
+ ax4.set_yticklabels(['Negative', 'Positive'])
528
+ ax4.grid(alpha=0.3)
529
+
530
+ return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
531
 
532
+ # Enhanced File Processor
533
  class FileProcessor:
534
  @staticmethod
535
  def process_file(file):
 
536
  if not file:
537
  return ""
538
 
539
  try:
540
  if file.name.endswith('.csv'):
541
+ # Try multiple encodings
542
+ for encoding in ['utf-8', 'latin-1', 'cp1252']:
543
+ try:
544
+ df = pd.read_csv(file.name, encoding=encoding)
545
+
546
+ # Smart column detection
547
+ text_columns = []
548
+ for col in df.columns:
549
+ sample_values = df[col].dropna().head(10)
550
+ if len(sample_values) > 0:
551
+ text_count = sum(1 for val in sample_values
552
+ if isinstance(val, str) and len(str(val).strip()) > 10)
553
+ if text_count > len(sample_values) * 0.7:
554
+ text_columns.append(col)
555
+
556
+ selected_column = text_columns[0] if text_columns else df.columns[0]
557
+ texts = df[selected_column].dropna().astype(str).tolist()
558
+
559
+ # Filter and clean
560
+ cleaned_texts = []
561
+ for text in texts:
562
+ text = text.strip()
563
+ if len(text) > 10 and text.lower() != 'nan':
564
+ cleaned_texts.append(text)
565
+
566
+ if cleaned_texts:
567
+ return '\n'.join(cleaned_texts)
568
  break
569
+
570
+ except Exception:
571
+ continue
572
 
573
+ return "Error: Could not process CSV file"
 
 
574
 
575
  else:
576
+ # Handle text files
577
+ for encoding in ['utf-8', 'latin-1', 'cp1252']:
578
+ try:
579
+ with open(file.name, 'r', encoding=encoding) as f:
580
+ return f.read().strip()
581
+ except Exception:
582
+ continue
583
+
584
+ return "Error: Could not read text file"
585
+
586
  except Exception as e:
587
+ return f"File processing error: {str(e)}"
 
 
588
 
589
+ # Enhanced Data Exporter
590
  class DataExporter:
591
  @staticmethod
592
  def export_csv(data):
 
593
  if not data:
594
  return None, "No data to export"
595
 
596
  try:
597
  temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv')
598
  writer = csv.writer(temp_file)
599
+
600
+ # Enhanced headers
601
+ writer.writerow(['Timestamp', 'Text_Preview', 'Full_Text', 'Sentiment',
602
+ 'Confidence', 'Analysis_Type', 'Top_Keywords'])
603
 
604
  for entry in data:
605
+ # Extract top keywords if available
606
+ keywords_str = ""
607
+ if 'key_words' in entry and entry['key_words']:
608
+ keywords_str = "; ".join([f"{word}:{score:.3f}"
609
+ for word, score in entry['key_words'][:5]])
610
+
611
  writer.writerow([
612
  entry.get('timestamp', ''),
613
  entry.get('text', ''),
614
+ entry.get('full_text', entry.get('text', '')),
615
  entry.get('sentiment', ''),
616
+ f"{entry.get('confidence', 0):.4f}",
617
+ entry.get('analysis_type', 'single'),
618
+ keywords_str
619
  ])
620
 
621
  temp_file.close()
622
+ return temp_file.name, f"Exported {len(data)} entries with enhanced data"
623
 
624
  except Exception as e:
625
  return None, f"Export failed: {str(e)}"
626
 
627
+ # Enhanced Main Application
628
  class SentimentApp:
629
  def __init__(self):
630
  self.engine = SentimentEngine()
 
632
  self.file_processor = FileProcessor()
633
  self.exporter = DataExporter()
634
 
 
635
  self.examples = [
636
  "This movie was absolutely fantastic! The acting was superb and the plot kept me engaged throughout.",
637
  "Terrible film with poor acting and a confusing storyline. Complete waste of time.",
 
639
  "An masterpiece of cinema! Every scene was beautifully crafted and emotionally powerful.",
640
  "The worst movie I've ever seen. Bad acting, terrible script, and boring plot."
641
  ]
642
+
643
+ def create_history_trends(self, theme='default'):
644
+ """Enhanced history visualization"""
645
+ history = self.history.get_all()
646
+ if len(history) < 2:
647
+ return None, f"Need at least 2 analyses for trends. Current: {len(history)}"
648
+
649
+ with managed_figure(figsize=(14, 10)) as fig:
650
+ gs = fig.add_gridspec(3, 2, hspace=0.4, wspace=0.3)
651
+
652
+ # Extract data
653
+ timestamps = [datetime.fromisoformat(h['timestamp']) for h in history]
654
+ sentiments = [h.get('sentiment', 'Unknown') for h in history]
655
+ confidences = [h.get('confidence', 0) for h in history]
656
+ analysis_types = [h.get('analysis_type', 'single') for h in history]
657
+
658
+ # 1. Sentiment timeline
659
+ ax1 = fig.add_subplot(gs[0, :])
660
+ pos_mask = [s == 'Positive' for s in sentiments]
661
+ neg_mask = [s == 'Negative' for s in sentiments]
662
+
663
+ pos_times = [t for t, m in zip(timestamps, pos_mask) if m]
664
+ neg_times = [t for t, m in zip(timestamps, neg_mask) if m]
665
+ pos_confs = [c for c, m in zip(confidences, pos_mask) if m]
666
+ neg_confs = [c for c, m in zip(confidences, neg_mask) if m]
667
+
668
+ if pos_times:
669
+ ax1.scatter(pos_times, [1]*len(pos_times), c='#4ecdc4', s=[c*100 for c in pos_confs],
670
+ alpha=0.7, label='Positive', edgecolors='white')
671
+ if neg_times:
672
+ ax1.scatter(neg_times, [0]*len(neg_times), c='#ff6b6b', s=[c*100 for c in neg_confs],
673
+ alpha=0.7, label='Negative', edgecolors='white')
674
+
675
+ ax1.set_ylim(-0.5, 1.5)
676
+ ax1.set_yticks([0, 1])
677
+ ax1.set_yticklabels(['Negative', 'Positive'])
678
+ ax1.set_title('Sentiment Timeline (bubble size = confidence)', fontweight='bold', fontsize=14)
679
+ ax1.legend()
680
+ ax1.grid(alpha=0.3)
681
+
682
+ # 2. Confidence trends
683
+ ax2 = fig.add_subplot(gs[1, 0])
684
+ colors = ['#4ecdc4' if s == 'Positive' else '#ff6b6b' for s in sentiments]
685
+ ax2.plot(range(len(confidences)), confidences, 'o-', alpha=0.7, markersize=6)
686
+ ax2.scatter(range(len(confidences)), confidences, c=colors, s=60, alpha=0.8, edgecolors='white')
687
+ ax2.set_title('Confidence Trends', fontweight='bold')
688
+ ax2.set_xlabel('Analysis Number')
689
+ ax2.set_ylabel('Confidence Score')
690
+ ax2.grid(alpha=0.3)
691
+
692
+ # 3. Analysis type distribution
693
+ ax3 = fig.add_subplot(gs[1, 1])
694
+ type_counts = Counter(analysis_types)
695
+ colors_pie = ['#2E86C1', '#28B463', '#F39C12'][:len(type_counts)]
696
+ ax3.pie(type_counts.values(), labels=type_counts.keys(), autopct='%1.1f%%',
697
+ colors=colors_pie, startangle=90)
698
+ ax3.set_title('Analysis Types', fontweight='bold')
699
+
700
+ # 4. Statistics summary
701
+ ax4 = fig.add_subplot(gs[2, :])
702
+ stats = self.history.get_stats()
703
+
704
+ # Create summary text
705
+ summary_text = f"""
706
+ Analysis Summary:
707
+ • Total Analyses: {stats['total']}
708
+ • Positive Reviews: {stats['positive']} ({stats['positive']/stats['total']*100:.1f}%)
709
+ • Negative Reviews: {stats['negative']} ({stats['negative']/stats['total']*100:.1f}%)
710
+ • Average Confidence: {stats['avg_confidence']:.3f}
711
+
712
+ Recent Activity:
713
+ • Last 10 analyses: {len(history[-10:])} entries
714
+ • Most recent: {history[-1]['sentiment'] if history else 'None'}
715
+ """
716
+
717
+ ax4.text(0.1, 0.5, summary_text, transform=ax4.transAxes, fontsize=12,
718
+ verticalalignment='center', bbox=dict(boxstyle="round,pad=0.3",
719
+ facecolor='lightblue', alpha=0.3))
720
+ ax4.set_xlim(0, 1)
721
+ ax4.set_ylim(0, 1)
722
+ ax4.axis('off')
723
+ ax4.set_title('Statistics Summary', fontweight='bold')
724
+
725
+ return fig, f"History trends generated for {len(history)} analyses"
726
 
727
  def create_interface():
728
+ """Create enhanced Gradio interface"""
729
  app = SentimentApp()
730
 
731
+ with gr.Blocks(theme=gr.themes.Soft(), title="Enhanced Movie Sentiment Analyzer") as demo:
732
+ gr.Markdown("# 🎭 Enhanced Movie Sentiment Analyzer")
733
+ gr.Markdown("**Advanced sentiment analysis with LIME explanations, keyword extraction, and comprehensive visualizations**")
734
 
735
+ # TAB 1: SINGLE ANALYSIS - Enhanced
736
  with gr.Tab("🎯 Single Analysis"):
737
+ gr.Markdown("### Fast DistilBERT analysis with enhanced LIME keyword extraction")
738
 
739
  with gr.Row():
740
  with gr.Column():
 
748
  theme_dropdown = gr.Dropdown(
749
  choices=list(config.THEMES.keys()),
750
  value="default",
751
+ label="Color Theme"
752
  )
753
 
754
  gr.Examples(
 
757
  )
758
 
759
  with gr.Column():
760
+ single_result = gr.Textbox(label="Analysis Results", lines=6)
761
+ heatmap_display = gr.HTML(label="Word Importance Heatmap")
762
 
763
  with gr.Row():
764
+ prob_plot = gr.Plot(label="Sentiment Probabilities")
765
  confidence_plot = gr.Plot(label="Confidence Gauge")
766
 
767
+ with gr.Row():
768
+ wordcloud_plot = gr.Plot(label="Word Cloud")
769
+ keyword_plot = gr.Plot(label="Key Contributing Words")
770
 
771
+ # TAB 2: ADVANCED ANALYSIS - Enhanced
772
  with gr.Tab("🧠 Advanced Analysis"):
773
+ gr.Markdown("### Deep BERT analysis with detailed LIME explanations and keyword impact")
774
 
775
  with gr.Row():
776
  with gr.Column():
777
  advanced_input = gr.Textbox(
778
  label="Text for Deep Analysis",
779
+ placeholder="Enter text for comprehensive analysis...",
780
  lines=6
781
  )
782
  advanced_btn = gr.Button("Deep Analyze", variant="primary", size="lg")
783
 
784
  with gr.Column():
785
+ advanced_result = gr.Textbox(label="Detailed Analysis Results", lines=12)
786
 
787
  with gr.Row():
788
+ advanced_heatmap = gr.HTML(label="Enhanced Word Impact Heatmap")
789
+ advanced_keywords = gr.Plot(label="Detailed Keyword Analysis")
790
 
791
+ # TAB 3: BATCH ANALYSIS - Enhanced
792
  with gr.Tab("📊 Batch Analysis"):
793
+ gr.Markdown("### Enhanced batch processing with comprehensive statistics")
794
 
795
  with gr.Row():
796
  with gr.Column():
797
+ file_upload = gr.File(
798
+ label="Upload CSV/TXT File",
799
+ file_types=[".csv", ".txt"]
800
+ )
801
  batch_input = gr.Textbox(
802
+ label="Batch Reviews (one per line)",
803
  placeholder="Paste multiple reviews, one per line...",
804
  lines=8
805
  )
806
 
807
  with gr.Column():
808
+ load_file_btn = gr.Button("Load File", variant="secondary")
809
  batch_btn = gr.Button("Analyze Batch", variant="primary", size="lg")
810
+ batch_status = gr.Textbox(label="Batch Status", lines=2)
811
 
812
+ batch_plot = gr.Plot(label="Enhanced Batch Analysis Results")
813
 
814
+ # TAB 4: ENHANCED HISTORY & EXPORT
815
+ with gr.Tab("📈 History & Analytics"):
816
+ gr.Markdown("### Comprehensive analysis history with advanced statistics and trends")
817
 
818
  with gr.Row():
819
+ with gr.Column():
820
+ with gr.Row():
821
+ refresh_history_btn = gr.Button("Refresh History", variant="secondary")
822
+ clear_history_btn = gr.Button("Clear All History", variant="stop")
823
+
824
+ history_stats = gr.Textbox(
825
+ label="History Statistics",
826
+ lines=3,
827
+ value="No analyses yet"
828
+ )
829
+
830
+ with gr.Column():
831
+ with gr.Row():
832
+ export_csv_btn = gr.Button("Export CSV", variant="secondary")
833
+ export_json_btn = gr.Button("Export JSON", variant="secondary")
834
+
835
+ export_status = gr.Textbox(label="Export Status", lines=2)
836
 
837
+ # Enhanced history visualization
838
+ history_trends_plot = gr.Plot(label="Advanced History Trends & Analytics")
 
839
 
840
+ # File downloads
841
+ with gr.Row():
842
+ csv_download = gr.File(label="CSV Download", visible=False)
843
+ json_download = gr.File(label="JSON Download", visible=False)
844
 
845
+ # Enhanced Event Handlers
846
+ def enhanced_single_analysis(text, theme):
847
+ result_text, prob_fig, conf_fig, cloud_fig, keyword_fig, heatmap_html = app.engine.analyze_single(text, theme)
848
 
849
+ # Add enhanced data to history
850
+ if "failed" not in result_text.lower() and text.strip():
851
+ # Extract keywords for history (simplified)
852
+ try:
853
+ keywords = app.engine.extract_keywords_lime(text, top_k=10)
854
+ app.history.add({
855
+ 'text': text[:100],
856
+ 'full_text': text,
857
+ 'sentiment': result_text.split('\n')[0].split(': ')[1] if ': ' in result_text else 'Unknown',
858
+ 'confidence': float(result_text.split('Confidence: ')[1].split('\n')[0]) if 'Confidence: ' in result_text else 0.0,
859
+ 'analysis_type': 'single',
860
+ 'key_words': keywords
861
+ })
862
+ except:
863
+ pass
864
 
865
+ return result_text, prob_fig, conf_fig, cloud_fig, keyword_fig, heatmap_html
866
 
867
+ def enhanced_advanced_analysis(text):
868
+ result_text, heatmap_html, keyword_fig = app.engine.analyze_advanced(text)
869
 
870
+ # Add to history with enhanced data
871
+ if "failed" not in result_text.lower() and text.strip():
872
+ try:
873
+ keywords = app.engine.extract_keywords_lime(text, top_k=15)
874
+ sentiment = result_text.split('Sentiment: ')[1].split(' (')[0] if 'Sentiment: ' in result_text else 'Unknown'
875
+ confidence_str = result_text.split('Confidence: ')[1].split(')')[0] if 'Confidence: ' in result_text else '0'
876
+ confidence = float(confidence_str)
877
+
878
+ app.history.add({
879
+ 'text': text[:100],
880
+ 'full_text': text,
881
+ 'sentiment': sentiment,
882
+ 'confidence': confidence,
883
+ 'analysis_type': 'advanced',
884
+ 'key_words': keywords
885
+ })
886
+ except:
887
+ pass
888
 
889
+ return result_text, heatmap_html, keyword_fig
890
 
891
+ def enhanced_batch_analysis(text):
892
  if not text.strip():
893
  return None, "Please provide text for batch analysis"
894
 
 
898
 
899
  chart, status = app.engine.analyze_batch(texts)
900
 
901
+ # Add results to history
902
+ if chart and "Successfully" in status:
903
+ for review_text in texts:
904
+ # Quick analysis for history
905
+ try:
906
+ distilbert = app.engine.model_manager.load_distilbert()
907
+ result = distilbert(review_text)[0]
908
+ sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative"
909
+ confidence = result['score']
910
+
911
+ app.history.add({
912
+ 'text': review_text[:100],
913
+ 'full_text': review_text,
914
+ 'sentiment': sentiment,
915
+ 'confidence': confidence,
916
+ 'analysis_type': 'batch'
917
+ })
918
+ except:
919
+ continue
920
 
921
  return chart, status
922
 
923
+ def get_history_stats():
924
+ stats = app.history.get_stats()
925
+ if stats['total'] == 0:
926
+ return "No analyses performed yet"
927
+
928
+ return f"""Current Statistics:
929
+ • Total Analyses: {stats['total']}
930
+ • Positive: {stats['positive']} ({stats['positive']/stats['total']*100:.1f}%)
931
+ • Negative: {stats['negative']} ({stats['negative']/stats['total']*100:.1f}%)
932
+ • Average Confidence: {stats['avg_confidence']:.3f}"""
933
+
934
+ def export_csv_data():
935
+ file_path, message = app.exporter.export_csv(app.history.get_all())
936
+ return file_path, message
937
+
938
+ def export_json_data():
939
+ # Create JSON export
940
+ data = app.history.get_all()
941
+ if not data:
942
+ return None, "No data to export"
943
+
944
+ try:
945
+ temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json')
946
+ json.dump(data, temp_file, indent=2, ensure_ascii=False, default=str)
947
+ temp_file.close()
948
+ return temp_file.name, f"Exported {len(data)} entries as JSON"
949
+ except Exception as e:
950
+ return None, f"JSON export failed: {str(e)}"
951
+
952
+ # Bind enhanced events
953
  single_btn.click(
954
+ enhanced_single_analysis,
955
  inputs=[single_input, theme_dropdown],
956
+ outputs=[single_result, prob_plot, confidence_plot, wordcloud_plot, keyword_plot, heatmap_display]
957
  )
958
 
959
  advanced_btn.click(
960
+ enhanced_advanced_analysis,
961
  inputs=advanced_input,
962
+ outputs=[advanced_result, advanced_heatmap, advanced_keywords]
963
  )
964
 
965
  load_file_btn.click(
 
969
  )
970
 
971
  batch_btn.click(
972
+ enhanced_batch_analysis,
973
  inputs=batch_input,
974
  outputs=[batch_plot, batch_status]
975
  )
976
 
977
+ refresh_history_btn.click(
978
+ lambda: [app.create_history_trends()[0], get_history_stats()],
979
+ outputs=[history_trends_plot, history_stats]
980
  )
981
 
982
+ clear_history_btn.click(
983
+ lambda: [None, f"Cleared {app.history.clear()} entries", "History cleared"],
984
+ outputs=[history_trends_plot, export_status, history_stats]
985
  )
986
 
987
  export_csv_btn.click(
988
+ export_csv_data,
989
+ outputs=[csv_download, export_status]
990
+ )
991
+
992
+ export_json_btn.click(
993
+ export_json_data,
994
+ outputs=[json_download, export_status]
995
+ )
996
+
997
+ # Auto-refresh history stats on load
998
+ demo.load(
999
+ get_history_stats,
1000
+ outputs=history_stats
1001
  )
1002
 
1003
  return demo
1004
 
1005
+ # Launch Enhanced Application
1006
  if __name__ == "__main__":
1007
+ logging.info("Starting Enhanced Sentiment Analysis Suite...")
1008
  demo = create_interface()
1009
  demo.launch(
1010
  share=True,