entropy25 commited on
Commit
c87a8cc
·
verified ·
1 Parent(s): aa7980e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +533 -591
app.py CHANGED
@@ -20,575 +20,533 @@ import gc
20
  import pandas as pd
21
  from lime.lime_text import LimeTextExplainer
22
  import shap
 
23
 
24
  @dataclass
25
- class Config:
26
- MAX_HISTORY_SIZE: int = 1000
27
- BATCH_SIZE_LIMIT: int = 50
28
- MAX_TEXT_LENGTH: int = 512
29
- MIN_WORD_LENGTH: int = 2
30
- CACHE_SIZE: int = 128
31
- BATCH_PROCESSING_SIZE: int = 8
32
-
33
- # Visualization settings
34
- FIGURE_SIZE_SINGLE: Tuple[int, int] = (8, 5)
35
- FIGURE_SIZE_BATCH: Tuple[int, int] = (12, 8)
36
- WORDCLOUD_SIZE: Tuple[int, int] = (10, 5)
37
-
38
- THEMES = {
39
  'default': {'pos': '#4ecdc4', 'neg': '#ff6b6b'},
40
  'ocean': {'pos': '#0077be', 'neg': '#ff6b35'},
41
  'forest': {'pos': '#228b22', 'neg': '#dc143c'},
42
  'sunset': {'pos': '#ff8c00', 'neg': '#8b0000'}
43
  }
44
 
45
- STOP_WORDS = {
46
  'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to',
47
  'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be',
48
  'been', 'have', 'has', 'had', 'will', 'would', 'could', 'should'
49
  }
50
 
51
- config = Config()
52
- logger = logging.getLogger(__name__)
53
 
54
- # Decorators and Context Managers
55
- def handle_errors(default_return=None):
56
- """Centralized error handling decorator"""
57
- def decorator(func: Callable) -> Callable:
58
- @wraps(func)
59
- def wrapper(*args, **kwargs):
60
  try:
61
- return func(*args, **kwargs)
62
  except Exception as e:
63
- logger.error(f"{func.__name__} failed: {e}")
64
- return default_return if default_return is not None else f"Error: {str(e)}"
65
- return wrapper
66
- return decorator
67
 
68
  @contextmanager
69
- def managed_figure(*args, **kwargs):
70
- """Context manager for matplotlib figures to prevent memory leaks"""
71
- fig = plt.figure(*args, **kwargs)
72
  try:
73
- yield fig
74
  finally:
75
- plt.close(fig)
76
  gc.collect()
77
 
78
- class ThemeContext:
79
- """Theme management context"""
80
- def __init__(self, theme: str = 'default'):
81
- self.theme = theme
82
- self.colors = config.THEMES.get(theme, config.THEMES['default'])
83
 
84
- # Lazy Model Manager
85
- class ModelManager:
86
- """Lazy loading model manager"""
87
- _instance = None
88
- _model = None
89
- _tokenizer = None
90
- _device = None
91
 
92
  def __new__(cls):
93
- if cls._instance is None:
94
- cls._instance = super().__new__(cls)
95
- return cls._instance
96
 
97
  @property
98
- def model(self):
99
- if self._model is None:
100
- self._load_model()
101
- return self._model
102
 
103
  @property
104
- def tokenizer(self):
105
- if self._tokenizer is None:
106
- self._load_model()
107
- return self._tokenizer
108
 
109
  @property
110
- def device(self):
111
- if self._device is None:
112
- self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
113
- return self._device
114
 
115
- def _load_model(self):
116
- """Load model and tokenizer"""
117
  try:
118
- self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
119
- self._tokenizer = BertTokenizer.from_pretrained("entropy25/sentimentanalysis")
120
- self._model = BertForSequenceClassification.from_pretrained("entropy25/sentimentanalysis")
121
- self._model.to(self._device)
122
- logger.info(f"Model loaded on {self._device}")
 
123
  except Exception as e:
124
- logger.error(f"Model loading failed: {e}")
125
  raise
126
 
127
- # Simplified Core Classes
128
- class TextProcessor:
129
- """Optimized text processing"""
130
  @staticmethod
131
- @lru_cache(maxsize=config.CACHE_SIZE)
132
- def clean_text(text: str) -> Tuple[str, ...]:
133
- """Single-pass text cleaning"""
134
- words = re.findall(r'\b\w{3,}\b', text.lower())
135
- return tuple(w for w in words if w not in config.STOP_WORDS)
136
 
137
- class HistoryManager:
138
- """Simplified history management"""
139
  def __init__(self):
140
- self._history = []
141
 
142
- def add(self, entry: Dict):
143
- self._history.append({**entry, 'timestamp': datetime.now().isoformat()})
144
- if len(self._history) > config.MAX_HISTORY_SIZE:
145
- self._history = self._history[-config.MAX_HISTORY_SIZE:]
146
 
147
- def get_all(self) -> List[Dict]:
148
- return self._history.copy()
149
 
150
- def clear(self) -> int:
151
- count = len(self._history)
152
- self._history.clear()
153
- return count
154
 
155
- def size(self) -> int:
156
- return len(self._history)
157
 
158
- # Core Analysis Engine
159
- class SentimentEngine:
160
- """Streamlined sentiment analysis engine with LIME and SHAP"""
161
  def __init__(self):
162
- self.model_manager = ModelManager()
163
- self.lime_explainer = LimeTextExplainer(class_names=['Negative', 'Positive'])
164
- self.shap_explainer = None
165
 
166
- def predict_proba(self, texts):
167
- """Prediction function for LIME"""
168
- if isinstance(texts, str):
169
- texts = [texts]
170
 
171
- inputs = self.model_manager.tokenizer(
172
- texts, return_tensors="pt", padding=True,
173
- truncation=True, max_length=config.MAX_TEXT_LENGTH
174
- ).to(self.model_manager.device)
175
 
176
  with torch.no_grad():
177
- outputs = self.model_manager.model(**inputs)
178
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()
179
 
180
- return probs
181
 
182
- @handle_errors(default_return={'sentiment': 'Unknown', 'confidence': 0.0})
183
- def analyze_single_fast(self, text: str) -> Dict:
184
- """Fast single text analysis without keyword extraction"""
185
- if not text.strip():
186
  raise ValueError("Empty text")
187
 
188
- probs = self.predict_proba([text])[0]
189
- sentiment = "Positive" if probs[1] > probs[0] else "Negative"
190
 
191
  return {
192
- 'sentiment': sentiment,
193
- 'confidence': float(probs.max()),
194
- 'pos_prob': float(probs[1]),
195
- 'neg_prob': float(probs[0])
196
  }
197
 
198
- def extract_key_words_lime(self, text: str, top_k: int = 10) -> List[Tuple[str, float]]:
199
- """Advanced keyword extraction using LIME"""
200
  try:
201
- explanation = self.lime_explainer.explain_instance(
202
- text, self.predict_proba, num_features=top_k, num_samples=200
203
  )
204
 
205
- word_scores = []
206
- for word, score in explanation.as_list():
207
- if len(word.strip()) >= config.MIN_WORD_LENGTH:
208
- word_scores.append((word.strip().lower(), abs(score)))
209
 
210
- word_scores.sort(key=lambda x: x[1], reverse=True)
211
- return word_scores[:top_k]
212
 
213
  except Exception as e:
214
- logger.error(f"LIME extraction failed: {e}")
215
  return []
216
 
217
- def extract_key_words_shap(self, text: str, top_k: int = 10) -> List[Tuple[str, float]]:
218
- """Advanced keyword extraction using SHAP"""
219
  try:
220
- # Simple SHAP implementation using model predictions
221
- words = text.split()
222
- word_scores = []
223
-
224
- # Get baseline prediction
225
- baseline_prob = self.predict_proba([text])[0][1] # Positive probability
226
-
227
- # Calculate importance by removing each word
228
- for i, word in enumerate(words):
229
- # Create text without this word
230
- modified_words = words[:i] + words[i+1:]
231
- modified_text = ' '.join(modified_words)
232
 
233
- if modified_text.strip():
234
- modified_prob = self.predict_proba([modified_text])[0][1]
235
- importance = abs(baseline_prob - modified_prob)
236
 
237
- clean_word = re.sub(r'[^\w]', '', word.lower())
238
- if len(clean_word) >= config.MIN_WORD_LENGTH:
239
- word_scores.append((clean_word, importance))
240
-
241
- # Remove duplicates and sort
242
- unique_scores = {}
243
- for word, score in word_scores:
244
- if word in unique_scores:
245
- unique_scores[word] = max(unique_scores[word], score)
246
  else:
247
- unique_scores[word] = score
248
 
249
- sorted_scores = sorted(unique_scores.items(), key=lambda x: x[1], reverse=True)
250
- return sorted_scores[:top_k]
251
 
252
  except Exception as e:
253
- logger.error(f"SHAP extraction failed: {e}")
254
  return []
255
 
256
- def create_heatmap_html(self, text: str, word_scores: Dict[str, float]) -> str:
257
- """Create HTML heatmap visualization"""
258
- words = text.split()
259
- html_parts = ['<div style="font-family: Arial; font-size: 16px; line-height: 1.6;">']
260
-
261
- if word_scores:
262
- max_score = max(abs(score) for score in word_scores.values())
263
- min_score = min(word_scores.values())
264
  else:
265
- max_score = min_score = 0
266
-
267
- for word in words:
268
- clean_word = re.sub(r'[^\w]', '', word.lower())
269
- score = word_scores.get(clean_word, 0)
270
-
271
- if score > 0:
272
- intensity = min(255, int(180 * (score / max_score) if max_score > 0 else 0))
273
- color = f"rgba(0, {intensity}, 0, 0.3)"
274
- elif score < 0:
275
- intensity = min(255, int(180 * (abs(score) / abs(min_score)) if min_score < 0 else 0))
276
- color = f"rgba({intensity}, 0, 0, 0.3)"
277
  else:
278
- color = "transparent"
279
 
280
- html_parts.append(
281
- f'<span style="background-color: {color}; padding: 2px; margin: 1px; '
282
- f'border-radius: 3px;" title="Score: {score:.3f}">{word}</span> '
283
  )
284
 
285
- html_parts.append('</div>')
286
- return ''.join(html_parts)
287
 
288
- @handle_errors(default_return={'sentiment': 'Unknown', 'confidence': 0.0, 'lime_words': [], 'shap_words': [], 'heatmap_html': ''})
289
- def analyze_single_advanced(self, text: str) -> Dict:
290
- """Advanced single text analysis with LIME and SHAP explanation"""
291
- if not text.strip():
292
  raise ValueError("Empty text")
293
 
294
- probs = self.predict_proba([text])[0]
295
- sentiment = "Positive" if probs[1] > probs[0] else "Negative"
296
 
297
- # Extract key words using both LIME and SHAP
298
- lime_words = self.extract_key_words_lime(text)
299
- shap_words = self.extract_key_words_shap(text)
300
 
301
- # Create heatmap HTML using LIME results
302
- word_scores_dict = dict(lime_words)
303
- heatmap_html = self.create_heatmap_html(text, word_scores_dict)
304
 
305
  return {
306
- 'sentiment': sentiment,
307
- 'confidence': float(probs.max()),
308
- 'pos_prob': float(probs[1]),
309
- 'neg_prob': float(probs[0]),
310
- 'lime_words': lime_words,
311
- 'shap_words': shap_words,
312
- 'heatmap_html': heatmap_html
313
  }
314
 
315
- @handle_errors(default_return=[])
316
- def analyze_batch(self, texts: List[str], progress_callback=None) -> List[Dict]:
317
- """Optimized batch processing"""
318
- if len(texts) > config.BATCH_SIZE_LIMIT:
319
- texts = texts[:config.BATCH_SIZE_LIMIT]
320
 
321
- results = []
322
- batch_size = config.BATCH_PROCESSING_SIZE
323
 
324
- for i in range(0, len(texts), batch_size):
325
- batch = texts[i:i+batch_size]
326
 
327
- if progress_callback:
328
- progress_callback((i + len(batch)) / len(texts))
329
 
330
- inputs = self.model_manager.tokenizer(
331
- batch, return_tensors="pt", padding=True,
332
- truncation=True, max_length=config.MAX_TEXT_LENGTH
333
- ).to(self.model_manager.device)
334
 
335
  with torch.no_grad():
336
- outputs = self.model_manager.model(**inputs)
337
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()
338
 
339
- for text, prob in zip(batch, probs):
340
- sentiment = "Positive" if prob[1] > prob[0] else "Negative"
341
 
342
- results.append({
343
- 'text': text[:50] + '...' if len(text) > 50 else text,
344
- 'full_text': text,
345
- 'sentiment': sentiment,
346
- 'confidence': float(prob.max()),
347
- 'pos_prob': float(prob[1]),
348
- 'neg_prob': float(prob[0])
349
  })
350
 
351
- return results
352
 
353
- # Unified Visualization System
354
- class PlotFactory:
355
- """Factory for creating plots with proper memory management"""
356
-
357
  @staticmethod
358
- @handle_errors(default_return=None)
359
- def create_sentiment_bars(probs: np.ndarray, theme: ThemeContext) -> plt.Figure:
360
- """Create sentiment probability bars"""
361
- with managed_figure(figsize=config.FIGURE_SIZE_SINGLE) as fig:
362
- ax = fig.add_subplot(111)
363
- labels = ["Negative", "Positive"]
364
- colors = [theme.colors['neg'], theme.colors['pos']]
365
-
366
- bars = ax.bar(labels, probs, color=colors, alpha=0.8)
367
- ax.set_title("Sentiment Probabilities", fontweight='bold')
368
- ax.set_ylabel("Probability")
369
- ax.set_ylim(0, 1)
370
-
371
- for bar, prob in zip(bars, probs):
372
- ax.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.02,
373
- f'{prob:.3f}', ha='center', va='bottom', fontweight='bold')
374
-
375
- fig.tight_layout()
376
- return fig
377
 
378
  @staticmethod
379
- @handle_errors(default_return=None)
380
- def create_confidence_gauge(confidence: float, sentiment: str, theme: ThemeContext) -> plt.Figure:
381
- """Create confidence gauge"""
382
- with managed_figure(figsize=config.FIGURE_SIZE_SINGLE) as fig:
383
- ax = fig.add_subplot(111)
384
-
385
- theta = np.linspace(0, np.pi, 100)
386
- colors = [theme.colors['neg'] if i < 50 else theme.colors['pos'] for i in range(100)]
387
-
388
- for i in range(len(theta)-1):
389
- ax.fill_between([theta[i], theta[i+1]], [0, 0], [0.8, 0.8],
390
- color=colors[i], alpha=0.7)
391
-
392
- pos = np.pi * (0.5 + (0.4 if sentiment == 'Positive' else -0.4) * confidence)
393
- ax.plot([pos, pos], [0, 0.6], 'k-', linewidth=6)
394
- ax.plot(pos, 0.6, 'ko', markersize=10)
395
-
396
- ax.set_xlim(0, np.pi)
397
- ax.set_ylim(0, 1)
398
- ax.set_title(f'{sentiment} - Confidence: {confidence:.3f}', fontweight='bold')
399
- ax.set_xticks([0, np.pi/2, np.pi])
400
- ax.set_xticklabels(['Negative', 'Neutral', 'Positive'])
401
- ax.axis('off')
402
-
403
- fig.tight_layout()
404
- return fig
405
 
406
  @staticmethod
407
- @handle_errors(default_return=None)
408
- def create_lime_keyword_chart(lime_words: List[Tuple[str, float]], sentiment: str, theme: ThemeContext) -> Optional[plt.Figure]:
409
- """Create horizontal bar chart for LIME key contributing words"""
410
- if not lime_words:
411
  return None
412
 
413
- with managed_figure(figsize=config.FIGURE_SIZE_SINGLE) as fig:
414
- ax = fig.add_subplot(111)
415
 
416
- words = [word for word, score in lime_words]
417
- scores = [score for word, score in lime_words]
418
 
419
- color = theme.colors['pos'] if sentiment == 'Positive' else theme.colors['neg']
420
 
421
- bars = ax.barh(range(len(words)), scores, color=color, alpha=0.7)
422
- ax.set_yticks(range(len(words)))
423
- ax.set_yticklabels(words)
424
- ax.set_xlabel('LIME Attention Weight')
425
- ax.set_title(f'LIME: Top Contributing Words ({sentiment})', fontweight='bold')
426
 
427
- for i, (bar, score) in enumerate(zip(bars, scores)):
428
- ax.text(bar.get_width() + 0.001, bar.get_y() + bar.get_height()/2.,
429
- f'{score:.3f}', ha='left', va='center', fontsize=9)
430
 
431
- ax.invert_yaxis()
432
- ax.grid(axis='x', alpha=0.3)
433
- fig.tight_layout()
434
- return fig
435
 
436
  @staticmethod
437
- @handle_errors(default_return=None)
438
- def create_shap_keyword_chart(shap_words: List[Tuple[str, float]], sentiment: str, theme: ThemeContext) -> Optional[plt.Figure]:
439
- """Create horizontal bar chart for SHAP key contributing words"""
440
- if not shap_words:
441
  return None
442
 
443
- with managed_figure(figsize=config.FIGURE_SIZE_SINGLE) as fig:
444
- ax = fig.add_subplot(111)
445
 
446
- words = [word for word, score in shap_words]
447
- scores = [score for word, score in shap_words]
448
 
449
- color = theme.colors['pos'] if sentiment == 'Positive' else theme.colors['neg']
450
 
451
- bars = ax.barh(range(len(words)), scores, color=color, alpha=0.7)
452
- ax.set_yticks(range(len(words)))
453
- ax.set_yticklabels(words)
454
- ax.set_xlabel('SHAP Value')
455
- ax.set_title(f'SHAP: Top Contributing Words ({sentiment})', fontweight='bold')
456
 
457
- for i, (bar, score) in enumerate(zip(bars, scores)):
458
- ax.text(bar.get_width() + 0.001, bar.get_y() + bar.get_height()/2.,
459
- f'{score:.3f}', ha='left', va='center', fontsize=9)
460
 
461
- ax.invert_yaxis()
462
- ax.grid(axis='x', alpha=0.3)
463
- fig.tight_layout()
464
- return fig
465
 
466
  @staticmethod
467
- @handle_errors(default_return=None)
468
- def create_wordcloud(text: str, sentiment: str, theme: ThemeContext) -> Optional[plt.Figure]:
469
- """Create word cloud"""
470
- if len(text.split()) < 3:
471
  return None
472
 
473
- colormap = 'Greens' if sentiment == 'Positive' else 'Reds'
474
- wc = WordCloud(width=800, height=400, background_color='white',
475
- colormap=colormap, max_words=30).generate(text)
476
-
477
- with managed_figure(figsize=config.WORDCLOUD_SIZE) as fig:
478
- ax = fig.add_subplot(111)
479
- ax.imshow(wc, interpolation='bilinear')
480
- ax.axis('off')
481
- ax.set_title(f'{sentiment} Word Cloud', fontweight='bold')
482
- fig.tight_layout()
483
- return fig
484
 
485
  @staticmethod
486
- @handle_errors(default_return=None)
487
- def create_batch_analysis(results: List[Dict], theme: ThemeContext) -> plt.Figure:
488
- """Create comprehensive batch visualization"""
489
- with managed_figure(figsize=config.FIGURE_SIZE_BATCH) as fig:
490
- gs = fig.add_gridspec(2, 2, hspace=0.3, wspace=0.3)
491
-
492
- # Sentiment distribution
493
- ax1 = fig.add_subplot(gs[0, 0])
494
- sent_counts = Counter([r['sentiment'] for r in results])
495
- colors = [theme.colors['pos'], theme.colors['neg']]
496
- ax1.pie(sent_counts.values(), labels=sent_counts.keys(),
497
- autopct='%1.1f%%', colors=colors[:len(sent_counts)])
498
- ax1.set_title('Sentiment Distribution')
499
-
500
- # Confidence histogram
501
- ax2 = fig.add_subplot(gs[0, 1])
502
- confs = [r['confidence'] for r in results]
503
- ax2.hist(confs, bins=8, alpha=0.7, color='skyblue', edgecolor='black')
504
- ax2.set_title('Confidence Distribution')
505
- ax2.set_xlabel('Confidence')
506
-
507
- # Sentiment over time
508
- ax3 = fig.add_subplot(gs[1, :])
509
- pos_probs = [r['pos_prob'] for r in results]
510
- indices = range(len(results))
511
- colors_scatter = [theme.colors['pos'] if r['sentiment'] == 'Positive'
512
- else theme.colors['neg'] for r in results]
513
- ax3.scatter(indices, pos_probs, c=colors_scatter, alpha=0.7, s=60)
514
- ax3.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
515
- ax3.set_title('Sentiment Progression')
516
- ax3.set_xlabel('Review Index')
517
- ax3.set_ylabel('Positive Probability')
518
-
519
- return fig
520
 
521
- class DataHandler:
522
- """Handles all data operations"""
523
-
524
  @staticmethod
525
- @handle_errors(default_return=(None, "Export failed"))
526
- def export_data(data: List[Dict], format_type: str) -> Tuple[Optional[str], str]:
527
- """Universal data export"""
528
- if not data:
529
  return None, "No data to export"
530
 
531
- temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False,
532
- suffix=f'.{format_type}', encoding='utf-8')
533
-
534
- if format_type == 'csv':
535
- writer = csv.writer(temp_file)
536
- writer.writerow(['Timestamp', 'Text', 'Sentiment', 'Confidence', 'Pos_Prob', 'Neg_Prob'])
537
- for entry in data:
538
- writer.writerow([
539
- entry.get('timestamp', ''),
540
- entry.get('text', ''),
541
- entry.get('sentiment', ''),
542
- f"{entry.get('confidence', 0):.4f}",
543
- f"{entry.get('pos_prob', 0):.4f}",
544
- f"{entry.get('neg_prob', 0):.4f}"
545
  ])
546
- elif format_type == 'json':
547
- json.dump(data, temp_file, indent=2, ensure_ascii=False)
548
 
549
- temp_file.close()
550
- return temp_file.name, f"Exported {len(data)} entries"
551
 
552
  @staticmethod
553
- @handle_errors(default_return="")
554
- def process_file(file) -> str:
555
- """Process uploaded file with improved CSV handling"""
556
- if not file:
557
  return ""
558
 
559
  try:
560
- file_path = file.name
561
 
562
- if file_path.endswith('.csv'):
563
- for encoding in ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']:
564
  try:
565
- df = pd.read_csv(file_path, encoding=encoding)
566
 
567
- text_columns = []
568
- for col in df.columns:
569
- sample_values = df[col].dropna().head(10)
570
- if len(sample_values) > 0:
571
- text_count = sum(1 for val in sample_values
572
- if isinstance(val, str) and len(str(val).strip()) > 10)
573
- if text_count > len(sample_values) * 0.7:
574
- text_columns.append(col)
575
 
576
- if text_columns:
577
- selected_column = text_columns[0]
578
  else:
579
- selected_column = df.columns[0]
580
 
581
- reviews = df[selected_column].dropna().astype(str).tolist()
582
 
583
- cleaned_reviews = []
584
- for review in reviews:
585
- review = review.strip()
586
- if len(review) > 10 and review.lower() != 'nan':
587
- cleaned_reviews.append(review)
588
 
589
- if cleaned_reviews:
590
- logger.info(f"Successfully read {len(cleaned_reviews)} reviews from CSV")
591
- return '\n'.join(cleaned_reviews)
592
 
593
  except Exception as e:
594
  continue
@@ -596,31 +554,28 @@ class DataHandler:
596
  return "Error: Could not read CSV file. Please check the file format and encoding."
597
 
598
  else:
599
- for encoding in ['utf-8', 'latin-1', 'cp1252']:
600
  try:
601
- with open(file_path, 'r', encoding=encoding) as f:
602
- content = f.read().strip()
603
- if content:
604
- return content
605
  except Exception as e:
606
  continue
607
 
608
  return "Error: Could not read text file. Please check the file encoding."
609
 
610
  except Exception as e:
611
- logger.error(f"File processing error: {e}")
612
  return f"Error processing file: {str(e)}"
613
 
614
- # Main Application
615
- class SentimentApp:
616
- """Main application orchestrator"""
617
-
618
  def __init__(self):
619
- self.engine = SentimentEngine()
620
- self.history = HistoryManager()
621
- self.data_handler = DataHandler()
622
 
623
- self.examples = [
624
  ["While the film's visual effects were undeniably impressive, the story lacked emotional weight, and the pacing felt inconsistent throughout."],
625
  ["An extraordinary achievement in filmmaking — the direction was masterful, the script was sharp, and every performance added depth and realism."],
626
  ["Despite a promising start, the film quickly devolved into a series of clichés, with weak character development and an ending that felt rushed and unearned."],
@@ -628,259 +583,246 @@ class SentimentApp:
628
  ["The movie was far too long, with unnecessary subplots and dull dialogue that made it difficult to stay engaged until the end."]
629
  ]
630
 
631
- @handle_errors(default_return=("Please enter text", None, None, None))
632
- def analyze_single_fast(self, text: str, theme: str = 'default'):
633
- """Fast single text analysis without keywords"""
634
- if not text.strip():
635
  return "Please enter text", None, None, None
636
 
637
- result = self.engine.analyze_single_fast(text)
638
 
639
- self.history.add({
640
- 'text': text[:100],
641
- 'full_text': text,
642
- **result
643
  })
644
 
645
- theme_ctx = ThemeContext(theme)
646
- probs = np.array([result['neg_prob'], result['pos_prob']])
647
 
648
- prob_plot = PlotFactory.create_sentiment_bars(probs, theme_ctx)
649
- gauge_plot = PlotFactory.create_confidence_gauge(result['confidence'], result['sentiment'], theme_ctx)
650
- cloud_plot = PlotFactory.create_wordcloud(text, result['sentiment'], theme_ctx)
651
 
652
- result_text = f"Sentiment: {result['sentiment']} (Confidence: {result['confidence']:.3f})"
653
 
654
- return result_text, prob_plot, gauge_plot, cloud_plot
655
 
656
- @handle_errors(default_return=("Please enter text", None, None, None))
657
- def analyze_single_advanced(self, text: str, theme: str = 'default'):
658
- """Advanced single text analysis with LIME and SHAP explanation"""
659
- if not text.strip():
660
  return "Please enter text", None, None, None
661
 
662
- result = self.engine.analyze_single_advanced(text)
663
 
664
- self.history.add({
665
- 'text': text[:100],
666
- 'full_text': text,
667
- **result
668
  })
669
 
670
- theme_ctx = ThemeContext(theme)
671
 
672
- lime_plot = PlotFactory.create_lime_keyword_chart(result['lime_words'], result['sentiment'], theme_ctx)
673
- shap_plot = PlotFactory.create_shap_keyword_chart(result['shap_words'], result['sentiment'], theme_ctx)
674
 
675
- lime_words_str = ", ".join([f"{word}({score:.3f})" for word, score in result['lime_words'][:5]])
676
- shap_words_str = ", ".join([f"{word}({score:.3f})" for word, score in result['shap_words'][:5]])
677
 
678
- result_text = (f"Sentiment: {result['sentiment']} (Confidence: {result['confidence']:.3f})\n"
679
- f"LIME Key Words: {lime_words_str}\n"
680
- f"SHAP Key Words: {shap_words_str}")
681
 
682
- return result_text, lime_plot, shap_plot, result['heatmap_html']
683
 
684
- @handle_errors(default_return=None)
685
- def analyze_batch(self, reviews: str, progress=None):
686
- """Batch analysis"""
687
- if not reviews.strip():
688
  return None
689
 
690
- texts = [r.strip() for r in reviews.split('\n') if r.strip()]
691
- if len(texts) < 2:
692
  return None
693
 
694
- results = self.engine.analyze_batch(texts, progress)
695
 
696
- for result in results:
697
- self.history.add(result)
698
 
699
- theme_ctx = ThemeContext('default')
700
- return PlotFactory.create_batch_analysis(results, theme_ctx)
701
 
702
- @handle_errors(default_return=(None, "No history available"))
703
- def plot_history(self, theme: str = 'default'):
704
- """Plot analysis history"""
705
- history = self.history.get_all()
706
- if len(history) < 2:
707
- return None, f"Need at least 2 analyses for trends. Current: {len(history)}"
708
-
709
- theme_ctx = ThemeContext(theme)
710
-
711
- with managed_figure(figsize=(12, 8)) as fig:
712
- gs = fig.add_gridspec(2, 1, hspace=0.3)
713
-
714
- indices = list(range(len(history)))
715
- pos_probs = [item['pos_prob'] for item in history]
716
- confs = [item['confidence'] for item in history]
717
-
718
- # Sentiment trend
719
- ax1 = fig.add_subplot(gs[0, 0])
720
- colors = [theme_ctx.colors['pos'] if p > 0.5 else theme_ctx.colors['neg']
721
- for p in pos_probs]
722
- ax1.scatter(indices, pos_probs, c=colors, alpha=0.7, s=60)
723
- ax1.plot(indices, pos_probs, alpha=0.5, linewidth=2)
724
- ax1.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
725
- ax1.set_title('Sentiment History')
726
- ax1.set_ylabel('Positive Probability')
727
- ax1.grid(True, alpha=0.3)
728
-
729
- # Confidence trend
730
- ax2 = fig.add_subplot(gs[1, 0])
731
- ax2.bar(indices, confs, alpha=0.7, color='lightblue', edgecolor='navy')
732
- ax2.set_title('Confidence Over Time')
733
- ax2.set_xlabel('Analysis Number')
734
- ax2.set_ylabel('Confidence')
735
- ax2.grid(True, alpha=0.3)
736
-
737
- fig.tight_layout()
738
- return fig, f"History: {len(history)} analyses"
739
 
740
- # Gradio Interface Setup
741
- def create_interface():
742
- """Create streamlined Gradio interface"""
743
- app = SentimentApp()
744
 
745
- with gr.Blocks(theme=gr.themes.Soft(), title="Movie Sentiment Analyzer") as demo:
746
  gr.Markdown("# 🎬 AI Movie Sentiment Analyzer")
747
  gr.Markdown("Fast sentiment analysis with advanced deep learning explanations")
748
 
749
  with gr.Tab("Quick Analysis"):
750
  with gr.Row():
751
  with gr.Column():
752
- text_input = gr.Textbox(
753
  label="Movie Review",
754
  placeholder="Enter your movie review...",
755
  lines=5
756
  )
757
  with gr.Row():
758
- analyze_btn = gr.Button("Analyze", variant="primary")
759
- theme_selector = gr.Dropdown(
760
- choices=list(config.THEMES.keys()),
761
  value="default",
762
  label="Theme"
763
  )
764
 
765
  gr.Examples(
766
- examples=app.examples,
767
- inputs=text_input
768
  )
769
 
770
  with gr.Column():
771
- result_output = gr.Textbox(label="Result", lines=3)
772
 
773
  with gr.Row():
774
- prob_plot = gr.Plot(label="Probabilities")
775
- gauge_plot = gr.Plot(label="Confidence")
776
 
777
  with gr.Row():
778
- wordcloud_plot = gr.Plot(label="Word Cloud")
779
 
780
  with gr.Tab("Advanced Analysis"):
781
  with gr.Row():
782
  with gr.Column():
783
- adv_text_input = gr.Textbox(
784
  label="Movie Review",
785
  placeholder="Enter your movie review for deep analysis...",
786
  lines=5
787
  )
788
  with gr.Row():
789
- adv_analyze_btn = gr.Button("Deep Analyze", variant="primary")
790
- adv_theme_selector = gr.Dropdown(
791
- choices=list(config.THEMES.keys()),
792
  value="default",
793
  label="Theme"
794
  )
795
 
796
  gr.Examples(
797
- examples=app.examples,
798
- inputs=adv_text_input
799
  )
800
 
801
  with gr.Column():
802
- adv_result_output = gr.Textbox(label="Analysis Result", lines=4)
803
 
804
  with gr.Row():
805
- lime_plot = gr.Plot(label="LIME: Key Contributing Words")
806
- shap_plot = gr.Plot(label="SHAP: Key Contributing Words")
807
 
808
  with gr.Row():
809
- heatmap_output = gr.HTML(label="Word Importance Heatmap (LIME-based)")
810
 
811
  with gr.Tab("Batch Analysis"):
812
  with gr.Row():
813
  with gr.Column():
814
- file_upload = gr.File(label="Upload File", file_types=[".csv", ".txt"])
815
- batch_input = gr.Textbox(
816
  label="Reviews (one per line)",
817
  lines=8
818
  )
819
 
820
  with gr.Column():
821
- load_btn = gr.Button("Load File")
822
- batch_btn = gr.Button("Analyze Batch", variant="primary")
823
 
824
- batch_plot = gr.Plot(label="Batch Results")
825
 
826
  with gr.Tab("History & Export"):
827
  with gr.Row():
828
- refresh_btn = gr.Button("Refresh")
829
- clear_btn = gr.Button("Clear", variant="stop")
830
 
831
  with gr.Row():
832
- csv_btn = gr.Button("Export CSV")
833
- json_btn = gr.Button("Export JSON")
834
-
835
- history_status = gr.Textbox(label="Status")
836
- history_plot = gr.Plot(label="History Trends")
837
- csv_file = gr.File(label="CSV Download", visible=True)
838
- json_file = gr.File(label="JSON Download", visible=True)
839
-
840
- # Event bindings for Quick Analysis
841
- analyze_btn.click(
842
- app.analyze_single_fast,
843
- inputs=[text_input, theme_selector],
844
- outputs=[result_output, prob_plot, gauge_plot, wordcloud_plot]
845
  )
846
 
847
- # Event bindings for Advanced Analysis
848
- adv_analyze_btn.click(
849
- app.analyze_single_advanced,
850
- inputs=[adv_text_input, adv_theme_selector],
851
- outputs=[adv_result_output, lime_plot, shap_plot, heatmap_output]
852
  )
853
 
854
- # Event bindings for Batch Analysis
855
- load_btn.click(app.data_handler.process_file, inputs=file_upload, outputs=batch_input)
856
- batch_btn.click(app.analyze_batch, inputs=batch_input, outputs=batch_plot)
857
 
858
- # Event bindings for History & Export
859
- refresh_btn.click(
860
- lambda theme: app.plot_history(theme),
861
- inputs=theme_selector,
862
- outputs=[history_plot, history_status]
863
  )
864
 
865
- clear_btn.click(
866
- lambda: f"Cleared {app.history.clear()} entries",
867
- outputs=history_status
868
  )
869
 
870
- csv_btn.click(
871
- lambda: app.data_handler.export_data(app.history.get_all(), 'csv'),
872
- outputs=[csv_file, history_status]
873
  )
874
 
875
- json_btn.click(
876
- lambda: app.data_handler.export_data(app.history.get_all(), 'json'),
877
- outputs=[json_file, history_status]
878
  )
879
 
880
- return demo
881
 
882
- # Application Entry Point
883
  if __name__ == "__main__":
884
  logging.basicConfig(level=logging.INFO)
885
- demo = create_interface()
886
- demo.launch(share=True)
 
20
  import pandas as pd
21
  from lime.lime_text import LimeTextExplainer
22
  import shap
23
+ import base64
24
 
25
  @dataclass
26
+ class _C7x9:
27
+ _m1: int = 1000
28
+ _b2: int = 50
29
+ _t3: int = 512
30
+ _w4: int = 2
31
+ _c5: int = 128
32
+ _p6: int = 8
33
+
34
+ _fs1: Tuple[int, int] = (8, 5)
35
+ _fs2: Tuple[int, int] = (12, 8)
36
+ _ws: Tuple[int, int] = (10, 5)
37
+
38
+ _th = {
 
39
  'default': {'pos': '#4ecdc4', 'neg': '#ff6b6b'},
40
  'ocean': {'pos': '#0077be', 'neg': '#ff6b35'},
41
  'forest': {'pos': '#228b22', 'neg': '#dc143c'},
42
  'sunset': {'pos': '#ff8c00', 'neg': '#8b0000'}
43
  }
44
 
45
+ _sw = {
46
  'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to',
47
  'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be',
48
  'been', 'have', 'has', 'had', 'will', 'would', 'could', 'should'
49
  }
50
 
51
+ _cfg = _C7x9()
52
+ _log = logging.getLogger(__name__)
53
 
54
+ def _err_wrap(_def_ret=None):
55
+ def _dec(_func: Callable) -> Callable:
56
+ @wraps(_func)
57
+ def _wrap(*args, **kwargs):
 
 
58
  try:
59
+ return _func(*args, **kwargs)
60
  except Exception as e:
61
+ _log.error(f"{_func.__name__} failed: {e}")
62
+ return _def_ret if _def_ret is not None else f"Error: {str(e)}"
63
+ return _wrap
64
+ return _dec
65
 
66
  @contextmanager
67
+ def _fig_mgr(*args, **kwargs):
68
+ _f = plt.figure(*args, **kwargs)
 
69
  try:
70
+ yield _f
71
  finally:
72
+ plt.close(_f)
73
  gc.collect()
74
 
75
+ class _Th7:
76
+ def __init__(self, _t: str = 'default'):
77
+ self._t = _t
78
+ self._c = _cfg._th.get(_t, _cfg._th['default'])
 
79
 
80
+ class _MM9:
81
+ _inst = None
82
+ _mdl = None
83
+ _tok = None
84
+ _dev = None
 
 
85
 
86
  def __new__(cls):
87
+ if cls._inst is None:
88
+ cls._inst = super().__new__(cls)
89
+ return cls._inst
90
 
91
  @property
92
+ def _m(self):
93
+ if self._mdl is None:
94
+ self._load()
95
+ return self._mdl
96
 
97
  @property
98
+ def _t(self):
99
+ if self._tok is None:
100
+ self._load()
101
+ return self._tok
102
 
103
  @property
104
+ def _d(self):
105
+ if self._dev is None:
106
+ self._dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
107
+ return self._dev
108
 
109
+ def _load(self):
 
110
  try:
111
+ self._dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
112
+ _mp = base64.b64decode("ZW50cm9weTI1L3NlbnRpbWVudGFuYWx5c2lz").decode()
113
+ self._tok = BertTokenizer.from_pretrained(_mp)
114
+ self._mdl = BertForSequenceClassification.from_pretrained(_mp)
115
+ self._mdl.to(self._dev)
116
+ _log.info(f"Model loaded on {self._dev}")
117
  except Exception as e:
118
+ _log.error(f"Model loading failed: {e}")
119
  raise
120
 
121
+ class _TP8:
 
 
122
  @staticmethod
123
+ @lru_cache(maxsize=_cfg._c5)
124
+ def _clean(_txt: str) -> Tuple[str, ...]:
125
+ _w = re.findall(r'\b\w{3,}\b', _txt.lower())
126
+ return tuple(w for w in _w if w not in _cfg._sw)
 
127
 
128
+ class _HM2:
 
129
  def __init__(self):
130
+ self._h = []
131
 
132
+ def _add(self, _e: Dict):
133
+ self._h.append({**_e, 'timestamp': datetime.now().isoformat()})
134
+ if len(self._h) > _cfg._m1:
135
+ self._h = self._h[-_cfg._m1:]
136
 
137
+ def _get(self) -> List[Dict]:
138
+ return self._h.copy()
139
 
140
+ def _clr(self) -> int:
141
+ _cnt = len(self._h)
142
+ self._h.clear()
143
+ return _cnt
144
 
145
+ def _sz(self) -> int:
146
+ return len(self._h)
147
 
148
+ class _SE3:
 
 
149
  def __init__(self):
150
+ self._mm = _MM9()
151
+ self._le = LimeTextExplainer(class_names=['Negative', 'Positive'])
152
+ self._se = None
153
 
154
+ def _pred(self, _txts):
155
+ if isinstance(_txts, str):
156
+ _txts = [_txts]
 
157
 
158
+ _inp = self._mm._t(
159
+ _txts, return_tensors="pt", padding=True,
160
+ truncation=True, max_length=_cfg._t3
161
+ ).to(self._mm._d)
162
 
163
  with torch.no_grad():
164
+ _out = self._mm._m(**_inp)
165
+ _probs = torch.nn.functional.softmax(_out.logits, dim=-1).cpu().numpy()
166
 
167
+ return _probs
168
 
169
+ @_err_wrap(default_return={'sentiment': 'Unknown', 'confidence': 0.0})
170
+ def _fast(self, _txt: str) -> Dict:
171
+ if not _txt.strip():
 
172
  raise ValueError("Empty text")
173
 
174
+ _probs = self._pred([_txt])[0]
175
+ _sent = "Positive" if _probs[1] > _probs[0] else "Negative"
176
 
177
  return {
178
+ 'sentiment': _sent,
179
+ 'confidence': float(_probs.max()),
180
+ 'pos_prob': float(_probs[1]),
181
+ 'neg_prob': float(_probs[0])
182
  }
183
 
184
+ def _lime_kw(self, _txt: str, _k: int = 10) -> List[Tuple[str, float]]:
 
185
  try:
186
+ _exp = self._le.explain_instance(
187
+ _txt, self._pred, num_features=_k, num_samples=200
188
  )
189
 
190
+ _ws = []
191
+ for _w, _s in _exp.as_list():
192
+ if len(_w.strip()) >= _cfg._w4:
193
+ _ws.append((_w.strip().lower(), abs(_s)))
194
 
195
+ _ws.sort(key=lambda x: x[1], reverse=True)
196
+ return _ws[:_k]
197
 
198
  except Exception as e:
199
+ _log.error(f"LIME extraction failed: {e}")
200
  return []
201
 
202
+ def _shap_kw(self, _txt: str, _k: int = 10) -> List[Tuple[str, float]]:
 
203
  try:
204
+ _words = _txt.split()
205
+ _ws = []
206
+
207
+ _base = self._pred([_txt])[0][1]
208
+
209
+ for i, _w in enumerate(_words):
210
+ _mod_w = _words[:i] + _words[i+1:]
211
+ _mod_t = ' '.join(_mod_w)
 
 
 
 
212
 
213
+ if _mod_t.strip():
214
+ _mod_p = self._pred([_mod_t])[0][1]
215
+ _imp = abs(_base - _mod_p)
216
 
217
+ _clean_w = re.sub(r'[^\w]', '', _w.lower())
218
+ if len(_clean_w) >= _cfg._w4:
219
+ _ws.append((_clean_w, _imp))
220
+
221
+ _uniq = {}
222
+ for _w, _s in _ws:
223
+ if _w in _uniq:
224
+ _uniq[_w] = max(_uniq[_w], _s)
 
225
  else:
226
+ _uniq[_w] = _s
227
 
228
+ _sorted = sorted(_uniq.items(), key=lambda x: x[1], reverse=True)
229
+ return _sorted[:_k]
230
 
231
  except Exception as e:
232
+ _log.error(f"SHAP extraction failed: {e}")
233
  return []
234
 
235
+ def _heatmap(self, _txt: str, _ws: Dict[str, float]) -> str:
236
+ _words = _txt.split()
237
+ _html = ['<div style="font-family: Arial; font-size: 16px; line-height: 1.6;">']
238
+
239
+ if _ws:
240
+ _max = max(abs(_s) for _s in _ws.values())
241
+ _min = min(_ws.values())
 
242
  else:
243
+ _max = _min = 0
244
+
245
+ for _w in _words:
246
+ _clean = re.sub(r'[^\w]', '', _w.lower())
247
+ _score = _ws.get(_clean, 0)
248
+
249
+ if _score > 0:
250
+ _int = min(255, int(180 * (_score / _max) if _max > 0 else 0))
251
+ _color = f"rgba(0, {_int}, 0, 0.3)"
252
+ elif _score < 0:
253
+ _int = min(255, int(180 * (abs(_score) / abs(_min)) if _min < 0 else 0))
254
+ _color = f"rgba({_int}, 0, 0, 0.3)"
255
  else:
256
+ _color = "transparent"
257
 
258
+ _html.append(
259
+ f'<span style="background-color: {_color}; padding: 2px; margin: 1px; '
260
+ f'border-radius: 3px;" title="Score: {_score:.3f}">{_w}</span> '
261
  )
262
 
263
+ _html.append('</div>')
264
+ return ''.join(_html)
265
 
266
+ @_err_wrap(default_return={'sentiment': 'Unknown', 'confidence': 0.0, 'lime_words': [], 'shap_words': [], 'heatmap_html': ''})
267
+ def _adv(self, _txt: str) -> Dict:
268
+ if not _txt.strip():
 
269
  raise ValueError("Empty text")
270
 
271
+ _probs = self._pred([_txt])[0]
272
+ _sent = "Positive" if _probs[1] > _probs[0] else "Negative"
273
 
274
+ _lime = self._lime_kw(_txt)
275
+ _shap = self._shap_kw(_txt)
 
276
 
277
+ _ws_dict = dict(_lime)
278
+ _heat = self._heatmap(_txt, _ws_dict)
 
279
 
280
  return {
281
+ 'sentiment': _sent,
282
+ 'confidence': float(_probs.max()),
283
+ 'pos_prob': float(_probs[1]),
284
+ 'neg_prob': float(_probs[0]),
285
+ 'lime_words': _lime,
286
+ 'shap_words': _shap,
287
+ 'heatmap_html': _heat
288
  }
289
 
290
+ @_err_wrap(default_return=[])
291
+ def _batch(self, _txts: List[str], _prog=None) -> List[Dict]:
292
+ if len(_txts) > _cfg._b2:
293
+ _txts = _txts[:_cfg._b2]
 
294
 
295
+ _res = []
296
+ _bs = _cfg._p6
297
 
298
+ for i in range(0, len(_txts), _bs):
299
+ _b = _txts[i:i+_bs]
300
 
301
+ if _prog:
302
+ _prog((i + len(_b)) / len(_txts))
303
 
304
+ _inp = self._mm._t(
305
+ _b, return_tensors="pt", padding=True,
306
+ truncation=True, max_length=_cfg._t3
307
+ ).to(self._mm._d)
308
 
309
  with torch.no_grad():
310
+ _out = self._mm._m(**_inp)
311
+ _probs = torch.nn.functional.softmax(_out.logits, dim=-1).cpu().numpy()
312
 
313
+ for _txt, _prob in zip(_b, _probs):
314
+ _sent = "Positive" if _prob[1] > _prob[0] else "Negative"
315
 
316
+ _res.append({
317
+ 'text': _txt[:50] + '...' if len(_txt) > 50 else _txt,
318
+ 'full_text': _txt,
319
+ 'sentiment': _sent,
320
+ 'confidence': float(_prob.max()),
321
+ 'pos_prob': float(_prob[1]),
322
+ 'neg_prob': float(_prob[0])
323
  })
324
 
325
+ return _res
326
 
327
+ class _PF4:
 
 
 
328
  @staticmethod
329
+ @_err_wrap(default_return=None)
330
+ def _bars(_probs: np.ndarray, _th: _Th7) -> plt.Figure:
331
+ with _fig_mgr(figsize=_cfg._fs1) as _f:
332
+ _ax = _f.add_subplot(111)
333
+ _lbl = ["Negative", "Positive"]
334
+ _clr = [_th._c['neg'], _th._c['pos']]
335
+
336
+ _b = _ax.bar(_lbl, _probs, color=_clr, alpha=0.8)
337
+ _ax.set_title("Sentiment Probabilities", fontweight='bold')
338
+ _ax.set_ylabel("Probability")
339
+ _ax.set_ylim(0, 1)
340
+
341
+ for _bar, _prob in zip(_b, _probs):
342
+ _ax.text(_bar.get_x() + _bar.get_width()/2., _bar.get_height() + 0.02,
343
+ f'{_prob:.3f}', ha='center', va='bottom', fontweight='bold')
344
+
345
+ _f.tight_layout()
346
+ return _f
 
347
 
348
  @staticmethod
349
+ @_err_wrap(default_return=None)
350
+ def _gauge(_conf: float, _sent: str, _th: _Th7) -> plt.Figure:
351
+ with _fig_mgr(figsize=_cfg._fs1) as _f:
352
+ _ax = _f.add_subplot(111)
353
+
354
+ _theta = np.linspace(0, np.pi, 100)
355
+ _clr = [_th._c['neg'] if i < 50 else _th._c['pos'] for i in range(100)]
356
+
357
+ for i in range(len(_theta)-1):
358
+ _ax.fill_between([_theta[i], _theta[i+1]], [0, 0], [0.8, 0.8],
359
+ color=_clr[i], alpha=0.7)
360
+
361
+ _pos = np.pi * (0.5 + (0.4 if _sent == 'Positive' else -0.4) * _conf)
362
+ _ax.plot([_pos, _pos], [0, 0.6], 'k-', linewidth=6)
363
+ _ax.plot(_pos, 0.6, 'ko', markersize=10)
364
+
365
+ _ax.set_xlim(0, np.pi)
366
+ _ax.set_ylim(0, 1)
367
+ _ax.set_title(f'{_sent} - Confidence: {_conf:.3f}', fontweight='bold')
368
+ _ax.set_xticks([0, np.pi/2, np.pi])
369
+ _ax.set_xticklabels(['Negative', 'Neutral', 'Positive'])
370
+ _ax.axis('off')
371
+
372
+ _f.tight_layout()
373
+ return _f
 
374
 
375
  @staticmethod
376
+ @_err_wrap(default_return=None)
377
+ def _lime_chart(_lw: List[Tuple[str, float]], _sent: str, _th: _Th7) -> Optional[plt.Figure]:
378
+ if not _lw:
 
379
  return None
380
 
381
+ with _fig_mgr(figsize=_cfg._fs1) as _f:
382
+ _ax = _f.add_subplot(111)
383
 
384
+ _w = [_word for _word, _score in _lw]
385
+ _s = [_score for _word, _score in _lw]
386
 
387
+ _clr = _th._c['pos'] if _sent == 'Positive' else _th._c['neg']
388
 
389
+ _b = _ax.barh(range(len(_w)), _s, color=_clr, alpha=0.7)
390
+ _ax.set_yticks(range(len(_w)))
391
+ _ax.set_yticklabels(_w)
392
+ _ax.set_xlabel('LIME Attention Weight')
393
+ _ax.set_title(f'LIME: Top Contributing Words ({_sent})', fontweight='bold')
394
 
395
+ for i, (_bar, _score) in enumerate(zip(_b, _s)):
396
+ _ax.text(_bar.get_width() + 0.001, _bar.get_y() + _bar.get_height()/2.,
397
+ f'{_score:.3f}', ha='left', va='center', fontsize=9)
398
 
399
+ _ax.invert_yaxis()
400
+ _ax.grid(axis='x', alpha=0.3)
401
+ _f.tight_layout()
402
+ return _f
403
 
404
  @staticmethod
405
+ @_err_wrap(default_return=None)
406
+ def _shap_chart(_sw: List[Tuple[str, float]], _sent: str, _th: _Th7) -> Optional[plt.Figure]:
407
+ if not _sw:
 
408
  return None
409
 
410
+ with _fig_mgr(figsize=_cfg._fs1) as _f:
411
+ _ax = _f.add_subplot(111)
412
 
413
+ _w = [_word for _word, _score in _sw]
414
+ _s = [_score for _word, _score in _sw]
415
 
416
+ _clr = _th._c['pos'] if _sent == 'Positive' else _th._c['neg']
417
 
418
+ _b = _ax.barh(range(len(_w)), _s, color=_clr, alpha=0.7)
419
+ _ax.set_yticks(range(len(_w)))
420
+ _ax.set_yticklabels(_w)
421
+ _ax.set_xlabel('SHAP Value')
422
+ _ax.set_title(f'SHAP: Top Contributing Words ({_sent})', fontweight='bold')
423
 
424
+ for i, (_bar, _score) in enumerate(zip(_b, _s)):
425
+ _ax.text(_bar.get_width() + 0.001, _bar.get_y() + _bar.get_height()/2.,
426
+ f'{_score:.3f}', ha='left', va='center', fontsize=9)
427
 
428
+ _ax.invert_yaxis()
429
+ _ax.grid(axis='x', alpha=0.3)
430
+ _f.tight_layout()
431
+ return _f
432
 
433
  @staticmethod
434
+ @_err_wrap(default_return=None)
435
+ def _cloud(_txt: str, _sent: str, _th: _Th7) -> Optional[plt.Figure]:
436
+ if len(_txt.split()) < 3:
 
437
  return None
438
 
439
+ _cm = 'Greens' if _sent == 'Positive' else 'Reds'
440
+ _wc = WordCloud(width=800, height=400, background_color='white',
441
+ colormap=_cm, max_words=30).generate(_txt)
442
+
443
+ with _fig_mgr(figsize=_cfg._ws) as _f:
444
+ _ax = _f.add_subplot(111)
445
+ _ax.imshow(_wc, interpolation='bilinear')
446
+ _ax.axis('off')
447
+ _ax.set_title(f'{_sent} Word Cloud', fontweight='bold')
448
+ _f.tight_layout()
449
+ return _f
450
 
451
  @staticmethod
452
+ @_err_wrap(default_return=None)
453
+ def _batch_viz(_res: List[Dict], _th: _Th7) -> plt.Figure:
454
+ with _fig_mgr(figsize=_cfg._fs2) as _f:
455
+ _gs = _f.add_gridspec(2, 2, hspace=0.3, wspace=0.3)
456
+
457
+ _ax1 = _f.add_subplot(_gs[0, 0])
458
+ _sc = Counter([_r['sentiment'] for _r in _res])
459
+ _clr = [_th._c['pos'], _th._c['neg']]
460
+ _ax1.pie(_sc.values(), labels=_sc.keys(),
461
+ autopct='%1.1f%%', colors=_clr[:len(_sc)])
462
+ _ax1.set_title('Sentiment Distribution')
463
+
464
+ _ax2 = _f.add_subplot(_gs[0, 1])
465
+ _confs = [_r['confidence'] for _r in _res]
466
+ _ax2.hist(_confs, bins=8, alpha=0.7, color='skyblue', edgecolor='black')
467
+ _ax2.set_title('Confidence Distribution')
468
+ _ax2.set_xlabel('Confidence')
469
+
470
+ _ax3 = _f.add_subplot(_gs[1, :])
471
+ _pp = [_r['pos_prob'] for _r in _res]
472
+ _idx = range(len(_res))
473
+ _cs = [_th._c['pos'] if _r['sentiment'] == 'Positive'
474
+ else _th._c['neg'] for _r in _res]
475
+ _ax3.scatter(_idx, _pp, c=_cs, alpha=0.7, s=60)
476
+ _ax3.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
477
+ _ax3.set_title('Sentiment Progression')
478
+ _ax3.set_xlabel('Review Index')
479
+ _ax3.set_ylabel('Positive Probability')
480
+
481
+ return _f
 
 
 
 
482
 
483
+ class _DH5:
 
 
484
  @staticmethod
485
+ @_err_wrap(default_return=(None, "Export failed"))
486
+ def _exp(_data: List[Dict], _fmt: str) -> Tuple[Optional[str], str]:
487
+ if not _data:
 
488
  return None, "No data to export"
489
 
490
+ _tf = tempfile.NamedTemporaryFile(mode='w', delete=False,
491
+ suffix=f'.{_fmt}', encoding='utf-8')
492
+
493
+ if _fmt == 'csv':
494
+ _w = csv.writer(_tf)
495
+ _w.writerow(['Timestamp', 'Text', 'Sentiment', 'Confidence', 'Pos_Prob', 'Neg_Prob'])
496
+ for _e in _data:
497
+ _w.writerow([
498
+ _e.get('timestamp', ''),
499
+ _e.get('text', ''),
500
+ _e.get('sentiment', ''),
501
+ f"{_e.get('confidence', 0):.4f}",
502
+ f"{_e.get('pos_prob', 0):.4f}",
503
+ f"{_e.get('neg_prob', 0):.4f}"
504
  ])
505
+ elif _fmt == 'json':
506
+ json.dump(_data, _tf, indent=2, ensure_ascii=False)
507
 
508
+ _tf.close()
509
+ return _tf.name, f"Exported {len(_data)} entries"
510
 
511
  @staticmethod
512
+ @_err_wrap(default_return="")
513
+ def _proc(_file) -> str:
514
+ if not _file:
 
515
  return ""
516
 
517
  try:
518
+ _fp = _file.name
519
 
520
+ if _fp.endswith('.csv'):
521
+ for _enc in ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']:
522
  try:
523
+ _df = pd.read_csv(_fp, encoding=_enc)
524
 
525
+ _tc = []
526
+ for _col in _df.columns:
527
+ _sv = _df[_col].dropna().head(10)
528
+ if len(_sv) > 0:
529
+ _cnt = sum(1 for _val in _sv
530
+ if isinstance(_val, str) and len(str(_val).strip()) > 10)
531
+ if _cnt > len(_sv) * 0.7:
532
+ _tc.append(_col)
533
 
534
+ if _tc:
535
+ _sc = _tc[0]
536
  else:
537
+ _sc = _df.columns[0]
538
 
539
+ _rev = _df[_sc].dropna().astype(str).tolist()
540
 
541
+ _cr = []
542
+ for _r in _rev:
543
+ _r = _r.strip()
544
+ if len(_r) > 10 and _r.lower() != 'nan':
545
+ _cr.append(_r)
546
 
547
+ if _cr:
548
+ _log.info(f"Successfully read {len(_cr)} reviews from CSV")
549
+ return '\n'.join(_cr)
550
 
551
  except Exception as e:
552
  continue
 
554
  return "Error: Could not read CSV file. Please check the file format and encoding."
555
 
556
  else:
557
+ for _enc in ['utf-8', 'latin-1', 'cp1252']:
558
  try:
559
+ with open(_fp, 'r', encoding=_enc) as _f:
560
+ _cont = _f.read().strip()
561
+ if _cont:
562
+ return _cont
563
  except Exception as e:
564
  continue
565
 
566
  return "Error: Could not read text file. Please check the file encoding."
567
 
568
  except Exception as e:
569
+ _log.error(f"File processing error: {e}")
570
  return f"Error processing file: {str(e)}"
571
 
572
+ class _SA6:
 
 
 
573
  def __init__(self):
574
+ self._eng = _SE3()
575
+ self._hist = _HM2()
576
+ self._dh = _DH5()
577
 
578
+ self._ex = [
579
  ["While the film's visual effects were undeniably impressive, the story lacked emotional weight, and the pacing felt inconsistent throughout."],
580
  ["An extraordinary achievement in filmmaking — the direction was masterful, the script was sharp, and every performance added depth and realism."],
581
  ["Despite a promising start, the film quickly devolved into a series of clichés, with weak character development and an ending that felt rushed and unearned."],
 
583
  ["The movie was far too long, with unnecessary subplots and dull dialogue that made it difficult to stay engaged until the end."]
584
  ]
585
 
586
+ @_err_wrap(default_return=("Please enter text", None, None, None))
587
+ def _fast_ana(self, _txt: str, _th: str = 'default'):
588
+ if not _txt.strip():
 
589
  return "Please enter text", None, None, None
590
 
591
+ _res = self._eng._fast(_txt)
592
 
593
+ self._hist._add({
594
+ 'text': _txt[:100],
595
+ 'full_text': _txt,
596
+ **_res
597
  })
598
 
599
+ _thc = _Th7(_th)
600
+ _probs = np.array([_res['neg_prob'], _res['pos_prob']])
601
 
602
+ _pp = _PF4._bars(_probs, _thc)
603
+ _gp = _PF4._gauge(_res['confidence'], _res['sentiment'], _thc)
604
+ _cp = _PF4._cloud(_txt, _res['sentiment'], _thc)
605
 
606
+ _rt = f"Sentiment: {_res['sentiment']} (Confidence: {_res['confidence']:.3f})"
607
 
608
+ return _rt, _pp, _gp, _cp
609
 
610
+ @_err_wrap(default_return=("Please enter text", None, None, None))
611
+ def _adv_ana(self, _txt: str, _th: str = 'default'):
612
+ if not _txt.strip():
 
613
  return "Please enter text", None, None, None
614
 
615
+ _res = self._eng._adv(_txt)
616
 
617
+ self._hist._add({
618
+ 'text': _txt[:100],
619
+ 'full_text': _txt,
620
+ **_res
621
  })
622
 
623
+ _thc = _Th7(_th)
624
 
625
+ _lp = _PF4._lime_chart(_res['lime_words'], _res['sentiment'], _thc)
626
+ _sp = _PF4._shap_chart(_res['shap_words'], _res['sentiment'], _thc)
627
 
628
+ _lws = ", ".join([f"{_w}({_s:.3f})" for _w, _s in _res['lime_words'][:5]])
629
+ _sws = ", ".join([f"{_w}({_s:.3f})" for _w, _s in _res['shap_words'][:5]])
630
 
631
+ _rt = (f"Sentiment: {_res['sentiment']} (Confidence: {_res['confidence']:.3f})\n"
632
+ f"LIME Key Words: {_lws}\n"
633
+ f"SHAP Key Words: {_sws}")
634
 
635
+ return _rt, _lp, _sp, _res['heatmap_html']
636
 
637
+ @_err_wrap(default_return=None)
638
+ def _batch_ana(self, _revs: str, _prog=None):
639
+ if not _revs.strip():
 
640
  return None
641
 
642
+ _txts = [_r.strip() for _r in _revs.split('\n') if _r.strip()]
643
+ if len(_txts) < 2:
644
  return None
645
 
646
+ _res = self._eng._batch(_txts, _prog)
647
 
648
+ for _r in _res:
649
+ self._hist._add(_r)
650
 
651
+ _thc = _Th7('default')
652
+ return _PF4._batch_viz(_res, _thc)
653
 
654
+ @_err_wrap(default_return=(None, "No history available"))
655
+ def _hist_plot(self, _th: str = 'default'):
656
+ _hist = self._hist._get()
657
+ if len(_hist) < 2:
658
+ return None, f"Need at least 2 analyses for trends. Current: {len(_hist)}"
659
+
660
+ _thc = _Th7(_th)
661
+
662
+ with _fig_mgr(figsize=(12, 8)) as _f:
663
+ _gs = _f.add_gridspec(2, 1, hspace=0.3)
664
+
665
+ _idx = list(range(len(_hist)))
666
+ _pp = [_item['pos_prob'] for _item in _hist]
667
+ _confs = [_item['confidence'] for _item in _hist]
668
+
669
+ _ax1 = _f.add_subplot(_gs[0, 0])
670
+ _clr = [_thc._c['pos'] if _p > 0.5 else _thc._c['neg']
671
+ for _p in _pp]
672
+ _ax1.scatter(_idx, _pp, c=_clr, alpha=0.7, s=60)
673
+ _ax1.plot(_idx, _pp, alpha=0.5, linewidth=2)
674
+ _ax1.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
675
+ _ax1.set_title('Sentiment History')
676
+ _ax1.set_ylabel('Positive Probability')
677
+ _ax1.grid(True, alpha=0.3)
678
+
679
+ _ax2 = _f.add_subplot(_gs[1, 0])
680
+ _ax2.bar(_idx, _confs, alpha=0.7, color='lightblue', edgecolor='navy')
681
+ _ax2.set_title('Confidence Over Time')
682
+ _ax2.set_xlabel('Analysis Number')
683
+ _ax2.set_ylabel('Confidence')
684
+ _ax2.grid(True, alpha=0.3)
685
+
686
+ _f.tight_layout()
687
+ return _f, f"History: {len(_hist)} analyses"
 
 
 
688
 
689
+ def _create_ui():
690
+ _app = _SA6()
 
 
691
 
692
+ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Sentiment Analyzer") as _demo:
693
  gr.Markdown("# 🎬 AI Movie Sentiment Analyzer")
694
  gr.Markdown("Fast sentiment analysis with advanced deep learning explanations")
695
 
696
  with gr.Tab("Quick Analysis"):
697
  with gr.Row():
698
  with gr.Column():
699
+ _ti = gr.Textbox(
700
  label="Movie Review",
701
  placeholder="Enter your movie review...",
702
  lines=5
703
  )
704
  with gr.Row():
705
+ _ab = gr.Button("Analyze", variant="primary")
706
+ _ts = gr.Dropdown(
707
+ choices=list(_cfg._th.keys()),
708
  value="default",
709
  label="Theme"
710
  )
711
 
712
  gr.Examples(
713
+ examples=_app._ex,
714
+ inputs=_ti
715
  )
716
 
717
  with gr.Column():
718
+ _ro = gr.Textbox(label="Result", lines=3)
719
 
720
  with gr.Row():
721
+ _pp = gr.Plot(label="Probabilities")
722
+ _gp = gr.Plot(label="Confidence")
723
 
724
  with gr.Row():
725
+ _wp = gr.Plot(label="Word Cloud")
726
 
727
  with gr.Tab("Advanced Analysis"):
728
  with gr.Row():
729
  with gr.Column():
730
+ _ati = gr.Textbox(
731
  label="Movie Review",
732
  placeholder="Enter your movie review for deep analysis...",
733
  lines=5
734
  )
735
  with gr.Row():
736
+ _aab = gr.Button("Deep Analyze", variant="primary")
737
+ _ats = gr.Dropdown(
738
+ choices=list(_cfg._th.keys()),
739
  value="default",
740
  label="Theme"
741
  )
742
 
743
  gr.Examples(
744
+ examples=_app._ex,
745
+ inputs=_ati
746
  )
747
 
748
  with gr.Column():
749
+ _aro = gr.Textbox(label="Analysis Result", lines=4)
750
 
751
  with gr.Row():
752
+ _lp = gr.Plot(label="LIME: Key Contributing Words")
753
+ _sp = gr.Plot(label="SHAP: Key Contributing Words")
754
 
755
  with gr.Row():
756
+ _ho = gr.HTML(label="Word Importance Heatmap (LIME-based)")
757
 
758
  with gr.Tab("Batch Analysis"):
759
  with gr.Row():
760
  with gr.Column():
761
+ _fu = gr.File(label="Upload File", file_types=[".csv", ".txt"])
762
+ _bi = gr.Textbox(
763
  label="Reviews (one per line)",
764
  lines=8
765
  )
766
 
767
  with gr.Column():
768
+ _lb = gr.Button("Load File")
769
+ _bb = gr.Button("Analyze Batch", variant="primary")
770
 
771
+ _bp = gr.Plot(label="Batch Results")
772
 
773
  with gr.Tab("History & Export"):
774
  with gr.Row():
775
+ _rb = gr.Button("Refresh")
776
+ _cb = gr.Button("Clear", variant="stop")
777
 
778
  with gr.Row():
779
+ _csvb = gr.Button("Export CSV")
780
+ _jb = gr.Button("Export JSON")
781
+
782
+ _hs = gr.Textbox(label="Status")
783
+ _hp = gr.Plot(label="History Trends")
784
+ _csvf = gr.File(label="CSV Download", visible=True)
785
+ _jf = gr.File(label="JSON Download", visible=True)
786
+
787
+ _ab.click(
788
+ _app._fast_ana,
789
+ inputs=[_ti, _ts],
790
+ outputs=[_ro, _pp, _gp, _wp]
 
791
  )
792
 
793
+ _aab.click(
794
+ _app._adv_ana,
795
+ inputs=[_ati, _ats],
796
+ outputs=[_aro, _lp, _sp, _ho]
 
797
  )
798
 
799
+ _lb.click(_app._dh._proc, inputs=_fu, outputs=_bi)
800
+ _bb.click(_app._batch_ana, inputs=_bi, outputs=_bp)
 
801
 
802
+ _rb.click(
803
+ lambda _th: _app._hist_plot(_th),
804
+ inputs=_ts,
805
+ outputs=[_hp, _hs]
 
806
  )
807
 
808
+ _cb.click(
809
+ lambda: f"Cleared {_app._hist._clr()} entries",
810
+ outputs=_hs
811
  )
812
 
813
+ _csvb.click(
814
+ lambda: _app._dh._exp(_app._hist._get(), 'csv'),
815
+ outputs=[_csvf, _hs]
816
  )
817
 
818
+ _jb.click(
819
+ lambda: _app._dh._exp(_app._hist._get(), 'json'),
820
+ outputs=[_jf, _hs]
821
  )
822
 
823
+ return _demo
824
 
 
825
  if __name__ == "__main__":
826
  logging.basicConfig(level=logging.INFO)
827
+ _demo = _create_ui()
828
+ _demo.launch(share=True)