entropy25 commited on
Commit
e57599e
·
verified ·
1 Parent(s): 1aada0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +354 -139
app.py CHANGED
@@ -4,64 +4,80 @@ from transformers import BertTokenizer, BertForSequenceClassification
4
  import matplotlib.pyplot as plt
5
  import numpy as np
6
  from wordcloud import WordCloud
7
- import seaborn as sns
 
 
 
8
 
 
9
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
- print(f"Using device: {device}")
11
-
12
  tokenizer = BertTokenizer.from_pretrained("entropy25/sentimentanalysis")
13
  model = BertForSequenceClassification.from_pretrained("entropy25/sentimentanalysis")
14
  model.to(device)
15
 
16
- sentiment_history = []
 
 
 
 
 
 
 
 
 
17
 
18
- def analyze_sentiment(text):
 
19
  if not text.strip():
20
- return "Please enter a review", None, None, None
21
 
22
  inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
23
  with torch.no_grad():
24
  outputs = model(**inputs)
25
- logits = outputs.logits
26
- probabilities = torch.nn.functional.softmax(logits, dim=-1).cpu().numpy()[0]
27
- prediction = torch.argmax(logits, dim=-1).item()
28
- confidence = probabilities.max()
29
- sentiment = "Positive" if prediction == 1 else "Negative"
30
 
31
- sentiment_history.append({
 
32
  'text': text[:100],
 
33
  'sentiment': sentiment,
34
- 'confidence': confidence,
35
- 'positive_prob': probabilities[1],
36
- 'negative_prob': probabilities[0]
37
  })
38
 
39
- result_text = f"Sentiment: {sentiment} (Confidence: {confidence:.3f})"
40
- prob_plot = plot_probabilities(probabilities)
41
- gauge_plot = create_gauge(confidence, sentiment)
42
- wordcloud_plot = generate_wordcloud(text, sentiment)
 
 
43
 
44
- return result_text, prob_plot, gauge_plot, wordcloud_plot
45
 
46
- def plot_probabilities(probabilities):
47
- sentiments = ["Negative", "Positive"]
 
 
48
  colors = ['#ff6b6b', '#4ecdc4']
49
 
50
- fig, ax = plt.subplots(figsize=(8, 5))
51
- bars = ax.bar(sentiments, probabilities, color=colors, alpha=0.8)
52
- ax.set_title("Sentiment Probability Distribution", fontsize=14, fontweight='bold')
53
  ax.set_ylabel("Probability")
54
  ax.set_ylim(0, 1)
55
 
56
- for bar, prob in zip(bars, probabilities):
57
- height = bar.get_height()
58
- ax.text(bar.get_x() + bar.get_width()/2., height + 0.02,
59
  f'{prob:.3f}', ha='center', va='bottom', fontweight='bold')
60
 
61
  plt.tight_layout()
62
  return fig
63
 
64
- def create_gauge(confidence, sentiment):
 
65
  fig, ax = plt.subplots(figsize=(8, 6))
66
 
67
  theta = np.linspace(0, np.pi, 100)
@@ -71,137 +87,325 @@ def create_gauge(confidence, sentiment):
71
  ax.fill_between([theta[i], theta[i+1]], [0, 0], [0.8, 0.8],
72
  color=colors[i], alpha=0.7)
73
 
74
- pointer_pos = np.pi * (0.5 + (0.4 if sentiment == 'Positive' else -0.4) * confidence)
75
- ax.plot([pointer_pos, pointer_pos], [0, 0.6], 'k-', linewidth=6)
76
- ax.plot(pointer_pos, 0.6, 'ko', markersize=10)
77
 
78
  ax.set_xlim(0, np.pi)
79
  ax.set_ylim(0, 1)
80
- ax.set_title(f'{sentiment} Sentiment - Confidence: {confidence:.3f}',
81
- fontsize=14, fontweight='bold')
82
  ax.set_xticks([0, np.pi/2, np.pi])
83
  ax.set_xticklabels(['Negative', 'Neutral', 'Positive'])
84
  ax.set_yticks([])
85
- ax.spines['top'].set_visible(False)
86
- ax.spines['right'].set_visible(False)
87
- ax.spines['left'].set_visible(False)
88
 
89
  plt.tight_layout()
90
  return fig
91
 
92
- def generate_wordcloud(text, sentiment):
 
93
  if len(text.split()) < 3:
94
  return None
95
 
96
- colormap = 'Greens' if sentiment == 'Positive' else 'Reds'
97
-
98
  try:
99
- wordcloud = WordCloud(
100
- width=800, height=400,
101
- background_color='white',
102
- colormap=colormap,
103
- max_words=30,
104
- relative_scaling=0.5
105
- ).generate(text)
106
 
107
  fig, ax = plt.subplots(figsize=(10, 5))
108
- ax.imshow(wordcloud, interpolation='bilinear')
109
  ax.axis('off')
110
- ax.set_title(f'{sentiment} Sentiment - Word Cloud', fontsize=14, fontweight='bold')
111
 
112
  plt.tight_layout()
113
  return fig
114
  except:
115
  return None
116
 
117
- def analyze_batch(reviews_text):
118
- if not reviews_text.strip():
 
119
  return None
120
 
121
- reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
122
- if len(reviews) < 2:
123
  return None
124
 
125
  results = []
126
- for review in reviews:
127
- inputs = tokenizer(review, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
128
  with torch.no_grad():
129
  outputs = model(**inputs)
130
- probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
131
- prediction = torch.argmax(outputs.logits, dim=-1).item()
132
- sentiment = "Positive" if prediction == 1 else "Negative"
133
- confidence = probabilities.max()
134
 
135
  results.append({
136
- 'review': review[:50] + '...' if len(review) > 50 else review,
137
  'sentiment': sentiment,
138
- 'confidence': confidence,
139
- 'positive_prob': probabilities[1]
 
 
 
 
 
 
 
 
 
 
140
  })
141
 
 
142
  fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
143
 
144
- sentiment_counts = {'Positive': 0, 'Negative': 0}
145
- confidences = []
146
- positive_probs = []
147
-
148
- for r in results:
149
- sentiment_counts[r['sentiment']] += 1
150
- confidences.append(r['confidence'])
151
- positive_probs.append(r['positive_prob'])
152
-
153
  colors = ['#4ecdc4', '#ff6b6b']
154
- ax1.pie(sentiment_counts.values(), labels=sentiment_counts.keys(),
155
  autopct='%1.1f%%', colors=colors, startangle=90)
156
  ax1.set_title('Sentiment Distribution')
157
 
158
- ax2.hist(confidences, bins=8, alpha=0.7, color='skyblue', edgecolor='black')
159
- ax2.set_title('Confidence Score Distribution')
160
- ax2.set_xlabel('Confidence Score')
161
- ax2.set_ylabel('Frequency')
162
-
163
- review_indices = range(len(results))
164
- ax3.scatter(review_indices, positive_probs,
165
- c=[colors[0] if r['sentiment'] == 'Positive' else colors[1] for r in results],
 
 
 
 
166
  alpha=0.7, s=100)
167
  ax3.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
168
  ax3.set_title('Positive Probability by Review')
169
  ax3.set_xlabel('Review Index')
170
  ax3.set_ylabel('Positive Probability')
171
 
172
- sentiment_scores = [1 if r['sentiment'] == 'Positive' else 0 for r in results]
173
- confidence_scores = confidences
174
- ax4.scatter(confidence_scores, sentiment_scores, alpha=0.7, s=100,
175
- c=[colors[0] if s == 1 else colors[1] for s in sentiment_scores])
176
  ax4.set_title('Sentiment vs Confidence')
177
- ax4.set_xlabel('Confidence Score')
178
- ax4.set_ylabel('Sentiment (0=Negative, 1=Positive)')
179
  ax4.set_yticks([0, 1])
180
  ax4.set_yticklabels(['Negative', 'Positive'])
181
 
182
  plt.tight_layout()
183
  return fig
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  def plot_history():
186
- if len(sentiment_history) < 2:
 
187
  return None
188
 
189
- indices = list(range(len(sentiment_history)))
190
- positive_probs = [item['positive_prob'] for item in sentiment_history]
191
- confidences = [item['confidence'] for item in sentiment_history]
192
 
193
  fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
194
 
195
- colors = ['#4ecdc4' if prob > 0.5 else '#ff6b6b' for prob in positive_probs]
196
- ax1.scatter(indices, positive_probs, c=colors, alpha=0.7, s=100)
197
- ax1.plot(indices, positive_probs, alpha=0.5, linewidth=2)
198
  ax1.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
199
- ax1.set_title('Sentiment Analysis History - Positive Probability')
200
  ax1.set_xlabel('Analysis Number')
201
  ax1.set_ylabel('Positive Probability')
202
  ax1.grid(True, alpha=0.3)
203
 
204
- ax2.bar(indices, confidences, alpha=0.7, color='lightblue', edgecolor='navy')
205
  ax2.set_title('Confidence Scores Over Time')
206
  ax2.set_xlabel('Analysis Number')
207
  ax2.set_ylabel('Confidence Score')
@@ -210,72 +414,83 @@ def plot_history():
210
  plt.tight_layout()
211
  return fig
212
 
 
213
  with gr.Blocks(theme=gr.themes.Soft(), title="Movie Sentiment Analyzer") as demo:
214
- gr.Markdown("# 🎬 AI Movie Sentiment Analyzer")
215
- gr.Markdown("Advanced sentiment analysis for movie reviews using BERT model with comprehensive visualizations")
216
 
217
- with gr.Tab("Single Review Analysis"):
218
  with gr.Row():
219
- with gr.Column(scale=1):
220
- input_text = gr.Textbox(
221
- label="Enter Movie Review",
222
- placeholder="The cinematography was stunning, but the plot felt predictable...",
223
  lines=5
224
  )
225
- analyze_btn = gr.Button("Analyze Sentiment", variant="primary", size="lg")
226
 
227
- gr.Examples(
228
- examples=[
229
- ["The cinematography was absolutely stunning, but the pacing felt slow at times."],
230
- ["A masterpiece in every way! The performances, direction, and music were phenomenal."],
231
- ["The movie was boring, and I couldn't connect with any of the characters."],
232
- ["Incredible special effects, but the dialogue was cheesy and the plot had holes."],
233
- ["The ending left me speechless, fantastic build-up throughout the entire film."]
234
- ],
235
- inputs=input_text
236
- )
237
 
238
- with gr.Column(scale=1):
239
- sentiment_output = gr.Textbox(label="Analysis Result", lines=2)
240
 
241
  with gr.Row():
242
- prob_plot = gr.Plot(label="Probability Distribution")
243
- gauge_plot = gr.Plot(label="Sentiment Gauge")
244
 
245
- with gr.Row():
246
- wordcloud_plot = gr.Plot(label="Word Cloud Visualization")
247
 
248
  with gr.Tab("Batch Analysis"):
249
- gr.Markdown("### Analyze Multiple Reviews")
250
- gr.Markdown("Enter multiple reviews separated by new lines for comparative analysis")
251
 
252
  batch_input = gr.Textbox(
253
- label="Multiple Reviews (one per line)",
254
- placeholder="First review here...\nSecond review here...\nThird review here...",
255
  lines=8
256
  )
257
- batch_btn = gr.Button("Analyze All Reviews", variant="primary")
258
- batch_plot = gr.Plot(label="Batch Analysis Results")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
- with gr.Tab("Analysis History"):
261
- gr.Markdown("### Historical Analysis Trends")
262
- gr.Markdown("View patterns and trends from your previous analyses")
263
 
264
  with gr.Row():
265
- history_plot = gr.Plot(label="Sentiment History")
266
- refresh_btn = gr.Button("Refresh History", variant="secondary")
267
 
268
- clear_btn = gr.Button("Clear History", variant="stop")
 
 
 
 
 
269
 
270
- analyze_btn.click(
271
- fn=analyze_sentiment,
272
- inputs=input_text,
273
- outputs=[sentiment_output, prob_plot, gauge_plot, wordcloud_plot]
274
- )
275
 
276
- batch_btn.click(fn=analyze_batch, inputs=batch_input, outputs=batch_plot)
277
- refresh_btn.click(fn=plot_history, outputs=history_plot)
278
- clear_btn.click(lambda: sentiment_history.clear(), outputs=None)
279
 
280
  demo.launch(share=True)
281
 
 
4
  import matplotlib.pyplot as plt
5
  import numpy as np
6
  from wordcloud import WordCloud
7
+ from collections import Counter, defaultdict
8
+ from sklearn.feature_extraction.text import TfidfVectorizer
9
+ import networkx as nx
10
+ import re
11
 
12
+ # Load model
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
14
  tokenizer = BertTokenizer.from_pretrained("entropy25/sentimentanalysis")
15
  model = BertForSequenceClassification.from_pretrained("entropy25/sentimentanalysis")
16
  model.to(device)
17
 
18
+ # Global storage
19
+ history = []
20
+
21
+ def clean_text(text):
22
+ """Simple text preprocessing"""
23
+ text = re.sub(r'[^\w\s]', '', text.lower())
24
+ words = text.split()
25
+ # Simple stopwords
26
+ stopwords = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'will', 'would', 'could', 'should', 'this', 'that', 'these', 'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them'}
27
+ return [w for w in words if w not in stopwords and len(w) > 2]
28
 
29
+ def analyze_text(text):
30
+ """Core sentiment analysis"""
31
  if not text.strip():
32
+ return "Please enter text", None, None, None
33
 
34
  inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
35
  with torch.no_grad():
36
  outputs = model(**inputs)
37
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
38
+ pred = torch.argmax(outputs.logits, dim=-1).item()
39
+ conf = probs.max()
40
+ sentiment = "Positive" if pred == 1 else "Negative"
 
41
 
42
+ # Store in history
43
+ history.append({
44
  'text': text[:100],
45
+ 'full_text': text,
46
  'sentiment': sentiment,
47
+ 'confidence': conf,
48
+ 'pos_prob': probs[1],
49
+ 'neg_prob': probs[0]
50
  })
51
 
52
+ result = f"Sentiment: {sentiment} (Confidence: {conf:.3f})"
53
+
54
+ # Generate plots
55
+ prob_plot = plot_probs(probs)
56
+ gauge_plot = plot_gauge(conf, sentiment)
57
+ cloud_plot = plot_wordcloud(text, sentiment)
58
 
59
+ return result, prob_plot, gauge_plot, cloud_plot
60
 
61
+ def plot_probs(probs):
62
+ """Probability bar chart"""
63
+ fig, ax = plt.subplots(figsize=(8, 5))
64
+ labels = ["Negative", "Positive"]
65
  colors = ['#ff6b6b', '#4ecdc4']
66
 
67
+ bars = ax.bar(labels, probs, color=colors, alpha=0.8)
68
+ ax.set_title("Sentiment Probabilities", fontweight='bold')
 
69
  ax.set_ylabel("Probability")
70
  ax.set_ylim(0, 1)
71
 
72
+ for bar, prob in zip(bars, probs):
73
+ ax.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.02,
 
74
  f'{prob:.3f}', ha='center', va='bottom', fontweight='bold')
75
 
76
  plt.tight_layout()
77
  return fig
78
 
79
+ def plot_gauge(conf, sentiment):
80
+ """Confidence gauge"""
81
  fig, ax = plt.subplots(figsize=(8, 6))
82
 
83
  theta = np.linspace(0, np.pi, 100)
 
87
  ax.fill_between([theta[i], theta[i+1]], [0, 0], [0.8, 0.8],
88
  color=colors[i], alpha=0.7)
89
 
90
+ pos = np.pi * (0.5 + (0.4 if sentiment == 'Positive' else -0.4) * conf)
91
+ ax.plot([pos, pos], [0, 0.6], 'k-', linewidth=6)
92
+ ax.plot(pos, 0.6, 'ko', markersize=10)
93
 
94
  ax.set_xlim(0, np.pi)
95
  ax.set_ylim(0, 1)
96
+ ax.set_title(f'{sentiment} - Confidence: {conf:.3f}', fontweight='bold')
 
97
  ax.set_xticks([0, np.pi/2, np.pi])
98
  ax.set_xticklabels(['Negative', 'Neutral', 'Positive'])
99
  ax.set_yticks([])
100
+ ax.axis('off')
 
 
101
 
102
  plt.tight_layout()
103
  return fig
104
 
105
+ def plot_wordcloud(text, sentiment):
106
+ """Word cloud visualization"""
107
  if len(text.split()) < 3:
108
  return None
109
 
 
 
110
  try:
111
+ colormap = 'Greens' if sentiment == 'Positive' else 'Reds'
112
+ wc = WordCloud(width=800, height=400, background_color='white',
113
+ colormap=colormap, max_words=30).generate(text)
 
 
 
 
114
 
115
  fig, ax = plt.subplots(figsize=(10, 5))
116
+ ax.imshow(wc, interpolation='bilinear')
117
  ax.axis('off')
118
+ ax.set_title(f'{sentiment} Word Cloud', fontweight='bold')
119
 
120
  plt.tight_layout()
121
  return fig
122
  except:
123
  return None
124
 
125
+ def batch_analysis(reviews):
126
+ """Analyze multiple reviews"""
127
+ if not reviews.strip():
128
  return None
129
 
130
+ texts = [r.strip() for r in reviews.split('\n') if r.strip()]
131
+ if len(texts) < 2:
132
  return None
133
 
134
  results = []
135
+ for text in texts:
136
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
137
  with torch.no_grad():
138
  outputs = model(**inputs)
139
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
140
+ pred = torch.argmax(outputs.logits, dim=-1).item()
141
+ sentiment = "Positive" if pred == 1 else "Negative"
142
+ conf = probs.max()
143
 
144
  results.append({
145
+ 'text': text[:50] + '...' if len(text) > 50 else text,
146
  'sentiment': sentiment,
147
+ 'confidence': conf,
148
+ 'pos_prob': probs[1]
149
+ })
150
+
151
+ # Add to history
152
+ history.append({
153
+ 'text': text[:100],
154
+ 'full_text': text,
155
+ 'sentiment': sentiment,
156
+ 'confidence': conf,
157
+ 'pos_prob': probs[1],
158
+ 'neg_prob': probs[0]
159
  })
160
 
161
+ # Create visualization
162
  fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
163
 
164
+ # Pie chart
165
+ sent_counts = Counter([r['sentiment'] for r in results])
 
 
 
 
 
 
 
166
  colors = ['#4ecdc4', '#ff6b6b']
167
+ ax1.pie(sent_counts.values(), labels=sent_counts.keys(),
168
  autopct='%1.1f%%', colors=colors, startangle=90)
169
  ax1.set_title('Sentiment Distribution')
170
 
171
+ # Confidence histogram
172
+ confs = [r['confidence'] for r in results]
173
+ ax2.hist(confs, bins=8, alpha=0.7, color='skyblue', edgecolor='black')
174
+ ax2.set_title('Confidence Distribution')
175
+ ax2.set_xlabel('Confidence')
176
+ ax2.set_ylabel('Count')
177
+
178
+ # Probability scatter
179
+ indices = range(len(results))
180
+ pos_probs = [r['pos_prob'] for r in results]
181
+ ax3.scatter(indices, pos_probs,
182
+ c=['#4ecdc4' if r['sentiment'] == 'Positive' else '#ff6b6b' for r in results],
183
  alpha=0.7, s=100)
184
  ax3.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
185
  ax3.set_title('Positive Probability by Review')
186
  ax3.set_xlabel('Review Index')
187
  ax3.set_ylabel('Positive Probability')
188
 
189
+ # Confidence vs Sentiment
190
+ sent_binary = [1 if r['sentiment'] == 'Positive' else 0 for r in results]
191
+ ax4.scatter(confs, sent_binary, alpha=0.7, s=100,
192
+ c=['#4ecdc4' if s == 1 else '#ff6b6b' for s in sent_binary])
193
  ax4.set_title('Sentiment vs Confidence')
194
+ ax4.set_xlabel('Confidence')
195
+ ax4.set_ylabel('Sentiment')
196
  ax4.set_yticks([0, 1])
197
  ax4.set_yticklabels(['Negative', 'Positive'])
198
 
199
  plt.tight_layout()
200
  return fig
201
 
202
+ def keyword_heatmap():
203
+ """Keyword sentiment heatmap"""
204
+ if len(history) < 3:
205
+ return None
206
+
207
+ word_stats = defaultdict(list)
208
+
209
+ for item in history:
210
+ words = clean_text(item['full_text'])
211
+ sentiment_score = item['pos_prob']
212
+
213
+ for word in words:
214
+ word_stats[word].append(sentiment_score)
215
+
216
+ # Filter words with at least 2 occurrences
217
+ filtered = {w: scores for w, scores in word_stats.items() if len(scores) >= 2}
218
+
219
+ if len(filtered) < 5:
220
+ return None
221
+
222
+ # Get top 20 most frequent words
223
+ top_words = sorted(filtered.items(), key=lambda x: len(x[1]), reverse=True)[:20]
224
+
225
+ words = [item[0] for item in top_words]
226
+ avg_sentiments = [np.mean(item[1]) for item in top_words]
227
+ frequencies = [len(item[1]) for item in top_words]
228
+
229
+ # Create heatmap data
230
+ data = np.array([avg_sentiments, [f/max(frequencies) for f in frequencies]]).T
231
+
232
+ fig, ax = plt.subplots(figsize=(12, 8))
233
+
234
+ im = ax.imshow(data, cmap='RdYlGn', aspect='auto')
235
+
236
+ ax.set_xticks([0, 1])
237
+ ax.set_xticklabels(['Avg Sentiment', 'Frequency'])
238
+ ax.set_yticks(range(len(words)))
239
+ ax.set_yticklabels(words)
240
+
241
+ # Add text annotations
242
+ for i in range(len(words)):
243
+ ax.text(0, i, f'{avg_sentiments[i]:.2f}', ha='center', va='center',
244
+ color='black', fontweight='bold')
245
+ ax.text(1, i, f'{frequencies[i]}', ha='center', va='center',
246
+ color='black', fontweight='bold')
247
+
248
+ ax.set_title('Keyword Sentiment Heatmap', fontweight='bold')
249
+ plt.colorbar(im, ax=ax, label='Intensity')
250
+
251
+ plt.tight_layout()
252
+ return fig
253
+
254
+ def cooccurrence_network():
255
+ """Word co-occurrence network"""
256
+ if len(history) < 3:
257
+ return None
258
+
259
+ all_words = []
260
+ for item in history:
261
+ words = clean_text(item['full_text'])
262
+ if len(words) >= 3:
263
+ all_words.extend(words)
264
+
265
+ if len(all_words) < 10:
266
+ return None
267
+
268
+ word_freq = Counter(all_words)
269
+ top_words = [word for word, freq in word_freq.most_common(15) if freq >= 2]
270
+
271
+ if len(top_words) < 5:
272
+ return None
273
+
274
+ # Calculate co-occurrences
275
+ cooccur = defaultdict(int)
276
+
277
+ for item in history:
278
+ words = [w for w in clean_text(item['full_text']) if w in top_words]
279
+
280
+ for i, w1 in enumerate(words):
281
+ for j, w2 in enumerate(words):
282
+ if i != j and w1 != w2:
283
+ pair = tuple(sorted([w1, w2]))
284
+ cooccur[pair] += 1
285
+
286
+ # Create network
287
+ G = nx.Graph()
288
+
289
+ for word in top_words:
290
+ G.add_node(word, size=word_freq[word])
291
+
292
+ for (w1, w2), weight in cooccur.items():
293
+ if weight >= 2:
294
+ G.add_edge(w1, w2, weight=weight)
295
+
296
+ if len(G.edges()) == 0:
297
+ return None
298
+
299
+ # Plot network
300
+ fig, ax = plt.subplots(figsize=(12, 10))
301
+
302
+ pos = nx.spring_layout(G, k=3, iterations=50)
303
+
304
+ node_sizes = [G.nodes[node]['size'] * 200 for node in G.nodes()]
305
+ nx.draw_networkx_nodes(G, pos, node_size=node_sizes,
306
+ node_color='lightblue', alpha=0.7, ax=ax)
307
+
308
+ edges = G.edges()
309
+ weights = [G[u][v]['weight'] for u, v in edges]
310
+ nx.draw_networkx_edges(G, pos, width=[w*0.5 for w in weights],
311
+ alpha=0.6, edge_color='gray', ax=ax)
312
+
313
+ nx.draw_networkx_labels(G, pos, font_size=10, font_weight='bold', ax=ax)
314
+
315
+ ax.set_title('Word Co-occurrence Network', fontweight='bold')
316
+ ax.axis('off')
317
+
318
+ plt.tight_layout()
319
+ return fig
320
+
321
+ def tfidf_analysis():
322
+ """TF-IDF keyword analysis"""
323
+ if len(history) < 4:
324
+ return None
325
+
326
+ pos_texts = []
327
+ neg_texts = []
328
+
329
+ for item in history:
330
+ if item['sentiment'] == 'Positive':
331
+ pos_texts.append(' '.join(clean_text(item['full_text'])))
332
+ else:
333
+ neg_texts.append(' '.join(clean_text(item['full_text'])))
334
+
335
+ if len(pos_texts) < 2 or len(neg_texts) < 2:
336
+ return None
337
+
338
+ try:
339
+ # Positive TF-IDF
340
+ vectorizer_pos = TfidfVectorizer(max_features=50, ngram_range=(1, 2))
341
+ pos_tfidf = vectorizer_pos.fit_transform(pos_texts)
342
+ pos_features = vectorizer_pos.get_feature_names_out()
343
+ pos_scores = pos_tfidf.sum(axis=0).A1
344
+
345
+ # Negative TF-IDF
346
+ vectorizer_neg = TfidfVectorizer(max_features=50, ngram_range=(1, 2))
347
+ neg_tfidf = vectorizer_neg.fit_transform(neg_texts)
348
+ neg_features = vectorizer_neg.get_feature_names_out()
349
+ neg_scores = neg_tfidf.sum(axis=0).A1
350
+
351
+ # Top 10 features
352
+ pos_top_idx = np.argsort(pos_scores)[-10:][::-1]
353
+ neg_top_idx = np.argsort(neg_scores)[-10:][::-1]
354
+
355
+ pos_words = [pos_features[i] for i in pos_top_idx]
356
+ pos_vals = [pos_scores[i] for i in pos_top_idx]
357
+
358
+ neg_words = [neg_features[i] for i in neg_top_idx]
359
+ neg_vals = [neg_scores[i] for i in neg_top_idx]
360
+
361
+ # Plot
362
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
363
+
364
+ # Positive
365
+ bars1 = ax1.barh(pos_words, pos_vals, color='#4ecdc4', alpha=0.8)
366
+ ax1.set_title('Positive Keywords (TF-IDF)', fontweight='bold')
367
+ ax1.set_xlabel('TF-IDF Score')
368
+
369
+ for bar, score in zip(bars1, pos_vals):
370
+ ax1.text(bar.get_width() + 0.001, bar.get_y() + bar.get_height()/2,
371
+ f'{score:.3f}', va='center', fontsize=9)
372
+
373
+ # Negative
374
+ bars2 = ax2.barh(neg_words, neg_vals, color='#ff6b6b', alpha=0.8)
375
+ ax2.set_title('Negative Keywords (TF-IDF)', fontweight='bold')
376
+ ax2.set_xlabel('TF-IDF Score')
377
+
378
+ for bar, score in zip(bars2, neg_vals):
379
+ ax2.text(bar.get_width() + 0.001, bar.get_y() + bar.get_height()/2,
380
+ f'{score:.3f}', va='center', fontsize=9)
381
+
382
+ plt.tight_layout()
383
+ return fig
384
+
385
+ except:
386
+ return None
387
+
388
  def plot_history():
389
+ """Analysis history visualization"""
390
+ if len(history) < 2:
391
  return None
392
 
393
+ indices = list(range(len(history)))
394
+ pos_probs = [item['pos_prob'] for item in history]
395
+ confs = [item['confidence'] for item in history]
396
 
397
  fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
398
 
399
+ colors = ['#4ecdc4' if p > 0.5 else '#ff6b6b' for p in pos_probs]
400
+ ax1.scatter(indices, pos_probs, c=colors, alpha=0.7, s=100)
401
+ ax1.plot(indices, pos_probs, alpha=0.5, linewidth=2)
402
  ax1.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
403
+ ax1.set_title('Sentiment History - Positive Probability')
404
  ax1.set_xlabel('Analysis Number')
405
  ax1.set_ylabel('Positive Probability')
406
  ax1.grid(True, alpha=0.3)
407
 
408
+ ax2.bar(indices, confs, alpha=0.7, color='lightblue', edgecolor='navy')
409
  ax2.set_title('Confidence Scores Over Time')
410
  ax2.set_xlabel('Analysis Number')
411
  ax2.set_ylabel('Confidence Score')
 
414
  plt.tight_layout()
415
  return fig
416
 
417
+ # Gradio Interface
418
  with gr.Blocks(theme=gr.themes.Soft(), title="Movie Sentiment Analyzer") as demo:
419
+ gr.Markdown("# 🎬 Movie Sentiment Analyzer")
420
+ gr.Markdown("Advanced sentiment analysis with comprehensive visualizations")
421
 
422
+ with gr.Tab("Single Analysis"):
423
  with gr.Row():
424
+ with gr.Column():
425
+ text_input = gr.Textbox(
426
+ label="Movie Review",
427
+ placeholder="Enter your movie review here...",
428
  lines=5
429
  )
430
+ analyze_btn = gr.Button("Analyze", variant="primary", size="lg")
431
 
432
+ gr.Examples([
433
+ ["The cinematography was stunning, but the plot felt predictable."],
434
+ ["A masterpiece! Amazing performances and direction."],
435
+ ["Boring movie with terrible acting and weak plot."],
436
+ ["Great special effects but cheesy dialogue."],
437
+ ["Incredible ending that left me speechless!"]
438
+ ], inputs=text_input)
 
 
 
439
 
440
+ with gr.Column():
441
+ result_output = gr.Textbox(label="Result", lines=2)
442
 
443
  with gr.Row():
444
+ prob_plot = gr.Plot(label="Probabilities")
445
+ gauge_plot = gr.Plot(label="Confidence Gauge")
446
 
447
+ wordcloud_plot = gr.Plot(label="Word Cloud")
 
448
 
449
  with gr.Tab("Batch Analysis"):
450
+ gr.Markdown("### Multiple Reviews Analysis")
 
451
 
452
  batch_input = gr.Textbox(
453
+ label="Reviews (one per line)",
454
+ placeholder="Review 1...\nReview 2...\nReview 3...",
455
  lines=8
456
  )
457
+ batch_btn = gr.Button("Analyze Batch", variant="primary")
458
+ batch_plot = gr.Plot(label="Batch Results")
459
+
460
+ with gr.Tab("Advanced Analytics"):
461
+ gr.Markdown("### Advanced Visualizations")
462
+
463
+ with gr.Row():
464
+ heatmap_btn = gr.Button("Keyword Heatmap", variant="primary")
465
+ network_btn = gr.Button("Word Network", variant="primary")
466
+ tfidf_btn = gr.Button("TF-IDF Analysis", variant="primary")
467
+
468
+ heatmap_plot = gr.Plot(label="Keyword Sentiment Heatmap")
469
+ network_plot = gr.Plot(label="Word Co-occurrence Network")
470
+ tfidf_plot = gr.Plot(label="TF-IDF Keywords")
471
+
472
+ gr.Markdown("**Status:** All features implemented")
473
 
474
+ with gr.Tab("History"):
475
+ gr.Markdown("### Analysis History")
 
476
 
477
  with gr.Row():
478
+ refresh_btn = gr.Button("Refresh", variant="secondary")
479
+ clear_btn = gr.Button("Clear History", variant="stop")
480
 
481
+ history_plot = gr.Plot(label="Historical Trends")
482
+
483
+ # Event handlers
484
+ analyze_btn.click(analyze_text, inputs=text_input,
485
+ outputs=[result_output, prob_plot, gauge_plot, wordcloud_plot])
486
+ batch_btn.click(batch_analysis, inputs=batch_input, outputs=batch_plot)
487
 
488
+ heatmap_btn.click(keyword_heatmap, outputs=heatmap_plot)
489
+ network_btn.click(cooccurrence_network, outputs=network_plot)
490
+ tfidf_btn.click(tfidf_analysis, outputs=tfidf_plot)
 
 
491
 
492
+ refresh_btn.click(plot_history, outputs=history_plot)
493
+ clear_btn.click(lambda: history.clear(), outputs=None)
 
494
 
495
  demo.launch(share=True)
496