Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,96 +3,280 @@ import gradio as gr
|
|
| 3 |
from transformers import BertTokenizer, BertForSequenceClassification
|
| 4 |
import matplotlib.pyplot as plt
|
| 5 |
import numpy as np
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
# Set the device to GPU or CPU
|
| 8 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 9 |
print(f"Using device: {device}")
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
model = BertForSequenceClassification.from_pretrained("entropy25/sentimentanalysis") # Replace with your model path
|
| 14 |
model.to(device)
|
| 15 |
|
| 16 |
-
|
|
|
|
| 17 |
def analyze_sentiment(text):
|
|
|
|
|
|
|
|
|
|
| 18 |
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
|
| 19 |
with torch.no_grad():
|
| 20 |
outputs = model(**inputs)
|
| 21 |
logits = outputs.logits
|
|
|
|
| 22 |
prediction = torch.argmax(logits, dim=-1).item()
|
| 23 |
-
confidence =
|
| 24 |
sentiment = "Positive" if prediction == 1 else "Negative"
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
# Randomly generate some distribution data for demonstration
|
| 32 |
-
scores = np.random.rand(2)
|
| 33 |
|
| 34 |
-
fig, ax = plt.subplots(figsize=(8,
|
| 35 |
-
bars = ax.bar(sentiments,
|
| 36 |
-
ax.set_title("Sentiment Distribution")
|
| 37 |
-
ax.set_ylabel("
|
| 38 |
ax.set_ylim(0, 1)
|
| 39 |
|
| 40 |
-
|
| 41 |
-
for bar, score in zip(bars, scores):
|
| 42 |
height = bar.get_height()
|
| 43 |
-
ax.text(bar.get_x() + bar.get_width()/2., height + 0.
|
| 44 |
-
f'{
|
| 45 |
|
| 46 |
plt.tight_layout()
|
| 47 |
return fig
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
gr.Markdown("# 🎬 AI Movie Sentiment Analyzer")
|
| 52 |
-
gr.Markdown("
|
| 53 |
-
|
| 54 |
-
with gr.
|
| 55 |
-
with gr.
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
-
with gr.
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
| 91 |
fn=analyze_sentiment,
|
| 92 |
-
|
|
|
|
| 93 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
-
# Launch the interface - 移除 examples 参数
|
| 96 |
demo.launch(share=True)
|
| 97 |
|
| 98 |
|
|
|
|
| 3 |
from transformers import BertTokenizer, BertForSequenceClassification
|
| 4 |
import matplotlib.pyplot as plt
|
| 5 |
import numpy as np
|
| 6 |
+
from wordcloud import WordCloud
|
| 7 |
+
import seaborn as sns
|
| 8 |
|
|
|
|
| 9 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 10 |
print(f"Using device: {device}")
|
| 11 |
|
| 12 |
+
tokenizer = BertTokenizer.from_pretrained("entropy25/sentimentanalysis")
|
| 13 |
+
model = BertForSequenceClassification.from_pretrained("entropy25/sentimentanalysis")
|
|
|
|
| 14 |
model.to(device)
|
| 15 |
|
| 16 |
+
sentiment_history = []
|
| 17 |
+
|
| 18 |
def analyze_sentiment(text):
|
| 19 |
+
if not text.strip():
|
| 20 |
+
return "Please enter a review", None, None, None
|
| 21 |
+
|
| 22 |
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
|
| 23 |
with torch.no_grad():
|
| 24 |
outputs = model(**inputs)
|
| 25 |
logits = outputs.logits
|
| 26 |
+
probabilities = torch.nn.functional.softmax(logits, dim=-1).cpu().numpy()[0]
|
| 27 |
prediction = torch.argmax(logits, dim=-1).item()
|
| 28 |
+
confidence = probabilities.max()
|
| 29 |
sentiment = "Positive" if prediction == 1 else "Negative"
|
| 30 |
+
|
| 31 |
+
sentiment_history.append({
|
| 32 |
+
'text': text[:100],
|
| 33 |
+
'sentiment': sentiment,
|
| 34 |
+
'confidence': confidence,
|
| 35 |
+
'positive_prob': probabilities[1],
|
| 36 |
+
'negative_prob': probabilities[0]
|
| 37 |
+
})
|
| 38 |
+
|
| 39 |
+
result_text = f"Sentiment: {sentiment} (Confidence: {confidence:.3f})"
|
| 40 |
+
prob_plot = plot_probabilities(probabilities)
|
| 41 |
+
gauge_plot = create_gauge(confidence, sentiment)
|
| 42 |
+
wordcloud_plot = generate_wordcloud(text, sentiment)
|
| 43 |
+
|
| 44 |
+
return result_text, prob_plot, gauge_plot, wordcloud_plot
|
| 45 |
|
| 46 |
+
def plot_probabilities(probabilities):
|
| 47 |
+
sentiments = ["Negative", "Positive"]
|
| 48 |
+
colors = ['#ff6b6b', '#4ecdc4']
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
fig, ax = plt.subplots(figsize=(8, 5))
|
| 51 |
+
bars = ax.bar(sentiments, probabilities, color=colors, alpha=0.8)
|
| 52 |
+
ax.set_title("Sentiment Probability Distribution", fontsize=14, fontweight='bold')
|
| 53 |
+
ax.set_ylabel("Probability")
|
| 54 |
ax.set_ylim(0, 1)
|
| 55 |
|
| 56 |
+
for bar, prob in zip(bars, probabilities):
|
|
|
|
| 57 |
height = bar.get_height()
|
| 58 |
+
ax.text(bar.get_x() + bar.get_width()/2., height + 0.02,
|
| 59 |
+
f'{prob:.3f}', ha='center', va='bottom', fontweight='bold')
|
| 60 |
|
| 61 |
plt.tight_layout()
|
| 62 |
return fig
|
| 63 |
|
| 64 |
+
def create_gauge(confidence, sentiment):
|
| 65 |
+
fig, ax = plt.subplots(figsize=(8, 6))
|
| 66 |
+
|
| 67 |
+
theta = np.linspace(0, np.pi, 100)
|
| 68 |
+
colors = plt.cm.RdYlGn(np.linspace(0.2, 0.8, 100))
|
| 69 |
+
|
| 70 |
+
for i in range(len(theta)-1):
|
| 71 |
+
ax.fill_between([theta[i], theta[i+1]], [0, 0], [0.8, 0.8],
|
| 72 |
+
color=colors[i], alpha=0.7)
|
| 73 |
+
|
| 74 |
+
pointer_pos = np.pi * (0.5 + (0.4 if sentiment == 'Positive' else -0.4) * confidence)
|
| 75 |
+
ax.plot([pointer_pos, pointer_pos], [0, 0.6], 'k-', linewidth=6)
|
| 76 |
+
ax.plot(pointer_pos, 0.6, 'ko', markersize=10)
|
| 77 |
+
|
| 78 |
+
ax.set_xlim(0, np.pi)
|
| 79 |
+
ax.set_ylim(0, 1)
|
| 80 |
+
ax.set_title(f'{sentiment} Sentiment - Confidence: {confidence:.3f}',
|
| 81 |
+
fontsize=14, fontweight='bold')
|
| 82 |
+
ax.set_xticks([0, np.pi/2, np.pi])
|
| 83 |
+
ax.set_xticklabels(['Negative', 'Neutral', 'Positive'])
|
| 84 |
+
ax.set_yticks([])
|
| 85 |
+
ax.spines['top'].set_visible(False)
|
| 86 |
+
ax.spines['right'].set_visible(False)
|
| 87 |
+
ax.spines['left'].set_visible(False)
|
| 88 |
+
|
| 89 |
+
plt.tight_layout()
|
| 90 |
+
return fig
|
| 91 |
+
|
| 92 |
+
def generate_wordcloud(text, sentiment):
|
| 93 |
+
if len(text.split()) < 3:
|
| 94 |
+
return None
|
| 95 |
+
|
| 96 |
+
colormap = 'Greens' if sentiment == 'Positive' else 'Reds'
|
| 97 |
+
|
| 98 |
+
try:
|
| 99 |
+
wordcloud = WordCloud(
|
| 100 |
+
width=800, height=400,
|
| 101 |
+
background_color='white',
|
| 102 |
+
colormap=colormap,
|
| 103 |
+
max_words=30,
|
| 104 |
+
relative_scaling=0.5
|
| 105 |
+
).generate(text)
|
| 106 |
+
|
| 107 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
| 108 |
+
ax.imshow(wordcloud, interpolation='bilinear')
|
| 109 |
+
ax.axis('off')
|
| 110 |
+
ax.set_title(f'{sentiment} Sentiment - Word Cloud', fontsize=14, fontweight='bold')
|
| 111 |
+
|
| 112 |
+
plt.tight_layout()
|
| 113 |
+
return fig
|
| 114 |
+
except:
|
| 115 |
+
return None
|
| 116 |
+
|
| 117 |
+
def analyze_batch(reviews_text):
|
| 118 |
+
if not reviews_text.strip():
|
| 119 |
+
return None
|
| 120 |
+
|
| 121 |
+
reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
|
| 122 |
+
if len(reviews) < 2:
|
| 123 |
+
return None
|
| 124 |
+
|
| 125 |
+
results = []
|
| 126 |
+
for review in reviews:
|
| 127 |
+
inputs = tokenizer(review, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
|
| 128 |
+
with torch.no_grad():
|
| 129 |
+
outputs = model(**inputs)
|
| 130 |
+
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
|
| 131 |
+
prediction = torch.argmax(outputs.logits, dim=-1).item()
|
| 132 |
+
sentiment = "Positive" if prediction == 1 else "Negative"
|
| 133 |
+
confidence = probabilities.max()
|
| 134 |
+
|
| 135 |
+
results.append({
|
| 136 |
+
'review': review[:50] + '...' if len(review) > 50 else review,
|
| 137 |
+
'sentiment': sentiment,
|
| 138 |
+
'confidence': confidence,
|
| 139 |
+
'positive_prob': probabilities[1]
|
| 140 |
+
})
|
| 141 |
+
|
| 142 |
+
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
|
| 143 |
+
|
| 144 |
+
sentiment_counts = {'Positive': 0, 'Negative': 0}
|
| 145 |
+
confidences = []
|
| 146 |
+
positive_probs = []
|
| 147 |
+
|
| 148 |
+
for r in results:
|
| 149 |
+
sentiment_counts[r['sentiment']] += 1
|
| 150 |
+
confidences.append(r['confidence'])
|
| 151 |
+
positive_probs.append(r['positive_prob'])
|
| 152 |
+
|
| 153 |
+
colors = ['#4ecdc4', '#ff6b6b']
|
| 154 |
+
ax1.pie(sentiment_counts.values(), labels=sentiment_counts.keys(),
|
| 155 |
+
autopct='%1.1f%%', colors=colors, startangle=90)
|
| 156 |
+
ax1.set_title('Sentiment Distribution')
|
| 157 |
+
|
| 158 |
+
ax2.hist(confidences, bins=8, alpha=0.7, color='skyblue', edgecolor='black')
|
| 159 |
+
ax2.set_title('Confidence Score Distribution')
|
| 160 |
+
ax2.set_xlabel('Confidence Score')
|
| 161 |
+
ax2.set_ylabel('Frequency')
|
| 162 |
+
|
| 163 |
+
review_indices = range(len(results))
|
| 164 |
+
ax3.scatter(review_indices, positive_probs,
|
| 165 |
+
c=[colors[0] if r['sentiment'] == 'Positive' else colors[1] for r in results],
|
| 166 |
+
alpha=0.7, s=100)
|
| 167 |
+
ax3.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
|
| 168 |
+
ax3.set_title('Positive Probability by Review')
|
| 169 |
+
ax3.set_xlabel('Review Index')
|
| 170 |
+
ax3.set_ylabel('Positive Probability')
|
| 171 |
+
|
| 172 |
+
sentiment_scores = [1 if r['sentiment'] == 'Positive' else 0 for r in results]
|
| 173 |
+
confidence_scores = confidences
|
| 174 |
+
ax4.scatter(confidence_scores, sentiment_scores, alpha=0.7, s=100,
|
| 175 |
+
c=[colors[0] if s == 1 else colors[1] for s in sentiment_scores])
|
| 176 |
+
ax4.set_title('Sentiment vs Confidence')
|
| 177 |
+
ax4.set_xlabel('Confidence Score')
|
| 178 |
+
ax4.set_ylabel('Sentiment (0=Negative, 1=Positive)')
|
| 179 |
+
ax4.set_yticks([0, 1])
|
| 180 |
+
ax4.set_yticklabels(['Negative', 'Positive'])
|
| 181 |
+
|
| 182 |
+
plt.tight_layout()
|
| 183 |
+
return fig
|
| 184 |
+
|
| 185 |
+
def plot_history():
|
| 186 |
+
if len(sentiment_history) < 2:
|
| 187 |
+
return None
|
| 188 |
+
|
| 189 |
+
indices = list(range(len(sentiment_history)))
|
| 190 |
+
positive_probs = [item['positive_prob'] for item in sentiment_history]
|
| 191 |
+
confidences = [item['confidence'] for item in sentiment_history]
|
| 192 |
+
|
| 193 |
+
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
|
| 194 |
+
|
| 195 |
+
colors = ['#4ecdc4' if prob > 0.5 else '#ff6b6b' for prob in positive_probs]
|
| 196 |
+
ax1.scatter(indices, positive_probs, c=colors, alpha=0.7, s=100)
|
| 197 |
+
ax1.plot(indices, positive_probs, alpha=0.5, linewidth=2)
|
| 198 |
+
ax1.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
|
| 199 |
+
ax1.set_title('Sentiment Analysis History - Positive Probability')
|
| 200 |
+
ax1.set_xlabel('Analysis Number')
|
| 201 |
+
ax1.set_ylabel('Positive Probability')
|
| 202 |
+
ax1.grid(True, alpha=0.3)
|
| 203 |
+
|
| 204 |
+
ax2.bar(indices, confidences, alpha=0.7, color='lightblue', edgecolor='navy')
|
| 205 |
+
ax2.set_title('Confidence Scores Over Time')
|
| 206 |
+
ax2.set_xlabel('Analysis Number')
|
| 207 |
+
ax2.set_ylabel('Confidence Score')
|
| 208 |
+
ax2.grid(True, alpha=0.3)
|
| 209 |
+
|
| 210 |
+
plt.tight_layout()
|
| 211 |
+
return fig
|
| 212 |
+
|
| 213 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Movie Sentiment Analyzer") as demo:
|
| 214 |
gr.Markdown("# 🎬 AI Movie Sentiment Analyzer")
|
| 215 |
+
gr.Markdown("Advanced sentiment analysis for movie reviews using BERT model with comprehensive visualizations")
|
| 216 |
+
|
| 217 |
+
with gr.Tab("Single Review Analysis"):
|
| 218 |
+
with gr.Row():
|
| 219 |
+
with gr.Column(scale=1):
|
| 220 |
+
input_text = gr.Textbox(
|
| 221 |
+
label="Enter Movie Review",
|
| 222 |
+
placeholder="The cinematography was stunning, but the plot felt predictable...",
|
| 223 |
+
lines=5
|
| 224 |
+
)
|
| 225 |
+
analyze_btn = gr.Button("Analyze Sentiment", variant="primary", size="lg")
|
| 226 |
+
|
| 227 |
+
gr.Examples(
|
| 228 |
+
examples=[
|
| 229 |
+
["The cinematography was absolutely stunning, but the pacing felt slow at times."],
|
| 230 |
+
["A masterpiece in every way! The performances, direction, and music were phenomenal."],
|
| 231 |
+
["The movie was boring, and I couldn't connect with any of the characters."],
|
| 232 |
+
["Incredible special effects, but the dialogue was cheesy and the plot had holes."],
|
| 233 |
+
["The ending left me speechless, fantastic build-up throughout the entire film."]
|
| 234 |
+
],
|
| 235 |
+
inputs=input_text
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
with gr.Column(scale=1):
|
| 239 |
+
sentiment_output = gr.Textbox(label="Analysis Result", lines=2)
|
| 240 |
|
| 241 |
+
with gr.Row():
|
| 242 |
+
prob_plot = gr.Plot(label="Probability Distribution")
|
| 243 |
+
gauge_plot = gr.Plot(label="Sentiment Gauge")
|
| 244 |
+
|
| 245 |
+
with gr.Row():
|
| 246 |
+
wordcloud_plot = gr.Plot(label="Word Cloud Visualization")
|
| 247 |
+
|
| 248 |
+
with gr.Tab("Batch Analysis"):
|
| 249 |
+
gr.Markdown("### Analyze Multiple Reviews")
|
| 250 |
+
gr.Markdown("Enter multiple reviews separated by new lines for comparative analysis")
|
| 251 |
+
|
| 252 |
+
batch_input = gr.Textbox(
|
| 253 |
+
label="Multiple Reviews (one per line)",
|
| 254 |
+
placeholder="First review here...\nSecond review here...\nThird review here...",
|
| 255 |
+
lines=8
|
| 256 |
+
)
|
| 257 |
+
batch_btn = gr.Button("Analyze All Reviews", variant="primary")
|
| 258 |
+
batch_plot = gr.Plot(label="Batch Analysis Results")
|
| 259 |
+
|
| 260 |
+
with gr.Tab("Analysis History"):
|
| 261 |
+
gr.Markdown("### Historical Analysis Trends")
|
| 262 |
+
gr.Markdown("View patterns and trends from your previous analyses")
|
| 263 |
+
|
| 264 |
+
with gr.Row():
|
| 265 |
+
history_plot = gr.Plot(label="Sentiment History")
|
| 266 |
+
refresh_btn = gr.Button("Refresh History", variant="secondary")
|
| 267 |
+
|
| 268 |
+
clear_btn = gr.Button("Clear History", variant="stop")
|
| 269 |
+
|
| 270 |
+
analyze_btn.click(
|
| 271 |
fn=analyze_sentiment,
|
| 272 |
+
inputs=input_text,
|
| 273 |
+
outputs=[sentiment_output, prob_plot, gauge_plot, wordcloud_plot]
|
| 274 |
)
|
| 275 |
+
|
| 276 |
+
batch_btn.click(fn=analyze_batch, inputs=batch_input, outputs=batch_plot)
|
| 277 |
+
refresh_btn.click(fn=plot_history, outputs=history_plot)
|
| 278 |
+
clear_btn.click(lambda: sentiment_history.clear(), outputs=None)
|
| 279 |
|
|
|
|
| 280 |
demo.launch(share=True)
|
| 281 |
|
| 282 |
|