ChiragKaushikCK commited on
Commit
2a331bc
ยท
verified ยท
1 Parent(s): 11622bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +323 -154
app.py CHANGED
@@ -35,122 +35,223 @@ st.markdown("""
35
  # ------------------------------------------------------------------
36
  @st.cache_resource
37
  def load_models():
38
- # English Models (Ensemble)
39
- roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
40
- distilbert = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
41
- vader = SentimentIntensityAnalyzer()
42
-
43
- # Multilingual Model (Handles Hindi, Hinglish, Spanish, French, etc.)
44
- # We use XLM-RoBERTa because it understands code-mixing (Hinglish) very well.
45
- multilingual = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment")
46
-
47
- return roberta, distilbert, vader, multilingual
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- # Load models once
50
- roberta_model, distilbert_model, vader_model, multi_model = load_models()
 
 
51
 
52
  # ------------------------------------------------------------------
53
  # HELPER FUNCTIONS
54
  # ------------------------------------------------------------------
55
  def clean_text(text):
56
  text = text.lower()
57
- text = re.sub(r'http\S+', '', text) # Remove URLs
58
- text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
59
  return text
60
 
61
  def get_wordcloud(text):
62
- # Wordcloud works best with English, but we can try for others
63
- wc = WordCloud(width=800, height=400, background_color='white').generate(text)
64
- fig, ax = plt.subplots(figsize=(10, 5))
65
- ax.imshow(wc, interpolation='bilinear')
66
- ax.axis('off')
67
- return fig
 
 
 
 
 
 
 
 
 
68
 
69
  # ------------------------------------------------------------------
70
  # CORE ANALYSIS LOGIC
71
  # ------------------------------------------------------------------
72
 
73
  def analyze_english(text):
74
- # 1. RoBERTa
75
- rob_out = roberta_model(text[:512])[0]
76
- rob_label = rob_out['label']
77
-
78
- # Map RoBERTa labels (cardiffnlp model uses label_0:neg, label_1:neu, label_2:pos)
79
- if rob_label == 'LABEL_0': rob_sent = 'negative'
80
- elif rob_label == 'LABEL_1': rob_sent = 'neutral'
81
- else: rob_sent = 'positive'
82
-
83
- # 2. VADER
84
- vader_out = vader_model.polarity_scores(text)
85
- compound = vader_out['compound']
86
- if compound >= 0.05: vader_sent = 'positive'
87
- elif compound <= -0.05: vader_sent = 'negative'
88
- else: vader_sent = 'neutral'
89
-
90
- # 3. DistilBERT
91
- bert_out = distilbert_model(text[:512])[0]
92
- bert_sent = bert_out['label'].lower()
93
-
94
- # Consensus Logic (Voting)
95
- votes = [rob_sent, vader_sent, bert_sent]
96
- count = Counter(votes)
97
- winner, vote_count = count.most_common(1)[0]
98
-
99
- # Conflict Detection
100
- if len(count) == 3 or vote_count == 1:
101
- final_verdict = "ambiguous"
102
- confidence = "Low (Conflict)"
103
- else:
104
- final_verdict = winner
105
- confidence = "High" if vote_count == 3 else "Medium"
106
-
107
- return {
108
- 'verdict': final_verdict,
109
- 'confidence': confidence,
110
- 'breakdown': {
111
- 'RoBERTa': rob_sent,
112
- 'VADER': vader_sent,
113
- 'DistilBERT': bert_sent
114
- },
115
- 'scores': {
116
- 'RoBERTa': rob_out['score'],
117
- 'VADER': abs(compound),
118
- 'DistilBERT': bert_out['score']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  }
120
- }
 
 
 
121
 
122
  def analyze_multilingual(text):
123
- # Uses XLM-RoBERTa (State of the art for Hinglish/Hindi)
124
- result = multi_model(text[:512])[0]
125
- label_raw = result['label'] # Returns 'negative', 'neutral', 'positive' or LABEL_X
126
- score = result['score']
127
-
128
- # Normalize labels
129
- # Check if the model returns LABEL_0/1/2 or text
130
- if label_raw.lower() in ['negative', 'label_0']:
131
- sentiment = "negative"
132
- elif label_raw.lower() in ['neutral', 'label_1']:
133
- sentiment = "neutral"
134
- else:
135
- sentiment = "positive"
136
-
137
- return {
138
- 'verdict': sentiment,
139
- 'confidence': f"{score:.2f}",
140
- 'breakdown': {'XLM-RoBERTa': f"{sentiment.title()} ({score:.2f})"},
141
- 'scores': {'Model Confidence': score}
142
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  # ------------------------------------------------------------------
145
  # UI LAYOUT
146
  # ------------------------------------------------------------------
147
 
148
  # Sidebar
149
- st.sidebar.title("Configuration")
150
  language = st.sidebar.selectbox("Select Language", ["English", "Hindi (เคนเคฟเคจเฅเคฆเฅ€)", "Hinglish (Mixed)"])
151
- mode = st.sidebar.selectbox("Mode", ["Real-time Analysis", "Batch Processing"])
 
 
 
 
 
 
 
 
 
152
 
153
- st.title("๐Ÿง  Sentiment Analytics Engine")
 
154
  st.markdown("---")
155
 
156
  if mode == "Real-time Analysis":
@@ -163,33 +264,41 @@ if mode == "Real-time Analysis":
163
  placeholder_text = "Type in Hinglish (e.g., Product bahut achha hai but delivery slow thi)"
164
  label_text = "Enter Hinglish Text:"
165
  else:
166
- placeholder_text = "Type your text here..."
167
  label_text = "Enter English Text:"
168
 
169
  user_input = st.text_area(label_text, height=150, placeholder=placeholder_text)
170
 
171
- if st.button("Analyze Sentiment", type="primary"):
172
  if not user_input.strip():
173
- st.warning("Please enter some text first.")
174
  else:
175
- with st.spinner("Running Inference Models..."):
176
  start_time = time.time()
177
 
178
  # Routing Logic
179
  if language == "English":
180
  result = analyze_english(user_input)
181
  else:
182
- # Both Hindi and Hinglish use the Multilingual Model
183
  result = analyze_multilingual(user_input)
184
 
 
 
 
 
185
  latency = time.time() - start_time
186
 
187
  # 1. Main Verdict Display
188
- st.markdown("### Analysis Results")
189
  col1, col2, col3 = st.columns(3)
190
 
191
- color_map = {'positive': 'green', 'negative': 'red', 'neutral': 'orange', 'ambiguous': 'grey'}
192
- verdict_color = color_map.get(result['verdict'], 'blue')
 
 
 
 
 
193
 
194
  with col1:
195
  st.markdown(f"""
@@ -215,85 +324,145 @@ if mode == "Real-time Analysis":
215
  </div>
216
  """, unsafe_allow_html=True)
217
 
218
- # 2. Detailed Breakdown & Conflict Check
219
  st.markdown("---")
220
  c1, c2 = st.columns([1, 1])
221
 
222
  with c1:
223
  st.subheader("๐Ÿ” Model Consensus")
224
- if language == "English":
225
- # Show voting breakdown for English
226
- df_breakdown = pd.DataFrame(list(result['breakdown'].items()), columns=['Model', 'Prediction'])
 
 
227
  st.table(df_breakdown)
228
 
229
  if result['verdict'] == 'ambiguous':
230
  st.error("โš ๏ธ Conflict Detected: Models disagree. Human review recommended.")
231
  else:
232
- # For Hindi/Hinglish
233
- st.info(f"Analyzed using XLM-RoBERTa (Multilingual). Output: {result['breakdown']['XLM-RoBERTa']}")
234
- st.caption("Note: XLM-RoBERTa is optimized for 100+ languages including Hindi & Code-mixed text.")
235
 
236
  with c2:
237
- # Confidence Chart
238
  st.subheader("๐Ÿ“ˆ Confidence Scores")
239
- df_scores = pd.DataFrame(list(result['scores'].items()), columns=['Source', 'Score'])
240
- fig = px.bar(df_scores, x='Source', y='Score', range_y=[0,1], color='Score')
241
- st.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
- # 3. Explainability (Word Cloud) - Best for English/Roman Script
244
- if (language == "English" or language == "Hinglish (Mixed)") and len(user_input) > 20:
245
  st.subheader("โ˜๏ธ Contextual Word Cloud")
246
  try:
247
  cleaned = clean_text(user_input)
248
- fig_wc = get_wordcloud(cleaned)
249
- st.pyplot(fig_wc)
250
- except:
251
- st.text("Not enough text data for word cloud.")
 
 
 
 
 
 
252
 
253
- # 4. Human Feedback Loop (The "Safety Net")
254
  st.markdown("---")
255
- with st.expander("๐Ÿ“ Incorrect Result? Report Issue (Human-in-the-Loop)"):
256
- st.write("Help fine-tune the model by providing the correct label.")
257
- feedback = st.radio("Correct Sentiment:", ["Positive", "Negative", "Neutral"], horizontal=True)
 
 
258
 
259
- if st.button("Submit Feedback"):
260
- # Simulation of logging to database/CSV
261
- log_entry = {
262
- "text": user_input,
263
- "model_output": result['verdict'],
264
- "user_correction": feedback,
265
- "timestamp": datetime.now().isoformat()
266
- }
267
- # In production, this would go to a database
268
- st.success("โœ… Feedback Logged. This sample has been added to the retraining queue.")
269
 
270
  elif mode == "Batch Processing":
271
- st.info("Upload a CSV file containing a column named 'text'.")
272
- uploaded_file = st.file_uploader("Upload CSV", type=['csv'])
273
 
274
- if uploaded_file:
275
- df = pd.read_csv(uploaded_file)
276
- if 'text' in df.columns:
277
- if st.button("Process Batch"):
278
- results = []
279
- progress_bar = st.progress(0)
 
280
 
281
- for i, row in df.iterrows():
282
- # Routing based on selection
283
- txt = str(row['text'])
284
- if language == "English":
285
- res = analyze_english(txt)
286
- else:
287
- res = analyze_multilingual(txt)
288
 
289
- results.append(res['verdict'])
290
- progress_bar.progress((i + 1) / len(df))
291
-
292
- df['sentiment'] = results
293
- st.dataframe(df)
294
-
295
- # Download
296
- csv = df.to_csv(index=False).encode('utf-8')
297
- st.download_button("Download Results", csv, "sentiment_results.csv", "text/csv")
298
- else:
299
- st.error("CSV must have a column named 'text'")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  # ------------------------------------------------------------------
36
  @st.cache_resource
37
  def load_models():
38
+ try:
39
+ st.info("๐Ÿ”„ Loading AI models... This may take a few minutes on first run.")
40
+
41
+ # English Models (Ensemble)
42
+ roberta = pipeline(
43
+ "sentiment-analysis",
44
+ model="cardiffnlp/twitter-roberta-base-sentiment-latest",
45
+ tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest"
46
+ )
47
+
48
+ distilbert = pipeline(
49
+ "sentiment-analysis",
50
+ model="distilbert-base-uncased-finetuned-sst-2-english"
51
+ )
52
+
53
+ vader = SentimentIntensityAnalyzer()
54
+
55
+ # Use a more stable multilingual model
56
+ multilingual = pipeline(
57
+ "sentiment-analysis",
58
+ model="nlptown/bert-base-multilingual-uncased-sentiment"
59
+ )
60
+
61
+ st.success("โœ… All models loaded successfully!")
62
+ return roberta, distilbert, vader, multilingual
63
+
64
+ except Exception as e:
65
+ st.error(f"โŒ Error loading models: {str(e)}")
66
+ # Return fallback models
67
+ try:
68
+ vader = SentimentIntensityAnalyzer()
69
+ distilbert = pipeline("sentiment-analysis")
70
+ return None, distilbert, vader, None
71
+ except:
72
+ return None, None, SentimentIntensityAnalyzer(), None
73
+
74
+ # Load models with progress indication
75
+ with st.spinner("Initializing AI models..."):
76
+ roberta_model, distilbert_model, vader_model, multi_model = load_models()
77
 
78
+ # Check if essential models loaded
79
+ if vader_model is None:
80
+ st.error("โŒ Critical error: Failed to load essential models. Please refresh the page.")
81
+ st.stop()
82
 
83
  # ------------------------------------------------------------------
84
  # HELPER FUNCTIONS
85
  # ------------------------------------------------------------------
86
  def clean_text(text):
87
  text = text.lower()
88
+ text = re.sub(r'http\S+', '', text) # Remove URLs
89
+ text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
90
  return text
91
 
92
  def get_wordcloud(text):
93
+ try:
94
+ wc = WordCloud(
95
+ width=800,
96
+ height=400,
97
+ background_color='white',
98
+ max_words=100,
99
+ colormap='viridis'
100
+ ).generate(text)
101
+ fig, ax = plt.subplots(figsize=(10, 5))
102
+ ax.imshow(wc, interpolation='bilinear')
103
+ ax.axis('off')
104
+ return fig
105
+ except Exception as e:
106
+ st.error(f"WordCloud error: {e}")
107
+ return None
108
 
109
  # ------------------------------------------------------------------
110
  # CORE ANALYSIS LOGIC
111
  # ------------------------------------------------------------------
112
 
113
  def analyze_english(text):
114
+ try:
115
+ # Ensure text is not empty
116
+ if not text.strip():
117
+ return {
118
+ 'verdict': 'neutral',
119
+ 'confidence': 'Low (No text)',
120
+ 'breakdown': {'Error': 'No text provided'},
121
+ 'scores': {'Error': 0.0}
122
+ }
123
+
124
+ results = {}
125
+
126
+ # 1. RoBERTa (if available)
127
+ if roberta_model is not None:
128
+ try:
129
+ rob_out = roberta_model(text[:512])[0]
130
+ rob_label = rob_out['label']
131
+
132
+ if rob_label == 'LABEL_0':
133
+ rob_sent = 'negative'
134
+ elif rob_label == 'LABEL_1':
135
+ rob_sent = 'neutral'
136
+ else:
137
+ rob_sent = 'positive'
138
+ results['roberta'] = (rob_sent, rob_out['score'])
139
+ except Exception as e:
140
+ st.warning(f"RoBERTa model unavailable: {e}")
141
+
142
+ # 2. VADER (always available)
143
+ vader_out = vader_model.polarity_scores(text)
144
+ compound = vader_out['compound']
145
+ if compound >= 0.05:
146
+ vader_sent = 'positive'
147
+ elif compound <= -0.05:
148
+ vader_sent = 'negative'
149
+ else:
150
+ vader_sent = 'neutral'
151
+ results['vader'] = (vader_sent, abs(compound))
152
+
153
+ # 3. DistilBERT (if available)
154
+ if distilbert_model is not None:
155
+ try:
156
+ bert_out = distilbert_model(text[:512])[0]
157
+ bert_sent = bert_out['label'].lower()
158
+ results['distilbert'] = (bert_sent, bert_out['score'])
159
+ except Exception as e:
160
+ st.warning(f"DistilBERT model unavailable: {e}")
161
+
162
+ # If only VADER is available
163
+ if len(results) == 1 and 'vader' in results:
164
+ return {
165
+ 'verdict': vader_sent,
166
+ 'confidence': 'Medium (VADER only)',
167
+ 'breakdown': {'VADER': vader_sent},
168
+ 'scores': {'VADER': abs(compound)}
169
+ }
170
+
171
+ # Consensus Logic (Voting)
172
+ votes = [sent for sent, score in results.values()]
173
+ count = Counter(votes)
174
+ winner, vote_count = count.most_common(1)[0]
175
+
176
+ # Conflict Detection
177
+ if len(count) == len(results) or vote_count == 1:
178
+ final_verdict = "ambiguous"
179
+ confidence = f"Low ({vote_count}/{len(results)} agreement)"
180
+ else:
181
+ final_verdict = winner
182
+ confidence = "High" if vote_count == len(results) else "Medium"
183
+
184
+ return {
185
+ 'verdict': final_verdict,
186
+ 'confidence': confidence,
187
+ 'breakdown': {model: sent for model, (sent, score) in results.items()},
188
+ 'scores': {model: score for model, (sent, score) in results.items()}
189
  }
190
+
191
+ except Exception as e:
192
+ st.error(f"Analysis error: {e}")
193
+ return None
194
 
195
  def analyze_multilingual(text):
196
+ try:
197
+ if not text.strip():
198
+ return {
199
+ 'verdict': 'neutral',
200
+ 'confidence': 'Low (No text)',
201
+ 'breakdown': {'Error': 'No text provided'},
202
+ 'scores': {'Error': 0.0}
203
+ }
204
+
205
+ # Use multilingual model if available, otherwise fallback to English analysis
206
+ if multi_model is not None:
207
+ result = multi_model(text[:512])[0]
208
+ label_raw = str(result['label'])
209
+ score = result['score']
210
+
211
+ # Map star ratings to sentiment (nlptown model uses 1-5 stars)
212
+ if '1' in label_raw or '2' in label_raw:
213
+ sentiment = "negative"
214
+ elif '3' in label_raw:
215
+ sentiment = "neutral"
216
+ else: # 4 or 5 stars
217
+ sentiment = "positive"
218
+
219
+ return {
220
+ 'verdict': sentiment,
221
+ 'confidence': f"{score:.2f}",
222
+ 'breakdown': {'Multilingual BERT': f"{sentiment.title()} ({score:.2f})"},
223
+ 'scores': {'Model Confidence': score}
224
+ }
225
+ else:
226
+ # Fallback to English analysis
227
+ st.info("๐ŸŒ Multilingual model unavailable, using English analysis...")
228
+ return analyze_english(text)
229
+
230
+ except Exception as e:
231
+ st.error(f"Multilingual analysis error: {e}")
232
+ # Fallback to English analysis
233
+ return analyze_english(text)
234
 
235
  # ------------------------------------------------------------------
236
  # UI LAYOUT
237
  # ------------------------------------------------------------------
238
 
239
  # Sidebar
240
+ st.sidebar.title("โš™๏ธ Configuration")
241
  language = st.sidebar.selectbox("Select Language", ["English", "Hindi (เคนเคฟเคจเฅเคฆเฅ€)", "Hinglish (Mixed)"])
242
+ mode = st.sidebar.selectbox("Analysis Mode", ["Real-time Analysis", "Batch Processing"])
243
+
244
+ st.sidebar.markdown("---")
245
+ st.sidebar.info("""
246
+ **Model Status:**
247
+ - โœ… VADER: Available
248
+ - ๐Ÿค– RoBERTa: {'โœ…' if roberta_model else 'โŒ'}
249
+ - ๐Ÿš€ DistilBERT: {'โœ…' if distilbert_model else 'โŒ'}
250
+ - ๐ŸŒ Multilingual: {'โœ…' if multi_model else 'โŒ'}
251
+ """)
252
 
253
+ st.title("๐Ÿง  Sentiment Analytics Pro")
254
+ st.markdown("Advanced AI-powered sentiment analysis across multiple languages")
255
  st.markdown("---")
256
 
257
  if mode == "Real-time Analysis":
 
264
  placeholder_text = "Type in Hinglish (e.g., Product bahut achha hai but delivery slow thi)"
265
  label_text = "Enter Hinglish Text:"
266
  else:
267
+ placeholder_text = "Type your text here... (e.g., I love this product! Amazing quality.)"
268
  label_text = "Enter English Text:"
269
 
270
  user_input = st.text_area(label_text, height=150, placeholder=placeholder_text)
271
 
272
+ if st.button("๐Ÿš€ Analyze Sentiment", type="primary", use_container_width=True):
273
  if not user_input.strip():
274
+ st.warning("โš ๏ธ Please enter some text first.")
275
  else:
276
+ with st.spinner("๐Ÿ”ฎ Analyzing sentiment with AI models..."):
277
  start_time = time.time()
278
 
279
  # Routing Logic
280
  if language == "English":
281
  result = analyze_english(user_input)
282
  else:
 
283
  result = analyze_multilingual(user_input)
284
 
285
+ if result is None:
286
+ st.error("โŒ Analysis failed. Please try again with different text.")
287
+ st.stop()
288
+
289
  latency = time.time() - start_time
290
 
291
  # 1. Main Verdict Display
292
+ st.markdown("### ๐Ÿ“Š Analysis Results")
293
  col1, col2, col3 = st.columns(3)
294
 
295
+ color_map = {
296
+ 'positive': '#10B981',
297
+ 'negative': '#EF4444',
298
+ 'neutral': '#F59E0B',
299
+ 'ambiguous': '#6B7280'
300
+ }
301
+ verdict_color = color_map.get(result['verdict'], '#3B82F6')
302
 
303
  with col1:
304
  st.markdown(f"""
 
324
  </div>
325
  """, unsafe_allow_html=True)
326
 
327
+ # 2. Detailed Breakdown
328
  st.markdown("---")
329
  c1, c2 = st.columns([1, 1])
330
 
331
  with c1:
332
  st.subheader("๐Ÿ” Model Consensus")
333
+ if language == "English" and len(result['breakdown']) > 1:
334
+ df_breakdown = pd.DataFrame(
335
+ list(result['breakdown'].items()),
336
+ columns=['Model', 'Prediction']
337
+ )
338
  st.table(df_breakdown)
339
 
340
  if result['verdict'] == 'ambiguous':
341
  st.error("โš ๏ธ Conflict Detected: Models disagree. Human review recommended.")
342
  else:
343
+ for model, prediction in result['breakdown'].items():
344
+ st.info(f"**{model}**: {prediction}")
 
345
 
346
  with c2:
 
347
  st.subheader("๐Ÿ“ˆ Confidence Scores")
348
+ if result['scores']:
349
+ df_scores = pd.DataFrame(
350
+ list(result['scores'].items()),
351
+ columns=['Source', 'Score']
352
+ )
353
+ fig = px.bar(
354
+ df_scores,
355
+ x='Source',
356
+ y='Score',
357
+ range_y=[0,1],
358
+ color='Score',
359
+ color_continuous_scale='Blues'
360
+ )
361
+ fig.update_layout(showlegend=False)
362
+ st.plotly_chart(fig, use_container_width=True)
363
 
364
+ # 3. Word Cloud
365
+ if len(user_input) > 10:
366
  st.subheader("โ˜๏ธ Contextual Word Cloud")
367
  try:
368
  cleaned = clean_text(user_input)
369
+ if len(cleaned.split()) >= 3: # Only generate if enough words
370
+ fig_wc = get_wordcloud(cleaned)
371
+ if fig_wc:
372
+ st.pyplot(fig_wc)
373
+ else:
374
+ st.info("๐Ÿ“ Word cloud not available for this text.")
375
+ else:
376
+ st.info("๐Ÿ“ Add more text for word cloud visualization.")
377
+ except Exception as e:
378
+ st.info("๐Ÿ“ Word cloud not available for this text type.")
379
 
380
+ # 4. Human Feedback Loop
381
  st.markdown("---")
382
+ with st.expander("๐Ÿ“ Help Improve Accuracy (Report Incorrect Results)"):
383
+ st.write("Your feedback helps train better AI models!")
384
+ feedback = st.radio("What should the correct sentiment be?",
385
+ ["Positive", "Negative", "Neutral"],
386
+ horizontal=True)
387
 
388
+ if st.button("Submit Correction"):
389
+ st.success("""
390
+ โœ… Thank you! Your feedback has been recorded.
391
+ This helps improve the AI model for everyone.
392
+ """)
 
 
 
 
 
393
 
394
  elif mode == "Batch Processing":
395
+ st.info("๐Ÿ“ Upload a CSV file with a 'text' column for batch analysis")
396
+ uploaded_file = st.file_uploader("Choose CSV file", type=['csv'])
397
 
398
+ if uploaded_file is not None:
399
+ try:
400
+ df = pd.read_csv(uploaded_file)
401
+ if 'text' not in df.columns:
402
+ st.error("โŒ CSV file must contain a column named 'text'")
403
+ else:
404
+ st.success(f"โœ… Loaded {len(df)} records")
405
 
406
+ if st.button("๐Ÿ”ฎ Process Batch Analysis", type="primary", use_container_width=True):
407
+ results = []
408
+ progress_bar = st.progress(0)
409
+ status_text = st.empty()
 
 
 
410
 
411
+ for i, row in df.iterrows():
412
+ status_text.text(f"Processing {i+1}/{len(df)}...")
413
+ txt = str(row['text'])
414
+
415
+ if language == "English":
416
+ res = analyze_english(txt)
417
+ else:
418
+ res = analyze_multilingual(txt)
419
+
420
+ if res:
421
+ results.append(res['verdict'])
422
+ else:
423
+ results.append('analysis_error')
424
+
425
+ progress_bar.progress((i + 1) / len(df))
426
+
427
+ status_text.text("โœ… Analysis complete!")
428
+
429
+ # Add results to dataframe
430
+ df['sentiment'] = results
431
+
432
+ # Show results
433
+ st.subheader("๐Ÿ“‹ Analysis Results")
434
+ st.dataframe(df, use_container_width=True)
435
+
436
+ # Show summary
437
+ st.subheader("๐Ÿ“ˆ Summary Statistics")
438
+ sentiment_counts = df['sentiment'].value_counts()
439
+ col1, col2, col3 = st.columns(3)
440
+
441
+ with col1:
442
+ st.metric("Total Records", len(df))
443
+ with col2:
444
+ st.metric("Positive", sentiment_counts.get('positive', 0))
445
+ with col3:
446
+ st.metric("Negative", sentiment_counts.get('negative', 0))
447
+
448
+ # Download
449
+ csv = df.to_csv(index=False).encode('utf-8')
450
+ st.download_button(
451
+ "๐Ÿ’พ Download Results CSV",
452
+ csv,
453
+ "sentiment_analysis_results.csv",
454
+ "text/csv",
455
+ use_container_width=True
456
+ )
457
+
458
+ except Exception as e:
459
+ st.error(f"โŒ Error processing file: {str(e)}")
460
+
461
+ # Footer
462
+ st.markdown("---")
463
+ st.markdown(
464
+ "<div style='text-align: center; color: #6B7280;'>"
465
+ "Built with โค๏ธ using Streamlit & Hugging Face Transformers"
466
+ "</div>",
467
+ unsafe_allow_html=True
468
+ )