CLICK commited on
Commit
f5790a1
Β·
verified Β·
1 Parent(s): f8d5588

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -22
app.py CHANGED
@@ -204,10 +204,10 @@ def main():
204
  'name_has_digits': int(bool(re.search(r'\d', name))),
205
  'description_length': len(description),
206
  'has_location': int(bool(location.strip())),
207
- # 'has_url': int(has_url),
208
  'default_profile': int(default_profile),
209
  'default_profile_image': int(default_profile_image),
210
- # 'has_extended_profile': int(has_extended_profile)
211
  }])
212
 
213
  # Make prediction
@@ -269,13 +269,18 @@ def main():
269
  model_components = load_model()
270
  if model_components is None:
271
  st.stop()
 
 
 
272
 
273
  predictions = []
274
  confidences = []
 
275
 
276
  with st.spinner("Processing accounts..."):
277
  for idx, row in data.iterrows():
278
- features = pd.DataFrame([{
 
279
  'followers_count': row['followers_count'],
280
  'friends_count': row['friends_count'],
281
  'listed_count': row['listed_count'],
@@ -288,23 +293,32 @@ def main():
288
  'account_age_days': row['account_age (days)'],
289
  'name_length': len(row['username']),
290
  'name_has_digits': int(bool(re.search(r'\d', row['username']))),
291
- 'description_length': len(row['description']),
292
- 'has_location': int(bool(row['location'].strip())),
293
- 'has_url': int(row['has_url']),
294
  'default_profile': int(row['default_profile']),
295
  'default_profile_image': int(row['default_profile_image']),
296
- 'has_extended_profile': int(row['has_extended_profile'])
297
- }])
 
 
 
 
298
 
299
  tweet_text = row['tweet_content'] if 'tweet_content' in row else ""
300
  pred, conf, _ = make_prediction(features, tweet_text, model_components)
301
  predictions.append(pred)
302
  confidences.append(conf)
 
 
303
 
304
  data['prediction'] = predictions
305
  data['confidence'] = confidences
 
 
306
  st.markdown("### Batch Prediction Results")
307
- st.dataframe(data)
 
 
308
 
309
  # If ground truth labels are provided, compute evaluation metrics
310
  if 'label' in data.columns:
@@ -321,7 +335,7 @@ def main():
321
  st.write("Precision:", precision)
322
  st.write("Recall:", recall)
323
  st.text(report)
324
-
325
  elif page == "About":
326
  st.title("About the Bot Detection System")
327
  st.markdown("""
@@ -365,7 +379,7 @@ def main():
365
 
366
  st.markdown("""
367
  <div class='info-box'>
368
- <h3>βš™οΈ Technical Implementation</h3>
369
  <p>The system employs a hierarchical classification approach:</p>
370
  <ul>
371
  <li><strong>Primary Analysis:</strong> Random Forest Classifier for behavioral patterns</li>
@@ -389,10 +403,10 @@ def main():
389
 
390
  st.markdown("""
391
  ### 🎯 Common Use Cases
392
- - **Social Media Management**: Identify and remove bot accounts
393
- - **Research**: Analyze social media manipulation
394
- - **Marketing**: Verify authentic engagement
395
- - **Security**: Protect against automated threats
396
  """)
397
 
398
  else: # Statistics page
@@ -402,7 +416,7 @@ def main():
402
  with col1:
403
  detection_data = {
404
  'Category': ['Bots', 'Humans'],
405
- 'Count': [324, 676]
406
  }
407
  fig = px.pie(detection_data,
408
  values='Count',
@@ -413,8 +427,8 @@ def main():
413
 
414
  with col2:
415
  confidence_data = {
416
- 'Score': ['90-100%', '80-90%', '70-80%', '60-70%', '50-60%'],
417
- 'Count': [250, 300, 200, 150, 100]
418
  }
419
  fig = px.bar(confidence_data,
420
  x='Score',
@@ -443,11 +457,11 @@ def main():
443
  with metric_col1:
444
  st.metric("Total Analyses", "1,000", "+12%")
445
  with metric_col2:
446
- st.metric("Avg. Accuracy", "94.5%", "+2.3%")
447
  with metric_col3:
448
- st.metric("Bot Detection Rate", "32.4%", "-5.2%")
449
  with metric_col4:
450
  st.metric("Processing Time", "1.2s", "-0.3s")
451
 
452
- if __name__ == "__main__":
453
- main()
 
204
  'name_has_digits': int(bool(re.search(r'\d', name))),
205
  'description_length': len(description),
206
  'has_location': int(bool(location.strip())),
207
+ 'has_url': True,
208
  'default_profile': int(default_profile),
209
  'default_profile_image': int(default_profile_image),
210
+ 'has_extended_profile': True
211
  }])
212
 
213
  # Make prediction
 
269
  model_components = load_model()
270
  if model_components is None:
271
  st.stop()
272
+
273
+ # Get the feature names in the correct order from the scaler
274
+ feature_names = model_components['scaler'].feature_names_in_
275
 
276
  predictions = []
277
  confidences = []
278
+ prediction_labels = [] # New list to store emoji labels
279
 
280
  with st.spinner("Processing accounts..."):
281
  for idx, row in data.iterrows():
282
+ # Create a dictionary with all features initialized to 0
283
+ feature_dict = {
284
  'followers_count': row['followers_count'],
285
  'friends_count': row['friends_count'],
286
  'listed_count': row['listed_count'],
 
293
  'account_age_days': row['account_age (days)'],
294
  'name_length': len(row['username']),
295
  'name_has_digits': int(bool(re.search(r'\d', row['username']))),
296
+ 'description_length': len(str(row['description'])),
297
+ 'has_location': int(bool(str(row['location']).strip())),
 
298
  'default_profile': int(row['default_profile']),
299
  'default_profile_image': int(row['default_profile_image']),
300
+ 'has_url': 0,
301
+ 'has_extended_profile': 0
302
+ }
303
+
304
+ # Create DataFrame with features in the correct order
305
+ features = pd.DataFrame([{name: feature_dict.get(name, 0) for name in feature_names}])
306
 
307
  tweet_text = row['tweet_content'] if 'tweet_content' in row else ""
308
  pred, conf, _ = make_prediction(features, tweet_text, model_components)
309
  predictions.append(pred)
310
  confidences.append(conf)
311
+ # Add emoji based on prediction
312
+ prediction_labels.append('πŸ€–' if pred == 1 else 'πŸ‘€')
313
 
314
  data['prediction'] = predictions
315
  data['confidence'] = confidences
316
+ data['account_type'] = prediction_labels # Add new column with emojis
317
+
318
  st.markdown("### Batch Prediction Results")
319
+ # Reorder columns to show the prediction and emoji first
320
+ cols = ['username', 'account_type', 'prediction', 'confidence'] + [col for col in data.columns if col not in ['username', 'account_type', 'prediction', 'confidence']]
321
+ st.dataframe(data[cols])
322
 
323
  # If ground truth labels are provided, compute evaluation metrics
324
  if 'label' in data.columns:
 
335
  st.write("Precision:", precision)
336
  st.write("Recall:", recall)
337
  st.text(report)
338
+
339
  elif page == "About":
340
  st.title("About the Bot Detection System")
341
  st.markdown("""
 
379
 
380
  st.markdown("""
381
  <div class='info-box'>
382
+ <h3>βš™ Technical Implementation</h3>
383
  <p>The system employs a hierarchical classification approach:</p>
384
  <ul>
385
  <li><strong>Primary Analysis:</strong> Random Forest Classifier for behavioral patterns</li>
 
403
 
404
  st.markdown("""
405
  ### 🎯 Common Use Cases
406
+ - *Social Media Management*: Identify and remove bot accounts
407
+ - *Research*: Analyze social media manipulation
408
+ - *Marketing*: Verify authentic engagement
409
+ - *Security*: Protect against automated threats
410
  """)
411
 
412
  else: # Statistics page
 
416
  with col1:
417
  detection_data = {
418
  'Category': ['Bots', 'Humans'],
419
+ 'Count': [737, 826]
420
  }
421
  fig = px.pie(detection_data,
422
  values='Count',
 
427
 
428
  with col2:
429
  confidence_data = {
430
+ 'Score': ['90-100%', '80-90%', '70-80%', '60-70%', '50-60%'],
431
+ 'Count': [178, 447, 503, 352, 83] # Total = 1563
432
  }
433
  fig = px.bar(confidence_data,
434
  x='Score',
 
457
  with metric_col1:
458
  st.metric("Total Analyses", "1,000", "+12%")
459
  with metric_col2:
460
+ st.metric("Avg. Accuracy", "87%", "+2.3%")
461
  with metric_col3:
462
+ st.metric("Bot Detection Rate", "47.2%", "-3.2%")
463
  with metric_col4:
464
  st.metric("Processing Time", "1.2s", "-0.3s")
465
 
466
+ if _name_ == "_main_":
467
+ main()