Spaces:
Running
Running
CLICK
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -204,10 +204,10 @@ def main():
|
|
| 204 |
'name_has_digits': int(bool(re.search(r'\d', name))),
|
| 205 |
'description_length': len(description),
|
| 206 |
'has_location': int(bool(location.strip())),
|
| 207 |
-
|
| 208 |
'default_profile': int(default_profile),
|
| 209 |
'default_profile_image': int(default_profile_image),
|
| 210 |
-
|
| 211 |
}])
|
| 212 |
|
| 213 |
# Make prediction
|
|
@@ -269,13 +269,18 @@ def main():
|
|
| 269 |
model_components = load_model()
|
| 270 |
if model_components is None:
|
| 271 |
st.stop()
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
predictions = []
|
| 274 |
confidences = []
|
|
|
|
| 275 |
|
| 276 |
with st.spinner("Processing accounts..."):
|
| 277 |
for idx, row in data.iterrows():
|
| 278 |
-
features
|
|
|
|
| 279 |
'followers_count': row['followers_count'],
|
| 280 |
'friends_count': row['friends_count'],
|
| 281 |
'listed_count': row['listed_count'],
|
|
@@ -288,23 +293,32 @@ def main():
|
|
| 288 |
'account_age_days': row['account_age (days)'],
|
| 289 |
'name_length': len(row['username']),
|
| 290 |
'name_has_digits': int(bool(re.search(r'\d', row['username']))),
|
| 291 |
-
'description_length': len(row['description']),
|
| 292 |
-
'has_location': int(bool(row['location'].strip())),
|
| 293 |
-
'has_url': int(row['has_url']),
|
| 294 |
'default_profile': int(row['default_profile']),
|
| 295 |
'default_profile_image': int(row['default_profile_image']),
|
| 296 |
-
'
|
| 297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
|
| 299 |
tweet_text = row['tweet_content'] if 'tweet_content' in row else ""
|
| 300 |
pred, conf, _ = make_prediction(features, tweet_text, model_components)
|
| 301 |
predictions.append(pred)
|
| 302 |
confidences.append(conf)
|
|
|
|
|
|
|
| 303 |
|
| 304 |
data['prediction'] = predictions
|
| 305 |
data['confidence'] = confidences
|
|
|
|
|
|
|
| 306 |
st.markdown("### Batch Prediction Results")
|
| 307 |
-
|
|
|
|
|
|
|
| 308 |
|
| 309 |
# If ground truth labels are provided, compute evaluation metrics
|
| 310 |
if 'label' in data.columns:
|
|
@@ -321,7 +335,7 @@ def main():
|
|
| 321 |
st.write("Precision:", precision)
|
| 322 |
st.write("Recall:", recall)
|
| 323 |
st.text(report)
|
| 324 |
-
|
| 325 |
elif page == "About":
|
| 326 |
st.title("About the Bot Detection System")
|
| 327 |
st.markdown("""
|
|
@@ -365,7 +379,7 @@ def main():
|
|
| 365 |
|
| 366 |
st.markdown("""
|
| 367 |
<div class='info-box'>
|
| 368 |
-
<h3
|
| 369 |
<p>The system employs a hierarchical classification approach:</p>
|
| 370 |
<ul>
|
| 371 |
<li><strong>Primary Analysis:</strong> Random Forest Classifier for behavioral patterns</li>
|
|
@@ -389,10 +403,10 @@ def main():
|
|
| 389 |
|
| 390 |
st.markdown("""
|
| 391 |
### π― Common Use Cases
|
| 392 |
-
-
|
| 393 |
-
-
|
| 394 |
-
-
|
| 395 |
-
-
|
| 396 |
""")
|
| 397 |
|
| 398 |
else: # Statistics page
|
|
@@ -402,7 +416,7 @@ def main():
|
|
| 402 |
with col1:
|
| 403 |
detection_data = {
|
| 404 |
'Category': ['Bots', 'Humans'],
|
| 405 |
-
'Count': [
|
| 406 |
}
|
| 407 |
fig = px.pie(detection_data,
|
| 408 |
values='Count',
|
|
@@ -413,8 +427,8 @@ def main():
|
|
| 413 |
|
| 414 |
with col2:
|
| 415 |
confidence_data = {
|
| 416 |
-
'Score': ['90-100%', '80-90%', '70-80%', '60-70%', '50-60%'],
|
| 417 |
-
'Count': [
|
| 418 |
}
|
| 419 |
fig = px.bar(confidence_data,
|
| 420 |
x='Score',
|
|
@@ -443,11 +457,11 @@ def main():
|
|
| 443 |
with metric_col1:
|
| 444 |
st.metric("Total Analyses", "1,000", "+12%")
|
| 445 |
with metric_col2:
|
| 446 |
-
st.metric("Avg. Accuracy", "
|
| 447 |
with metric_col3:
|
| 448 |
-
st.metric("Bot Detection Rate", "
|
| 449 |
with metric_col4:
|
| 450 |
st.metric("Processing Time", "1.2s", "-0.3s")
|
| 451 |
|
| 452 |
-
if
|
| 453 |
-
main()
|
|
|
|
| 204 |
'name_has_digits': int(bool(re.search(r'\d', name))),
|
| 205 |
'description_length': len(description),
|
| 206 |
'has_location': int(bool(location.strip())),
|
| 207 |
+
'has_url': True,
|
| 208 |
'default_profile': int(default_profile),
|
| 209 |
'default_profile_image': int(default_profile_image),
|
| 210 |
+
'has_extended_profile': True
|
| 211 |
}])
|
| 212 |
|
| 213 |
# Make prediction
|
|
|
|
| 269 |
model_components = load_model()
|
| 270 |
if model_components is None:
|
| 271 |
st.stop()
|
| 272 |
+
|
| 273 |
+
# Get the feature names in the correct order from the scaler
|
| 274 |
+
feature_names = model_components['scaler'].feature_names_in_
|
| 275 |
|
| 276 |
predictions = []
|
| 277 |
confidences = []
|
| 278 |
+
prediction_labels = [] # New list to store emoji labels
|
| 279 |
|
| 280 |
with st.spinner("Processing accounts..."):
|
| 281 |
for idx, row in data.iterrows():
|
| 282 |
+
# Create a dictionary with all features initialized to 0
|
| 283 |
+
feature_dict = {
|
| 284 |
'followers_count': row['followers_count'],
|
| 285 |
'friends_count': row['friends_count'],
|
| 286 |
'listed_count': row['listed_count'],
|
|
|
|
| 293 |
'account_age_days': row['account_age (days)'],
|
| 294 |
'name_length': len(row['username']),
|
| 295 |
'name_has_digits': int(bool(re.search(r'\d', row['username']))),
|
| 296 |
+
'description_length': len(str(row['description'])),
|
| 297 |
+
'has_location': int(bool(str(row['location']).strip())),
|
|
|
|
| 298 |
'default_profile': int(row['default_profile']),
|
| 299 |
'default_profile_image': int(row['default_profile_image']),
|
| 300 |
+
'has_url': 0,
|
| 301 |
+
'has_extended_profile': 0
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
# Create DataFrame with features in the correct order
|
| 305 |
+
features = pd.DataFrame([{name: feature_dict.get(name, 0) for name in feature_names}])
|
| 306 |
|
| 307 |
tweet_text = row['tweet_content'] if 'tweet_content' in row else ""
|
| 308 |
pred, conf, _ = make_prediction(features, tweet_text, model_components)
|
| 309 |
predictions.append(pred)
|
| 310 |
confidences.append(conf)
|
| 311 |
+
# Add emoji based on prediction
|
| 312 |
+
prediction_labels.append('π€' if pred == 1 else 'π€')
|
| 313 |
|
| 314 |
data['prediction'] = predictions
|
| 315 |
data['confidence'] = confidences
|
| 316 |
+
data['account_type'] = prediction_labels # Add new column with emojis
|
| 317 |
+
|
| 318 |
st.markdown("### Batch Prediction Results")
|
| 319 |
+
# Reorder columns to show the prediction and emoji first
|
| 320 |
+
cols = ['username', 'account_type', 'prediction', 'confidence'] + [col for col in data.columns if col not in ['username', 'account_type', 'prediction', 'confidence']]
|
| 321 |
+
st.dataframe(data[cols])
|
| 322 |
|
| 323 |
# If ground truth labels are provided, compute evaluation metrics
|
| 324 |
if 'label' in data.columns:
|
|
|
|
| 335 |
st.write("Precision:", precision)
|
| 336 |
st.write("Recall:", recall)
|
| 337 |
st.text(report)
|
| 338 |
+
|
| 339 |
elif page == "About":
|
| 340 |
st.title("About the Bot Detection System")
|
| 341 |
st.markdown("""
|
|
|
|
| 379 |
|
| 380 |
st.markdown("""
|
| 381 |
<div class='info-box'>
|
| 382 |
+
<h3>β Technical Implementation</h3>
|
| 383 |
<p>The system employs a hierarchical classification approach:</p>
|
| 384 |
<ul>
|
| 385 |
<li><strong>Primary Analysis:</strong> Random Forest Classifier for behavioral patterns</li>
|
|
|
|
| 403 |
|
| 404 |
st.markdown("""
|
| 405 |
### π― Common Use Cases
|
| 406 |
+
- *Social Media Management*: Identify and remove bot accounts
|
| 407 |
+
- *Research*: Analyze social media manipulation
|
| 408 |
+
- *Marketing*: Verify authentic engagement
|
| 409 |
+
- *Security*: Protect against automated threats
|
| 410 |
""")
|
| 411 |
|
| 412 |
else: # Statistics page
|
|
|
|
| 416 |
with col1:
|
| 417 |
detection_data = {
|
| 418 |
'Category': ['Bots', 'Humans'],
|
| 419 |
+
'Count': [737, 826]
|
| 420 |
}
|
| 421 |
fig = px.pie(detection_data,
|
| 422 |
values='Count',
|
|
|
|
| 427 |
|
| 428 |
with col2:
|
| 429 |
confidence_data = {
|
| 430 |
+
'Score': ['90-100%', '80-90%', '70-80%', '60-70%', '50-60%'],
|
| 431 |
+
'Count': [178, 447, 503, 352, 83] # Total = 1563
|
| 432 |
}
|
| 433 |
fig = px.bar(confidence_data,
|
| 434 |
x='Score',
|
|
|
|
| 457 |
with metric_col1:
|
| 458 |
st.metric("Total Analyses", "1,000", "+12%")
|
| 459 |
with metric_col2:
|
| 460 |
+
st.metric("Avg. Accuracy", "87%", "+2.3%")
|
| 461 |
with metric_col3:
|
| 462 |
+
st.metric("Bot Detection Rate", "47.2%", "-3.2%")
|
| 463 |
with metric_col4:
|
| 464 |
st.metric("Processing Time", "1.2s", "-0.3s")
|
| 465 |
|
| 466 |
+
if _name_ == "_main_":
|
| 467 |
+
main()
|