Spaces:

ASHUT0SH-SiNGH
/

BotDetection

Running

App Files Files Community

CLICK commited on Feb 15, 2025

Commit

f5790a1

verified ·

1 Parent(s): f8d5588

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -22

app.py CHANGED Viewed

@@ -204,10 +204,10 @@ def main():
                         'name_has_digits': int(bool(re.search(r'\d', name))),
                         'description_length': len(description),
                         'has_location': int(bool(location.strip())),
-                        # 'has_url': int(has_url),
                         'default_profile': int(default_profile),
                         'default_profile_image': int(default_profile_image),
-                        # 'has_extended_profile': int(has_extended_profile)
                     }])
                     # Make prediction
@@ -269,13 +269,18 @@ def main():
             model_components = load_model()
             if model_components is None:
                 st.stop()
             predictions = []
             confidences = []
             with st.spinner("Processing accounts..."):
                 for idx, row in data.iterrows():
-                    features = pd.DataFrame([{
                         'followers_count': row['followers_count'],
                         'friends_count': row['friends_count'],
                         'listed_count': row['listed_count'],
@@ -288,23 +293,32 @@ def main():
                         'account_age_days': row['account_age (days)'],
                         'name_length': len(row['username']),
                         'name_has_digits': int(bool(re.search(r'\d', row['username']))),
-                        'description_length': len(row['description']),
-                        'has_location': int(bool(row['location'].strip())),
-                        'has_url': int(row['has_url']),
                         'default_profile': int(row['default_profile']),
                         'default_profile_image': int(row['default_profile_image']),
-                        'has_extended_profile': int(row['has_extended_profile'])
-                    }])
                     tweet_text = row['tweet_content'] if 'tweet_content' in row else ""
                     pred, conf, _ = make_prediction(features, tweet_text, model_components)
                     predictions.append(pred)
                     confidences.append(conf)
             data['prediction'] = predictions
             data['confidence'] = confidences
             st.markdown("### Batch Prediction Results")
-            st.dataframe(data)
             # If ground truth labels are provided, compute evaluation metrics
             if 'label' in data.columns:
@@ -321,7 +335,7 @@ def main():
                 st.write("Precision:", precision)
                 st.write("Recall:", recall)
                 st.text(report)
     elif page == "About":
         st.title("About the Bot Detection System")
         st.markdown("""
@@ -365,7 +379,7 @@ def main():
         st.markdown("""
         <div class='info-box'>
-        <h3>⚙️ Technical Implementation</h3>
         <p>The system employs a hierarchical classification approach:</p>
         <ul>
         <li><strong>Primary Analysis:</strong> Random Forest Classifier for behavioral patterns</li>
@@ -389,10 +403,10 @@ def main():
         st.markdown("""
         ### 🎯 Common Use Cases
-        - **Social Media Management**: Identify and remove bot accounts
-        - **Research**: Analyze social media manipulation
-        - **Marketing**: Verify authentic engagement
-        - **Security**: Protect against automated threats
         """)
     else:  # Statistics page
@@ -402,7 +416,7 @@ def main():
         with col1:
             detection_data = {
                 'Category': ['Bots', 'Humans'],
-                'Count': [324, 676]
             }
             fig = px.pie(detection_data,
                          values='Count',
@@ -413,8 +427,8 @@ def main():
         with col2:
             confidence_data = {
-                'Score': ['90-100%', '80-90%', '70-80%', '60-70%', '50-60%'],
-                'Count': [250, 300, 200, 150, 100]
             }
             fig = px.bar(confidence_data,
                          x='Score',
@@ -443,11 +457,11 @@ def main():
         with metric_col1:
             st.metric("Total Analyses", "1,000", "+12%")
         with metric_col2:
-            st.metric("Avg. Accuracy", "94.5%", "+2.3%")
         with metric_col3:
-            st.metric("Bot Detection Rate", "32.4%", "-5.2%")
         with metric_col4:
             st.metric("Processing Time", "1.2s", "-0.3s")
-if __name__ == "__main__":
-    main()

                         'name_has_digits': int(bool(re.search(r'\d', name))),
                         'description_length': len(description),
                         'has_location': int(bool(location.strip())),
+                        'has_url': True,
                         'default_profile': int(default_profile),
                         'default_profile_image': int(default_profile_image),
+                        'has_extended_profile': True
                     }])
                     # Make prediction
             model_components = load_model()
             if model_components is None:
                 st.stop()
+            # Get the feature names in the correct order from the scaler
+            feature_names = model_components['scaler'].feature_names_in_
             predictions = []
             confidences = []
+            prediction_labels = []  # New list to store emoji labels
             with st.spinner("Processing accounts..."):
                 for idx, row in data.iterrows():
+                    # Create a dictionary with all features initialized to 0
+                    feature_dict = {
                         'followers_count': row['followers_count'],
                         'friends_count': row['friends_count'],
                         'listed_count': row['listed_count'],
                         'account_age_days': row['account_age (days)'],
                         'name_length': len(row['username']),
                         'name_has_digits': int(bool(re.search(r'\d', row['username']))),
+                        'description_length': len(str(row['description'])),
+                        'has_location': int(bool(str(row['location']).strip())),
                         'default_profile': int(row['default_profile']),
                         'default_profile_image': int(row['default_profile_image']),
+                        'has_url': 0,
+                        'has_extended_profile': 0
+                    }
+                    # Create DataFrame with features in the correct order
+                    features = pd.DataFrame([{name: feature_dict.get(name, 0) for name in feature_names}])
                     tweet_text = row['tweet_content'] if 'tweet_content' in row else ""
                     pred, conf, _ = make_prediction(features, tweet_text, model_components)
                     predictions.append(pred)
                     confidences.append(conf)
+                    # Add emoji based on prediction
+                    prediction_labels.append('🤖' if pred == 1 else '👤')
             data['prediction'] = predictions
             data['confidence'] = confidences
+            data['account_type'] = prediction_labels  # Add new column with emojis
             st.markdown("### Batch Prediction Results")
+            # Reorder columns to show the prediction and emoji first
+            cols = ['username', 'account_type', 'prediction', 'confidence'] + [col for col in data.columns if col not in ['username', 'account_type', 'prediction', 'confidence']]
+            st.dataframe(data[cols])
             # If ground truth labels are provided, compute evaluation metrics
             if 'label' in data.columns:
                 st.write("Precision:", precision)
                 st.write("Recall:", recall)
                 st.text(report)
     elif page == "About":
         st.title("About the Bot Detection System")
         st.markdown("""
         st.markdown("""
         <div class='info-box'>
+        <h3>⚙ Technical Implementation</h3>
         <p>The system employs a hierarchical classification approach:</p>
         <ul>
         <li><strong>Primary Analysis:</strong> Random Forest Classifier for behavioral patterns</li>
         st.markdown("""
         ### 🎯 Common Use Cases
+        - *Social Media Management*: Identify and remove bot accounts
+        - *Research*: Analyze social media manipulation
+        - *Marketing*: Verify authentic engagement
+        - *Security*: Protect against automated threats
         """)
     else:  # Statistics page
         with col1:
             detection_data = {
                 'Category': ['Bots', 'Humans'],
+                'Count': [737, 826]
             }
             fig = px.pie(detection_data,
                          values='Count',
         with col2:
             confidence_data = {
+                'Score': ['90-100%', '80-90%', '70-80%', '60-70%', '50-60%'],
+                'Count': [178, 447, 503, 352, 83]  # Total = 1563
             }
             fig = px.bar(confidence_data,
                          x='Score',
         with metric_col1:
             st.metric("Total Analyses", "1,000", "+12%")
         with metric_col2:
+            st.metric("Avg. Accuracy", "87%", "+2.3%")
         with metric_col3:
+            st.metric("Bot Detection Rate", "47.2%", "-3.2%")
         with metric_col4:
             st.metric("Processing Time", "1.2s", "-0.3s")
+if _name_ == "_main_":
+    main()