Spaces:

ASHUT0SH-SiNGH
/

BotDetection

Sleeping

App Files Files Community

Update app.py

by SurajJha21 - opened Feb 14, 2025

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+200

-158

Files changed (1) hide show

app.py +200 -158

app.py CHANGED Viewed

@@ -62,7 +62,6 @@ st.markdown("""
     </style>
     """, unsafe_allow_html=True)
 @st.cache_resource
 def load_model(model_path='bot_detector_model.pkl'):
     try:
@@ -77,7 +76,7 @@ def make_prediction(features, tweet_content, model_components):
     features_scaled = model_components['scaler'].transform(features)
     behavioral_probs = model_components['behavioral_model'].predict_proba(features_scaled)[0]
-    if tweet_content:
         tweet_features = model_components['tweet_vectorizer'].transform([tweet_content])
         tweet_probs = model_components['tweet_model'].predict_proba(tweet_features)[0]
         final_probs = 0.8 * behavioral_probs + 0.2 * tweet_probs
@@ -86,7 +85,6 @@ def make_prediction(features, tweet_content, model_components):
     prediction = (final_probs[1] > 0.5)
     confidence = final_probs[1] if prediction else final_probs[0]
     return prediction, confidence, final_probs
 def create_gauge_chart(confidence, prediction):
@@ -128,10 +126,10 @@ def create_probability_chart(probs):
     return fig
 def main():
-    # Sidebar
     st.sidebar.image("piclumen-1739279351872.png", width=100)  # Replace with your logo
     st.sidebar.title("Navigation")
-    page = st.sidebar.radio("Go to", ["Bot Detection", "About", "Statistics"])
     if page == "Bot Detection":
         st.title("🤖 Twitter Bot Detection System")
@@ -148,7 +146,7 @@ def main():
         if model_components is None:
             st.stop()
-        # Create tabs
         tab1, tab2 = st.tabs(["📝 Input Details", "📊 Analysis Results"])
         with tab1:
@@ -172,7 +170,7 @@ def main():
                 location = st.text_input("Location")
             st.markdown("### Account Properties")
-            prop_col1, prop_col2, prop_col3, prop_col4 = st.columns(4)
             with prop_col1:
                 verified = st.checkbox("Verified Account")
@@ -181,15 +179,16 @@ def main():
             with prop_col3:
                 default_profile_image = st.checkbox("Default Profile Image")
             has_extended_profile = True
             has_url = True
             st.markdown("### Tweet Content")
-            tweet_content = st.text_area("Sample Tweet ", height=100)
             if st.button("🔍 Analyze Account"):
                 with st.spinner('Analyzing account characteristics...'):
-                    # Prepare features
                     features = pd.DataFrame([{
                         'followers_count': followers_count,
                         'friends_count': friends_count,
@@ -215,28 +214,22 @@ def main():
                     prediction, confidence, probs = make_prediction(features, tweet_content, model_components)
                     # Switch to results tab
-                    time.sleep(1)  # Add small delay for effect
                     tab2.markdown("### Analysis Complete!")
                     with tab2:
-                        # Display main result
                         if prediction:
                             st.error("🤖 Bot Account Detected!")
                         else:
                             st.success("👤 Human Account Detected!")
-                        # Create three columns for visualizations
                         metric_col1, metric_col2 = st.columns(2)
                         with metric_col1:
-                            # Gauge chart
                             st.plotly_chart(create_gauge_chart(confidence, prediction), use_container_width=True)
                         with metric_col2:
-                            # Probability distribution
                             st.plotly_chart(create_probability_chart(probs), use_container_width=True)
-                        # Feature importance
                         st.markdown("### Feature Analysis")
                         feature_importance = pd.DataFrame({
                             'Feature': model_components['feature_names'],
@@ -244,168 +237,217 @@ def main():
                         }).sort_values('Importance', ascending=False)
                         fig = px.bar(feature_importance,
-                                   x='Importance',
-                                   y='Feature',
-                                   orientation='h',
-                                   title='Feature Importance Analysis')
                         fig.update_layout(height=400)
                         st.plotly_chart(fig, use_container_width=True)
-                        # Account metrics comparison
                         metrics_data = {
                             'Metric': ['Followers', 'Friends', 'Tweets', 'Favorites'],
                             'Count': [followers_count, friends_count, statuses_count, favorites_count]
                         }
                         fig = px.bar(metrics_data,
-                                   x='Metric',
-                                   y='Count',
-                                   title='Account Metrics Overview',
-                                   color='Count',
-                                   color_continuous_scale='Viridis')
                         st.plotly_chart(fig, use_container_width=True)
-    elif page == "About":
-            st.title("About the Bot Detection System")
-            # System Overview
-            st.markdown("""
-            <div class='info-box'>
-            <h3>🎯 System Overview</h3>
-            <p>Our Twitter Bot Detection System uses state-of-the-art machine learning algorithms to analyze Twitter accounts
-            and determine whether they are automated bots or genuine human users. The system achieves this through multi-faceted
-            analysis of various account characteristics and behaviors.</p>
-            </div>
-            """, unsafe_allow_html=True)
-            # Key Features
-            st.markdown("### 🔑 Key Features Analyzed")
-            col1, col2 = st.columns(2)
-            with col1:
-                st.markdown("""
-                #### Account Characteristics
-                - Profile completeness
-                - Account age and verification status
-                - Username patterns
-                - Profile description analysis
-                #### Behavioral Patterns
-                - Posting frequency
-                - Engagement rates
-                - Temporal patterns
-                - Content similarity
-                """)
-            with col2:
-                st.markdown("""
-                #### Network Analysis
-                - Follower-following ratio
-                - Friend acquisition rate
-                - Network growth patterns
-                #### Content Analysis
-                - Tweet sentiment
-                - Language patterns
-                - URL sharing frequency
-                - Hashtag usage
-                """)
-            # Technical Details
             st.markdown("""
-            <div class='info-box'>
-            <h3>⚙️ Technical Implementation</h3>
-            <p>The system employs a hierarchical classification approach:</p>
-            <ul>
-            <li><strong>Primary Analysis:</strong> Random Forest Classifier for behavioral patterns</li>
-            <li><strong>Secondary Analysis:</strong> Natural Language Processing for content analysis</li>
-            <li><strong>Final Decision:</strong> Weighted ensemble of multiple models</li>
-            </ul>
-            </div>
-            """, unsafe_allow_html=True)
-            # Accuracy Metrics
-            st.markdown("### 📊 System Performance")
-            metrics_col1, metrics_col2, metrics_col3, metrics_col4 = st.columns(4)
-            with metrics_col1:
-                st.metric("Accuracy", "87%")
-            with metrics_col2:
-                st.metric("Precision", "89%")
-            with metrics_col3:
-                st.metric("Recall", "83%")
-            with metrics_col4:
-                st.metric("F1 Score", "86%")
-            # Use Cases
             st.markdown("""
-            ### 🎯 Common Use Cases
-            - **Social Media Management**: Identify and remove bot accounts
-            - **Research**: Analyze social media manipulation
-            - **Marketing**: Verify authentic engagement
-            - **Security**: Protect against automated threats
             """)
     else:  # Statistics page
-            st.title("System Statistics")
-            # Add some sample statistics
-            col1, col2 = st.columns(2)
-            with col1:
-                # Sample detection distribution
-                detection_data = {
-                    'Category': ['Bots', 'Humans'],
-                    'Count': [324, 676]
-                }
-                fig = px.pie(detection_data,
-                            values='Count',
-                            names='Category',
-                            title='Detection Distribution',
-                            color_discrete_sequence=['#FF4B4B', '#00CC96'])
-                st.plotly_chart(fig, use_container_width=True)
-            with col2:
-                # Confidence score distribution
-                confidence_data = {
-                    'Score': ['90-100%', '80-90%', '70-80%', '60-70%', '50-60%'],
-                    'Count': [250, 300, 200, 150, 100]
-                }
-                fig = px.bar(confidence_data,
-                            x='Score',
-                            y='Count',
-                            title='Confidence Score Distribution',
-                            color='Count',
-                            color_continuous_scale='Viridis')
-                st.plotly_chart(fig, use_container_width=True)
-            # Monthly statistics
-            st.markdown("### Monthly Detection Trends")
-            monthly_data = {
-                'Month': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun'],
-                'Bots Detected': [45, 52, 38, 65, 48, 76],
-                'Accuracy': [92, 94, 93, 95, 94, 96]
             }
-            fig = px.line(monthly_data,
-                        x='Month',
-                        y=['Bots Detected', 'Accuracy'],
-                        title='Monthly Performance Metrics',
-                        markers=True)
             st.plotly_chart(fig, use_container_width=True)
-            # Key metrics
-            st.markdown("### Key System Metrics")
-            metric_col1, metric_col2, metric_col3, metric_col4 = st.columns(4)
-            with metric_col1:
-                st.metric("Total Analyses", "1,000", "+12%")
-            with metric_col2:
-                st.metric("Avg. Accuracy", "94.5%", "+2.3%")
-            with metric_col3:
-                st.metric("Bot Detection Rate", "32.4%", "-5.2%")
-            with metric_col4:
-                st.metric("Processing Time", "1.2s", "-0.3s")
 if __name__ == "__main__":
-    main()

     </style>
     """, unsafe_allow_html=True)
 @st.cache_resource
 def load_model(model_path='bot_detector_model.pkl'):
     try:
     features_scaled = model_components['scaler'].transform(features)
     behavioral_probs = model_components['behavioral_model'].predict_proba(features_scaled)[0]
+    if tweet_content and tweet_content.strip():
         tweet_features = model_components['tweet_vectorizer'].transform([tweet_content])
         tweet_probs = model_components['tweet_model'].predict_proba(tweet_features)[0]
         final_probs = 0.8 * behavioral_probs + 0.2 * tweet_probs
     prediction = (final_probs[1] > 0.5)
     confidence = final_probs[1] if prediction else final_probs[0]
     return prediction, confidence, final_probs
 def create_gauge_chart(confidence, prediction):
     return fig
 def main():
+    # Sidebar with extended navigation
     st.sidebar.image("piclumen-1739279351872.png", width=100)  # Replace with your logo
     st.sidebar.title("Navigation")
+    page = st.sidebar.radio("Go to", ["Bot Detection", "CSV Analysis", "About", "Statistics"])
     if page == "Bot Detection":
         st.title("🤖 Twitter Bot Detection System")
         if model_components is None:
             st.stop()
+        # Create tabs for individual account analysis
         tab1, tab2 = st.tabs(["📝 Input Details", "📊 Analysis Results"])
         with tab1:
                 location = st.text_input("Location")
             st.markdown("### Account Properties")
+            prop_col1, prop_col2, prop_col3 = st.columns(3)
             with prop_col1:
                 verified = st.checkbox("Verified Account")
             with prop_col3:
                 default_profile_image = st.checkbox("Default Profile Image")
+            # These can be fixed or computed; here we assume True as default
             has_extended_profile = True
             has_url = True
             st.markdown("### Tweet Content")
+            tweet_content = st.text_area("Sample Tweet", height=100)
             if st.button("🔍 Analyze Account"):
                 with st.spinner('Analyzing account characteristics...'):
+                    # Prepare features for the single account
                     features = pd.DataFrame([{
                         'followers_count': followers_count,
                         'friends_count': friends_count,
                     prediction, confidence, probs = make_prediction(features, tweet_content, model_components)
                     # Switch to results tab
+                    time.sleep(1)
                     tab2.markdown("### Analysis Complete!")
                     with tab2:
                         if prediction:
                             st.error("🤖 Bot Account Detected!")
                         else:
                             st.success("👤 Human Account Detected!")
                         metric_col1, metric_col2 = st.columns(2)
                         with metric_col1:
                             st.plotly_chart(create_gauge_chart(confidence, prediction), use_container_width=True)
                         with metric_col2:
                             st.plotly_chart(create_probability_chart(probs), use_container_width=True)
                         st.markdown("### Feature Analysis")
                         feature_importance = pd.DataFrame({
                             'Feature': model_components['feature_names'],
                         }).sort_values('Importance', ascending=False)
                         fig = px.bar(feature_importance,
+                                     x='Importance',
+                                     y='Feature',
+                                     orientation='h',
+                                     title='Feature Importance Analysis')
                         fig.update_layout(height=400)
                         st.plotly_chart(fig, use_container_width=True)
                         metrics_data = {
                             'Metric': ['Followers', 'Friends', 'Tweets', 'Favorites'],
                             'Count': [followers_count, friends_count, statuses_count, favorites_count]
                         }
                         fig = px.bar(metrics_data,
+                                     x='Metric',
+                                     y='Count',
+                                     title='Account Metrics Overview',
+                                     color='Count',
+                                     color_continuous_scale='Viridis')
                         st.plotly_chart(fig, use_container_width=True)
+    elif page == "CSV Analysis":
+        st.title("CSV Batch Analysis")
+        st.markdown("Upload a CSV file with account data to run batch predictions.")
+        uploaded_file = st.file_uploader("Upload CSV", type=["csv"])
+        if uploaded_file is not None:
+            data = pd.read_csv(uploaded_file)
+            st.markdown("### CSV Data Preview")
+            st.dataframe(data.head())
+            model_components = load_model()
+            if model_components is None:
+                st.stop()
+            predictions = []
+            confidences = []
+            with st.spinner("Processing accounts..."):
+                for idx, row in data.iterrows():
+                    features = pd.DataFrame([{
+                        'followers_count': row['followers_count'],
+                        'friends_count': row['friends_count'],
+                        'listed_count': row['listed_count'],
+                        'favorites_count': row['favorites_count'],
+                        'statuses_count': row['statuses_count'],
+                        'verified': int(row['verified']),
+                        'followers_friends_ratio': row['followers_count'] / (row['friends_count'] + 1),
+                        'statuses_per_day': row['statuses_count'] / (row['account_age (days)'] + 1),
+                        'engagement_ratio': row['favorites_count'] / (row['statuses_count'] + 1),
+                        'account_age_days': row['account_age (days)'],
+                        'name_length': len(row['username']),
+                        'name_has_digits': int(bool(re.search(r'\d', row['username']))),
+                        'description_length': len(row['description']),
+                        'has_location': int(bool(row['location'].strip())),
+                        'has_url': int(row['has_url']),
+                        'default_profile': int(row['default_profile']),
+                        'default_profile_image': int(row['default_profile_image']),
+                        'has_extended_profile': int(row['has_extended_profile'])
+                    }])
+                    tweet_text = row['tweet_content'] if 'tweet_content' in row else ""
+                    pred, conf, _ = make_prediction(features, tweet_text, model_components)
+                    predictions.append(pred)
+                    confidences.append(conf)
+            data['prediction'] = predictions
+            data['confidence'] = confidences
+            st.markdown("### Batch Prediction Results")
+            st.dataframe(data)
+            # If ground truth labels are provided, compute evaluation metrics
+            if 'label' in data.columns:
+                y_true = data['label'].tolist()
+                y_pred = [int(p) for p in predictions]
+                from sklearn.metrics import f1_score, precision_score, recall_score, classification_report
+                f1 = f1_score(y_true, y_pred, average='weighted')
+                precision = precision_score(y_true, y_pred, average='weighted')
+                recall = recall_score(y_true, y_pred, average='weighted')
+                report = classification_report(y_true, y_pred)
+                st.markdown("### Evaluation Metrics")
+                st.write("F1 Score:", f1)
+                st.write("Precision:", precision)
+                st.write("Recall:", recall)
+                st.text(report)
+    elif page == "About":
+        st.title("About the Bot Detection System")
+        st.markdown("""
+        <div class='info-box'>
+        <h3>🎯 System Overview</h3>
+        <p>Our Twitter Bot Detection System uses state-of-the-art machine learning algorithms to analyze Twitter accounts
+        and determine whether they are automated bots or genuine human users. The system achieves this through multi-faceted
+        analysis of various account characteristics and behaviors.</p>
+        </div>
+        """, unsafe_allow_html=True)
+        st.markdown("### 🔑 Key Features Analyzed")
+        col1, col2 = st.columns(2)
+        with col1:
             st.markdown("""
+            #### Account Characteristics
+            - Profile completeness
+            - Account age and verification status
+            - Username patterns
+            - Profile description analysis
+            #### Behavioral Patterns
+            - Posting frequency
+            - Engagement rates
+            - Temporal patterns
+            - Content similarity
+            """)
+        with col2:
             st.markdown("""
+            #### Network Analysis
+            - Follower-following ratio
+            - Friend acquisition rate
+            - Network growth patterns
+            #### Content Analysis
+            - Tweet sentiment
+            - Language patterns
+            - URL sharing frequency
+            - Hashtag usage
             """)
+        st.markdown("""
+        <div class='info-box'>
+        <h3>⚙️ Technical Implementation</h3>
+        <p>The system employs a hierarchical classification approach:</p>
+        <ul>
+        <li><strong>Primary Analysis:</strong> Random Forest Classifier for behavioral patterns</li>
+        <li><strong>Secondary Analysis:</strong> Natural Language Processing for content analysis</li>
+        <li><strong>Final Decision:</strong> Weighted ensemble of multiple models</li>
+        </ul>
+        </div>
+        """, unsafe_allow_html=True)
+        st.markdown("### 📊 System Performance")
+        metrics_col1, metrics_col2, metrics_col3, metrics_col4 = st.columns(4)
+        with metrics_col1:
+            st.metric("Accuracy", "87%")
+        with metrics_col2:
+            st.metric("Precision", "89%")
+        with metrics_col3:
+            st.metric("Recall", "83%")
+        with metrics_col4:
+            st.metric("F1 Score", "86%")
+        st.markdown("""
+        ### 🎯 Common Use Cases
+        - **Social Media Management**: Identify and remove bot accounts
+        - **Research**: Analyze social media manipulation
+        - **Marketing**: Verify authentic engagement
+        - **Security**: Protect against automated threats
+        """)
     else:  # Statistics page
+        st.title("System Statistics")
+        col1, col2 = st.columns(2)
+        with col1:
+            detection_data = {
+                'Category': ['Bots', 'Humans'],
+                'Count': [324, 676]
             }
+            fig = px.pie(detection_data,
+                         values='Count',
+                         names='Category',
+                         title='Detection Distribution',
+                         color_discrete_sequence=['#FF4B4B', '#00CC96'])
             st.plotly_chart(fig, use_container_width=True)
+        with col2:
+            confidence_data = {
+                'Score': ['90-100%', '80-90%', '70-80%', '60-70%', '50-60%'],
+                'Count': [250, 300, 200, 150, 100]
+            }
+            fig = px.bar(confidence_data,
+                         x='Score',
+                         y='Count',
+                         title='Confidence Score Distribution',
+                         color='Count',
+                         color_continuous_scale='Viridis')
+            st.plotly_chart(fig, use_container_width=True)
+        st.markdown("### Monthly Detection Trends")
+        monthly_data = {
+            'Month': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun'],
+            'Bots Detected': [45, 52, 38, 65, 48, 76],
+            'Accuracy': [92, 94, 93, 95, 94, 96]
+        }
+        fig = px.line(monthly_data,
+                      x='Month',
+                      y=['Bots Detected', 'Accuracy'],
+                      title='Monthly Performance Metrics',
+                      markers=True)
+        st.plotly_chart(fig, use_container_width=True)
+        st.markdown("### Key System Metrics")
+        metric_col1, metric_col2, metric_col3, metric_col4 = st.columns(4)
+        with metric_col1:
+            st.metric("Total Analyses", "1,000", "+12%")
+        with metric_col2:
+            st.metric("Avg. Accuracy", "94.5%", "+2.3%")
+        with metric_col3:
+            st.metric("Bot Detection Rate", "32.4%", "-5.2%")
+        with metric_col4:
+            st.metric("Processing Time", "1.2s", "-0.3s")
 if __name__ == "__main__":
+    main()