import streamlit as st import pandas as pd import re import numpy as np import plotly.express as px import plotly.graph_objects as go from datetime import datetime import time import base64 import joblib def get_default_robot_icon(): return "https://raw.githubusercontent.com/FortAwesome/Font-Awesome/master/svgs/solid/robot.svg" # Set page configuration st.set_page_config( page_title="Twitter Bot Detector", page_icon="🤖", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS st.markdown(""" """, unsafe_allow_html=True) # ✅ Model was trained with these 11 features (confirmed by you) MODEL_FEATURES = [ "followers_count", "friends_count", "listedcount", "favourites_count", "statuses_count", "verified", "default_profile", "default_profile_image", "has_extended_profile", "follow_ratio", "account_age_days", ] @st.cache_resource def load_model(model_path="bot_model.joblib"): try: model = joblib.load(model_path) return model except FileNotFoundError: st.error("Model file not found. Please ensure 'bot_model.joblib' exists in the project folder.") return None except Exception as e: st.error(f"Failed to load model: {e}") return None def make_prediction(features_df, model): """ Behavioral-only RandomForest prediction. features_df MUST have the same columns used in training. """ probs = model.predict_proba(features_df)[0] pred_class = int(np.argmax(probs)) # 0 = Human, 1 = Bot confidence = float(probs[pred_class]) return pred_class, confidence, probs def create_gauge_chart(confidence, prediction_is_bot): fig = go.Figure(go.Indicator( mode="gauge+number", value=confidence * 100, domain={'x': [0, 1], 'y': [0, 1]}, title={'text': "Confidence Score"}, gauge={ 'axis': {'range': [None, 100]}, 'bar': {'color': "darkred" if prediction_is_bot else "darkgreen"}, 'steps': [ {'range': [0, 33], 'color': 'lightgray'}, {'range': [33, 66], 'color': 'gray'}, {'range': [66, 100], 'color': 'darkgray'} ], 'threshold': { 'line': {'color': "red", 'width': 4}, 'thickness': 0.75, 'value': 50 } } )) fig.update_layout(height=300) return fig def create_probability_chart(probs): labels = ['Human', 'Bot'] fig = go.Figure(data=[go.Pie( labels=labels, values=[probs[0] * 100, probs[1] * 100], hole=.3, marker_colors=['#00CC96', '#EF553B'] )]) fig.update_layout( title="Probability Distribution", height=300 ) return fig def build_model_features_from_ui( followers_count: int, friends_count: int, listed_count: int, favorites_count: int, statuses_count: int, verified: bool, default_profile: bool, default_profile_image: bool, has_extended_profile: bool, account_age_days: int ) -> pd.DataFrame: """ Converts UI inputs to the EXACT schema expected by the trained RF model. UI stays same, only feature mapping changes. Mapping: listed_count -> listedcount favorites_count -> favourites_count followers_friends_ratio -> follow_ratio account_age -> account_age_days """ follow_ratio = followers_count / (friends_count + 1) features = pd.DataFrame([{ "followers_count": followers_count, "friends_count": friends_count, "listedcount": listed_count, "favourites_count": favorites_count, "statuses_count": statuses_count, "verified": int(verified), "default_profile": int(default_profile), "default_profile_image": int(default_profile_image), "has_extended_profile": int(has_extended_profile), "follow_ratio": follow_ratio, "account_age_days": account_age_days, }]) # enforce correct order features = features[MODEL_FEATURES] return features def main(): # Sidebar with extended navigation st.sidebar.image("piclumen-1739279351872.png", width=100) # Replace with your logo st.sidebar.title("Navigation") page = st.sidebar.radio("Go to", ["Bot Detection", "CSV Analysis", "About", "Statistics"]) if page == "Bot Detection": st.title("🤖 Social Media Bot Detection System") st.markdown("""

Welcome to the Social Media Bot Detection System

This application demonstrates a metadata-based machine learning approach for detecting automated social media accounts.

""", unsafe_allow_html=True) # Load model model = load_model() if model is None: st.stop() # Create tabs for individual account analysis tab1, tab2 = st.tabs(["📝 Input Details", "📊 Analysis Results"]) with tab1: st.markdown("### Account Information") col1, col2, col3 = st.columns([1, 1, 1]) with col1: name = st.text_input("Account Name", placeholder="@username") followers_count = st.number_input("Followers Count", min_value=0) friends_count = st.number_input("Friends Count", min_value=0) listed_count = st.number_input("Listed Count", min_value=0) with col2: favorites_count = st.number_input("Favorites Count", min_value=0) statuses_count = st.number_input("Statuses Count", min_value=0) account_age = st.number_input("Account Age (days)", min_value=0) with col3: description = st.text_area("Profile Description") location = st.text_input("Location") st.markdown("### Account Properties") prop_col1, prop_col2, prop_col3 = st.columns(3) with prop_col1: verified = st.checkbox("Verified Account") with prop_col2: default_profile = st.checkbox("Default Profile") with prop_col3: default_profile_image = st.checkbox("Default Profile Image") # kept same UI logic has_extended_profile = True has_url = True st.markdown("### Tweet Content") tweet_content = st.text_area("Sample Tweet", height=100) # UI stays, ignored in logic st.caption( "Note: The prediction model uses only profile and activity metadata. " "Text fields are shown for completeness and are not used in model inference." ) if st.button("🔍 Analyze Account"): with st.spinner('Analyzing account characteristics...'): # ✅ Build ONLY the exact 11 features your RF expects features = build_model_features_from_ui( followers_count=followers_count, friends_count=friends_count, listed_count=listed_count, favorites_count=favorites_count, statuses_count=statuses_count, verified=verified, default_profile=default_profile, default_profile_image=default_profile_image, has_extended_profile=has_extended_profile, account_age_days=account_age ) # ✅ Predict pred_class, confidence, probs = make_prediction(features, model) prediction_is_bot = (pred_class == 1) time.sleep(1) tab2.markdown("### Analysis Complete!") with tab2: if prediction_is_bot: st.error("🤖 Bot Account Detected!") else: st.success("👤 Human Account Detected!") # Confidence gauge directly below the result st.plotly_chart( create_gauge_chart(confidence, prediction_is_bot), use_container_width=True ) st.markdown("### Feature Analysis") # Feature importance (RF supports this) if hasattr(model, "feature_importances_"): feature_importance = pd.DataFrame({ 'Feature': MODEL_FEATURES, 'Importance': model.feature_importances_ }).sort_values('Importance', ascending=False) fig = px.bar( feature_importance, x='Importance', y='Feature', orientation='h', title='Feature Importance Analysis' ) fig.update_layout(height=400) st.plotly_chart(fig, use_container_width=True) else: st.info("Feature importance is not available for this model type.") metrics_data = { 'Metric': ['Followers', 'Friends', 'Tweets', 'Favorites'], 'Count': [followers_count, friends_count, statuses_count, favorites_count] } fig = px.bar( metrics_data, x='Metric', y='Count', title='Account Metrics Overview', color='Count', color_continuous_scale='Viridis' ) st.plotly_chart(fig, use_container_width=True) elif page == "CSV Analysis": st.title("CSV Batch Analysis") st.markdown("Upload a CSV file with account data to run batch predictions. You can use \"testClick.csv\" from Dataset folder of this repository.") uploaded_file = st.file_uploader("Upload CSV", type=["csv"]) if uploaded_file is not None: data = pd.read_csv(uploaded_file) st.markdown("### CSV Data Preview") st.dataframe(data.head()) model = load_model() if model is None: st.stop() predictions = [] confidences = [] prediction_labels = [] with st.spinner("Processing accounts..."): for idx, row in data.iterrows(): # flexible column names support followers = row.get("followers_count", 0) friends = row.get("friends_count", 0) statuses = row.get("statuses_count", 0) # allow either listedcount or listed_count listed = row.get("listedcount", row.get("listed_count", 0)) # allow either favourites_count or favorites_count favourites = row.get("favourites_count", row.get("favorites_count", 0)) verified = int(row.get("verified", 0)) default_profile = int(row.get("default_profile", 0)) default_profile_image = int(row.get("default_profile_image", 0)) has_extended_profile = int(row.get("has_extended_profile", 0)) # allow account_age_days or "account_age (days)" age_days = row.get("account_age_days", row.get("account_age (days)", 0)) # compute follow_ratio if not present follow_ratio = row.get("follow_ratio", followers / (friends + 1)) features = pd.DataFrame([{ "followers_count": followers, "friends_count": friends, "listedcount": listed, "favourites_count": favourites, "statuses_count": statuses, "verified": verified, "default_profile": default_profile, "default_profile_image": default_profile_image, "has_extended_profile": has_extended_profile, "follow_ratio": follow_ratio, "account_age_days": age_days, }])[MODEL_FEATURES] pred_class, conf, _ = make_prediction(features, model) predictions.append(pred_class) confidences.append(conf) prediction_labels.append('🤖' if pred_class == 1 else '👤') data['prediction'] = predictions data['confidence'] = confidences data['account_type'] = prediction_labels st.markdown("### Batch Prediction Results") cols = ['username', 'account_type', 'prediction', 'confidence'] + [ col for col in data.columns if col not in ['username', 'account_type', 'prediction', 'confidence'] ] st.dataframe(data[cols]) # Optional evaluation if labels exist if 'label' in data.columns: y_true = data['label'].tolist() y_pred = [int(p) for p in predictions] from sklearn.metrics import f1_score, precision_score, recall_score, classification_report f1 = f1_score(y_true, y_pred, average='weighted') precision = precision_score(y_true, y_pred, average='weighted') recall = recall_score(y_true, y_pred, average='weighted') report = classification_report(y_true, y_pred) st.markdown("### Evaluation Metrics") st.write("F1 Score:", f1) st.write("Precision:", precision) st.write("Recall:", recall) st.text(report) elif page == "About": st.title("About the Bot Detection System") st.markdown("""

🎯 System Overview

Our Twitter Bot Detection System demonstrates a supervised machine learning approach for detecting automated social media accounts using structured profile and activity metadata. The goal of the system is to understand how different behavioral and account-level attributes contribute to identifying bot-like patterns, rather than relying on text or content-based signals.

""", unsafe_allow_html=True) st.markdown("### 🔑 Key Features Analyzed") col1, col2 = st.columns(2) with col1: st.markdown(""" #### Account Characteristics - Profile completeness - Account age and verification status - Username patterns - Profile description analysis #### Behavioral Patterns - Posting frequency - Engagement rates - Temporal patterns - Content similarity """) with col2: st.markdown(""" #### Network Analysis - Follower-following ratio - Friend acquisition rate - Network growth patterns """) st.markdown("""

⚙ Technical Implementation

Data Processing: Cleaned and structured profile and activity metadata.
Feature Engineering: Derived behavioral features such as follower–following ratio, posting activity, and account age.
Modeling: Trained a Random Forest classifier on the engineered features.
Explainability: Used feature importance to interpret model predictions.

""", unsafe_allow_html=True) st.markdown("### 📊 System Performance") metrics_col1, metrics_col2, metrics_col3, metrics_col4 = st.columns(4) with metrics_col1: st.metric("Accuracy", "87%") with metrics_col2: st.metric("Precision", "89%") with metrics_col3: st.metric("Recall", "83%") with metrics_col4: st.metric("F1 Score", "86%") st.markdown(""" ### 🎯 Common Use Cases - *Social Media Management*: Identify and remove bot accounts - *Research*: Analyze social media manipulation - *Marketing*: Verify authentic engagement - *Security*: Protect against automated threats """) else: # Statistics page st.title("System Statistics") st.info( "This dashboard is a demo visualization intended to illustrate how system-level statistics and trends could be presented. The data shown here is illustrative and not generated from live usage or production logs." ) col1, col2 = st.columns(2) with col1: detection_data = { 'Category': ['Bots', 'Humans'], 'Count': [737, 826] } fig = px.pie( detection_data, values='Count', names='Category', title='Detection Distribution', color_discrete_sequence=['#FF4B4B', '#00CC96'] ) st.plotly_chart(fig, use_container_width=True) with col2: confidence_data = { 'Score': ['90-100%', '80-90%', '70-80%', '60-70%', '50-60%'], 'Count': [178, 447, 503, 352, 83] } fig = px.bar( confidence_data, x='Score', y='Count', title='Confidence Score Distribution', color='Count', color_continuous_scale='Viridis' ) st.plotly_chart(fig, use_container_width=True) st.markdown("### Monthly Detection Trends") monthly_data = { 'Month': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun'], 'Bots Detected': [45, 52, 38, 65, 48, 76], 'Accuracy': [92, 94, 93, 95, 94, 96] } fig = px.line( monthly_data, x='Month', y=['Accuracy','Bots Detected' ], title='Monthly Performance Metrics', markers=True ) st.plotly_chart(fig, use_container_width=True) st.markdown("### Key System Metrics") metric_col1, metric_col2, metric_col3, metric_col4 = st.columns(4) with metric_col1: st.metric("Total Analyses", "1,000", "+12%") with metric_col2: st.metric("Avg. Accuracy", "87%", "+2.3%") with metric_col3: st.metric("Bot Detection Rate", "47.2%", "-3.2%") with metric_col4: st.metric("Processing Time", "1.2s", "-0.3s") st.caption("*Demo Dashboard (Concept Visualization)*") if __name__ == "__main__": main()