{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-01-20T09:42:14.745657Z",
     "iopub.status.busy": "2026-01-20T09:42:14.744873Z",
     "iopub.status.idle": "2026-01-20T09:42:14.750198Z",
     "shell.execute_reply": "2026-01-20T09:42:14.749406Z",
     "shell.execute_reply.started": "2026-01-20T09:42:14.745620Z"
    },
    "trusted": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.metrics import accuracy_score, classification_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-01-20T09:42:14.752013Z",
     "iopub.status.busy": "2026-01-20T09:42:14.751712Z",
     "iopub.status.idle": "2026-01-20T09:42:14.831116Z",
     "shell.execute_reply": "2026-01-20T09:42:14.830201Z",
     "shell.execute_reply.started": "2026-01-20T09:42:14.751978Z"
    },
    "trusted": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1562, 20)\n"
     ]
    }
   ],
   "source": [
    "# DATA_PATH = \"/kaggle/input/bot-detection-data/bot_detection_data.csv\"\n",
    "DATA_PATH = \"/kaggle/input/bot-detection-data/training_data.csv\"\n",
    "\n",
    "df = pd.read_csv(DATA_PATH)\n",
    "print(df.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-01-20T09:42:14.833146Z",
     "iopub.status.busy": "2026-01-20T09:42:14.832832Z",
     "iopub.status.idle": "2026-01-20T09:42:14.849605Z",
     "shell.execute_reply": "2026-01-20T09:42:14.848831Z",
     "shell.execute_reply.started": "2026-01-20T09:42:14.833119Z"
    },
    "trusted": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>id_str</th>\n",
       "      <th>screen_name</th>\n",
       "      <th>location</th>\n",
       "      <th>description</th>\n",
       "      <th>url</th>\n",
       "      <th>followers_count</th>\n",
       "      <th>friends_count</th>\n",
       "      <th>listedcount</th>\n",
       "      <th>created_at</th>\n",
       "      <th>favourites_count</th>\n",
       "      <th>verified</th>\n",
       "      <th>statuses_count</th>\n",
       "      <th>lang</th>\n",
       "      <th>status</th>\n",
       "      <th>default_profile</th>\n",
       "      <th>default_profile_image</th>\n",
       "      <th>has_extended_profile</th>\n",
       "      <th>name</th>\n",
       "      <th>bot</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.953701e+08</td>\n",
       "      <td>195370058</td>\n",
       "      <td>kanyejordan</td>\n",
       "      <td>NaN</td>\n",
       "      <td>This is what I do. I drop truth bombs.</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2925</td>\n",
       "      <td>3</td>\n",
       "      <td>139</td>\n",
       "      <td>9/26/2010 14:45</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>708</td>\n",
       "      <td>en</td>\n",
       "      <td>Status(in_reply_to_status_id=None, favorited=F...</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>Kanye Jordan</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7.950000e+17</td>\n",
       "      <td>7.95E+17</td>\n",
       "      <td>astronaut_bot</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Keeping an eye on astronauts coming and going....</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>Fri Nov 04 12:11:27 +0000 2016</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>6</td>\n",
       "      <td>en</td>\n",
       "      <td>{'created_at': 'Tue Nov 22 16:52:31 +0000 2016...</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>Astronaut Notifier</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2.976541e+09</td>\n",
       "      <td>2976541239</td>\n",
       "      <td>TheRiddlerBot</td>\n",
       "      <td>Coimbra, Portugal</td>\n",
       "      <td>Solve the riddle by replying only the name of ...</td>\n",
       "      <td>https://t.co/1v8BON9QpT</td>\n",
       "      <td>132</td>\n",
       "      <td>46</td>\n",
       "      <td>24</td>\n",
       "      <td>1/13/2015 15:10</td>\n",
       "      <td>740</td>\n",
       "      <td>False</td>\n",
       "      <td>7346</td>\n",
       "      <td>en</td>\n",
       "      <td>Status(contributors=None, truncated=False, tex...</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>TheRiddlerBot</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2.243832e+08</td>\n",
       "      <td>224383150</td>\n",
       "      <td>mlegoudes262</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>54</td>\n",
       "      <td>1351</td>\n",
       "      <td>0</td>\n",
       "      <td>Wed Dec 08 21:29:31 +0000 2010</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>6</td>\n",
       "      <td>en</td>\n",
       "      <td>{'truncated': False, 'entities': {'user_mentio...</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>Laurie Poulsen</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.134712e+07</td>\n",
       "      <td>11347122</td>\n",
       "      <td>GavinNewsom</td>\n",
       "      <td>California</td>\n",
       "      <td>Husband &amp; father. 49th Lt. Gov. of California ...</td>\n",
       "      <td>https://t.co/XrGnfzTDJD</td>\n",
       "      <td>1300380</td>\n",
       "      <td>24248</td>\n",
       "      <td>7089</td>\n",
       "      <td>Wed Dec 19 19:53:42 +0000 2007</td>\n",
       "      <td>4184</td>\n",
       "      <td>True</td>\n",
       "      <td>8536</td>\n",
       "      <td>en</td>\n",
       "      <td>{u'contributors': None, u'truncated': True, u'...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>Gavin Newsom</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             id      id_str    screen_name           location  \\\n",
       "0  1.953701e+08   195370058    kanyejordan                NaN   \n",
       "1  7.950000e+17    7.95E+17  astronaut_bot                NaN   \n",
       "2  2.976541e+09  2976541239  TheRiddlerBot  Coimbra, Portugal   \n",
       "3  2.243832e+08   224383150   mlegoudes262                NaN   \n",
       "4  1.134712e+07    11347122    GavinNewsom         California   \n",
       "\n",
       "                                         description                      url  \\\n",
       "0             This is what I do. I drop truth bombs.                      NaN   \n",
       "1  Keeping an eye on astronauts coming and going....                      NaN   \n",
       "2  Solve the riddle by replying only the name of ...  https://t.co/1v8BON9QpT   \n",
       "3                                                NaN                      NaN   \n",
       "4  Husband & father. 49th Lt. Gov. of California ...  https://t.co/XrGnfzTDJD   \n",
       "\n",
       "   followers_count  friends_count  listedcount  \\\n",
       "0             2925              3          139   \n",
       "1                9              0            5   \n",
       "2              132             46           24   \n",
       "3               54           1351            0   \n",
       "4          1300380          24248         7089   \n",
       "\n",
       "                       created_at  favourites_count  verified  statuses_count  \\\n",
       "0                 9/26/2010 14:45                 0     False             708   \n",
       "1  Fri Nov 04 12:11:27 +0000 2016                 0     False               6   \n",
       "2                 1/13/2015 15:10               740     False            7346   \n",
       "3  Wed Dec 08 21:29:31 +0000 2010                 2     False               6   \n",
       "4  Wed Dec 19 19:53:42 +0000 2007              4184      True            8536   \n",
       "\n",
       "  lang                                             status  default_profile  \\\n",
       "0   en  Status(in_reply_to_status_id=None, favorited=F...             True   \n",
       "1   en  {'created_at': 'Tue Nov 22 16:52:31 +0000 2016...             True   \n",
       "2   en  Status(contributors=None, truncated=False, tex...             True   \n",
       "3   en  {'truncated': False, 'entities': {'user_mentio...             True   \n",
       "4   en  {u'contributors': None, u'truncated': True, u'...            False   \n",
       "\n",
       "   default_profile_image has_extended_profile                name  bot  \n",
       "0                  False                False        Kanye Jordan    1  \n",
       "1                  False                False  Astronaut Notifier    1  \n",
       "2                  False                False       TheRiddlerBot    1  \n",
       "3                  False                False      Laurie Poulsen    1  \n",
       "4                  False                False        Gavin Newsom    0  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-01-20T09:42:14.851012Z",
     "iopub.status.busy": "2026-01-20T09:42:14.850700Z",
     "iopub.status.idle": "2026-01-20T09:42:14.867213Z",
     "shell.execute_reply": "2026-01-20T09:42:14.866311Z",
     "shell.execute_reply.started": "2026-01-20T09:42:14.850985Z"
    },
    "trusted": true
   },
   "outputs": [],
   "source": [
    "FEATURES = [\n",
    "    \"followers_count\",\n",
    "    \"friends_count\",\n",
    "    \"listedcount\",\n",
    "    \"favourites_count\",\n",
    "    \"statuses_count\",\n",
    "    \"verified\",\n",
    "    \"default_profile\",\n",
    "    \"default_profile_image\",\n",
    "    \"has_extended_profile\"\n",
    "]\n",
    "\n",
    "X = df[FEATURES].fillna(0)\n",
    "y = df[\"bot\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-01-20T09:42:14.869442Z",
     "iopub.status.busy": "2026-01-20T09:42:14.869099Z",
     "iopub.status.idle": "2026-01-20T09:42:14.884158Z",
     "shell.execute_reply": "2026-01-20T09:42:14.883300Z",
     "shell.execute_reply.started": "2026-01-20T09:42:14.869405Z"
    },
    "trusted": true
   },
   "outputs": [],
   "source": [
    "bool_cols = [\n",
    "    \"verified\",\n",
    "    \"default_profile\",\n",
    "    \"default_profile_image\",\n",
    "    \"has_extended_profile\"\n",
    "]\n",
    "\n",
    "for col in bool_cols:\n",
    "    X[col] = X[col].astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-01-20T09:42:14.885944Z",
     "iopub.status.busy": "2026-01-20T09:42:14.885337Z",
     "iopub.status.idle": "2026-01-20T09:42:14.899788Z",
     "shell.execute_reply": "2026-01-20T09:42:14.898857Z",
     "shell.execute_reply.started": "2026-01-20T09:42:14.885913Z"
    },
    "trusted": true
   },
   "outputs": [],
   "source": [
    "X[\"follow_ratio\"] = X[\"followers_count\"] / (X[\"friends_count\"] + 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-01-20T09:42:14.901446Z",
     "iopub.status.busy": "2026-01-20T09:42:14.901029Z",
     "iopub.status.idle": "2026-01-20T09:42:14.920930Z",
     "shell.execute_reply": "2026-01-20T09:42:14.920119Z",
     "shell.execute_reply.started": "2026-01-20T09:42:14.901408Z"
    },
    "trusted": true
   },
   "outputs": [],
   "source": [
    "df[\"created_at\"] = pd.to_datetime(df[\"created_at\"], errors=\"coerce\")\n",
    "\n",
    "X[\"account_age_days\"] = (\n",
    "    pd.Timestamp.now() - df[\"created_at\"]\n",
    ").dt.days.fillna(0)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-01-20T09:42:14.922400Z",
     "iopub.status.busy": "2026-01-20T09:42:14.922068Z",
     "iopub.status.idle": "2026-01-20T09:42:14.940152Z",
     "shell.execute_reply": "2026-01-20T09:42:14.939293Z",
     "shell.execute_reply.started": "2026-01-20T09:42:14.922365Z"
    },
    "trusted": true
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    X,\n",
    "    y,\n",
    "    test_size=0.2,\n",
    "    random_state=42\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-01-20T09:42:14.941565Z",
     "iopub.status.busy": "2026-01-20T09:42:14.941261Z",
     "iopub.status.idle": "2026-01-20T09:42:15.734600Z",
     "shell.execute_reply": "2026-01-20T09:42:15.733765Z",
     "shell.execute_reply.started": "2026-01-20T09:42:14.941540Z"
    },
    "trusted": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-2 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: #000;\n",
       "  --sklearn-color-text-muted: #666;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-2 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-2 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-2 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: flex;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "  align-items: start;\n",
       "  justify-content: space-between;\n",
       "  gap: 0.5em;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 label.sk-toggleable__label .caption {\n",
       "  font-size: 0.6rem;\n",
       "  font-weight: lighter;\n",
       "  color: var(--sklearn-color-text-muted);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-2 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-2 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-2 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-2 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 0.5em;\n",
       "  text-align: center;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-2 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-2 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(class_weight=&#x27;balanced&#x27;, max_depth=20,\n",
       "                       min_samples_leaf=2, n_estimators=300, n_jobs=-1,\n",
       "                       random_state=42)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>RandomForestClassifier</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.6/modules/generated/sklearn.ensemble.RandomForestClassifier.html\">?<span>Documentation for RandomForestClassifier</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></div></label><div class=\"sk-toggleable__content fitted\"><pre>RandomForestClassifier(class_weight=&#x27;balanced&#x27;, max_depth=20,\n",
       "                       min_samples_leaf=2, n_estimators=300, n_jobs=-1,\n",
       "                       random_state=42)</pre></div> </div></div></div></div>"
      ],
      "text/plain": [
       "RandomForestClassifier(class_weight='balanced', max_depth=20,\n",
       "                       min_samples_leaf=2, n_estimators=300, n_jobs=-1,\n",
       "                       random_state=42)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "\n",
    "rf = RandomForestClassifier(\n",
    "    n_estimators=300,\n",
    "    max_depth=20,\n",
    "    min_samples_leaf=2,\n",
    "    class_weight=\"balanced\",\n",
    "    random_state=42,\n",
    "    n_jobs=-1\n",
    ")\n",
    "\n",
    "rf.fit(X_train, y_train)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-01-20T09:42:15.736130Z",
     "iopub.status.busy": "2026-01-20T09:42:15.735775Z",
     "iopub.status.idle": "2026-01-20T09:42:15.851114Z",
     "shell.execute_reply": "2026-01-20T09:42:15.850291Z",
     "shell.execute_reply.started": "2026-01-20T09:42:15.736093Z"
    },
    "trusted": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.8785942492012779\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.90      0.87      0.89       169\n",
      "           1       0.85      0.89      0.87       144\n",
      "\n",
      "    accuracy                           0.88       313\n",
      "   macro avg       0.88      0.88      0.88       313\n",
      "weighted avg       0.88      0.88      0.88       313\n",
      "\n"
     ]
    }
   ],
   "source": [
    "preds = rf.predict(X_test)\n",
    "\n",
    "print(\"Accuracy:\", accuracy_score(y_test, preds))\n",
    "print(classification_report(y_test, preds))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-01-20T09:42:15.853420Z",
     "iopub.status.busy": "2026-01-20T09:42:15.853099Z",
     "iopub.status.idle": "2026-01-20T09:42:15.919231Z",
     "shell.execute_reply": "2026-01-20T09:42:15.918360Z",
     "shell.execute_reply.started": "2026-01-20T09:42:15.853391Z"
    },
    "trusted": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                  feature  importance\n",
      "1           friends_count    0.204309\n",
      "9            follow_ratio    0.144836\n",
      "3        favourites_count    0.135528\n",
      "0         followers_count    0.109556\n",
      "5                verified    0.099516\n",
      "10       account_age_days    0.090862\n",
      "2             listedcount    0.088300\n",
      "4          statuses_count    0.076216\n",
      "6         default_profile    0.039780\n",
      "8    has_extended_profile    0.008163\n",
      "7   default_profile_image    0.002935\n"
     ]
    }
   ],
   "source": [
    "imp = pd.DataFrame({\n",
    "    \"feature\": X.columns,\n",
    "    \"importance\": rf.feature_importances_\n",
    "}).sort_values(by=\"importance\", ascending=False)\n",
    "\n",
    "print(imp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-01-20T09:42:15.920668Z",
     "iopub.status.busy": "2026-01-20T09:42:15.920341Z",
     "iopub.status.idle": "2026-01-20T09:42:16.022530Z",
     "shell.execute_reply": "2026-01-20T09:42:16.021678Z",
     "shell.execute_reply.started": "2026-01-20T09:42:15.920632Z"
    },
    "trusted": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['bot_model.joblib']"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import joblib\n",
    "\n",
    "joblib.dump(rf, \"bot_model.joblib\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-01-20T09:42:16.024523Z",
     "iopub.status.busy": "2026-01-20T09:42:16.023646Z",
     "iopub.status.idle": "2026-01-20T09:42:16.029010Z",
     "shell.execute_reply": "2026-01-20T09:42:16.028344Z",
     "shell.execute_reply.started": "2026-01-20T09:42:16.024490Z"
    },
    "trusted": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "RF trained feature count: 11\n",
      "RF trained feature names:\n",
      "['followers_count', 'friends_count', 'listedcount', 'favourites_count', 'statuses_count', 'verified', 'default_profile', 'default_profile_image', 'has_extended_profile', 'follow_ratio', 'account_age_days']\n"
     ]
    }
   ],
   "source": [
    "# ✅ After training RF\n",
    "print(\"RF trained feature count:\", len(rf.feature_names_in_))\n",
    "print(\"RF trained feature names:\")\n",
    "print(list(rf.feature_names_in_))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "trusted": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kaggle": {
   "accelerator": "none",
   "dataSources": [
    {
     "datasetId": 9259817,
     "sourceId": 14497523,
     "sourceType": "datasetVersion"
    }
   ],
   "dockerImageVersionId": 31234,
   "isGpuEnabled": false,
   "isInternetEnabled": true,
   "language": "python",
   "sourceType": "notebook"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}