{ "cells": [ { "cell_type": "code", "execution_count": 15, "metadata": { "execution": { "iopub.execute_input": "2026-01-20T09:42:14.745657Z", "iopub.status.busy": "2026-01-20T09:42:14.744873Z", "iopub.status.idle": "2026-01-20T09:42:14.750198Z", "shell.execute_reply": "2026-01-20T09:42:14.749406Z", "shell.execute_reply.started": "2026-01-20T09:42:14.745620Z" }, "trusted": true }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.metrics import accuracy_score, classification_report" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "execution": { "iopub.execute_input": "2026-01-20T09:42:14.752013Z", "iopub.status.busy": "2026-01-20T09:42:14.751712Z", "iopub.status.idle": "2026-01-20T09:42:14.831116Z", "shell.execute_reply": "2026-01-20T09:42:14.830201Z", "shell.execute_reply.started": "2026-01-20T09:42:14.751978Z" }, "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(1562, 20)\n" ] } ], "source": [ "# DATA_PATH = \"/kaggle/input/bot-detection-data/bot_detection_data.csv\"\n", "DATA_PATH = \"/kaggle/input/bot-detection-data/training_data.csv\"\n", "\n", "df = pd.read_csv(DATA_PATH)\n", "print(df.shape)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "execution": { "iopub.execute_input": "2026-01-20T09:42:14.833146Z", "iopub.status.busy": "2026-01-20T09:42:14.832832Z", "iopub.status.idle": "2026-01-20T09:42:14.849605Z", "shell.execute_reply": "2026-01-20T09:42:14.848831Z", "shell.execute_reply.started": "2026-01-20T09:42:14.833119Z" }, "trusted": true }, "outputs": [ { "data": { "text/html": [ "
| \n", " | id | \n", "id_str | \n", "screen_name | \n", "location | \n", "description | \n", "url | \n", "followers_count | \n", "friends_count | \n", "listedcount | \n", "created_at | \n", "favourites_count | \n", "verified | \n", "statuses_count | \n", "lang | \n", "status | \n", "default_profile | \n", "default_profile_image | \n", "has_extended_profile | \n", "name | \n", "bot | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "1.953701e+08 | \n", "195370058 | \n", "kanyejordan | \n", "NaN | \n", "This is what I do. I drop truth bombs. | \n", "NaN | \n", "2925 | \n", "3 | \n", "139 | \n", "9/26/2010 14:45 | \n", "0 | \n", "False | \n", "708 | \n", "en | \n", "Status(in_reply_to_status_id=None, favorited=F... | \n", "True | \n", "False | \n", "False | \n", "Kanye Jordan | \n", "1 | \n", "
| 1 | \n", "7.950000e+17 | \n", "7.95E+17 | \n", "astronaut_bot | \n", "NaN | \n", "Keeping an eye on astronauts coming and going.... | \n", "NaN | \n", "9 | \n", "0 | \n", "5 | \n", "Fri Nov 04 12:11:27 +0000 2016 | \n", "0 | \n", "False | \n", "6 | \n", "en | \n", "{'created_at': 'Tue Nov 22 16:52:31 +0000 2016... | \n", "True | \n", "False | \n", "False | \n", "Astronaut Notifier | \n", "1 | \n", "
| 2 | \n", "2.976541e+09 | \n", "2976541239 | \n", "TheRiddlerBot | \n", "Coimbra, Portugal | \n", "Solve the riddle by replying only the name of ... | \n", "https://t.co/1v8BON9QpT | \n", "132 | \n", "46 | \n", "24 | \n", "1/13/2015 15:10 | \n", "740 | \n", "False | \n", "7346 | \n", "en | \n", "Status(contributors=None, truncated=False, tex... | \n", "True | \n", "False | \n", "False | \n", "TheRiddlerBot | \n", "1 | \n", "
| 3 | \n", "2.243832e+08 | \n", "224383150 | \n", "mlegoudes262 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "54 | \n", "1351 | \n", "0 | \n", "Wed Dec 08 21:29:31 +0000 2010 | \n", "2 | \n", "False | \n", "6 | \n", "en | \n", "{'truncated': False, 'entities': {'user_mentio... | \n", "True | \n", "False | \n", "False | \n", "Laurie Poulsen | \n", "1 | \n", "
| 4 | \n", "1.134712e+07 | \n", "11347122 | \n", "GavinNewsom | \n", "California | \n", "Husband & father. 49th Lt. Gov. of California ... | \n", "https://t.co/XrGnfzTDJD | \n", "1300380 | \n", "24248 | \n", "7089 | \n", "Wed Dec 19 19:53:42 +0000 2007 | \n", "4184 | \n", "True | \n", "8536 | \n", "en | \n", "{u'contributors': None, u'truncated': True, u'... | \n", "False | \n", "False | \n", "False | \n", "Gavin Newsom | \n", "0 | \n", "
RandomForestClassifier(class_weight='balanced', max_depth=20,\n",
" min_samples_leaf=2, n_estimators=300, n_jobs=-1,\n",
" random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. RandomForestClassifier(class_weight='balanced', max_depth=20,\n",
" min_samples_leaf=2, n_estimators=300, n_jobs=-1,\n",
" random_state=42)