{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "sW2ThP777mBG" }, "source": [ "# **Exploratory Data Analysis for Bot Detection**\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "pLAYgHzBCh3U" }, "outputs": [], "source": [ "\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.metrics import accuracy_score, classification_report\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 486 }, "id": "bRLQu3EuDtQM", "outputId": "a9758b5d-dc44-4347-c07b-97efff2f3c7b" }, "outputs": [ { "data": { "text/html": [ "
| \n", " | id | \n", "id_str | \n", "screen_name | \n", "location | \n", "description | \n", "url | \n", "followers_count | \n", "friends_count | \n", "listedcount | \n", "created_at | \n", "favourites_count | \n", "verified | \n", "statuses_count | \n", "lang | \n", "status | \n", "default_profile | \n", "default_profile_image | \n", "has_extended_profile | \n", "name | \n", "bot | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "1.953701e+08 | \n", "195370058 | \n", "kanyejordan | \n", "NaN | \n", "This is what I do. I drop truth bombs. | \n", "NaN | \n", "2925 | \n", "3 | \n", "139 | \n", "9/26/2010 14:45 | \n", "0 | \n", "False | \n", "708 | \n", "en | \n", "Status(in_reply_to_status_id=None, favorited=F... | \n", "True | \n", "False | \n", "False | \n", "Kanye Jordan | \n", "1 | \n", "
| 1 | \n", "7.950000e+17 | \n", "7.95E+17 | \n", "astronaut_bot | \n", "NaN | \n", "Keeping an eye on astronauts coming and going.... | \n", "NaN | \n", "9 | \n", "0 | \n", "5 | \n", "Fri Nov 04 12:11:27 +0000 2016 | \n", "0 | \n", "False | \n", "6 | \n", "en | \n", "{'created_at': 'Tue Nov 22 16:52:31 +0000 2016... | \n", "True | \n", "False | \n", "False | \n", "Astronaut Notifier | \n", "1 | \n", "
| 2 | \n", "2.976541e+09 | \n", "2976541239 | \n", "TheRiddlerBot | \n", "Coimbra, Portugal | \n", "Solve the riddle by replying only the name of ... | \n", "https://t.co/1v8BON9QpT | \n", "132 | \n", "46 | \n", "24 | \n", "1/13/2015 15:10 | \n", "740 | \n", "False | \n", "7346 | \n", "en | \n", "Status(contributors=None, truncated=False, tex... | \n", "True | \n", "False | \n", "False | \n", "TheRiddlerBot | \n", "1 | \n", "
| 3 | \n", "2.243832e+08 | \n", "224383150 | \n", "mlegoudes262 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "54 | \n", "1351 | \n", "0 | \n", "Wed Dec 08 21:29:31 +0000 2010 | \n", "2 | \n", "False | \n", "6 | \n", "en | \n", "{'truncated': False, 'entities': {'user_mentio... | \n", "True | \n", "False | \n", "False | \n", "Laurie Poulsen | \n", "1 | \n", "
| 4 | \n", "1.134712e+07 | \n", "11347122 | \n", "GavinNewsom | \n", "California | \n", "Husband & father. 49th Lt. Gov. of California ... | \n", "https://t.co/XrGnfzTDJD | \n", "1300380 | \n", "24248 | \n", "7089 | \n", "Wed Dec 19 19:53:42 +0000 2007 | \n", "4184 | \n", "True | \n", "8536 | \n", "en | \n", "{u'contributors': None, u'truncated': True, u'... | \n", "False | \n", "False | \n", "False | \n", "Gavin Newsom | \n", "0 | \n", "