diff --git "a/Phishing Website Detection_Models & Training.ipynb" "b/Phishing Website Detection_Models & Training.ipynb" new file mode 100644--- /dev/null +++ "b/Phishing Website Detection_Models & Training.ipynb" @@ -0,0 +1,1533 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "colab_type": "code", + "id": "C297HhYulXcb", + "outputId": "d6e2a9df-586e-4192-b8ec-1e7b7025c0c3" + }, + "outputs": [], + "source": [ + "#importing basic packages\n", + "import pandas as pd\n", + "import numpy as np\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 217 + }, + "colab_type": "code", + "id": "fVPglpaf4REa", + "outputId": "eef4a4ca-e12d-4cd3-e011-20376fc752a2" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | Domain | \n", + "Have_IP | \n", + "Have_At | \n", + "URL_Length | \n", + "URL_Depth | \n", + "Redirection | \n", + "https_Domain | \n", + "TinyURL | \n", + "Prefix/Suffix | \n", + "DNS_Record | \n", + "Web_Traffic | \n", + "Domain_Age | \n", + "Domain_End | \n", + "iFrame | \n", + "Mouse_Over | \n", + "Right_Click | \n", + "Web_Forwards | \n", + "Label | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "graphicriver.net | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "
| 1 | \n", + "ecnavi.jp | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "
| 2 | \n", + "hubpages.com | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "
| 3 | \n", + "extratorrent.cc | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "3 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "
| 4 | \n", + "icicibank.com | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "3 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "
| \n", + " | Have_IP | \n", + "Have_At | \n", + "URL_Length | \n", + "URL_Depth | \n", + "Redirection | \n", + "https_Domain | \n", + "TinyURL | \n", + "Prefix/Suffix | \n", + "DNS_Record | \n", + "Web_Traffic | \n", + "Domain_Age | \n", + "Domain_End | \n", + "iFrame | \n", + "Mouse_Over | \n", + "Right_Click | \n", + "Web_Forwards | \n", + "Label | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "3 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "
| 1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "
| 2 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "2 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "
| 3 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "
| 4 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "4 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "
DecisionTreeClassifier(max_depth=5)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
DecisionTreeClassifier(max_depth=5)
RandomForestClassifier(max_depth=5)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier(max_depth=5)
MLPClassifier(alpha=0.001, hidden_layer_sizes=[100, 100, 100])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
MLPClassifier(alpha=0.001, hidden_layer_sizes=[100, 100, 100])
XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=0.4, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=7, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None, num_parallel_tree=None,\n", + " predictor=None, random_state=None, ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=0.4, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=7, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None, num_parallel_tree=None,\n", + " predictor=None, random_state=None, ...)
| \n", + " | ML Model | \n", + "Train Accuracy | \n", + "Test Accuracy | \n", + "
|---|---|---|---|
| 0 | \n", + "Decision Tree | \n", + "0.812 | \n", + "0.820 | \n", + "
| 1 | \n", + "Random Forest | \n", + "0.819 | \n", + "0.824 | \n", + "
| 2 | \n", + "Multilayer Perceptrons | \n", + "0.865 | \n", + "0.858 | \n", + "
| 3 | \n", + "Multilayer Perceptrons | \n", + "0.865 | \n", + "0.858 | \n", + "
| 4 | \n", + "XGBoost | \n", + "0.867 | \n", + "0.858 | \n", + "
| 5 | \n", + "AutoEncoder | \n", + "0.002 | \n", + "0.001 | \n", + "
| 6 | \n", + "SVM | \n", + "0.800 | \n", + "0.806 | \n", + "
| \n", + " | ML Model | \n", + "Train Accuracy | \n", + "Test Accuracy | \n", + "
|---|---|---|---|
| 4 | \n", + "XGBoost | \n", + "0.867 | \n", + "0.858 | \n", + "
| 2 | \n", + "Multilayer Perceptrons | \n", + "0.865 | \n", + "0.858 | \n", + "
| 3 | \n", + "Multilayer Perceptrons | \n", + "0.865 | \n", + "0.858 | \n", + "
| 1 | \n", + "Random Forest | \n", + "0.819 | \n", + "0.824 | \n", + "
| 0 | \n", + "Decision Tree | \n", + "0.812 | \n", + "0.820 | \n", + "
| 6 | \n", + "SVM | \n", + "0.800 | \n", + "0.806 | \n", + "
| 5 | \n", + "AutoEncoder | \n", + "0.002 | \n", + "0.001 | \n", + "
XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=0.4, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=7, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None, num_parallel_tree=None,\n", + " predictor=None, random_state=None, ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=0.4, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=7, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None, num_parallel_tree=None,\n", + " predictor=None, random_state=None, ...)