Spaces:

PranavReddy18
/

Smart_Crop_Advisor

Runtime error

File size: 25,601 Bytes

97bf0f6

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "import numpy as np \n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "df=pd.read_csv(\"C:\\\\Users\\\\saipr\\\\Crop_Recommendation\\\\data\\\\Crop_recommendation.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>N</th>\n",
       "      <th>P</th>\n",
       "      <th>K</th>\n",
       "      <th>temperature</th>\n",
       "      <th>humidity</th>\n",
       "      <th>ph</th>\n",
       "      <th>rainfall</th>\n",
       "      <th>label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>90</td>\n",
       "      <td>42</td>\n",
       "      <td>43</td>\n",
       "      <td>20.879744</td>\n",
       "      <td>82.002744</td>\n",
       "      <td>6.502985</td>\n",
       "      <td>202.935536</td>\n",
       "      <td>rice</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>85</td>\n",
       "      <td>58</td>\n",
       "      <td>41</td>\n",
       "      <td>21.770462</td>\n",
       "      <td>80.319644</td>\n",
       "      <td>7.038096</td>\n",
       "      <td>226.655537</td>\n",
       "      <td>rice</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>60</td>\n",
       "      <td>55</td>\n",
       "      <td>44</td>\n",
       "      <td>23.004459</td>\n",
       "      <td>82.320763</td>\n",
       "      <td>7.840207</td>\n",
       "      <td>263.964248</td>\n",
       "      <td>rice</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>74</td>\n",
       "      <td>35</td>\n",
       "      <td>40</td>\n",
       "      <td>26.491096</td>\n",
       "      <td>80.158363</td>\n",
       "      <td>6.980401</td>\n",
       "      <td>242.864034</td>\n",
       "      <td>rice</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>78</td>\n",
       "      <td>42</td>\n",
       "      <td>42</td>\n",
       "      <td>20.130175</td>\n",
       "      <td>81.604873</td>\n",
       "      <td>7.628473</td>\n",
       "      <td>262.717340</td>\n",
       "      <td>rice</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2195</th>\n",
       "      <td>107</td>\n",
       "      <td>34</td>\n",
       "      <td>32</td>\n",
       "      <td>26.774637</td>\n",
       "      <td>66.413269</td>\n",
       "      <td>6.780064</td>\n",
       "      <td>177.774507</td>\n",
       "      <td>coffee</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2196</th>\n",
       "      <td>99</td>\n",
       "      <td>15</td>\n",
       "      <td>27</td>\n",
       "      <td>27.417112</td>\n",
       "      <td>56.636362</td>\n",
       "      <td>6.086922</td>\n",
       "      <td>127.924610</td>\n",
       "      <td>coffee</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2197</th>\n",
       "      <td>118</td>\n",
       "      <td>33</td>\n",
       "      <td>30</td>\n",
       "      <td>24.131797</td>\n",
       "      <td>67.225123</td>\n",
       "      <td>6.362608</td>\n",
       "      <td>173.322839</td>\n",
       "      <td>coffee</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2198</th>\n",
       "      <td>117</td>\n",
       "      <td>32</td>\n",
       "      <td>34</td>\n",
       "      <td>26.272418</td>\n",
       "      <td>52.127394</td>\n",
       "      <td>6.758793</td>\n",
       "      <td>127.175293</td>\n",
       "      <td>coffee</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2199</th>\n",
       "      <td>104</td>\n",
       "      <td>18</td>\n",
       "      <td>30</td>\n",
       "      <td>23.603016</td>\n",
       "      <td>60.396475</td>\n",
       "      <td>6.779833</td>\n",
       "      <td>140.937041</td>\n",
       "      <td>coffee</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2200 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        N   P   K  temperature   humidity        ph    rainfall   label\n",
       "0      90  42  43    20.879744  82.002744  6.502985  202.935536    rice\n",
       "1      85  58  41    21.770462  80.319644  7.038096  226.655537    rice\n",
       "2      60  55  44    23.004459  82.320763  7.840207  263.964248    rice\n",
       "3      74  35  40    26.491096  80.158363  6.980401  242.864034    rice\n",
       "4      78  42  42    20.130175  81.604873  7.628473  262.717340    rice\n",
       "...   ...  ..  ..          ...        ...       ...         ...     ...\n",
       "2195  107  34  32    26.774637  66.413269  6.780064  177.774507  coffee\n",
       "2196   99  15  27    27.417112  56.636362  6.086922  127.924610  coffee\n",
       "2197  118  33  30    24.131797  67.225123  6.362608  173.322839  coffee\n",
       "2198  117  32  34    26.272418  52.127394  6.758793  127.175293  coffee\n",
       "2199  104  18  30    23.603016  60.396475  6.779833  140.937041  coffee\n",
       "\n",
       "[2200 rows x 8 columns]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "label\n",
       "rice           100\n",
       "maize          100\n",
       "jute           100\n",
       "cotton         100\n",
       "coconut        100\n",
       "papaya         100\n",
       "orange         100\n",
       "apple          100\n",
       "muskmelon      100\n",
       "watermelon     100\n",
       "grapes         100\n",
       "mango          100\n",
       "banana         100\n",
       "pomegranate    100\n",
       "lentil         100\n",
       "blackgram      100\n",
       "mungbean       100\n",
       "mothbeans      100\n",
       "pigeonpeas     100\n",
       "kidneybeans    100\n",
       "chickpea       100\n",
       "coffee         100\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['label'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Splitting the Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "X=df.drop('label',axis=1)\n",
    "y=df['label']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "# Assuming X is your feature set and y is the corresponding label\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2025/02/16 01:16:03 INFO mlflow.tracking.fluent: Experiment with name 'RandomForest_GridSearch' does not exist. Creating a new experiment.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 216 candidates, totalling 1080 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2025/02/16 01:18:02 WARNING mlflow.models.model: Model logged without a signature and input example. Please set `input_example` parameter when logging the model to auto infer the model signature.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best Parameters: {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}\n",
      "Best Accuracy: 0.9960227272727271\n",
      "🏃 View run illustrious-cub-960 at: http://127.0.0.1:5000/#/experiments/809510633914373352/runs/7be5a439659d4274a47adfc717813c77\n",
      "🧪 View experiment at: http://127.0.0.1:5000/#/experiments/809510633914373352\n"
     ]
    }
   ],
   "source": [
    "import mlflow\n",
    "import mlflow.sklearn\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "mlflow.set_tracking_uri(\"http://127.0.0.1:5000\")\n",
    "\n",
    "mlflow.set_experiment(\"RandomForest_GridSearch\")\n",
    "\n",
    "with mlflow.start_run():\n",
    "    rf = RandomForestClassifier(random_state=42)\n",
    "\n",
    "    params = {\n",
    "        'n_estimators': [50, 100, 200],\n",
    "        'max_depth': [None, 10, 20, 30], \n",
    "        'min_samples_split': [2, 5, 10],  \n",
    "        'min_samples_leaf': [1, 2, 4],  \n",
    "        'criterion': ['gini', 'entropy']  \n",
    "    }\n",
    "\n",
    "    grid_search_rf = GridSearchCV(estimator=rf, param_grid=params, \n",
    "                               cv=5, scoring='accuracy', n_jobs=-1, verbose=2)\n",
    "\n",
    "    grid_search_rf.fit(X_train, y_train)\n",
    "\n",
    "    best_params = grid_search_rf.best_params_\n",
    "    best_score = grid_search_rf.best_score_\n",
    "\n",
    "    mlflow.log_params(best_params)\n",
    "    mlflow.log_metric(\"best_accuracy\", best_score)\n",
    "\n",
    "    mlflow.sklearn.log_model(grid_search_rf.best_estimator_, \"best_random_forest_model\")\n",
    "\n",
    "    print(\"Best Parameters:\", best_params)\n",
    "    print(\"Best Accuracy:\", best_score)\n",
    "\n",
    "    mlflow.end_run()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_rfc=grid_search_rf.best_estimator_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'criterion': 'gini',\n",
       " 'max_depth': 10,\n",
       " 'min_samples_leaf': 1,\n",
       " 'min_samples_split': 5,\n",
       " 'n_estimators': 100}"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid_search_rf.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9960227272727271"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid_search_rf.best_score_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9954545454545455"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.metrics import accuracy_score,confusion_matrix\n",
    "y_pred=model_rfc.predict(X_test)\n",
    "accuracy_score(y_test,y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0 19  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0 19  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 20]]\n"
     ]
    }
   ],
   "source": [
    "print(confusion_matrix(y_test,y_pred))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Trying Out Support Vector Machines Algorithm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2025/02/16 01:21:04 INFO mlflow.tracking.fluent: Experiment with name 'SVC_GridSearch' does not exist. Creating a new experiment.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 16 candidates, totalling 80 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2025/02/16 01:21:10 WARNING mlflow.models.model: Model logged without a signature and input example. Please set `input_example` parameter when logging the model to auto infer the model signature.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best Parameters: {'C': 0.1, 'kernel': 'linear'}\n",
      "Best Accuracy: 0.9857954545454545\n",
      "🏃 View run agreeable-goose-248 at: http://127.0.0.1:5000/#/experiments/308727505154579858/runs/b513565508424ee1883922873cb0fa35\n",
      "🧪 View experiment at: http://127.0.0.1:5000/#/experiments/308727505154579858\n"
     ]
    }
   ],
   "source": [
    "import mlflow\n",
    "import mlflow.sklearn\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "mlflow.set_tracking_uri(\"http://127.0.0.1:5000\")\n",
    "\n",
    "mlflow.set_experiment(\"SVC_GridSearch\")\n",
    "\n",
    "with mlflow.start_run():\n",
    "    svc = SVC()\n",
    "\n",
    "    params = {\n",
    "        'C': [0.1, 1, 10, 100],  \n",
    "        'kernel': ['linear', 'rbf', 'poly', 'sigmoid']\n",
    "    }\n",
    "\n",
    "    grid_search_svc = GridSearchCV(estimator=svc, param_grid=params, \n",
    "                                   cv=5, scoring='accuracy', n_jobs=-1, verbose=2)\n",
    "\n",
    "    grid_search_svc.fit(X_train, y_train)\n",
    "\n",
    "    best_params = grid_search_svc.best_params_\n",
    "    best_score = grid_search_svc.best_score_\n",
    "\n",
    "    mlflow.log_params(best_params)\n",
    "    mlflow.log_metric(\"best_accuracy\", best_score)\n",
    "\n",
    "    mlflow.sklearn.log_model(grid_search_svc.best_estimator_, \"best_svc_model\")\n",
    "\n",
    "    print(\"Best Parameters:\", best_params)\n",
    "    print(\"Best Accuracy:\", best_score)\n",
    "\n",
    "    mlflow.end_run()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_svc=grid_search_svc.best_estimator_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred1=model_svc.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9931818181818182"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "accuracy_score(y_test,y_pred1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  1  0  0  0  0 19  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  1  0  0  0  0  0  0  0  0  0  0 19  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0 19  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 20]]\n"
     ]
    }
   ],
   "source": [
    "print(confusion_matrix(y_test,y_pred1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Gradient Boosting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2025/02/16 01:24:17 INFO mlflow.tracking.fluent: Experiment with name 'GradientBoosting_GridSearch' does not exist. Creating a new experiment.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 9 candidates, totalling 45 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2025/02/16 01:25:36 WARNING mlflow.models.model: Model logged without a signature and input example. Please set `input_example` parameter when logging the model to auto infer the model signature.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best Parameters: {'learning_rate': 0.1, 'n_estimators': 100}\n",
      "Best Accuracy: 0.9875\n",
      "🏃 View run rare-auk-421 at: http://127.0.0.1:5000/#/experiments/148959594547896690/runs/417ba78c49f845bd9ff43a5f9a381fb2\n",
      "🧪 View experiment at: http://127.0.0.1:5000/#/experiments/148959594547896690\n"
     ]
    }
   ],
   "source": [
    "import mlflow\n",
    "import mlflow.sklearn\n",
    "from sklearn.ensemble import GradientBoostingClassifier\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "mlflow.set_tracking_uri(\"http://127.0.0.1:5000\")\n",
    "\n",
    "mlflow.set_experiment(\"GradientBoosting_GridSearch\")\n",
    "\n",
    "with mlflow.start_run():\n",
    "    gb = GradientBoostingClassifier(random_state=42)\n",
    "\n",
    "    params = {\n",
    "        'n_estimators': [50, 100, 150],  \n",
    "        'learning_rate': [0.01, 0.05, 0.1],  \n",
    "    }\n",
    "\n",
    "    grid_search_gb = GridSearchCV(estimator=gb, param_grid=params, \n",
    "                                  cv=5, scoring='accuracy', n_jobs=-1, verbose=2)\n",
    "\n",
    "    grid_search_gb.fit(X_train, y_train)\n",
    "\n",
    "    best_params = grid_search_gb.best_params_\n",
    "    best_score = grid_search_gb.best_score_\n",
    "\n",
    "    mlflow.log_params(best_params)\n",
    "    mlflow.log_metric(\"best_accuracy\", best_score)\n",
    "\n",
    "    mlflow.sklearn.log_model(grid_search_gb.best_estimator_, \"best_gradient_boosting_model\")\n",
    "\n",
    "    print(\"Best Parameters:\", best_params)\n",
    "    print(\"Best Accuracy:\", best_score)\n",
    "\n",
    "    mlflow.end_run()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_gb=grid_search_gb.best_estimator_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred2=model_gb.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9886363636363636"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "accuracy_score(y_test,y_pred2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0 19  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0 19  0  0  1  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0 19  0  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 20  0  0]\n",
      " [ 0  0  0  0  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0 18  0]\n",
      " [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 20]]\n"
     ]
    }
   ],
   "source": [
    "print(confusion_matrix(y_test,y_pred2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}