{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# A. Extract Features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<module 'submission.utils.utils' from 'c:\\\\Users\\\\sharv\\\\Documents\\\\TUHH\\\\sem-3\\\\intelligent systems in medicine\\\\project\\\\baselines\\\\phase_1a\\\\submission\\\\utils\\\\utils.py'>"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# from submission.utils.utils import extract_features_from_image, perform_pca\n",
    "import submission.utils.utils as utils\n",
    "import importlib\n",
    "importlib.reload(utils)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## A.1. Extract Features for Multiclass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Features shape: (2845, 2013)\n",
      "Labels shape: (2845,)\n",
      "[1 1 1 ... 1 2 1]\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import classification_report\n",
    "import os\n",
    "import pandas as pd\n",
    "import cv2\n",
    "import numpy as np\n",
    "\n",
    "BASE_PATH = \"C:/Users/sharv/Documents/TUHH/sem-3/intelligent systems in medicine/project/baselines/phase_1a\"\n",
    "PATH_TO_GT = os.path.join(BASE_PATH, \"gt_for_classification_multiclass_from_filenames_0_index.csv\")\n",
    "PATH_TO_IMAGES = os.path.join(BASE_PATH, \"images\")\n",
    "\n",
    "df = pd.read_csv(PATH_TO_GT)\n",
    "\n",
    "images = df[\"file_name\"].tolist()\n",
    "\n",
    "features = []\n",
    "labels = []\n",
    "\n",
    "for i in range(len(df)):\n",
    "    \n",
    "    image_name = df.iloc[i][\"file_name\"]\n",
    "    label = df.iloc[i][\"category_id\"]\n",
    "\n",
    "    path_to_image = os.path.join(PATH_TO_IMAGES, image_name)\n",
    "    image = cv2.imread(path_to_image)\n",
    "    \n",
    "    image_features = utils.extract_features_from_image(image)\n",
    "    \n",
    "    features.append(image_features)\n",
    "    labels.append(label)\n",
    "    \n",
    "features_multiclass = np.array(features)\n",
    "labels_multiclass = np.array(labels)\n",
    "\n",
    "print(\"Features shape:\", features_multiclass.shape)\n",
    "print(\"Labels shape:\", labels_multiclass.shape)\n",
    "print(labels_multiclass)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# B. Train Classification Model for Multiclass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test Accuracy: 0.9666\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.97      0.95      0.96       167\n",
      "           1       0.95      0.98      0.96       253\n",
      "           2       0.99      0.97      0.98       149\n",
      "\n",
      "    accuracy                           0.97       569\n",
      "   macro avg       0.97      0.97      0.97       569\n",
      "weighted avg       0.97      0.97      0.97       569\n",
      "\n",
      "Confusion matrix:\n",
      " [[158   9   0]\n",
      " [  5 247   1]\n",
      " [  0   4 145]]\n"
     ]
    }
   ],
   "source": [
    "multiclass_model, _, _ = utils.train_svm_model(features_multiclass, labels_multiclass)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Pipeline(steps=[('scaler', StandardScaler()), ('select', SelectKBest(k=500)),\n",
      "                ('pca', PCA(n_components=100)),\n",
      "                ('svc',\n",
      "                 SVC(class_weight='balanced', kernel='linear', probability=True,\n",
      "                     random_state=42))])\n"
     ]
    }
   ],
   "source": [
    "print(multiclass_model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "# save the weights of multiclass_model\n",
    "import pickle\n",
    "\n",
    "SAVE_PATH = \"C:/Users/sharv/Documents/TUHH/sem-3/intelligent systems in medicine/project/baselines/phase_1a/submission\"\n",
    "\n",
    "with open(os.path.join(SAVE_PATH, \"multiclass_model.pkl\"), \"wb\") as f:\n",
    "    pickle.dump(multiclass_model, f)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ism",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.25"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}