{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# A. Extract Features" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# from submission.utils.utils import extract_features_from_image, perform_pca\n", "import submission.utils.utils as utils\n", "import importlib\n", "importlib.reload(utils)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## A.1. Extract Features for Multiclass" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Features shape: (2845, 2013)\n", "Labels shape: (2845,)\n", "[1 1 1 ... 1 2 1]\n" ] } ], "source": [ "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import classification_report\n", "import os\n", "import pandas as pd\n", "import cv2\n", "import numpy as np\n", "\n", "BASE_PATH = \"C:/Users/sharv/Documents/TUHH/sem-3/intelligent systems in medicine/project/baselines/phase_1a\"\n", "PATH_TO_GT = os.path.join(BASE_PATH, \"gt_for_classification_multiclass_from_filenames_0_index.csv\")\n", "PATH_TO_IMAGES = os.path.join(BASE_PATH, \"images\")\n", "\n", "df = pd.read_csv(PATH_TO_GT)\n", "\n", "images = df[\"file_name\"].tolist()\n", "\n", "features = []\n", "labels = []\n", "\n", "for i in range(len(df)):\n", " \n", " image_name = df.iloc[i][\"file_name\"]\n", " label = df.iloc[i][\"category_id\"]\n", "\n", " path_to_image = os.path.join(PATH_TO_IMAGES, image_name)\n", " image = cv2.imread(path_to_image)\n", " \n", " image_features = utils.extract_features_from_image(image)\n", " \n", " features.append(image_features)\n", " labels.append(label)\n", " \n", "features_multiclass = np.array(features)\n", "labels_multiclass = np.array(labels)\n", "\n", "print(\"Features shape:\", features_multiclass.shape)\n", "print(\"Labels shape:\", labels_multiclass.shape)\n", "print(labels_multiclass)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# B. Train Classification Model for Multiclass" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Test Accuracy: 0.9666\n", " precision recall f1-score support\n", "\n", " 0 0.97 0.95 0.96 167\n", " 1 0.95 0.98 0.96 253\n", " 2 0.99 0.97 0.98 149\n", "\n", " accuracy 0.97 569\n", " macro avg 0.97 0.97 0.97 569\n", "weighted avg 0.97 0.97 0.97 569\n", "\n", "Confusion matrix:\n", " [[158 9 0]\n", " [ 5 247 1]\n", " [ 0 4 145]]\n" ] } ], "source": [ "multiclass_model, _, _ = utils.train_svm_model(features_multiclass, labels_multiclass)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pipeline(steps=[('scaler', StandardScaler()), ('select', SelectKBest(k=500)),\n", " ('pca', PCA(n_components=100)),\n", " ('svc',\n", " SVC(class_weight='balanced', kernel='linear', probability=True,\n", " random_state=42))])\n" ] } ], "source": [ "print(multiclass_model)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "# save the weights of multiclass_model\n", "import pickle\n", "\n", "SAVE_PATH = \"C:/Users/sharv/Documents/TUHH/sem-3/intelligent systems in medicine/project/baselines/phase_1a/submission\"\n", "\n", "with open(os.path.join(SAVE_PATH, \"multiclass_model.pkl\"), \"wb\") as f:\n", " pickle.dump(multiclass_model, f)\n" ] } ], "metadata": { "kernelspec": { "display_name": "ism", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.25" } }, "nbformat": 4, "nbformat_minor": 2 }