Spaces:

Md-Hakim
/

image-classification-using-mlops

Sleeping

App Files Files Community

hakim commited on Jul 28, 2024

Commit

05f7b3b

1 Parent(s): 00004c2

dvc and pipeline added

Browse files

Files changed (29) hide show

.github/workflows/.gitkeep +0 -0
.github/workflows/main.yaml +20 -0
config/config.yaml +26 -0
research/01_data_integation.ipynb +249 -0
research/02_prepare_base_model.ipynb +339 -0
research/03_stage_callbacks.ipynb +194 -0
research/04_train.ipynb +327 -0
research/05_model_evaluation.ipynb +209 -0
research/tails.ipynb +0 -0
src/cnnClassfier/__init__.py +23 -0
src/cnnClassfier/components/__init__.py +0 -0
src/cnnClassfier/components/base_model.py +68 -0
src/cnnClassfier/components/callbacks.py +33 -0
src/cnnClassfier/components/data_ingestion.py +38 -0
src/cnnClassfier/components/evaluation.py +45 -0
src/cnnClassfier/components/train.py +87 -0
src/cnnClassfier/config/__init__.py +0 -0
src/cnnClassfier/config/configuration.py +107 -0
src/cnnClassfier/constants/__init__.py +4 -0
src/cnnClassfier/entity/__init__.py +0 -0
src/cnnClassfier/entity/config_entity.py +54 -0
src/cnnClassfier/pipeline/__init__.py +0 -0
src/cnnClassfier/pipeline/predict.py +30 -0
src/cnnClassfier/pipeline/stage02_base_model.py +30 -0
src/cnnClassfier/pipeline/stage_01_data_ingestion.py +29 -0
src/cnnClassfier/pipeline/stage_03_train.py +43 -0
src/cnnClassfier/pipeline/stage_04_evaluation.py +30 -0
src/cnnClassfier/utils/__init__.py +0 -0
src/cnnClassfier/utils/common.py +144 -0

.github/workflows/.gitkeep ADDED Viewed

File without changes

.github/workflows/main.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+name: Sync to Hugging Face hub
+on:
+  push:
+    branches: [main]
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+      - name: Push to hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push --force https://Md-Hakim:$HF_TOKEN@huggingface.co/spaces/Md-Hakim/image-classification-using-mlops main

config/config.yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+artifacts_root: artifacts
+data_ingestion:
+  root_dir: artifacts/data_ingestion
+  source_URL: https://github.com/entbappy/Branching-tutorial/raw/master/Chicken-fecal-images.zip
+  local_data_file: artifacts/data_ingestion/data.zip
+  unzip_dir: artifacts/data_ingestion
+prepare_base_model:
+  root_dir: artifacts/prepare_base_model
+  base_model_path: artifacts/prepare_base_model/base_model.h5
+  updated_base_model_path: artifacts/prepare_base_model/base_model_updated.h5
+prepare_callbacks:
+  root_dir: artifacts/prepare_callbacks
+  tensorboard_root_log_dir: artifacts/prepare_callbacks/tensorboard_log_dir
+  checkpoint_model_filepath: artifacts/prepare_callbacks/checkpoint_dir/model.h5
+training:
+  root_dir: artifacts/training
+  trained_model_path: artifacts/training/model.h5

research/01_data_integation.ipynb ADDED Viewed

	@@ -0,0 +1,249 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'d:\\\\MLOps-Project\\\\Chicken-Disease-Classification-Using-Mlops\\\\research'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "%pwd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'d:\\\\MLOps-Project\\\\Chicken-Disease-Classification-Using-Mlops'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "os.chdir('../')\n",
+    "%pwd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dataclasses import dataclass\n",
+    "from pathlib import Path\n",
+    "\n",
+    "\n",
+    "@dataclass(frozen=True)\n",
+    "class DataIngestionConfig:\n",
+    "    root_dir: Path\n",
+    "    source_URL: str\n",
+    "    local_data_file: Path\n",
+    "    unzip_dir: Path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cnnClassfier.constants import *\n",
+    "from cnnClassfier.utils.common import read_yaml, create_directories"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ConfigurationManager:\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        config_filepath = CONFIG_FILE_PATH,\n",
+    "        params_filepath = PARAMS_FILE_PATH):\n",
+    "\n",
+    "        self.config = read_yaml(config_filepath)\n",
+    "        self.params = read_yaml(params_filepath)\n",
+    "\n",
+    "        create_directories([self.config.artifacts_root])\n",
+    "\n",
+    "\n",
+    "    \n",
+    "    def get_data_ingestion_config(self) -> DataIngestionConfig:\n",
+    "        config = self.config.data_ingestion\n",
+    "\n",
+    "        create_directories([config.root_dir])\n",
+    "\n",
+    "        data_ingestion_config = DataIngestionConfig(\n",
+    "            root_dir=config.root_dir,\n",
+    "            source_URL=config.source_URL,\n",
+    "            local_data_file=config.local_data_file,\n",
+    "            unzip_dir=config.unzip_dir \n",
+    "        )\n",
+    "\n",
+    "        return data_ingestion_config\n",
+    "      "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from tqdm.notebook import tqdm\n",
+    "import urllib.request as request\n",
+    "import zipfile\n",
+    "from cnnClassfier import logger\n",
+    "from cnnClassfier.utils.common import get_size"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class DataIngestion:\n",
+    "    def __init__(self, config: DataIngestionConfig):\n",
+    "        self.config = config\n",
+    "\n",
+    "\n",
+    "    \n",
+    "    def download_file(self):\n",
+    "        if not os.path.exists(self.config.local_data_file):\n",
+    "            filename, headers = request.urlretrieve(\n",
+    "                url = self.config.source_URL,\n",
+    "                filename = self.config.local_data_file\n",
+    "            )\n",
+    "            logger.info(f\"{filename} download! with following info: \\n{headers}\")\n",
+    "        else:\n",
+    "            logger.info(f\"File already exists of size: {get_size(Path(self.config.local_data_file))}\")  \n",
+    "\n",
+    "\n",
+    "    \n",
+    "    def extract_zip_file(self):\n",
+    "        \"\"\"\n",
+    "        zip_file_path: str\n",
+    "        Extracts the zip file into the data directory\n",
+    "        Function returns None\n",
+    "        \"\"\"\n",
+    "        unzip_path = self.config.unzip_dir\n",
+    "        os.makedirs(unzip_path, exist_ok=True)\n",
+    "        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:\n",
+    "            zip_ref.extractall(unzip_path)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2024-07-26 23:35:21,072: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
+      "[2024-07-26 23:35:21,074: INFO: common: yaml file: params.yaml loaded successfully]\n",
+      "[2024-07-26 23:35:21,075: INFO: common: Created directory  at: artifacts]\n",
+      "[2024-07-26 23:35:21,077: INFO: common: Created directory  at: artifacts/data_ingestion]\n",
+      "[2024-07-26 23:43:29,635: INFO: 1170291011: artifacts/data_ingestion/data.zip download! with following info: \n",
+      "Connection: close\n",
+      "Content-Length: 11616915\n",
+      "Cache-Control: max-age=300\n",
+      "Content-Security-Policy: default-src 'none'; style-src 'unsafe-inline'; sandbox\n",
+      "Content-Type: application/zip\n",
+      "ETag: \"adf745abc03891fe493c3be264ec012691fe3fa21d861f35a27edbe6d86a76b1\"\n",
+      "Strict-Transport-Security: max-age=31536000\n",
+      "X-Content-Type-Options: nosniff\n",
+      "X-Frame-Options: deny\n",
+      "X-XSS-Protection: 1; mode=block\n",
+      "X-GitHub-Request-Id: 38AD:28F2BB:3FC6CA:4CD38D:66A3DDDF\n",
+      "Accept-Ranges: bytes\n",
+      "Date: Fri, 26 Jul 2024 17:36:12 GMT\n",
+      "Via: 1.1 varnish\n",
+      "X-Served-By: cache-qpg1222-QPG\n",
+      "X-Cache: HIT\n",
+      "X-Cache-Hits: 0\n",
+      "X-Timer: S1722015372.265290,VS0,VE1\n",
+      "Vary: Authorization,Accept-Encoding,Origin\n",
+      "Access-Control-Allow-Origin: *\n",
+      "Cross-Origin-Resource-Policy: cross-origin\n",
+      "X-Fastly-Request-ID: 493e408e866b628ee5d1f857060d32bbaf38002f\n",
+      "Expires: Fri, 26 Jul 2024 17:41:12 GMT\n",
+      "Source-Age: 172\n",
+      "\n",
+      "]\n"
+     ]
+    }
+   ],
+   "source": [
+    "try:\n",
+    "    config = ConfigurationManager()\n",
+    "    data_ingestion_config = config.get_data_ingestion_config()\n",
+    "    data_ingestion = DataIngestion(config=data_ingestion_config)\n",
+    "    data_ingestion.download_file()\n",
+    "    data_ingestion.extract_zip_file()\n",
+    "except Exception as e:\n",
+    "    raise e"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

research/02_prepare_base_model.ipynb ADDED Viewed

	@@ -0,0 +1,339 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'d:\\\\MLOps-Project\\\\Chicken-Disease-Classification-Using-Mlops\\\\research'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "%pwd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.chdir('../')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'d:\\\\MLOps-Project\\\\Chicken-Disease-Classification-Using-Mlops'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%pwd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dataclasses import dataclass\n",
+    "from pathlib import Path\n",
+    "\n",
+    "\n",
+    "@dataclass(frozen=True)\n",
+    "class PrepareBaseModelConfig:\n",
+    "    root_dir: Path\n",
+    "    base_model_path: Path\n",
+    "    updated_base_model_path: Path\n",
+    "    params_image_size: list\n",
+    "    params_learning_rate: float\n",
+    "    params_include_top: bool\n",
+    "    params_weights: str\n",
+    "    params_classes: int"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cnnClassfier.constants import *\n",
+    "from cnnClassfier.utils.common import read_yaml, create_directories\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ConfigarationManager:\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        config_filepath = CONFIG_FILE_PATH,\n",
+    "        params_filepath = PARAMS_FILE_PATH\n",
+    "    ):\n",
+    "        self.config = read_yaml(config_filepath)\n",
+    "        self.params = read_yaml(params_filepath)\n",
+    "        \n",
+    "        create_directories([self.config.artifacts_root])\n",
+    "        \n",
+    "        \n",
+    "        \n",
+    "    def get_prepare_base_model(self) -> PrepareBaseModelConfig:\n",
+    "        config = self.config.prepare_base_model\n",
+    "        \n",
+    "        create_directories([config.root_dir])\n",
+    "        \n",
+    "        prepare_base_model_config = PrepareBaseModelConfig(\n",
+    "            root_dir=Path(config.root_dir),\n",
+    "            base_model_path= Path(config.base_model_path),\n",
+    "            updated_base_model_path= Path(config.updated_base_model_path),\n",
+    "            params_image_size=self.params.IMAZE_SIZE,\n",
+    "            params_learning_rate=self.params.LEARNING_RATE,\n",
+    "            params_include_top=self.params.INCLUDE_TOP,\n",
+    "            params_weights=self.params.WEIGHTS,\n",
+    "            params_classes=self.params.CLASSES\n",
+    "        )\n",
+    "        \n",
+    "        return prepare_base_model_config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import urllib.request as request\n",
+    "from zipfile import ZipFile\n",
+    "import tensorflow as tf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class PrepareBaseModel:\n",
+    "    def __init__(self, config: PrepareBaseModelConfig):\n",
+    "        self.config = config\n",
+    "        \n",
+    "    def get_base_model(self):\n",
+    "        self.model = tf.keras.applications.vgg16.VGG16(\n",
+    "            input_shape = self.config.params_image_size,\n",
+    "            weights = self.config.params_weights,\n",
+    "            include_top = self.config.params_include_top\n",
+    "        )\n",
+    "        \n",
+    "        self.save_model(path = self.config.base_model_path, model = self.model)\n",
+    "        \n",
+    "        \n",
+    "    @staticmethod\n",
+    "    def prepare_full_model(model, classes, freeze_all, freeze_till, learinig_rate):\n",
+    "        if freeze_all:\n",
+    "            for layer in model.layers:\n",
+    "                model.trainable = False\n",
+    "                \n",
+    "        elif (freeze_till is not None) and (freeze_till > 0):\n",
+    "            for layer in model.layers[:-freeze_till]:\n",
+    "                model.trainable = False\n",
+    "                \n",
+    "        flatten_in = tf.keras.layers.Flatten()(model.output)\n",
+    "        prediction = tf.keras.layers.Dense(\n",
+    "            units = classes,\n",
+    "            activation = 'softmax'\n",
+    "        )(flatten_in)\n",
+    "        \n",
+    "        full_model = tf.keras.models.Model(\n",
+    "            inputs = model.input,\n",
+    "            outputs = prediction\n",
+    "        )\n",
+    "        \n",
+    "        full_model.compile(\n",
+    "            optimizer = tf.keras.optimizers.SGD(lr = learinig_rate),\n",
+    "            loss = tf.keras.losses.CategoricalCrossentropy(),\n",
+    "            metrics = ['accuracy']\n",
+    "        )\n",
+    "        \n",
+    "        full_model.summary()\n",
+    "        return full_model\n",
+    "    \n",
+    "    def update_base_model(self):\n",
+    "        self.full_model = self.prepare_full_model(\n",
+    "            model = self.model,\n",
+    "            classes = self.config.params_classes,\n",
+    "            freeze_all=True,\n",
+    "            freeze_till=None,\n",
+    "            learinig_rate=self.config.params_learning_rate\n",
+    "        )\n",
+    "        self.save_model(path = self.config.updated_base_model_path, model = self.full_model)\n",
+    "    \n",
+    "    \n",
+    "    @staticmethod\n",
+    "    def save_model(path: Path, model: tf.keras.Model):\n",
+    "        model.save(path)\n",
+    "        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2024-07-27 01:03:25,207: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
+      "[2024-07-27 01:03:25,210: INFO: common: yaml file: params.yaml loaded successfully]\n",
+      "[2024-07-27 01:03:25,212: INFO: common: Created directory  at: artifacts]\n",
+      "[2024-07-27 01:03:25,214: INFO: common: Created directory  at: artifacts/prepare_base_model]\n",
+      "[2024-07-27 01:03:25,540: WARNING: saving_utils: Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.]\n",
+      "[2024-07-27 01:03:25,680: WARNING: optimizer: `lr` is deprecated in Keras optimizer, please use `learning_rate` or use the legacy optimizer, e.g.,tf.keras.optimizers.legacy.SGD.]\n",
+      "Model: \"model_3\"\n",
+      "_________________________________________________________________\n",
+      " Layer (type)                Output Shape              Param #   \n",
+      "=================================================================\n",
+      " input_4 (InputLayer)        [(None, 224, 224, 3)]     0         \n",
+      "                                                                 \n",
+      " block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      \n",
+      "                                                                 \n",
+      " block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     \n",
+      "                                                                 \n",
+      " block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         \n",
+      "                                                                 \n",
+      " block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     \n",
+      "                                                                 \n",
+      " block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    \n",
+      "                                                                 \n",
+      " block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0         \n",
+      "                                                                 \n",
+      " block3_conv1 (Conv2D)       (None, 56, 56, 256)       295168    \n",
+      "                                                                 \n",
+      " block3_conv2 (Conv2D)       (None, 56, 56, 256)       590080    \n",
+      "                                                                 \n",
+      " block3_conv3 (Conv2D)       (None, 56, 56, 256)       590080    \n",
+      "                                                                 \n",
+      " block3_pool (MaxPooling2D)  (None, 28, 28, 256)       0         \n",
+      "                                                                 \n",
+      " block4_conv1 (Conv2D)       (None, 28, 28, 512)       1180160   \n",
+      "                                                                 \n",
+      " block4_conv2 (Conv2D)       (None, 28, 28, 512)       2359808   \n",
+      "                                                                 \n",
+      " block4_conv3 (Conv2D)       (None, 28, 28, 512)       2359808   \n",
+      "                                                                 \n",
+      " block4_pool (MaxPooling2D)  (None, 14, 14, 512)       0         \n",
+      "                                                                 \n",
+      " block5_conv1 (Conv2D)       (None, 14, 14, 512)       2359808   \n",
+      "                                                                 \n",
+      " block5_conv2 (Conv2D)       (None, 14, 14, 512)       2359808   \n",
+      "                                                                 \n",
+      " block5_conv3 (Conv2D)       (None, 14, 14, 512)       2359808   \n",
+      "                                                                 \n",
+      " block5_pool (MaxPooling2D)  (None, 7, 7, 512)         0         \n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\engine\\training.py:3103: UserWarning: You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.\n",
+      "  saving_api.save_model(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                                                                 \n",
+      " flatten_3 (Flatten)         (None, 25088)             0         \n",
+      "                                                                 \n",
+      " dense_3 (Dense)             (None, 2)                 50178     \n",
+      "                                                                 \n",
+      "=================================================================\n",
+      "Total params: 14764866 (56.32 MB)\n",
+      "Trainable params: 50178 (196.01 KB)\n",
+      "Non-trainable params: 14714688 (56.13 MB)\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "try:\n",
+    "    config = ConfigarationManager()\n",
+    "    prepare_base_model_config = config.get_prepare_base_model()\n",
+    "    prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)\n",
+    "    prepare_base_model.get_base_model()\n",
+    "    prepare_base_model.update_base_model()\n",
+    "except Exception as e:\n",
+    "    raise e"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

research/03_stage_callbacks.ipynb ADDED Viewed

	@@ -0,0 +1,194 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.chdir('../')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dataclasses import dataclass\n",
+    "from pathlib import Path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@dataclass(frozen=True)\n",
+    "class PrepareCallbacksConfig:\n",
+    "    root_dir : Path\n",
+    "    tensorboard_root_log_dir : Path\n",
+    "    checkpoint_model_filepath : Path\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cnnClassfier.constants import *\n",
+    "from cnnClassfier.utils.common import read_yaml, create_directories"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ConfigurationManager:\n",
+    "    def __init__(\n",
+    "        self, \n",
+    "        config_filepath = CONFIG_FILE_PATH,\n",
+    "        params_filepath = PARAMS_FILE_PATH):\n",
+    "        self.config = read_yaml(config_filepath)\n",
+    "        self.params = read_yaml(params_filepath)\n",
+    "        create_directories([self.config.artifacts_root])\n",
+    "        \n",
+    "        \n",
+    "    def get_prepare_callback_config(self) -> PrepareCallbacksConfig:\n",
+    "        config = self.config.prepare_callbacks\n",
+    "        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)\n",
+    "        create_directories([\n",
+    "            Path(model_ckpt_dir),\n",
+    "            Path(config.tensorboard_root_log_dir)\n",
+    "        ])\n",
+    "\n",
+    "        prepare_callback_config = PrepareCallbacksConfig(\n",
+    "            root_dir=Path(config.root_dir),\n",
+    "            tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),\n",
+    "            checkpoint_model_filepath=Path(config.checkpoint_model_filepath)\n",
+    "        )\n",
+    "\n",
+    "        return prepare_callback_config\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2024-07-27 21:24:17,721: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
+      "]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import urllib.request as request\n",
+    "from zipfile import ZipFile\n",
+    "import tensorflow as tf\n",
+    "import time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class PrepareCallback:\n",
+    "    def __init__(self, config: PrepareCallbacksConfig):\n",
+    "        self.config = config\n",
+    "        \n",
+    "    @property\n",
+    "    def _create_tb_callbacks(self):\n",
+    "        timestamp = time.strftime('%Y-%m-%d-%H-%M-%S')\n",
+    "    \n",
+    "        tb_running_log_dir = os.path.join(\n",
+    "            str(self.config.tensorboard_root_log_dir),  # Convert to string\n",
+    "            f\"tb_logs_at_{timestamp}\",\n",
+    "        )\n",
+    "        return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)\n",
+    "\n",
+    "    @property\n",
+    "    def _create_ckpt_callbacks(self):\n",
+    "        return tf.keras.callbacks.ModelCheckpoint(\n",
+    "            filepath=str(self.config.checkpoint_model_filepath),  # Convert to string\n",
+    "            save_best_only=True\n",
+    "        )\n",
+    "\n",
+    "    def get_tb_callbacks(self):\n",
+    "        return [\n",
+    "            self._create_tb_callbacks,\n",
+    "            self._create_ckpt_callbacks\n",
+    "        ]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2024-07-27 21:35:22,818: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
+      "[2024-07-27 21:35:22,820: INFO: common: yaml file: params.yaml loaded successfully]\n",
+      "[2024-07-27 21:35:22,821: INFO: common: Created directory  at: artifacts]\n",
+      "[2024-07-27 21:35:22,823: INFO: common: Created directory  at: artifacts\\prepare_callbacks\\checkpoint_dir]\n",
+      "[2024-07-27 21:35:22,824: INFO: common: Created directory  at: artifacts\\prepare_callbacks\\tensorboard_log_dir]\n"
+     ]
+    }
+   ],
+   "source": [
+    "try:\n",
+    "    config = ConfigurationManager()\n",
+    "    prepare_callbacks_config = config.get_prepare_callback_config()\n",
+    "    prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)\n",
+    "    callback_list = prepare_callbacks.get_tb_callbacks()\n",
+    "    \n",
+    "except Exception as e:\n",
+    "    raise e"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

research/04_train.ipynb ADDED Viewed

	@@ -0,0 +1,327 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.chdir('../')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dataclasses import dataclass\n",
+    "from pathlib import Path\n",
+    "\n",
+    "\n",
+    "@dataclass(frozen=True)\n",
+    "class TrainingConfig:\n",
+    "    root_dir: Path\n",
+    "    trained_model_path: Path\n",
+    "    updated_base_model_path: Path\n",
+    "    training_data: Path\n",
+    "    params_epochs: int\n",
+    "    params_batch_size: int\n",
+    "    params_is_augmentation: bool\n",
+    "    params_image_size: list\n",
+    "\n",
+    "\n",
+    "\n",
+    "@dataclass(frozen=True)\n",
+    "class PrepareCallbacksConfig:\n",
+    "    root_dir: Path\n",
+    "    tensorboard_root_log_dir: Path\n",
+    "    checkpoint_model_filepath: Path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cnnClassfier.constants import *\n",
+    "from cnnClassfier.utils.common import read_yaml, create_directories\n",
+    "import tensorflow as tf\n",
+    "\n",
+    "\n",
+    "class ConfigurationManager:\n",
+    "    def __init__(\n",
+    "        self, \n",
+    "        config_filepath = CONFIG_FILE_PATH,\n",
+    "        params_filepath = PARAMS_FILE_PATH):\n",
+    "        self.config = read_yaml(config_filepath)\n",
+    "        self.params = read_yaml(params_filepath)\n",
+    "        create_directories([self.config.artifacts_root])\n",
+    "\n",
+    "\n",
+    "    \n",
+    "    def get_prepare_callback_config(self) -> PrepareCallbacksConfig:\n",
+    "        config = self.config.prepare_callbacks\n",
+    "        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)\n",
+    "        create_directories([\n",
+    "            Path(model_ckpt_dir),\n",
+    "            Path(config.tensorboard_root_log_dir)\n",
+    "        ])\n",
+    "\n",
+    "        prepare_callback_config = PrepareCallbacksConfig(\n",
+    "            root_dir=Path(config.root_dir),\n",
+    "            tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),\n",
+    "            checkpoint_model_filepath=Path(config.checkpoint_model_filepath)\n",
+    "        )\n",
+    "\n",
+    "        return prepare_callback_config\n",
+    "    \n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "    def get_training_config(self) -> TrainingConfig:\n",
+    "        training = self.config.training\n",
+    "        prepare_base_model = self.config.prepare_base_model\n",
+    "        params = self.params\n",
+    "        training_data = os.path.join(self.config.data_ingestion.unzip_dir, \"Chicken-fecal-images\")\n",
+    "        create_directories([\n",
+    "            Path(training.root_dir)\n",
+    "        ])\n",
+    "\n",
+    "        training_config = TrainingConfig(\n",
+    "            root_dir=Path(training.root_dir),\n",
+    "            trained_model_path=Path(training.trained_model_path),\n",
+    "            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),\n",
+    "            training_data=Path(training_data),\n",
+    "            params_epochs=params.EPOCHS,\n",
+    "            params_batch_size=params.BATCH_SIZE,\n",
+    "            params_is_augmentation=params.AUGMENTATION,\n",
+    "            params_image_size=params.IMAZE_SIZE\n",
+    "        )\n",
+    "\n",
+    "        return training_config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "class PrepareCallback:\n",
+    "    def __init__(self, config: PrepareCallbacksConfig):\n",
+    "        self.config = config\n",
+    "        \n",
+    "    @property\n",
+    "    def _create_tb_callbacks(self):\n",
+    "        timestamp = time.strftime('%Y-%m-%d-%H-%M-%S')\n",
+    "    \n",
+    "        tb_running_log_dir = os.path.join(\n",
+    "            str(self.config.tensorboard_root_log_dir),  # Convert to string\n",
+    "            f\"tb_logs_at_{timestamp}\",\n",
+    "        )\n",
+    "        return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)\n",
+    "\n",
+    "    @property\n",
+    "    def _create_ckpt_callbacks(self):\n",
+    "        return tf.keras.callbacks.ModelCheckpoint(\n",
+    "            filepath=str(self.config.checkpoint_model_filepath),  # Convert to string\n",
+    "            save_best_only=True\n",
+    "        )\n",
+    "\n",
+    "    def get_tb_callbacks(self):\n",
+    "        return [\n",
+    "            self._create_tb_callbacks,\n",
+    "            self._create_ckpt_callbacks\n",
+    "        ]\n",
+    "        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import urllib.request as request\n",
+    "from zipfile import ZipFile\n",
+    "import tensorflow as tf\n",
+    "import time\n",
+    "\n",
+    "\n",
+    "class Training:\n",
+    "    def __init__(self, config: TrainingConfig):\n",
+    "        self.config = config\n",
+    "    \n",
+    "    def get_base_model(self):\n",
+    "        self.model = tf.keras.models.load_model(\n",
+    "            self.config.updated_base_model_path\n",
+    "        )\n",
+    "    \n",
+    "    def train_valid_generator(self):\n",
+    "\n",
+    "        datagenerator_kwargs = dict(\n",
+    "            rescale = 1./255,\n",
+    "            validation_split=0.20\n",
+    "        )\n",
+    "\n",
+    "        dataflow_kwargs = dict(\n",
+    "            target_size=self.config.params_image_size[:-1],\n",
+    "            batch_size=self.config.params_batch_size,\n",
+    "            interpolation=\"bilinear\"\n",
+    "        )\n",
+    "\n",
+    "        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
+    "            **datagenerator_kwargs\n",
+    "        )\n",
+    "\n",
+    "        self.valid_generator = valid_datagenerator.flow_from_directory(\n",
+    "            directory=self.config.training_data,\n",
+    "            subset=\"validation\",\n",
+    "            shuffle=False,\n",
+    "            **dataflow_kwargs\n",
+    "        )\n",
+    "\n",
+    "        if self.config.params_is_augmentation:\n",
+    "            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
+    "                rotation_range=40,\n",
+    "                horizontal_flip=True,\n",
+    "                width_shift_range=0.2,\n",
+    "                height_shift_range=0.2,\n",
+    "                shear_range=0.2,\n",
+    "                zoom_range=0.2,\n",
+    "                **datagenerator_kwargs\n",
+    "            )\n",
+    "        else:\n",
+    "            train_datagenerator = valid_datagenerator\n",
+    "\n",
+    "        self.train_generator = train_datagenerator.flow_from_directory(\n",
+    "            directory=self.config.training_data,\n",
+    "            subset=\"training\",\n",
+    "            shuffle=True,\n",
+    "            **dataflow_kwargs\n",
+    "        )\n",
+    "\n",
+    "    @staticmethod\n",
+    "    def save_model(path: Path, model: tf.keras.Model):\n",
+    "        model.save(path)\n",
+    "\n",
+    "\n",
+    "    def train(self, callback_list: list):\n",
+    "        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size\n",
+    "        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size\n",
+    "\n",
+    "        self.model.fit(\n",
+    "            self.train_generator,\n",
+    "            epochs=self.config.params_epochs,\n",
+    "            steps_per_epoch=self.steps_per_epoch,\n",
+    "            validation_steps=self.validation_steps,\n",
+    "            validation_data=self.valid_generator,\n",
+    "            callbacks=callback_list\n",
+    "        )\n",
+    "\n",
+    "        self.save_model(\n",
+    "            path=self.config.trained_model_path,\n",
+    "            model=self.model\n",
+    "        )\n",
+    "\n",
+    "\n",
+    "            "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2024-07-27 23:47:55,531: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
+      "[2024-07-27 23:47:55,534: INFO: common: yaml file: params.yaml loaded successfully]\n",
+      "[2024-07-27 23:47:55,536: INFO: common: Created directory  at: artifacts]\n",
+      "[2024-07-27 23:47:55,537: INFO: common: Created directory  at: artifacts\\prepare_callbacks\\checkpoint_dir]\n",
+      "[2024-07-27 23:47:55,538: INFO: common: Created directory  at: artifacts\\prepare_callbacks\\tensorboard_log_dir]\n",
+      "[2024-07-27 23:47:55,539: INFO: common: Created directory  at: artifacts\\training]\n",
+      "[2024-07-27 23:47:56,129: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\backend.py:1398: The name tf.executing_eagerly_outside_functions is deprecated. Please use tf.compat.v1.executing_eagerly_outside_functions instead.\n",
+      "]\n",
+      "[2024-07-27 23:47:56,313: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\layers\\pooling\\max_pooling2d.py:161: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.\n",
+      "]\n",
+      "Found 78 images belonging to 2 classes.\n",
+      "Found 312 images belonging to 2 classes.\n",
+      "[2024-07-27 23:47:58,334: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\utils\\tf_utils.py:492: The name tf.ragged.RaggedTensorValue is deprecated. Please use tf.compat.v1.ragged.RaggedTensorValue instead.\n",
+      "]\n",
+      "19/19 [==============================] - 53s 3s/step - loss: 13.0395 - accuracy: 0.5270 - val_loss: 24.1360 - val_accuracy: 0.3906\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\engine\\training.py:3103: UserWarning: You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.\n",
+      "  saving_api.save_model(\n"
+     ]
+    }
+   ],
+   "source": [
+    "try:\n",
+    "    config = ConfigurationManager()\n",
+    "    prepare_callbacks_config = config.get_prepare_callback_config()\n",
+    "    prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)\n",
+    "    callback_list = prepare_callbacks.get_tb_callbacks()\n",
+    "\n",
+    "    training_config = config.get_training_config()\n",
+    "    training = Training(config=training_config)\n",
+    "    training.get_base_model()\n",
+    "    training.train_valid_generator()\n",
+    "    training.train(\n",
+    "        callback_list=callback_list\n",
+    "    )\n",
+    "    \n",
+    "except Exception as e:\n",
+    "    raise e"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

research/05_model_evaluation.ipynb ADDED Viewed

	@@ -0,0 +1,209 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.chdir('../')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\backend.py:1398: The name tf.executing_eagerly_outside_functions is deprecated. Please use tf.compat.v1.executing_eagerly_outside_functions instead.\n",
+      "\n",
+      "WARNING:tensorflow:From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\layers\\pooling\\max_pooling2d.py:161: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = tf.keras.models.load_model('artifacts/training/model.h5')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dataclasses import dataclass\n",
+    "from pathlib import Path\n",
+    "\n",
+    "@dataclass(frozen=True)\n",
+    "class EvaluationConfig:\n",
+    "    path_of_model : Path\n",
+    "    training_data : Path\n",
+    "    all_params : dict\n",
+    "    params_image_size : list\n",
+    "    params_batch_size: int"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cnnClassfier.constants import *\n",
+    "from cnnClassfier.utils.common import read_yaml, create_directories, save_json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ConfigurationManager:\n",
+    "    def __init__(\n",
+    "        self, \n",
+    "        config_filepath = CONFIG_FILE_PATH,\n",
+    "        params_filepath = PARAMS_FILE_PATH):\n",
+    "        self.config = read_yaml(config_filepath)\n",
+    "        self.params = read_yaml(params_filepath)\n",
+    "        create_directories([self.config.artifacts_root])\n",
+    "        \n",
+    "        \n",
+    "    def get_validation_config(self) -> EvaluationConfig:\n",
+    "        eval_config = EvaluationConfig(\n",
+    "            path_of_model=\"artifacts/training/model.h5\",\n",
+    "            training_data=\"artifacts/data_ingestion/Chicken-fecal-images\",\n",
+    "            all_params=self.params,\n",
+    "            params_image_size=self.params.IMAZE_SIZE,\n",
+    "            params_batch_size=self.params.BATCH_SIZE\n",
+    "        )\n",
+    "        return eval_config\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from urllib.parse import urlparse\n",
+    "\n",
+    "class Evaluation:\n",
+    "    def __init__(self, config: EvaluationConfig):\n",
+    "        self.config = config\n",
+    "        \n",
+    "    def _valid_generator(self):\n",
+    "        datagenerator_kwargs = dict(\n",
+    "            rescale = 1./255,\n",
+    "            validation_split = 0.30\n",
+    "        )\n",
+    "        \n",
+    "        dataflow_kwargs = dict(\n",
+    "            target_size = self.config.params_image_size[:-1],\n",
+    "            batch_size= self.config.params_batch_size,\n",
+    "            interpolation = 'bilinear'\n",
+    "        )\n",
+    "        \n",
+    "        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
+    "            **datagenerator_kwargs\n",
+    "        )\n",
+    "        \n",
+    "        self.valid_generator = valid_datagenerator.flow_from_directory(\n",
+    "            directory = self.config.training_data,\n",
+    "            subset = 'validation',\n",
+    "            shuffle = True,\n",
+    "            **dataflow_kwargs\n",
+    "        )\n",
+    "        \n",
+    "    @staticmethod\n",
+    "    def load_model(path: Path) -> tf.keras.Model:\n",
+    "        return tf.keras.models.load_model(path)\n",
+    "    \n",
+    "    def evaluation(self):\n",
+    "        self.model = self.load_model(self.config.path_of_model)\n",
+    "        self._valid_generator()\n",
+    "        self.score = model.evaluate(self.valid_generator)\n",
+    "        \n",
+    "    def save_score(self):\n",
+    "        scores = {'loss' : self.score[0], 'accuracy' : self.score[1]}\n",
+    "        save_json(path = Path('scores.json'), data = scores)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2024-07-28 02:01:54,885: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
+      "[2024-07-28 02:01:54,889: INFO: common: yaml file: params.yaml loaded successfully]\n",
+      "[2024-07-28 02:01:54,890: INFO: common: Created directory  at: artifacts]\n",
+      "Found 116 images belonging to 2 classes.\n",
+      "WARNING:tensorflow:From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\utils\\tf_utils.py:492: The name tf.ragged.RaggedTensorValue is deprecated. Please use tf.compat.v1.ragged.RaggedTensorValue instead.\n",
+      "\n",
+      "[2024-07-28 02:01:56,004: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\utils\\tf_utils.py:492: The name tf.ragged.RaggedTensorValue is deprecated. Please use tf.compat.v1.ragged.RaggedTensorValue instead.\n",
+      "]\n",
+      "8/8 [==============================] - 11s 1s/step - loss: 0.3306 - accuracy: 0.9569\n",
+      "[2024-07-28 02:02:06,982: INFO: common: Json file saved at: scores.json]\n"
+     ]
+    }
+   ],
+   "source": [
+    "try:\n",
+    "    config = ConfigurationManager()\n",
+    "    val_config = config.get_validation_config()\n",
+    "    evaluation = Evaluation(val_config)\n",
+    "    evaluation.evaluation()\n",
+    "    evaluation.save_score()\n",
+    "    \n",
+    "except Exception as e:\n",
+    "    raise e"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

research/tails.ipynb ADDED Viewed

File without changes

src/cnnClassfier/__init__.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import os
+import sys
+import logging
+logging_str = logging_str = "[%(asctime)s: %(levelname)s: %(module)s: %(message)s]"
+log_dir =  'logs'
+log_filepath = os.path.join(log_dir, 'running_logs.log')
+os.makedirs(log_dir, exist_ok=True)
+logging.basicConfig(
+    level = logging.INFO,
+    format= logging_str,
+    handlers= [
+        logging.FileHandler(log_filepath),
+        logging.StreamHandler(sys.stdout)
+    ]
+)
+logger = logging.getLogger('cnnClassifierLogger')

src/cnnClassfier/components/__init__.py ADDED Viewed

File without changes

src/cnnClassfier/components/base_model.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os
+import urllib.request as request
+from zipfile import ZipFile
+import tensorflow as tf
+from cnnClassfier.entity.config_entity import PrepareBaseModelConfig
+from pathlib import Path
+class PrepareBaseModel:
+    def __init__(self, config: PrepareBaseModelConfig):
+        self.config = config
+    def get_base_model(self):
+        self.model = tf.keras.applications.vgg16.VGG16(
+            input_shape = self.config.params_image_size,
+            weights = self.config.params_weights,
+            include_top = self.config.params_include_top
+        )
+        self.save_model(path = self.config.base_model_path, model = self.model)
+    @staticmethod
+    def prepare_full_model(model, classes, freeze_all, freeze_till, learinig_rate):
+        if freeze_all:
+            for layer in model.layers:
+                model.trainable = False
+        elif (freeze_till is not None) and (freeze_till > 0):
+            for layer in model.layers[:-freeze_till]:
+                model.trainable = False
+        flatten_in = tf.keras.layers.Flatten()(model.output)
+        prediction = tf.keras.layers.Dense(
+            units = classes,
+            activation = 'softmax'
+        )(flatten_in)
+        full_model = tf.keras.models.Model(
+            inputs = model.input,
+            outputs = prediction
+        )
+        full_model.compile(
+            optimizer = tf.keras.optimizers.SGD(lr = learinig_rate),
+            loss = tf.keras.losses.CategoricalCrossentropy(),
+            metrics = ['accuracy']
+        )
+        full_model.summary()
+        return full_model
+    def update_base_model(self):
+        self.full_model = self.prepare_full_model(
+            model = self.model,
+            classes = self.config.params_classes,
+            freeze_all=True,
+            freeze_till=None,
+            learinig_rate=self.config.params_learning_rate
+        )
+        self.save_model(path = self.config.updated_base_model_path, model = self.full_model)
+    @staticmethod
+    def save_model(path: Path, model: tf.keras.Model):
+        model.save(path)

src/cnnClassfier/components/callbacks.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from cnnClassfier.config.configuration import PrepareCallbacksConfig
+import time
+import os
+import tensorflow as tf
+class PrepareCallback:
+    def __init__(self, config: PrepareCallbacksConfig):
+        self.config = config
+    @property
+    def _create_tb_callbacks(self):
+        timestamp = time.strftime('%Y-%m-%d-%H-%M-%S')
+        tb_running_log_dir = os.path.join(
+            str(self.config.tensorboard_root_log_dir),  # Convert to string
+            f"tb_logs_at_{timestamp}",
+        )
+        return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)
+    @property
+    def _create_ckpt_callbacks(self):
+        return tf.keras.callbacks.ModelCheckpoint(
+            filepath=str(self.config.checkpoint_model_filepath),  # Convert to string
+            save_best_only=True
+        )
+    def get_tb_callbacks(self):
+        return [
+            self._create_tb_callbacks,
+            self._create_ckpt_callbacks
+        ]

src/cnnClassfier/components/data_ingestion.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os
+from tqdm.notebook import tqdm
+import urllib.request as request
+import zipfile
+from cnnClassfier import logger
+from cnnClassfier.utils.common import get_size
+from cnnClassfier.entity.config_entity import DataIngestionConfig
+from pathlib import Path
+class DataIngestion:
+    def __init__(self, config: DataIngestionConfig):
+        self.config = config
+    def download_file(self):
+        if not os.path.exists(self.config.local_data_file):
+            filename, headers = request.urlretrieve(
+                url = self.config.source_URL,
+                filename = self.config.local_data_file
+            )
+            logger.info(f"{filename} download! with following info: \n{headers}")
+        else:
+            logger.info(f"File already exists of size: {get_size(Path(self.config.local_data_file))}")
+    def extract_zip_file(self):
+        """
+        zip_file_path: str
+        Extracts the zip file into the data directory
+        Function returns None
+        """
+        unzip_path = self.config.unzip_dir
+        os.makedirs(unzip_path, exist_ok=True)
+        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
+            zip_ref.extractall(unzip_path)

src/cnnClassfier/components/evaluation.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from urllib.parse import urlparse
+from cnnClassfier.entity.config_entity import EvaluationConfig
+from pathlib import Path
+import tensorflow as tf
+from cnnClassfier.utils.common import save_json
+class Evaluation:
+    def __init__(self, config: EvaluationConfig):
+        self.config = config
+    def _valid_generator(self):
+        datagenerator_kwargs = dict(
+            rescale = 1./255,
+            validation_split = 0.30
+        )
+        dataflow_kwargs = dict(
+            target_size = self.config.params_image_size[:-1],
+            batch_size= self.config.params_batch_size,
+            interpolation = 'bilinear'
+        )
+        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
+            **datagenerator_kwargs
+        )
+        self.valid_generator = valid_datagenerator.flow_from_directory(
+            directory = self.config.training_data,
+            subset = 'validation',
+            shuffle = True,
+            **dataflow_kwargs
+        )
+    @staticmethod
+    def load_model(path: Path) -> tf.keras.Model:
+        return tf.keras.models.load_model(path)
+    def evaluation(self):
+        model = self.load_model(self.config.path_of_model)
+        self._valid_generator()
+        self.score = model.evaluate(self.valid_generator)
+    def save_score(self):
+        scores = {'loss' : self.score[0], 'accuracy' : self.score[1]}
+        save_json(path = Path('scores.json'), data = scores)

src/cnnClassfier/components/train.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import os
+import urllib.request as request
+from zipfile import ZipFile
+import tensorflow as tf
+import time
+from cnnClassfier.entity.config_entity import TrainingConfig
+from pathlib import Path
+class Training:
+    def __init__(self, config: TrainingConfig):
+        self.config = config
+    def get_base_model(self):
+        self.model = tf.keras.models.load_model(
+            self.config.updated_base_model_path
+        )
+    def train_valid_generator(self):
+        datagenerator_kwargs = dict(
+            rescale = 1./255,
+            validation_split=0.20
+        )
+        dataflow_kwargs = dict(
+            target_size=self.config.params_image_size[:-1],
+            batch_size=self.config.params_batch_size,
+            interpolation="bilinear"
+        )
+        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
+            **datagenerator_kwargs
+        )
+        self.valid_generator = valid_datagenerator.flow_from_directory(
+            directory=self.config.training_data,
+            subset="validation",
+            shuffle=False,
+            **dataflow_kwargs
+        )
+        if self.config.params_is_augmentation:
+            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
+                rotation_range=40,
+                horizontal_flip=True,
+                width_shift_range=0.2,
+                height_shift_range=0.2,
+                shear_range=0.2,
+                zoom_range=0.2,
+                **datagenerator_kwargs
+            )
+        else:
+            train_datagenerator = valid_datagenerator
+        self.train_generator = train_datagenerator.flow_from_directory(
+            directory=self.config.training_data,
+            subset="training",
+            shuffle=True,
+            **dataflow_kwargs
+        )
+    @staticmethod
+    def save_model(path: Path, model: tf.keras.Model):
+        model.save(path)
+    def train(self, callback_list: list):
+        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
+        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size
+        self.model.fit(
+            self.train_generator,
+            epochs=self.config.params_epochs,
+            steps_per_epoch=self.steps_per_epoch,
+            validation_steps=self.validation_steps,
+            validation_data=self.valid_generator,
+            callbacks=callback_list
+        )
+        self.save_model(
+            path=self.config.trained_model_path,
+            model=self.model
+        )

src/cnnClassfier/config/__init__.py ADDED Viewed

File without changes

src/cnnClassfier/config/configuration.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import os
+from cnnClassfier.constants import *
+from cnnClassfier.utils.common import read_yaml, create_directories
+from cnnClassfier.entity.config_entity import (DataIngestionConfig,
+                                               PrepareBaseModelConfig,
+                                               PrepareCallbacksConfig,
+                                               TrainingConfig,
+                                               EvaluationConfig)
+class ConfigurationManager:
+    def __init__(
+        self,
+        config_filepath = CONFIG_FILE_PATH,
+        params_filepath = PARAMS_FILE_PATH):
+        self.config = read_yaml(config_filepath)
+        self.params = read_yaml(params_filepath)
+        create_directories([self.config.artifacts_root])
+    def get_data_ingestion_config(self) -> DataIngestionConfig:
+        config = self.config.data_ingestion
+        create_directories([config.root_dir])
+        data_ingestion_config = DataIngestionConfig(
+            root_dir=config.root_dir,
+            source_URL=config.source_URL,
+            local_data_file=config.local_data_file,
+            unzip_dir=config.unzip_dir
+        )
+        return data_ingestion_config
+    def get_prepare_base_model(self) -> PrepareBaseModelConfig:
+        config = self.config.prepare_base_model
+        create_directories([config.root_dir])
+        prepare_base_model_config = PrepareBaseModelConfig(
+            root_dir=Path(config.root_dir),
+            base_model_path= Path(config.base_model_path),
+            updated_base_model_path= Path(config.updated_base_model_path),
+            params_image_size=self.params.IMAZE_SIZE,
+            params_learning_rate=self.params.LEARNING_RATE,
+            params_include_top=self.params.INCLUDE_TOP,
+            params_weights=self.params.WEIGHTS,
+            params_classes=self.params.CLASSES
+        )
+        return prepare_base_model_config
+    def get_prepare_callback_config(self) -> PrepareCallbacksConfig:
+        config = self.config.prepare_callbacks
+        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)
+        create_directories([
+            Path(model_ckpt_dir),
+            Path(config.tensorboard_root_log_dir)
+        ])
+        prepare_callback_config = PrepareCallbacksConfig(
+            root_dir=Path(config.root_dir),
+            tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),
+            checkpoint_model_filepath=Path(config.checkpoint_model_filepath)
+        )
+        return prepare_callback_config
+    def get_training_config(self) -> TrainingConfig:
+        training = self.config.training
+        prepare_base_model = self.config.prepare_base_model
+        params = self.params
+        training_data = os.path.join(self.config.data_ingestion.unzip_dir, "Chicken-fecal-images")
+        create_directories([
+            Path(training.root_dir)
+        ])
+        training_config = TrainingConfig(
+            root_dir=Path(training.root_dir),
+            trained_model_path=Path(training.trained_model_path),
+            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
+            training_data=Path(training_data),
+            params_epochs=params.EPOCHS,
+            params_batch_size=params.BATCH_SIZE,
+            params_is_augmentation=params.AUGMENTATION,
+            params_image_size=params.IMAZE_SIZE
+        )
+        return training_config
+    def get_validation_config(self) -> EvaluationConfig:
+        eval_config = EvaluationConfig(
+            path_of_model="artifacts/training/model.h5",
+            training_data="artifacts/data_ingestion/Chicken-fecal-images",
+            all_params=self.params,
+            params_image_size=self.params.IMAZE_SIZE,
+            params_batch_size=self.params.BATCH_SIZE
+        )
+        return eval_config

src/cnnClassfier/constants/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from pathlib import Path
+CONFIG_FILE_PATH = Path("config/config.yaml")
+PARAMS_FILE_PATH = Path("params.yaml")

src/cnnClassfier/entity/__init__.py ADDED Viewed

File without changes

src/cnnClassfier/entity/config_entity.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from dataclasses import dataclass
+from pathlib import Path
+@dataclass(frozen=True)
+class DataIngestionConfig:
+    root_dir: Path
+    source_URL: str
+    local_data_file: Path
+    unzip_dir: Path
+@dataclass(frozen=True)
+class PrepareBaseModelConfig:
+    root_dir: Path
+    base_model_path: Path
+    updated_base_model_path: Path
+    params_image_size: list
+    params_learning_rate: float
+    params_include_top: bool
+    params_weights: str
+    params_classes: int
+@dataclass(frozen=True)
+class PrepareCallbacksConfig:
+    root_dir : Path
+    tensorboard_root_log_dir : Path
+    checkpoint_model_filepath : Path
+@dataclass(frozen=True)
+class TrainingConfig:
+    root_dir: Path
+    trained_model_path: Path
+    updated_base_model_path: Path
+    training_data: Path
+    params_epochs: int
+    params_batch_size: int
+    params_is_augmentation: bool
+    params_image_size: list
+@dataclass(frozen=True)
+class EvaluationConfig:
+    path_of_model : Path
+    training_data : Path
+    all_params : dict
+    params_image_size : list
+    params_batch_size: int

src/cnnClassfier/pipeline/__init__.py ADDED Viewed

File without changes

src/cnnClassfier/pipeline/predict.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import numpy as np
+from tensorflow.keras.models import load_model
+from tensorflow.keras.preprocessing import image
+import os
+class Prediction:
+    def __init__(self,filename):
+        self.filename =filename
+    def predict(self):
+        # load model
+        model = load_model("model.h5")
+        imagename = self.filename
+        test_image = image.load_img(imagename, target_size = (224,224))
+        test_image = image.img_to_array(test_image)
+        test_image = np.expand_dims(test_image, axis = 0)
+        result = np.argmax(model.predict(test_image), axis=1)
+        print(result)
+        if result[0] == 1:
+            prediction = 'Healthy'
+        else:
+            prediction = 'Coccidiosis'
+        return prediction

src/cnnClassfier/pipeline/stage02_base_model.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from cnnClassfier.config.configuration import ConfigurationManager
+from cnnClassfier.components.base_model import PrepareBaseModel
+from cnnClassfier import logger
+STAGE_NAME = 'Pepare Base Model Stage'
+class PrepareBaseModelTrainigPipeline:
+    def __init__(self):
+        pass
+    def main(self):
+        config = ConfigurationManager()
+        prepare_base_model_config = config.get_prepare_base_model()
+        prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)
+        prepare_base_model.get_base_model()
+        prepare_base_model.update_base_model()
+if __name__ == '__main__':
+    try:
+        logger.info(f'>>>>>> Stage {STAGE_NAME} Started <<<<<<<')
+        obj = PrepareBaseModelTrainigPipeline()
+        obj.main()
+        logger.info(f">>>>>> Stage {STAGE_NAME} Completed <<<<<<\n\nx======x")
+    except Exception as e:
+        logger.exception(e)
+        raise e

src/cnnClassfier/pipeline/stage_01_data_ingestion.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from cnnClassfier.config.configuration import ConfigurationManager
+from cnnClassfier.components.data_ingestion import DataIngestion
+from cnnClassfier import logger
+STAGE_NAME = 'Data Ingestion Stage'
+class DataIngestionTrainingPipeline:
+    def __init__(self):
+        pass
+    def main(self):
+        config = ConfigurationManager()
+        data_ingestion_config = config.get_data_ingestion_config()
+        data_ingestion = DataIngestion(config=data_ingestion_config)
+        data_ingestion.download_file()
+        data_ingestion.extract_zip_file()
+if __name__ == '__main__':
+    try:
+        logger.info(f'>>>>>> Stage {STAGE_NAME} Started <<<<<<<')
+        obj = DataIngestionTrainingPipeline()
+        obj.main()
+        logger.info(f">>>>>> Stage {STAGE_NAME} Completed <<<<<<\n\nx======x")
+    except Exception as e:
+        logger.exception(e)
+        raise e

src/cnnClassfier/pipeline/stage_03_train.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from cnnClassfier.config.configuration import ConfigurationManager
+from cnnClassfier.components.callbacks import PrepareCallback
+from cnnClassfier.components.train import Training
+from cnnClassfier import logger
+STAGE_NAME = "Training"
+class ModelTrainingPipeline:
+    def __init__(self):
+        pass
+    def main(self):
+        config = ConfigurationManager()
+        prepare_callbacks_config = config.get_prepare_callback_config()
+        prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)
+        callback_list = prepare_callbacks.get_tb_callbacks()
+        training_config = config.get_training_config()
+        training = Training(config=training_config)
+        training.get_base_model()
+        training.train_valid_generator()
+        training.train(
+            callback_list=callback_list
+        )
+if __name__ == '__main__':
+    try:
+        logger.info(f"*******************")
+        logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
+        obj = ModelTrainingPipeline()
+        obj.main()
+        logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
+    except Exception as e:
+        logger.exception(e)
+        raise e

src/cnnClassfier/pipeline/stage_04_evaluation.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from cnnClassfier.config.configuration import ConfigurationManager
+from cnnClassfier.components.evaluation import Evaluation
+from cnnClassfier import logger
+STAGE_NAME = 'Evaluation Stage'
+class EvaluationTrainigPipeline:
+    def __init__(self):
+        pass
+    def main(self):
+        config = ConfigurationManager()
+        val_config = config.get_validation_config()
+        evaluation = Evaluation(val_config)
+        evaluation.evaluation()
+        evaluation.save_score()
+if __name__ == '__main__':
+    try:
+        logger.info(f'>>>>>> Stage {STAGE_NAME} Started <<<<<<<')
+        obj = EvaluationTrainigPipeline()
+        obj.main()
+        logger.info(f">>>>>> Stage {STAGE_NAME} Completed <<<<<<\n\nx======x")
+    except Exception as e:
+        logger.exception(e)
+        raise e

src/cnnClassfier/utils/__init__.py ADDED Viewed

File without changes

src/cnnClassfier/utils/common.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import os
+from box.exceptions import BoxValueError
+import yaml
+from cnnClassfier import logger
+import json
+import joblib
+from ensure import ensure_annotations
+from box import ConfigBox
+from pathlib import Path
+from typing import Any
+import base64
+@ensure_annotations
+def read_yaml(path_to_yaml: Path) -> ConfigBox:
+    """reads yaml file and returns
+    Args:
+        path_to_yaml (str): path like input
+    Raises:
+        ValueError: if yaml file is empty
+        e: empty file
+    Returns:
+        ConfigBox: ConfigBox type
+    """
+    try:
+        with open(path_to_yaml) as yaml_file:
+            content = yaml.safe_load(yaml_file)
+            logger.info(f"yaml file: {path_to_yaml} loaded successfully")
+            return ConfigBox(content)
+    except BoxValueError:
+        raise ValueError("yaml file is empty")
+    except Exception as e:
+        raise e
+@ensure_annotations
+def create_directories(path_to_directories: list, verbose = True):
+    """create list of directories
+    Args:
+        path_to_directories (list): list of path of directories
+        ignore_log (bool, optional): ignore if multiple dirs is to be created. Defaults to False.
+    """
+    for path in path_to_directories:
+        os.makedirs(path, exist_ok=True)
+        if verbose:
+            logger.info(f'Created directory  at: {path}')
+@ensure_annotations
+def save_json(path: Path, data: dict):
+    """save json data
+    Args:
+        path (Path): path to json file
+        data (dict): data to be saved in json file
+    """
+    with open(path, 'w') as f:
+        json.dump(data, f, indent=4)
+    logger.info(f'Json file saved at: {path}')
+@ensure_annotations
+def load_json(path: Path) -> ConfigBox:
+    """load json files data
+    Args:
+        path (Path): path to json file
+    Returns:
+        ConfigBox: data as class attributes instead of dict
+    """
+    with open(path, 'r') as f:
+        content = json.load(f)
+    logger.info(f"Json file loaded successfully from: {path}")
+    return ConfigBox
+@ensure_annotations
+def save_bin(data: Any, path: Path):
+    """save binary file
+    Args:
+        data (Any): data to be saved as binary
+        path (Path): path to binary file
+    """
+    joblib.dump(value=data, filename=path)
+    logger.info(f'binary file saved at: {path}')
+@ensure_annotations
+def load_bin(path: Path) -> ConfigBox:
+    """load binary data
+    Args:
+        path (Path): path to binary file
+    Returns:
+        Any: object stored in the file
+    """
+    data = joblib.load(path)
+    logger.info(f'binary file has been loaded successfully from : {path}')
+    return data
+@ensure_annotations
+def get_size(path: Path) -> str:
+    '''
+    get size in KB
+    Args:
+        Path (Path): path of the file
+    Returns:
+        str: size in KB'''
+    size_in_kb = round(os.path.getsize(path)/1024)
+    return f"~ {size_in_kb} KB"
+def decodeImage(imgstring, fileName):
+    imgdata = base64.b64decode(imgstring)
+    with open(fileName, 'wb') as f:
+        f.write(imgdata)
+        f.close()
+def encodeImageIntoBase64(croppedImagePath):
+    with open(croppedImagePath, 'rb') as f:
+        return base64.b64decode(f.read())