hakim commited on
Commit
05f7b3b
·
1 Parent(s): 00004c2

dvc and pipeline added

Browse files
.github/workflows/.gitkeep ADDED
File without changes
.github/workflows/main.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+
7
+ # to run this workflow manually from the Actions tab
8
+ workflow_dispatch:
9
+
10
+ jobs:
11
+ sync-to-hub:
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v2
15
+ with:
16
+ fetch-depth: 0
17
+ - name: Push to hub
18
+ env:
19
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ run: git push --force https://Md-Hakim:$HF_TOKEN@huggingface.co/spaces/Md-Hakim/image-classification-using-mlops main
config/config.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ artifacts_root: artifacts
2
+
3
+
4
+ data_ingestion:
5
+ root_dir: artifacts/data_ingestion
6
+ source_URL: https://github.com/entbappy/Branching-tutorial/raw/master/Chicken-fecal-images.zip
7
+ local_data_file: artifacts/data_ingestion/data.zip
8
+ unzip_dir: artifacts/data_ingestion
9
+
10
+
11
+ prepare_base_model:
12
+ root_dir: artifacts/prepare_base_model
13
+ base_model_path: artifacts/prepare_base_model/base_model.h5
14
+ updated_base_model_path: artifacts/prepare_base_model/base_model_updated.h5
15
+
16
+ prepare_callbacks:
17
+ root_dir: artifacts/prepare_callbacks
18
+ tensorboard_root_log_dir: artifacts/prepare_callbacks/tensorboard_log_dir
19
+ checkpoint_model_filepath: artifacts/prepare_callbacks/checkpoint_dir/model.h5
20
+
21
+
22
+
23
+
24
+ training:
25
+ root_dir: artifacts/training
26
+ trained_model_path: artifacts/training/model.h5
research/01_data_integation.ipynb ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "data": {
10
+ "text/plain": [
11
+ "'d:\\\\MLOps-Project\\\\Chicken-Disease-Classification-Using-Mlops\\\\research'"
12
+ ]
13
+ },
14
+ "execution_count": 1,
15
+ "metadata": {},
16
+ "output_type": "execute_result"
17
+ }
18
+ ],
19
+ "source": [
20
+ "import os\n",
21
+ "%pwd"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 2,
27
+ "metadata": {},
28
+ "outputs": [
29
+ {
30
+ "data": {
31
+ "text/plain": [
32
+ "'d:\\\\MLOps-Project\\\\Chicken-Disease-Classification-Using-Mlops'"
33
+ ]
34
+ },
35
+ "execution_count": 2,
36
+ "metadata": {},
37
+ "output_type": "execute_result"
38
+ }
39
+ ],
40
+ "source": [
41
+ "os.chdir('../')\n",
42
+ "%pwd"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": 3,
48
+ "metadata": {},
49
+ "outputs": [],
50
+ "source": [
51
+ "from dataclasses import dataclass\n",
52
+ "from pathlib import Path\n",
53
+ "\n",
54
+ "\n",
55
+ "@dataclass(frozen=True)\n",
56
+ "class DataIngestionConfig:\n",
57
+ " root_dir: Path\n",
58
+ " source_URL: str\n",
59
+ " local_data_file: Path\n",
60
+ " unzip_dir: Path"
61
+ ]
62
+ },
63
+ {
64
+ "cell_type": "code",
65
+ "execution_count": 4,
66
+ "metadata": {},
67
+ "outputs": [],
68
+ "source": [
69
+ "from cnnClassfier.constants import *\n",
70
+ "from cnnClassfier.utils.common import read_yaml, create_directories"
71
+ ]
72
+ },
73
+ {
74
+ "cell_type": "code",
75
+ "execution_count": 5,
76
+ "metadata": {},
77
+ "outputs": [],
78
+ "source": [
79
+ "class ConfigurationManager:\n",
80
+ " def __init__(\n",
81
+ " self,\n",
82
+ " config_filepath = CONFIG_FILE_PATH,\n",
83
+ " params_filepath = PARAMS_FILE_PATH):\n",
84
+ "\n",
85
+ " self.config = read_yaml(config_filepath)\n",
86
+ " self.params = read_yaml(params_filepath)\n",
87
+ "\n",
88
+ " create_directories([self.config.artifacts_root])\n",
89
+ "\n",
90
+ "\n",
91
+ " \n",
92
+ " def get_data_ingestion_config(self) -> DataIngestionConfig:\n",
93
+ " config = self.config.data_ingestion\n",
94
+ "\n",
95
+ " create_directories([config.root_dir])\n",
96
+ "\n",
97
+ " data_ingestion_config = DataIngestionConfig(\n",
98
+ " root_dir=config.root_dir,\n",
99
+ " source_URL=config.source_URL,\n",
100
+ " local_data_file=config.local_data_file,\n",
101
+ " unzip_dir=config.unzip_dir \n",
102
+ " )\n",
103
+ "\n",
104
+ " return data_ingestion_config\n",
105
+ " "
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "code",
110
+ "execution_count": 6,
111
+ "metadata": {},
112
+ "outputs": [],
113
+ "source": [
114
+ "import os\n",
115
+ "from tqdm.notebook import tqdm\n",
116
+ "import urllib.request as request\n",
117
+ "import zipfile\n",
118
+ "from cnnClassfier import logger\n",
119
+ "from cnnClassfier.utils.common import get_size"
120
+ ]
121
+ },
122
+ {
123
+ "cell_type": "code",
124
+ "execution_count": 10,
125
+ "metadata": {},
126
+ "outputs": [],
127
+ "source": [
128
+ "class DataIngestion:\n",
129
+ " def __init__(self, config: DataIngestionConfig):\n",
130
+ " self.config = config\n",
131
+ "\n",
132
+ "\n",
133
+ " \n",
134
+ " def download_file(self):\n",
135
+ " if not os.path.exists(self.config.local_data_file):\n",
136
+ " filename, headers = request.urlretrieve(\n",
137
+ " url = self.config.source_URL,\n",
138
+ " filename = self.config.local_data_file\n",
139
+ " )\n",
140
+ " logger.info(f\"{filename} download! with following info: \\n{headers}\")\n",
141
+ " else:\n",
142
+ " logger.info(f\"File already exists of size: {get_size(Path(self.config.local_data_file))}\") \n",
143
+ "\n",
144
+ "\n",
145
+ " \n",
146
+ " def extract_zip_file(self):\n",
147
+ " \"\"\"\n",
148
+ " zip_file_path: str\n",
149
+ " Extracts the zip file into the data directory\n",
150
+ " Function returns None\n",
151
+ " \"\"\"\n",
152
+ " unzip_path = self.config.unzip_dir\n",
153
+ " os.makedirs(unzip_path, exist_ok=True)\n",
154
+ " with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:\n",
155
+ " zip_ref.extractall(unzip_path)\n",
156
+ "\n"
157
+ ]
158
+ },
159
+ {
160
+ "cell_type": "code",
161
+ "execution_count": 11,
162
+ "metadata": {},
163
+ "outputs": [
164
+ {
165
+ "name": "stdout",
166
+ "output_type": "stream",
167
+ "text": [
168
+ "[2024-07-26 23:35:21,072: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
169
+ "[2024-07-26 23:35:21,074: INFO: common: yaml file: params.yaml loaded successfully]\n",
170
+ "[2024-07-26 23:35:21,075: INFO: common: Created directory at: artifacts]\n",
171
+ "[2024-07-26 23:35:21,077: INFO: common: Created directory at: artifacts/data_ingestion]\n",
172
+ "[2024-07-26 23:43:29,635: INFO: 1170291011: artifacts/data_ingestion/data.zip download! with following info: \n",
173
+ "Connection: close\n",
174
+ "Content-Length: 11616915\n",
175
+ "Cache-Control: max-age=300\n",
176
+ "Content-Security-Policy: default-src 'none'; style-src 'unsafe-inline'; sandbox\n",
177
+ "Content-Type: application/zip\n",
178
+ "ETag: \"adf745abc03891fe493c3be264ec012691fe3fa21d861f35a27edbe6d86a76b1\"\n",
179
+ "Strict-Transport-Security: max-age=31536000\n",
180
+ "X-Content-Type-Options: nosniff\n",
181
+ "X-Frame-Options: deny\n",
182
+ "X-XSS-Protection: 1; mode=block\n",
183
+ "X-GitHub-Request-Id: 38AD:28F2BB:3FC6CA:4CD38D:66A3DDDF\n",
184
+ "Accept-Ranges: bytes\n",
185
+ "Date: Fri, 26 Jul 2024 17:36:12 GMT\n",
186
+ "Via: 1.1 varnish\n",
187
+ "X-Served-By: cache-qpg1222-QPG\n",
188
+ "X-Cache: HIT\n",
189
+ "X-Cache-Hits: 0\n",
190
+ "X-Timer: S1722015372.265290,VS0,VE1\n",
191
+ "Vary: Authorization,Accept-Encoding,Origin\n",
192
+ "Access-Control-Allow-Origin: *\n",
193
+ "Cross-Origin-Resource-Policy: cross-origin\n",
194
+ "X-Fastly-Request-ID: 493e408e866b628ee5d1f857060d32bbaf38002f\n",
195
+ "Expires: Fri, 26 Jul 2024 17:41:12 GMT\n",
196
+ "Source-Age: 172\n",
197
+ "\n",
198
+ "]\n"
199
+ ]
200
+ }
201
+ ],
202
+ "source": [
203
+ "try:\n",
204
+ " config = ConfigurationManager()\n",
205
+ " data_ingestion_config = config.get_data_ingestion_config()\n",
206
+ " data_ingestion = DataIngestion(config=data_ingestion_config)\n",
207
+ " data_ingestion.download_file()\n",
208
+ " data_ingestion.extract_zip_file()\n",
209
+ "except Exception as e:\n",
210
+ " raise e"
211
+ ]
212
+ },
213
+ {
214
+ "cell_type": "code",
215
+ "execution_count": null,
216
+ "metadata": {},
217
+ "outputs": [],
218
+ "source": []
219
+ },
220
+ {
221
+ "cell_type": "code",
222
+ "execution_count": null,
223
+ "metadata": {},
224
+ "outputs": [],
225
+ "source": []
226
+ }
227
+ ],
228
+ "metadata": {
229
+ "kernelspec": {
230
+ "display_name": "Python 3",
231
+ "language": "python",
232
+ "name": "python3"
233
+ },
234
+ "language_info": {
235
+ "codemirror_mode": {
236
+ "name": "ipython",
237
+ "version": 3
238
+ },
239
+ "file_extension": ".py",
240
+ "mimetype": "text/x-python",
241
+ "name": "python",
242
+ "nbconvert_exporter": "python",
243
+ "pygments_lexer": "ipython3",
244
+ "version": "3.11.0"
245
+ }
246
+ },
247
+ "nbformat": 4,
248
+ "nbformat_minor": 2
249
+ }
research/02_prepare_base_model.ipynb ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "data": {
10
+ "text/plain": [
11
+ "'d:\\\\MLOps-Project\\\\Chicken-Disease-Classification-Using-Mlops\\\\research'"
12
+ ]
13
+ },
14
+ "execution_count": 1,
15
+ "metadata": {},
16
+ "output_type": "execute_result"
17
+ }
18
+ ],
19
+ "source": [
20
+ "import os\n",
21
+ "%pwd"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 2,
27
+ "metadata": {},
28
+ "outputs": [],
29
+ "source": [
30
+ "os.chdir('../')"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": 3,
36
+ "metadata": {},
37
+ "outputs": [
38
+ {
39
+ "data": {
40
+ "text/plain": [
41
+ "'d:\\\\MLOps-Project\\\\Chicken-Disease-Classification-Using-Mlops'"
42
+ ]
43
+ },
44
+ "execution_count": 3,
45
+ "metadata": {},
46
+ "output_type": "execute_result"
47
+ }
48
+ ],
49
+ "source": [
50
+ "%pwd"
51
+ ]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": 4,
56
+ "metadata": {},
57
+ "outputs": [],
58
+ "source": [
59
+ "from dataclasses import dataclass\n",
60
+ "from pathlib import Path\n",
61
+ "\n",
62
+ "\n",
63
+ "@dataclass(frozen=True)\n",
64
+ "class PrepareBaseModelConfig:\n",
65
+ " root_dir: Path\n",
66
+ " base_model_path: Path\n",
67
+ " updated_base_model_path: Path\n",
68
+ " params_image_size: list\n",
69
+ " params_learning_rate: float\n",
70
+ " params_include_top: bool\n",
71
+ " params_weights: str\n",
72
+ " params_classes: int"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "execution_count": 5,
78
+ "metadata": {},
79
+ "outputs": [],
80
+ "source": [
81
+ "from cnnClassfier.constants import *\n",
82
+ "from cnnClassfier.utils.common import read_yaml, create_directories\n"
83
+ ]
84
+ },
85
+ {
86
+ "cell_type": "code",
87
+ "execution_count": 10,
88
+ "metadata": {},
89
+ "outputs": [],
90
+ "source": [
91
+ "class ConfigarationManager:\n",
92
+ " def __init__(\n",
93
+ " self,\n",
94
+ " config_filepath = CONFIG_FILE_PATH,\n",
95
+ " params_filepath = PARAMS_FILE_PATH\n",
96
+ " ):\n",
97
+ " self.config = read_yaml(config_filepath)\n",
98
+ " self.params = read_yaml(params_filepath)\n",
99
+ " \n",
100
+ " create_directories([self.config.artifacts_root])\n",
101
+ " \n",
102
+ " \n",
103
+ " \n",
104
+ " def get_prepare_base_model(self) -> PrepareBaseModelConfig:\n",
105
+ " config = self.config.prepare_base_model\n",
106
+ " \n",
107
+ " create_directories([config.root_dir])\n",
108
+ " \n",
109
+ " prepare_base_model_config = PrepareBaseModelConfig(\n",
110
+ " root_dir=Path(config.root_dir),\n",
111
+ " base_model_path= Path(config.base_model_path),\n",
112
+ " updated_base_model_path= Path(config.updated_base_model_path),\n",
113
+ " params_image_size=self.params.IMAZE_SIZE,\n",
114
+ " params_learning_rate=self.params.LEARNING_RATE,\n",
115
+ " params_include_top=self.params.INCLUDE_TOP,\n",
116
+ " params_weights=self.params.WEIGHTS,\n",
117
+ " params_classes=self.params.CLASSES\n",
118
+ " )\n",
119
+ " \n",
120
+ " return prepare_base_model_config"
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "code",
125
+ "execution_count": 11,
126
+ "metadata": {},
127
+ "outputs": [],
128
+ "source": [
129
+ "import os\n",
130
+ "import urllib.request as request\n",
131
+ "from zipfile import ZipFile\n",
132
+ "import tensorflow as tf"
133
+ ]
134
+ },
135
+ {
136
+ "cell_type": "code",
137
+ "execution_count": 18,
138
+ "metadata": {},
139
+ "outputs": [],
140
+ "source": [
141
+ "class PrepareBaseModel:\n",
142
+ " def __init__(self, config: PrepareBaseModelConfig):\n",
143
+ " self.config = config\n",
144
+ " \n",
145
+ " def get_base_model(self):\n",
146
+ " self.model = tf.keras.applications.vgg16.VGG16(\n",
147
+ " input_shape = self.config.params_image_size,\n",
148
+ " weights = self.config.params_weights,\n",
149
+ " include_top = self.config.params_include_top\n",
150
+ " )\n",
151
+ " \n",
152
+ " self.save_model(path = self.config.base_model_path, model = self.model)\n",
153
+ " \n",
154
+ " \n",
155
+ " @staticmethod\n",
156
+ " def prepare_full_model(model, classes, freeze_all, freeze_till, learinig_rate):\n",
157
+ " if freeze_all:\n",
158
+ " for layer in model.layers:\n",
159
+ " model.trainable = False\n",
160
+ " \n",
161
+ " elif (freeze_till is not None) and (freeze_till > 0):\n",
162
+ " for layer in model.layers[:-freeze_till]:\n",
163
+ " model.trainable = False\n",
164
+ " \n",
165
+ " flatten_in = tf.keras.layers.Flatten()(model.output)\n",
166
+ " prediction = tf.keras.layers.Dense(\n",
167
+ " units = classes,\n",
168
+ " activation = 'softmax'\n",
169
+ " )(flatten_in)\n",
170
+ " \n",
171
+ " full_model = tf.keras.models.Model(\n",
172
+ " inputs = model.input,\n",
173
+ " outputs = prediction\n",
174
+ " )\n",
175
+ " \n",
176
+ " full_model.compile(\n",
177
+ " optimizer = tf.keras.optimizers.SGD(lr = learinig_rate),\n",
178
+ " loss = tf.keras.losses.CategoricalCrossentropy(),\n",
179
+ " metrics = ['accuracy']\n",
180
+ " )\n",
181
+ " \n",
182
+ " full_model.summary()\n",
183
+ " return full_model\n",
184
+ " \n",
185
+ " def update_base_model(self):\n",
186
+ " self.full_model = self.prepare_full_model(\n",
187
+ " model = self.model,\n",
188
+ " classes = self.config.params_classes,\n",
189
+ " freeze_all=True,\n",
190
+ " freeze_till=None,\n",
191
+ " learinig_rate=self.config.params_learning_rate\n",
192
+ " )\n",
193
+ " self.save_model(path = self.config.updated_base_model_path, model = self.full_model)\n",
194
+ " \n",
195
+ " \n",
196
+ " @staticmethod\n",
197
+ " def save_model(path: Path, model: tf.keras.Model):\n",
198
+ " model.save(path)\n",
199
+ " "
200
+ ]
201
+ },
202
+ {
203
+ "cell_type": "code",
204
+ "execution_count": 19,
205
+ "metadata": {},
206
+ "outputs": [
207
+ {
208
+ "name": "stdout",
209
+ "output_type": "stream",
210
+ "text": [
211
+ "[2024-07-27 01:03:25,207: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
212
+ "[2024-07-27 01:03:25,210: INFO: common: yaml file: params.yaml loaded successfully]\n",
213
+ "[2024-07-27 01:03:25,212: INFO: common: Created directory at: artifacts]\n",
214
+ "[2024-07-27 01:03:25,214: INFO: common: Created directory at: artifacts/prepare_base_model]\n",
215
+ "[2024-07-27 01:03:25,540: WARNING: saving_utils: Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.]\n",
216
+ "[2024-07-27 01:03:25,680: WARNING: optimizer: `lr` is deprecated in Keras optimizer, please use `learning_rate` or use the legacy optimizer, e.g.,tf.keras.optimizers.legacy.SGD.]\n",
217
+ "Model: \"model_3\"\n",
218
+ "_________________________________________________________________\n",
219
+ " Layer (type) Output Shape Param # \n",
220
+ "=================================================================\n",
221
+ " input_4 (InputLayer) [(None, 224, 224, 3)] 0 \n",
222
+ " \n",
223
+ " block1_conv1 (Conv2D) (None, 224, 224, 64) 1792 \n",
224
+ " \n",
225
+ " block1_conv2 (Conv2D) (None, 224, 224, 64) 36928 \n",
226
+ " \n",
227
+ " block1_pool (MaxPooling2D) (None, 112, 112, 64) 0 \n",
228
+ " \n",
229
+ " block2_conv1 (Conv2D) (None, 112, 112, 128) 73856 \n",
230
+ " \n",
231
+ " block2_conv2 (Conv2D) (None, 112, 112, 128) 147584 \n",
232
+ " \n",
233
+ " block2_pool (MaxPooling2D) (None, 56, 56, 128) 0 \n",
234
+ " \n",
235
+ " block3_conv1 (Conv2D) (None, 56, 56, 256) 295168 \n",
236
+ " \n",
237
+ " block3_conv2 (Conv2D) (None, 56, 56, 256) 590080 \n",
238
+ " \n",
239
+ " block3_conv3 (Conv2D) (None, 56, 56, 256) 590080 \n",
240
+ " \n",
241
+ " block3_pool (MaxPooling2D) (None, 28, 28, 256) 0 \n",
242
+ " \n",
243
+ " block4_conv1 (Conv2D) (None, 28, 28, 512) 1180160 \n",
244
+ " \n",
245
+ " block4_conv2 (Conv2D) (None, 28, 28, 512) 2359808 \n",
246
+ " \n",
247
+ " block4_conv3 (Conv2D) (None, 28, 28, 512) 2359808 \n",
248
+ " \n",
249
+ " block4_pool (MaxPooling2D) (None, 14, 14, 512) 0 \n",
250
+ " \n",
251
+ " block5_conv1 (Conv2D) (None, 14, 14, 512) 2359808 \n",
252
+ " \n",
253
+ " block5_conv2 (Conv2D) (None, 14, 14, 512) 2359808 \n",
254
+ " \n",
255
+ " block5_conv3 (Conv2D) (None, 14, 14, 512) 2359808 \n",
256
+ " \n",
257
+ " block5_pool (MaxPooling2D) (None, 7, 7, 512) 0 \n"
258
+ ]
259
+ },
260
+ {
261
+ "name": "stderr",
262
+ "output_type": "stream",
263
+ "text": [
264
+ "c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\engine\\training.py:3103: UserWarning: You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.\n",
265
+ " saving_api.save_model(\n"
266
+ ]
267
+ },
268
+ {
269
+ "name": "stdout",
270
+ "output_type": "stream",
271
+ "text": [
272
+ " \n",
273
+ " flatten_3 (Flatten) (None, 25088) 0 \n",
274
+ " \n",
275
+ " dense_3 (Dense) (None, 2) 50178 \n",
276
+ " \n",
277
+ "=================================================================\n",
278
+ "Total params: 14764866 (56.32 MB)\n",
279
+ "Trainable params: 50178 (196.01 KB)\n",
280
+ "Non-trainable params: 14714688 (56.13 MB)\n",
281
+ "_________________________________________________________________\n"
282
+ ]
283
+ }
284
+ ],
285
+ "source": [
286
+ "try:\n",
287
+ " config = ConfigarationManager()\n",
288
+ " prepare_base_model_config = config.get_prepare_base_model()\n",
289
+ " prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)\n",
290
+ " prepare_base_model.get_base_model()\n",
291
+ " prepare_base_model.update_base_model()\n",
292
+ "except Exception as e:\n",
293
+ " raise e"
294
+ ]
295
+ },
296
+ {
297
+ "cell_type": "code",
298
+ "execution_count": null,
299
+ "metadata": {},
300
+ "outputs": [],
301
+ "source": []
302
+ },
303
+ {
304
+ "cell_type": "code",
305
+ "execution_count": null,
306
+ "metadata": {},
307
+ "outputs": [],
308
+ "source": []
309
+ },
310
+ {
311
+ "cell_type": "code",
312
+ "execution_count": null,
313
+ "metadata": {},
314
+ "outputs": [],
315
+ "source": []
316
+ }
317
+ ],
318
+ "metadata": {
319
+ "kernelspec": {
320
+ "display_name": "Python 3",
321
+ "language": "python",
322
+ "name": "python3"
323
+ },
324
+ "language_info": {
325
+ "codemirror_mode": {
326
+ "name": "ipython",
327
+ "version": 3
328
+ },
329
+ "file_extension": ".py",
330
+ "mimetype": "text/x-python",
331
+ "name": "python",
332
+ "nbconvert_exporter": "python",
333
+ "pygments_lexer": "ipython3",
334
+ "version": "3.11.0"
335
+ }
336
+ },
337
+ "nbformat": 4,
338
+ "nbformat_minor": 2
339
+ }
research/03_stage_callbacks.ipynb ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "os.chdir('../')"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 2,
16
+ "metadata": {},
17
+ "outputs": [],
18
+ "source": [
19
+ "from dataclasses import dataclass\n",
20
+ "from pathlib import Path"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 3,
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": [
29
+ "@dataclass(frozen=True)\n",
30
+ "class PrepareCallbacksConfig:\n",
31
+ " root_dir : Path\n",
32
+ " tensorboard_root_log_dir : Path\n",
33
+ " checkpoint_model_filepath : Path\n",
34
+ " "
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": 4,
40
+ "metadata": {},
41
+ "outputs": [],
42
+ "source": [
43
+ "from cnnClassfier.constants import *\n",
44
+ "from cnnClassfier.utils.common import read_yaml, create_directories"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "code",
49
+ "execution_count": 5,
50
+ "metadata": {},
51
+ "outputs": [],
52
+ "source": [
53
+ "class ConfigurationManager:\n",
54
+ " def __init__(\n",
55
+ " self, \n",
56
+ " config_filepath = CONFIG_FILE_PATH,\n",
57
+ " params_filepath = PARAMS_FILE_PATH):\n",
58
+ " self.config = read_yaml(config_filepath)\n",
59
+ " self.params = read_yaml(params_filepath)\n",
60
+ " create_directories([self.config.artifacts_root])\n",
61
+ " \n",
62
+ " \n",
63
+ " def get_prepare_callback_config(self) -> PrepareCallbacksConfig:\n",
64
+ " config = self.config.prepare_callbacks\n",
65
+ " model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)\n",
66
+ " create_directories([\n",
67
+ " Path(model_ckpt_dir),\n",
68
+ " Path(config.tensorboard_root_log_dir)\n",
69
+ " ])\n",
70
+ "\n",
71
+ " prepare_callback_config = PrepareCallbacksConfig(\n",
72
+ " root_dir=Path(config.root_dir),\n",
73
+ " tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),\n",
74
+ " checkpoint_model_filepath=Path(config.checkpoint_model_filepath)\n",
75
+ " )\n",
76
+ "\n",
77
+ " return prepare_callback_config\n",
78
+ " "
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": 6,
84
+ "metadata": {},
85
+ "outputs": [
86
+ {
87
+ "name": "stdout",
88
+ "output_type": "stream",
89
+ "text": [
90
+ "[2024-07-27 21:24:17,721: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
91
+ "]\n"
92
+ ]
93
+ }
94
+ ],
95
+ "source": [
96
+ "import os\n",
97
+ "import urllib.request as request\n",
98
+ "from zipfile import ZipFile\n",
99
+ "import tensorflow as tf\n",
100
+ "import time"
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "execution_count": 19,
106
+ "metadata": {},
107
+ "outputs": [],
108
+ "source": [
109
+ "class PrepareCallback:\n",
110
+ " def __init__(self, config: PrepareCallbacksConfig):\n",
111
+ " self.config = config\n",
112
+ " \n",
113
+ " @property\n",
114
+ " def _create_tb_callbacks(self):\n",
115
+ " timestamp = time.strftime('%Y-%m-%d-%H-%M-%S')\n",
116
+ " \n",
117
+ " tb_running_log_dir = os.path.join(\n",
118
+ " str(self.config.tensorboard_root_log_dir), # Convert to string\n",
119
+ " f\"tb_logs_at_{timestamp}\",\n",
120
+ " )\n",
121
+ " return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)\n",
122
+ "\n",
123
+ " @property\n",
124
+ " def _create_ckpt_callbacks(self):\n",
125
+ " return tf.keras.callbacks.ModelCheckpoint(\n",
126
+ " filepath=str(self.config.checkpoint_model_filepath), # Convert to string\n",
127
+ " save_best_only=True\n",
128
+ " )\n",
129
+ "\n",
130
+ " def get_tb_callbacks(self):\n",
131
+ " return [\n",
132
+ " self._create_tb_callbacks,\n",
133
+ " self._create_ckpt_callbacks\n",
134
+ " ]"
135
+ ]
136
+ },
137
+ {
138
+ "cell_type": "code",
139
+ "execution_count": 20,
140
+ "metadata": {},
141
+ "outputs": [
142
+ {
143
+ "name": "stdout",
144
+ "output_type": "stream",
145
+ "text": [
146
+ "[2024-07-27 21:35:22,818: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
147
+ "[2024-07-27 21:35:22,820: INFO: common: yaml file: params.yaml loaded successfully]\n",
148
+ "[2024-07-27 21:35:22,821: INFO: common: Created directory at: artifacts]\n",
149
+ "[2024-07-27 21:35:22,823: INFO: common: Created directory at: artifacts\\prepare_callbacks\\checkpoint_dir]\n",
150
+ "[2024-07-27 21:35:22,824: INFO: common: Created directory at: artifacts\\prepare_callbacks\\tensorboard_log_dir]\n"
151
+ ]
152
+ }
153
+ ],
154
+ "source": [
155
+ "try:\n",
156
+ " config = ConfigurationManager()\n",
157
+ " prepare_callbacks_config = config.get_prepare_callback_config()\n",
158
+ " prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)\n",
159
+ " callback_list = prepare_callbacks.get_tb_callbacks()\n",
160
+ " \n",
161
+ "except Exception as e:\n",
162
+ " raise e"
163
+ ]
164
+ },
165
+ {
166
+ "cell_type": "code",
167
+ "execution_count": null,
168
+ "metadata": {},
169
+ "outputs": [],
170
+ "source": []
171
+ }
172
+ ],
173
+ "metadata": {
174
+ "kernelspec": {
175
+ "display_name": "Python 3",
176
+ "language": "python",
177
+ "name": "python3"
178
+ },
179
+ "language_info": {
180
+ "codemirror_mode": {
181
+ "name": "ipython",
182
+ "version": 3
183
+ },
184
+ "file_extension": ".py",
185
+ "mimetype": "text/x-python",
186
+ "name": "python",
187
+ "nbconvert_exporter": "python",
188
+ "pygments_lexer": "ipython3",
189
+ "version": "3.11.0"
190
+ }
191
+ },
192
+ "nbformat": 4,
193
+ "nbformat_minor": 2
194
+ }
research/04_train.ipynb ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "os.chdir('../')"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 2,
16
+ "metadata": {},
17
+ "outputs": [],
18
+ "source": [
19
+ "from dataclasses import dataclass\n",
20
+ "from pathlib import Path\n",
21
+ "\n",
22
+ "\n",
23
+ "@dataclass(frozen=True)\n",
24
+ "class TrainingConfig:\n",
25
+ " root_dir: Path\n",
26
+ " trained_model_path: Path\n",
27
+ " updated_base_model_path: Path\n",
28
+ " training_data: Path\n",
29
+ " params_epochs: int\n",
30
+ " params_batch_size: int\n",
31
+ " params_is_augmentation: bool\n",
32
+ " params_image_size: list\n",
33
+ "\n",
34
+ "\n",
35
+ "\n",
36
+ "@dataclass(frozen=True)\n",
37
+ "class PrepareCallbacksConfig:\n",
38
+ " root_dir: Path\n",
39
+ " tensorboard_root_log_dir: Path\n",
40
+ " checkpoint_model_filepath: Path"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": 7,
46
+ "metadata": {},
47
+ "outputs": [],
48
+ "source": [
49
+ "from cnnClassfier.constants import *\n",
50
+ "from cnnClassfier.utils.common import read_yaml, create_directories\n",
51
+ "import tensorflow as tf\n",
52
+ "\n",
53
+ "\n",
54
+ "class ConfigurationManager:\n",
55
+ " def __init__(\n",
56
+ " self, \n",
57
+ " config_filepath = CONFIG_FILE_PATH,\n",
58
+ " params_filepath = PARAMS_FILE_PATH):\n",
59
+ " self.config = read_yaml(config_filepath)\n",
60
+ " self.params = read_yaml(params_filepath)\n",
61
+ " create_directories([self.config.artifacts_root])\n",
62
+ "\n",
63
+ "\n",
64
+ " \n",
65
+ " def get_prepare_callback_config(self) -> PrepareCallbacksConfig:\n",
66
+ " config = self.config.prepare_callbacks\n",
67
+ " model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)\n",
68
+ " create_directories([\n",
69
+ " Path(model_ckpt_dir),\n",
70
+ " Path(config.tensorboard_root_log_dir)\n",
71
+ " ])\n",
72
+ "\n",
73
+ " prepare_callback_config = PrepareCallbacksConfig(\n",
74
+ " root_dir=Path(config.root_dir),\n",
75
+ " tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),\n",
76
+ " checkpoint_model_filepath=Path(config.checkpoint_model_filepath)\n",
77
+ " )\n",
78
+ "\n",
79
+ " return prepare_callback_config\n",
80
+ " \n",
81
+ "\n",
82
+ "\n",
83
+ "\n",
84
+ "\n",
85
+ " def get_training_config(self) -> TrainingConfig:\n",
86
+ " training = self.config.training\n",
87
+ " prepare_base_model = self.config.prepare_base_model\n",
88
+ " params = self.params\n",
89
+ " training_data = os.path.join(self.config.data_ingestion.unzip_dir, \"Chicken-fecal-images\")\n",
90
+ " create_directories([\n",
91
+ " Path(training.root_dir)\n",
92
+ " ])\n",
93
+ "\n",
94
+ " training_config = TrainingConfig(\n",
95
+ " root_dir=Path(training.root_dir),\n",
96
+ " trained_model_path=Path(training.trained_model_path),\n",
97
+ " updated_base_model_path=Path(prepare_base_model.updated_base_model_path),\n",
98
+ " training_data=Path(training_data),\n",
99
+ " params_epochs=params.EPOCHS,\n",
100
+ " params_batch_size=params.BATCH_SIZE,\n",
101
+ " params_is_augmentation=params.AUGMENTATION,\n",
102
+ " params_image_size=params.IMAZE_SIZE\n",
103
+ " )\n",
104
+ "\n",
105
+ " return training_config"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "code",
110
+ "execution_count": 8,
111
+ "metadata": {},
112
+ "outputs": [],
113
+ "source": [
114
+ "import time\n",
115
+ "class PrepareCallback:\n",
116
+ " def __init__(self, config: PrepareCallbacksConfig):\n",
117
+ " self.config = config\n",
118
+ " \n",
119
+ " @property\n",
120
+ " def _create_tb_callbacks(self):\n",
121
+ " timestamp = time.strftime('%Y-%m-%d-%H-%M-%S')\n",
122
+ " \n",
123
+ " tb_running_log_dir = os.path.join(\n",
124
+ " str(self.config.tensorboard_root_log_dir), # Convert to string\n",
125
+ " f\"tb_logs_at_{timestamp}\",\n",
126
+ " )\n",
127
+ " return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)\n",
128
+ "\n",
129
+ " @property\n",
130
+ " def _create_ckpt_callbacks(self):\n",
131
+ " return tf.keras.callbacks.ModelCheckpoint(\n",
132
+ " filepath=str(self.config.checkpoint_model_filepath), # Convert to string\n",
133
+ " save_best_only=True\n",
134
+ " )\n",
135
+ "\n",
136
+ " def get_tb_callbacks(self):\n",
137
+ " return [\n",
138
+ " self._create_tb_callbacks,\n",
139
+ " self._create_ckpt_callbacks\n",
140
+ " ]\n",
141
+ " "
142
+ ]
143
+ },
144
+ {
145
+ "cell_type": "code",
146
+ "execution_count": 9,
147
+ "metadata": {},
148
+ "outputs": [],
149
+ "source": [
150
+ "import os\n",
151
+ "import urllib.request as request\n",
152
+ "from zipfile import ZipFile\n",
153
+ "import tensorflow as tf\n",
154
+ "import time\n",
155
+ "\n",
156
+ "\n",
157
+ "class Training:\n",
158
+ " def __init__(self, config: TrainingConfig):\n",
159
+ " self.config = config\n",
160
+ " \n",
161
+ " def get_base_model(self):\n",
162
+ " self.model = tf.keras.models.load_model(\n",
163
+ " self.config.updated_base_model_path\n",
164
+ " )\n",
165
+ " \n",
166
+ " def train_valid_generator(self):\n",
167
+ "\n",
168
+ " datagenerator_kwargs = dict(\n",
169
+ " rescale = 1./255,\n",
170
+ " validation_split=0.20\n",
171
+ " )\n",
172
+ "\n",
173
+ " dataflow_kwargs = dict(\n",
174
+ " target_size=self.config.params_image_size[:-1],\n",
175
+ " batch_size=self.config.params_batch_size,\n",
176
+ " interpolation=\"bilinear\"\n",
177
+ " )\n",
178
+ "\n",
179
+ " valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
180
+ " **datagenerator_kwargs\n",
181
+ " )\n",
182
+ "\n",
183
+ " self.valid_generator = valid_datagenerator.flow_from_directory(\n",
184
+ " directory=self.config.training_data,\n",
185
+ " subset=\"validation\",\n",
186
+ " shuffle=False,\n",
187
+ " **dataflow_kwargs\n",
188
+ " )\n",
189
+ "\n",
190
+ " if self.config.params_is_augmentation:\n",
191
+ " train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
192
+ " rotation_range=40,\n",
193
+ " horizontal_flip=True,\n",
194
+ " width_shift_range=0.2,\n",
195
+ " height_shift_range=0.2,\n",
196
+ " shear_range=0.2,\n",
197
+ " zoom_range=0.2,\n",
198
+ " **datagenerator_kwargs\n",
199
+ " )\n",
200
+ " else:\n",
201
+ " train_datagenerator = valid_datagenerator\n",
202
+ "\n",
203
+ " self.train_generator = train_datagenerator.flow_from_directory(\n",
204
+ " directory=self.config.training_data,\n",
205
+ " subset=\"training\",\n",
206
+ " shuffle=True,\n",
207
+ " **dataflow_kwargs\n",
208
+ " )\n",
209
+ "\n",
210
+ " @staticmethod\n",
211
+ " def save_model(path: Path, model: tf.keras.Model):\n",
212
+ " model.save(path)\n",
213
+ "\n",
214
+ "\n",
215
+ " def train(self, callback_list: list):\n",
216
+ " self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size\n",
217
+ " self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size\n",
218
+ "\n",
219
+ " self.model.fit(\n",
220
+ " self.train_generator,\n",
221
+ " epochs=self.config.params_epochs,\n",
222
+ " steps_per_epoch=self.steps_per_epoch,\n",
223
+ " validation_steps=self.validation_steps,\n",
224
+ " validation_data=self.valid_generator,\n",
225
+ " callbacks=callback_list\n",
226
+ " )\n",
227
+ "\n",
228
+ " self.save_model(\n",
229
+ " path=self.config.trained_model_path,\n",
230
+ " model=self.model\n",
231
+ " )\n",
232
+ "\n",
233
+ "\n",
234
+ " "
235
+ ]
236
+ },
237
+ {
238
+ "cell_type": "code",
239
+ "execution_count": 10,
240
+ "metadata": {},
241
+ "outputs": [
242
+ {
243
+ "name": "stdout",
244
+ "output_type": "stream",
245
+ "text": [
246
+ "[2024-07-27 23:47:55,531: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
247
+ "[2024-07-27 23:47:55,534: INFO: common: yaml file: params.yaml loaded successfully]\n",
248
+ "[2024-07-27 23:47:55,536: INFO: common: Created directory at: artifacts]\n",
249
+ "[2024-07-27 23:47:55,537: INFO: common: Created directory at: artifacts\\prepare_callbacks\\checkpoint_dir]\n",
250
+ "[2024-07-27 23:47:55,538: INFO: common: Created directory at: artifacts\\prepare_callbacks\\tensorboard_log_dir]\n",
251
+ "[2024-07-27 23:47:55,539: INFO: common: Created directory at: artifacts\\training]\n",
252
+ "[2024-07-27 23:47:56,129: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\backend.py:1398: The name tf.executing_eagerly_outside_functions is deprecated. Please use tf.compat.v1.executing_eagerly_outside_functions instead.\n",
253
+ "]\n",
254
+ "[2024-07-27 23:47:56,313: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\layers\\pooling\\max_pooling2d.py:161: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.\n",
255
+ "]\n",
256
+ "Found 78 images belonging to 2 classes.\n",
257
+ "Found 312 images belonging to 2 classes.\n",
258
+ "[2024-07-27 23:47:58,334: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\utils\\tf_utils.py:492: The name tf.ragged.RaggedTensorValue is deprecated. Please use tf.compat.v1.ragged.RaggedTensorValue instead.\n",
259
+ "]\n",
260
+ "19/19 [==============================] - 53s 3s/step - loss: 13.0395 - accuracy: 0.5270 - val_loss: 24.1360 - val_accuracy: 0.3906\n"
261
+ ]
262
+ },
263
+ {
264
+ "name": "stderr",
265
+ "output_type": "stream",
266
+ "text": [
267
+ "c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\engine\\training.py:3103: UserWarning: You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.\n",
268
+ " saving_api.save_model(\n"
269
+ ]
270
+ }
271
+ ],
272
+ "source": [
273
+ "try:\n",
274
+ " config = ConfigurationManager()\n",
275
+ " prepare_callbacks_config = config.get_prepare_callback_config()\n",
276
+ " prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)\n",
277
+ " callback_list = prepare_callbacks.get_tb_callbacks()\n",
278
+ "\n",
279
+ " training_config = config.get_training_config()\n",
280
+ " training = Training(config=training_config)\n",
281
+ " training.get_base_model()\n",
282
+ " training.train_valid_generator()\n",
283
+ " training.train(\n",
284
+ " callback_list=callback_list\n",
285
+ " )\n",
286
+ " \n",
287
+ "except Exception as e:\n",
288
+ " raise e"
289
+ ]
290
+ },
291
+ {
292
+ "cell_type": "code",
293
+ "execution_count": null,
294
+ "metadata": {},
295
+ "outputs": [],
296
+ "source": []
297
+ },
298
+ {
299
+ "cell_type": "code",
300
+ "execution_count": null,
301
+ "metadata": {},
302
+ "outputs": [],
303
+ "source": []
304
+ }
305
+ ],
306
+ "metadata": {
307
+ "kernelspec": {
308
+ "display_name": "Python 3",
309
+ "language": "python",
310
+ "name": "python3"
311
+ },
312
+ "language_info": {
313
+ "codemirror_mode": {
314
+ "name": "ipython",
315
+ "version": 3
316
+ },
317
+ "file_extension": ".py",
318
+ "mimetype": "text/x-python",
319
+ "name": "python",
320
+ "nbconvert_exporter": "python",
321
+ "pygments_lexer": "ipython3",
322
+ "version": "3.11.0"
323
+ }
324
+ },
325
+ "nbformat": 4,
326
+ "nbformat_minor": 2
327
+ }
research/05_model_evaluation.ipynb ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "os.chdir('../')"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 3,
16
+ "metadata": {},
17
+ "outputs": [],
18
+ "source": [
19
+ "import tensorflow as tf"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 4,
25
+ "metadata": {},
26
+ "outputs": [
27
+ {
28
+ "name": "stdout",
29
+ "output_type": "stream",
30
+ "text": [
31
+ "WARNING:tensorflow:From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\backend.py:1398: The name tf.executing_eagerly_outside_functions is deprecated. Please use tf.compat.v1.executing_eagerly_outside_functions instead.\n",
32
+ "\n",
33
+ "WARNING:tensorflow:From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\layers\\pooling\\max_pooling2d.py:161: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.\n",
34
+ "\n"
35
+ ]
36
+ }
37
+ ],
38
+ "source": [
39
+ "model = tf.keras.models.load_model('artifacts/training/model.h5')"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": 5,
45
+ "metadata": {},
46
+ "outputs": [],
47
+ "source": [
48
+ "from dataclasses import dataclass\n",
49
+ "from pathlib import Path\n",
50
+ "\n",
51
+ "@dataclass(frozen=True)\n",
52
+ "class EvaluationConfig:\n",
53
+ " path_of_model : Path\n",
54
+ " training_data : Path\n",
55
+ " all_params : dict\n",
56
+ " params_image_size : list\n",
57
+ " params_batch_size: int"
58
+ ]
59
+ },
60
+ {
61
+ "cell_type": "code",
62
+ "execution_count": 8,
63
+ "metadata": {},
64
+ "outputs": [],
65
+ "source": [
66
+ "from cnnClassfier.constants import *\n",
67
+ "from cnnClassfier.utils.common import read_yaml, create_directories, save_json"
68
+ ]
69
+ },
70
+ {
71
+ "cell_type": "code",
72
+ "execution_count": 14,
73
+ "metadata": {},
74
+ "outputs": [],
75
+ "source": [
76
+ "class ConfigurationManager:\n",
77
+ " def __init__(\n",
78
+ " self, \n",
79
+ " config_filepath = CONFIG_FILE_PATH,\n",
80
+ " params_filepath = PARAMS_FILE_PATH):\n",
81
+ " self.config = read_yaml(config_filepath)\n",
82
+ " self.params = read_yaml(params_filepath)\n",
83
+ " create_directories([self.config.artifacts_root])\n",
84
+ " \n",
85
+ " \n",
86
+ " def get_validation_config(self) -> EvaluationConfig:\n",
87
+ " eval_config = EvaluationConfig(\n",
88
+ " path_of_model=\"artifacts/training/model.h5\",\n",
89
+ " training_data=\"artifacts/data_ingestion/Chicken-fecal-images\",\n",
90
+ " all_params=self.params,\n",
91
+ " params_image_size=self.params.IMAZE_SIZE,\n",
92
+ " params_batch_size=self.params.BATCH_SIZE\n",
93
+ " )\n",
94
+ " return eval_config\n"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "code",
99
+ "execution_count": 15,
100
+ "metadata": {},
101
+ "outputs": [],
102
+ "source": [
103
+ "from urllib.parse import urlparse\n",
104
+ "\n",
105
+ "class Evaluation:\n",
106
+ " def __init__(self, config: EvaluationConfig):\n",
107
+ " self.config = config\n",
108
+ " \n",
109
+ " def _valid_generator(self):\n",
110
+ " datagenerator_kwargs = dict(\n",
111
+ " rescale = 1./255,\n",
112
+ " validation_split = 0.30\n",
113
+ " )\n",
114
+ " \n",
115
+ " dataflow_kwargs = dict(\n",
116
+ " target_size = self.config.params_image_size[:-1],\n",
117
+ " batch_size= self.config.params_batch_size,\n",
118
+ " interpolation = 'bilinear'\n",
119
+ " )\n",
120
+ " \n",
121
+ " valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
122
+ " **datagenerator_kwargs\n",
123
+ " )\n",
124
+ " \n",
125
+ " self.valid_generator = valid_datagenerator.flow_from_directory(\n",
126
+ " directory = self.config.training_data,\n",
127
+ " subset = 'validation',\n",
128
+ " shuffle = True,\n",
129
+ " **dataflow_kwargs\n",
130
+ " )\n",
131
+ " \n",
132
+ " @staticmethod\n",
133
+ " def load_model(path: Path) -> tf.keras.Model:\n",
134
+ " return tf.keras.models.load_model(path)\n",
135
+ " \n",
136
+ " def evaluation(self):\n",
137
+ " self.model = self.load_model(self.config.path_of_model)\n",
138
+ " self._valid_generator()\n",
139
+ " self.score = model.evaluate(self.valid_generator)\n",
140
+ " \n",
141
+ " def save_score(self):\n",
142
+ " scores = {'loss' : self.score[0], 'accuracy' : self.score[1]}\n",
143
+ " save_json(path = Path('scores.json'), data = scores)"
144
+ ]
145
+ },
146
+ {
147
+ "cell_type": "code",
148
+ "execution_count": 16,
149
+ "metadata": {},
150
+ "outputs": [
151
+ {
152
+ "name": "stdout",
153
+ "output_type": "stream",
154
+ "text": [
155
+ "[2024-07-28 02:01:54,885: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
156
+ "[2024-07-28 02:01:54,889: INFO: common: yaml file: params.yaml loaded successfully]\n",
157
+ "[2024-07-28 02:01:54,890: INFO: common: Created directory at: artifacts]\n",
158
+ "Found 116 images belonging to 2 classes.\n",
159
+ "WARNING:tensorflow:From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\utils\\tf_utils.py:492: The name tf.ragged.RaggedTensorValue is deprecated. Please use tf.compat.v1.ragged.RaggedTensorValue instead.\n",
160
+ "\n",
161
+ "[2024-07-28 02:01:56,004: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\utils\\tf_utils.py:492: The name tf.ragged.RaggedTensorValue is deprecated. Please use tf.compat.v1.ragged.RaggedTensorValue instead.\n",
162
+ "]\n",
163
+ "8/8 [==============================] - 11s 1s/step - loss: 0.3306 - accuracy: 0.9569\n",
164
+ "[2024-07-28 02:02:06,982: INFO: common: Json file saved at: scores.json]\n"
165
+ ]
166
+ }
167
+ ],
168
+ "source": [
169
+ "try:\n",
170
+ " config = ConfigurationManager()\n",
171
+ " val_config = config.get_validation_config()\n",
172
+ " evaluation = Evaluation(val_config)\n",
173
+ " evaluation.evaluation()\n",
174
+ " evaluation.save_score()\n",
175
+ " \n",
176
+ "except Exception as e:\n",
177
+ " raise e"
178
+ ]
179
+ },
180
+ {
181
+ "cell_type": "code",
182
+ "execution_count": null,
183
+ "metadata": {},
184
+ "outputs": [],
185
+ "source": []
186
+ }
187
+ ],
188
+ "metadata": {
189
+ "kernelspec": {
190
+ "display_name": "Python 3",
191
+ "language": "python",
192
+ "name": "python3"
193
+ },
194
+ "language_info": {
195
+ "codemirror_mode": {
196
+ "name": "ipython",
197
+ "version": 3
198
+ },
199
+ "file_extension": ".py",
200
+ "mimetype": "text/x-python",
201
+ "name": "python",
202
+ "nbconvert_exporter": "python",
203
+ "pygments_lexer": "ipython3",
204
+ "version": "3.11.0"
205
+ }
206
+ },
207
+ "nbformat": 4,
208
+ "nbformat_minor": 2
209
+ }
research/tails.ipynb ADDED
File without changes
src/cnnClassfier/__init__.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import logging
4
+
5
+ logging_str = logging_str = "[%(asctime)s: %(levelname)s: %(module)s: %(message)s]"
6
+
7
+ log_dir = 'logs'
8
+ log_filepath = os.path.join(log_dir, 'running_logs.log')
9
+ os.makedirs(log_dir, exist_ok=True)
10
+
11
+
12
+ logging.basicConfig(
13
+ level = logging.INFO,
14
+ format= logging_str,
15
+
16
+ handlers= [
17
+ logging.FileHandler(log_filepath),
18
+ logging.StreamHandler(sys.stdout)
19
+ ]
20
+ )
21
+
22
+ logger = logging.getLogger('cnnClassifierLogger')
23
+
src/cnnClassfier/components/__init__.py ADDED
File without changes
src/cnnClassfier/components/base_model.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import urllib.request as request
3
+ from zipfile import ZipFile
4
+ import tensorflow as tf
5
+ from cnnClassfier.entity.config_entity import PrepareBaseModelConfig
6
+ from pathlib import Path
7
+
8
+
9
+
10
+ class PrepareBaseModel:
11
+ def __init__(self, config: PrepareBaseModelConfig):
12
+ self.config = config
13
+
14
+ def get_base_model(self):
15
+ self.model = tf.keras.applications.vgg16.VGG16(
16
+ input_shape = self.config.params_image_size,
17
+ weights = self.config.params_weights,
18
+ include_top = self.config.params_include_top
19
+ )
20
+
21
+ self.save_model(path = self.config.base_model_path, model = self.model)
22
+
23
+
24
+ @staticmethod
25
+ def prepare_full_model(model, classes, freeze_all, freeze_till, learinig_rate):
26
+ if freeze_all:
27
+ for layer in model.layers:
28
+ model.trainable = False
29
+
30
+ elif (freeze_till is not None) and (freeze_till > 0):
31
+ for layer in model.layers[:-freeze_till]:
32
+ model.trainable = False
33
+
34
+ flatten_in = tf.keras.layers.Flatten()(model.output)
35
+ prediction = tf.keras.layers.Dense(
36
+ units = classes,
37
+ activation = 'softmax'
38
+ )(flatten_in)
39
+
40
+ full_model = tf.keras.models.Model(
41
+ inputs = model.input,
42
+ outputs = prediction
43
+ )
44
+
45
+ full_model.compile(
46
+ optimizer = tf.keras.optimizers.SGD(lr = learinig_rate),
47
+ loss = tf.keras.losses.CategoricalCrossentropy(),
48
+ metrics = ['accuracy']
49
+ )
50
+
51
+ full_model.summary()
52
+ return full_model
53
+
54
+ def update_base_model(self):
55
+ self.full_model = self.prepare_full_model(
56
+ model = self.model,
57
+ classes = self.config.params_classes,
58
+ freeze_all=True,
59
+ freeze_till=None,
60
+ learinig_rate=self.config.params_learning_rate
61
+ )
62
+ self.save_model(path = self.config.updated_base_model_path, model = self.full_model)
63
+
64
+
65
+ @staticmethod
66
+ def save_model(path: Path, model: tf.keras.Model):
67
+ model.save(path)
68
+
src/cnnClassfier/components/callbacks.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from cnnClassfier.config.configuration import PrepareCallbacksConfig
3
+ import time
4
+ import os
5
+ import tensorflow as tf
6
+
7
+
8
+ class PrepareCallback:
9
+ def __init__(self, config: PrepareCallbacksConfig):
10
+ self.config = config
11
+
12
+ @property
13
+ def _create_tb_callbacks(self):
14
+ timestamp = time.strftime('%Y-%m-%d-%H-%M-%S')
15
+
16
+ tb_running_log_dir = os.path.join(
17
+ str(self.config.tensorboard_root_log_dir), # Convert to string
18
+ f"tb_logs_at_{timestamp}",
19
+ )
20
+ return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)
21
+
22
+ @property
23
+ def _create_ckpt_callbacks(self):
24
+ return tf.keras.callbacks.ModelCheckpoint(
25
+ filepath=str(self.config.checkpoint_model_filepath), # Convert to string
26
+ save_best_only=True
27
+ )
28
+
29
+ def get_tb_callbacks(self):
30
+ return [
31
+ self._create_tb_callbacks,
32
+ self._create_ckpt_callbacks
33
+ ]
src/cnnClassfier/components/data_ingestion.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from tqdm.notebook import tqdm
3
+ import urllib.request as request
4
+ import zipfile
5
+ from cnnClassfier import logger
6
+ from cnnClassfier.utils.common import get_size
7
+ from cnnClassfier.entity.config_entity import DataIngestionConfig
8
+ from pathlib import Path
9
+
10
+ class DataIngestion:
11
+ def __init__(self, config: DataIngestionConfig):
12
+ self.config = config
13
+
14
+
15
+
16
+ def download_file(self):
17
+ if not os.path.exists(self.config.local_data_file):
18
+ filename, headers = request.urlretrieve(
19
+ url = self.config.source_URL,
20
+ filename = self.config.local_data_file
21
+ )
22
+ logger.info(f"{filename} download! with following info: \n{headers}")
23
+ else:
24
+ logger.info(f"File already exists of size: {get_size(Path(self.config.local_data_file))}")
25
+
26
+
27
+
28
+ def extract_zip_file(self):
29
+ """
30
+ zip_file_path: str
31
+ Extracts the zip file into the data directory
32
+ Function returns None
33
+ """
34
+ unzip_path = self.config.unzip_dir
35
+ os.makedirs(unzip_path, exist_ok=True)
36
+ with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
37
+ zip_ref.extractall(unzip_path)
38
+
src/cnnClassfier/components/evaluation.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from urllib.parse import urlparse
2
+ from cnnClassfier.entity.config_entity import EvaluationConfig
3
+ from pathlib import Path
4
+ import tensorflow as tf
5
+ from cnnClassfier.utils.common import save_json
6
+
7
+ class Evaluation:
8
+ def __init__(self, config: EvaluationConfig):
9
+ self.config = config
10
+
11
+ def _valid_generator(self):
12
+ datagenerator_kwargs = dict(
13
+ rescale = 1./255,
14
+ validation_split = 0.30
15
+ )
16
+
17
+ dataflow_kwargs = dict(
18
+ target_size = self.config.params_image_size[:-1],
19
+ batch_size= self.config.params_batch_size,
20
+ interpolation = 'bilinear'
21
+ )
22
+
23
+ valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
24
+ **datagenerator_kwargs
25
+ )
26
+
27
+ self.valid_generator = valid_datagenerator.flow_from_directory(
28
+ directory = self.config.training_data,
29
+ subset = 'validation',
30
+ shuffle = True,
31
+ **dataflow_kwargs
32
+ )
33
+
34
+ @staticmethod
35
+ def load_model(path: Path) -> tf.keras.Model:
36
+ return tf.keras.models.load_model(path)
37
+
38
+ def evaluation(self):
39
+ model = self.load_model(self.config.path_of_model)
40
+ self._valid_generator()
41
+ self.score = model.evaluate(self.valid_generator)
42
+
43
+ def save_score(self):
44
+ scores = {'loss' : self.score[0], 'accuracy' : self.score[1]}
45
+ save_json(path = Path('scores.json'), data = scores)
src/cnnClassfier/components/train.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import urllib.request as request
3
+ from zipfile import ZipFile
4
+ import tensorflow as tf
5
+ import time
6
+ from cnnClassfier.entity.config_entity import TrainingConfig
7
+ from pathlib import Path
8
+
9
+
10
+ class Training:
11
+ def __init__(self, config: TrainingConfig):
12
+ self.config = config
13
+
14
+ def get_base_model(self):
15
+ self.model = tf.keras.models.load_model(
16
+ self.config.updated_base_model_path
17
+ )
18
+
19
+ def train_valid_generator(self):
20
+
21
+ datagenerator_kwargs = dict(
22
+ rescale = 1./255,
23
+ validation_split=0.20
24
+ )
25
+
26
+ dataflow_kwargs = dict(
27
+ target_size=self.config.params_image_size[:-1],
28
+ batch_size=self.config.params_batch_size,
29
+ interpolation="bilinear"
30
+ )
31
+
32
+ valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
33
+ **datagenerator_kwargs
34
+ )
35
+
36
+ self.valid_generator = valid_datagenerator.flow_from_directory(
37
+ directory=self.config.training_data,
38
+ subset="validation",
39
+ shuffle=False,
40
+ **dataflow_kwargs
41
+ )
42
+
43
+ if self.config.params_is_augmentation:
44
+ train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
45
+ rotation_range=40,
46
+ horizontal_flip=True,
47
+ width_shift_range=0.2,
48
+ height_shift_range=0.2,
49
+ shear_range=0.2,
50
+ zoom_range=0.2,
51
+ **datagenerator_kwargs
52
+ )
53
+ else:
54
+ train_datagenerator = valid_datagenerator
55
+
56
+ self.train_generator = train_datagenerator.flow_from_directory(
57
+ directory=self.config.training_data,
58
+ subset="training",
59
+ shuffle=True,
60
+ **dataflow_kwargs
61
+ )
62
+
63
+ @staticmethod
64
+ def save_model(path: Path, model: tf.keras.Model):
65
+ model.save(path)
66
+
67
+
68
+ def train(self, callback_list: list):
69
+ self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
70
+ self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size
71
+
72
+ self.model.fit(
73
+ self.train_generator,
74
+ epochs=self.config.params_epochs,
75
+ steps_per_epoch=self.steps_per_epoch,
76
+ validation_steps=self.validation_steps,
77
+ validation_data=self.valid_generator,
78
+ callbacks=callback_list
79
+ )
80
+
81
+ self.save_model(
82
+ path=self.config.trained_model_path,
83
+ model=self.model
84
+ )
85
+
86
+
87
+
src/cnnClassfier/config/__init__.py ADDED
File without changes
src/cnnClassfier/config/configuration.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from cnnClassfier.constants import *
3
+ from cnnClassfier.utils.common import read_yaml, create_directories
4
+ from cnnClassfier.entity.config_entity import (DataIngestionConfig,
5
+ PrepareBaseModelConfig,
6
+ PrepareCallbacksConfig,
7
+ TrainingConfig,
8
+ EvaluationConfig)
9
+
10
+
11
+ class ConfigurationManager:
12
+ def __init__(
13
+ self,
14
+ config_filepath = CONFIG_FILE_PATH,
15
+ params_filepath = PARAMS_FILE_PATH):
16
+
17
+ self.config = read_yaml(config_filepath)
18
+ self.params = read_yaml(params_filepath)
19
+
20
+ create_directories([self.config.artifacts_root])
21
+
22
+
23
+
24
+ def get_data_ingestion_config(self) -> DataIngestionConfig:
25
+ config = self.config.data_ingestion
26
+
27
+ create_directories([config.root_dir])
28
+
29
+ data_ingestion_config = DataIngestionConfig(
30
+ root_dir=config.root_dir,
31
+ source_URL=config.source_URL,
32
+ local_data_file=config.local_data_file,
33
+ unzip_dir=config.unzip_dir
34
+ )
35
+
36
+ return data_ingestion_config
37
+
38
+
39
+
40
+
41
+ def get_prepare_base_model(self) -> PrepareBaseModelConfig:
42
+ config = self.config.prepare_base_model
43
+
44
+ create_directories([config.root_dir])
45
+
46
+ prepare_base_model_config = PrepareBaseModelConfig(
47
+ root_dir=Path(config.root_dir),
48
+ base_model_path= Path(config.base_model_path),
49
+ updated_base_model_path= Path(config.updated_base_model_path),
50
+ params_image_size=self.params.IMAZE_SIZE,
51
+ params_learning_rate=self.params.LEARNING_RATE,
52
+ params_include_top=self.params.INCLUDE_TOP,
53
+ params_weights=self.params.WEIGHTS,
54
+ params_classes=self.params.CLASSES
55
+ )
56
+
57
+ return prepare_base_model_config
58
+
59
+
60
+ def get_prepare_callback_config(self) -> PrepareCallbacksConfig:
61
+ config = self.config.prepare_callbacks
62
+ model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)
63
+ create_directories([
64
+ Path(model_ckpt_dir),
65
+ Path(config.tensorboard_root_log_dir)
66
+ ])
67
+
68
+ prepare_callback_config = PrepareCallbacksConfig(
69
+ root_dir=Path(config.root_dir),
70
+ tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),
71
+ checkpoint_model_filepath=Path(config.checkpoint_model_filepath)
72
+ )
73
+
74
+ return prepare_callback_config
75
+
76
+ def get_training_config(self) -> TrainingConfig:
77
+ training = self.config.training
78
+ prepare_base_model = self.config.prepare_base_model
79
+ params = self.params
80
+ training_data = os.path.join(self.config.data_ingestion.unzip_dir, "Chicken-fecal-images")
81
+ create_directories([
82
+ Path(training.root_dir)
83
+ ])
84
+
85
+ training_config = TrainingConfig(
86
+ root_dir=Path(training.root_dir),
87
+ trained_model_path=Path(training.trained_model_path),
88
+ updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
89
+ training_data=Path(training_data),
90
+ params_epochs=params.EPOCHS,
91
+ params_batch_size=params.BATCH_SIZE,
92
+ params_is_augmentation=params.AUGMENTATION,
93
+ params_image_size=params.IMAZE_SIZE
94
+ )
95
+
96
+ return training_config
97
+
98
+
99
+ def get_validation_config(self) -> EvaluationConfig:
100
+ eval_config = EvaluationConfig(
101
+ path_of_model="artifacts/training/model.h5",
102
+ training_data="artifacts/data_ingestion/Chicken-fecal-images",
103
+ all_params=self.params,
104
+ params_image_size=self.params.IMAZE_SIZE,
105
+ params_batch_size=self.params.BATCH_SIZE
106
+ )
107
+ return eval_config
src/cnnClassfier/constants/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ CONFIG_FILE_PATH = Path("config/config.yaml")
4
+ PARAMS_FILE_PATH = Path("params.yaml")
src/cnnClassfier/entity/__init__.py ADDED
File without changes
src/cnnClassfier/entity/config_entity.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from pathlib import Path
3
+
4
+
5
+ @dataclass(frozen=True)
6
+ class DataIngestionConfig:
7
+ root_dir: Path
8
+ source_URL: str
9
+ local_data_file: Path
10
+ unzip_dir: Path
11
+
12
+
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class PrepareBaseModelConfig:
17
+ root_dir: Path
18
+ base_model_path: Path
19
+ updated_base_model_path: Path
20
+ params_image_size: list
21
+ params_learning_rate: float
22
+ params_include_top: bool
23
+ params_weights: str
24
+ params_classes: int
25
+
26
+
27
+
28
+ @dataclass(frozen=True)
29
+ class PrepareCallbacksConfig:
30
+ root_dir : Path
31
+ tensorboard_root_log_dir : Path
32
+ checkpoint_model_filepath : Path
33
+
34
+
35
+
36
+ @dataclass(frozen=True)
37
+ class TrainingConfig:
38
+ root_dir: Path
39
+ trained_model_path: Path
40
+ updated_base_model_path: Path
41
+ training_data: Path
42
+ params_epochs: int
43
+ params_batch_size: int
44
+ params_is_augmentation: bool
45
+ params_image_size: list
46
+
47
+
48
+ @dataclass(frozen=True)
49
+ class EvaluationConfig:
50
+ path_of_model : Path
51
+ training_data : Path
52
+ all_params : dict
53
+ params_image_size : list
54
+ params_batch_size: int
src/cnnClassfier/pipeline/__init__.py ADDED
File without changes
src/cnnClassfier/pipeline/predict.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from tensorflow.keras.models import load_model
3
+ from tensorflow.keras.preprocessing import image
4
+ import os
5
+
6
+
7
+
8
+ class Prediction:
9
+ def __init__(self,filename):
10
+ self.filename =filename
11
+
12
+
13
+
14
+ def predict(self):
15
+ # load model
16
+ model = load_model("model.h5")
17
+
18
+ imagename = self.filename
19
+ test_image = image.load_img(imagename, target_size = (224,224))
20
+ test_image = image.img_to_array(test_image)
21
+ test_image = np.expand_dims(test_image, axis = 0)
22
+ result = np.argmax(model.predict(test_image), axis=1)
23
+ print(result)
24
+
25
+ if result[0] == 1:
26
+ prediction = 'Healthy'
27
+ else:
28
+ prediction = 'Coccidiosis'
29
+
30
+ return prediction
src/cnnClassfier/pipeline/stage02_base_model.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassfier.config.configuration import ConfigurationManager
2
+ from cnnClassfier.components.base_model import PrepareBaseModel
3
+ from cnnClassfier import logger
4
+
5
+
6
+ STAGE_NAME = 'Pepare Base Model Stage'
7
+
8
+ class PrepareBaseModelTrainigPipeline:
9
+ def __init__(self):
10
+ pass
11
+
12
+ def main(self):
13
+ config = ConfigurationManager()
14
+ prepare_base_model_config = config.get_prepare_base_model()
15
+ prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)
16
+ prepare_base_model.get_base_model()
17
+ prepare_base_model.update_base_model()
18
+
19
+
20
+
21
+
22
+ if __name__ == '__main__':
23
+ try:
24
+ logger.info(f'>>>>>> Stage {STAGE_NAME} Started <<<<<<<')
25
+ obj = PrepareBaseModelTrainigPipeline()
26
+ obj.main()
27
+ logger.info(f">>>>>> Stage {STAGE_NAME} Completed <<<<<<\n\nx======x")
28
+ except Exception as e:
29
+ logger.exception(e)
30
+ raise e
src/cnnClassfier/pipeline/stage_01_data_ingestion.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassfier.config.configuration import ConfigurationManager
2
+ from cnnClassfier.components.data_ingestion import DataIngestion
3
+ from cnnClassfier import logger
4
+
5
+ STAGE_NAME = 'Data Ingestion Stage'
6
+
7
+ class DataIngestionTrainingPipeline:
8
+ def __init__(self):
9
+ pass
10
+
11
+ def main(self):
12
+ config = ConfigurationManager()
13
+ data_ingestion_config = config.get_data_ingestion_config()
14
+ data_ingestion = DataIngestion(config=data_ingestion_config)
15
+ data_ingestion.download_file()
16
+ data_ingestion.extract_zip_file()
17
+
18
+
19
+
20
+
21
+ if __name__ == '__main__':
22
+ try:
23
+ logger.info(f'>>>>>> Stage {STAGE_NAME} Started <<<<<<<')
24
+ obj = DataIngestionTrainingPipeline()
25
+ obj.main()
26
+ logger.info(f">>>>>> Stage {STAGE_NAME} Completed <<<<<<\n\nx======x")
27
+ except Exception as e:
28
+ logger.exception(e)
29
+ raise e
src/cnnClassfier/pipeline/stage_03_train.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassfier.config.configuration import ConfigurationManager
2
+ from cnnClassfier.components.callbacks import PrepareCallback
3
+ from cnnClassfier.components.train import Training
4
+ from cnnClassfier import logger
5
+
6
+
7
+
8
+ STAGE_NAME = "Training"
9
+
10
+
11
+ class ModelTrainingPipeline:
12
+ def __init__(self):
13
+ pass
14
+
15
+ def main(self):
16
+ config = ConfigurationManager()
17
+ prepare_callbacks_config = config.get_prepare_callback_config()
18
+ prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)
19
+ callback_list = prepare_callbacks.get_tb_callbacks()
20
+
21
+
22
+ training_config = config.get_training_config()
23
+ training = Training(config=training_config)
24
+ training.get_base_model()
25
+ training.train_valid_generator()
26
+ training.train(
27
+ callback_list=callback_list
28
+ )
29
+
30
+
31
+
32
+
33
+ if __name__ == '__main__':
34
+ try:
35
+ logger.info(f"*******************")
36
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
37
+ obj = ModelTrainingPipeline()
38
+ obj.main()
39
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
40
+ except Exception as e:
41
+ logger.exception(e)
42
+ raise e
43
+
src/cnnClassfier/pipeline/stage_04_evaluation.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassfier.config.configuration import ConfigurationManager
2
+ from cnnClassfier.components.evaluation import Evaluation
3
+ from cnnClassfier import logger
4
+
5
+
6
+ STAGE_NAME = 'Evaluation Stage'
7
+
8
+ class EvaluationTrainigPipeline:
9
+ def __init__(self):
10
+ pass
11
+
12
+ def main(self):
13
+ config = ConfigurationManager()
14
+ val_config = config.get_validation_config()
15
+ evaluation = Evaluation(val_config)
16
+ evaluation.evaluation()
17
+ evaluation.save_score()
18
+
19
+
20
+
21
+
22
+ if __name__ == '__main__':
23
+ try:
24
+ logger.info(f'>>>>>> Stage {STAGE_NAME} Started <<<<<<<')
25
+ obj = EvaluationTrainigPipeline()
26
+ obj.main()
27
+ logger.info(f">>>>>> Stage {STAGE_NAME} Completed <<<<<<\n\nx======x")
28
+ except Exception as e:
29
+ logger.exception(e)
30
+ raise e
src/cnnClassfier/utils/__init__.py ADDED
File without changes
src/cnnClassfier/utils/common.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from box.exceptions import BoxValueError
3
+ import yaml
4
+ from cnnClassfier import logger
5
+ import json
6
+ import joblib
7
+ from ensure import ensure_annotations
8
+ from box import ConfigBox
9
+ from pathlib import Path
10
+ from typing import Any
11
+ import base64
12
+
13
+
14
+
15
+ @ensure_annotations
16
+ def read_yaml(path_to_yaml: Path) -> ConfigBox:
17
+ """reads yaml file and returns
18
+
19
+ Args:
20
+ path_to_yaml (str): path like input
21
+
22
+ Raises:
23
+ ValueError: if yaml file is empty
24
+ e: empty file
25
+
26
+ Returns:
27
+ ConfigBox: ConfigBox type
28
+ """
29
+ try:
30
+ with open(path_to_yaml) as yaml_file:
31
+ content = yaml.safe_load(yaml_file)
32
+ logger.info(f"yaml file: {path_to_yaml} loaded successfully")
33
+ return ConfigBox(content)
34
+ except BoxValueError:
35
+ raise ValueError("yaml file is empty")
36
+ except Exception as e:
37
+ raise e
38
+
39
+
40
+ @ensure_annotations
41
+ def create_directories(path_to_directories: list, verbose = True):
42
+ """create list of directories
43
+
44
+ Args:
45
+ path_to_directories (list): list of path of directories
46
+ ignore_log (bool, optional): ignore if multiple dirs is to be created. Defaults to False.
47
+ """
48
+
49
+ for path in path_to_directories:
50
+ os.makedirs(path, exist_ok=True)
51
+ if verbose:
52
+ logger.info(f'Created directory at: {path}')
53
+
54
+ @ensure_annotations
55
+ def save_json(path: Path, data: dict):
56
+ """save json data
57
+
58
+ Args:
59
+ path (Path): path to json file
60
+ data (dict): data to be saved in json file
61
+ """
62
+
63
+
64
+ with open(path, 'w') as f:
65
+ json.dump(data, f, indent=4)
66
+
67
+ logger.info(f'Json file saved at: {path}')
68
+
69
+
70
+
71
+
72
+ @ensure_annotations
73
+ def load_json(path: Path) -> ConfigBox:
74
+ """load json files data
75
+
76
+ Args:
77
+ path (Path): path to json file
78
+
79
+ Returns:
80
+ ConfigBox: data as class attributes instead of dict
81
+ """
82
+
83
+ with open(path, 'r') as f:
84
+ content = json.load(f)
85
+
86
+ logger.info(f"Json file loaded successfully from: {path}")
87
+ return ConfigBox
88
+
89
+
90
+ @ensure_annotations
91
+ def save_bin(data: Any, path: Path):
92
+ """save binary file
93
+
94
+ Args:
95
+ data (Any): data to be saved as binary
96
+ path (Path): path to binary file
97
+ """
98
+ joblib.dump(value=data, filename=path)
99
+ logger.info(f'binary file saved at: {path}')
100
+
101
+
102
+
103
+ @ensure_annotations
104
+ def load_bin(path: Path) -> ConfigBox:
105
+ """load binary data
106
+
107
+ Args:
108
+ path (Path): path to binary file
109
+
110
+ Returns:
111
+ Any: object stored in the file
112
+ """
113
+
114
+ data = joblib.load(path)
115
+ logger.info(f'binary file has been loaded successfully from : {path}')
116
+ return data
117
+
118
+
119
+ @ensure_annotations
120
+ def get_size(path: Path) -> str:
121
+ '''
122
+ get size in KB
123
+
124
+ Args:
125
+ Path (Path): path of the file
126
+
127
+ Returns:
128
+ str: size in KB'''
129
+
130
+ size_in_kb = round(os.path.getsize(path)/1024)
131
+ return f"~ {size_in_kb} KB"
132
+
133
+
134
+ def decodeImage(imgstring, fileName):
135
+ imgdata = base64.b64decode(imgstring)
136
+ with open(fileName, 'wb') as f:
137
+ f.write(imgdata)
138
+ f.close()
139
+
140
+
141
+
142
+ def encodeImageIntoBase64(croppedImagePath):
143
+ with open(croppedImagePath, 'rb') as f:
144
+ return base64.b64decode(f.read())