ALYYAN commited on
Commit
d576da9
·
0 Parent(s):

Initial commit of clean, working project

Browse files
Files changed (49) hide show
  1. .dvc/.gitignore +3 -0
  2. .dvc/config +0 -0
  3. .dvcignore +3 -0
  4. .gitattributes +2 -0
  5. .github/workflows/main.yaml +57 -0
  6. .gitignore +214 -0
  7. LICENSE +21 -0
  8. README.md +2 -0
  9. app.py +55 -0
  10. class_check.py +66 -0
  11. config/config.yaml +17 -0
  12. confusion_matrix.png +0 -0
  13. dockerfile +13 -0
  14. dvc.lock +113 -0
  15. dvc.yaml +54 -0
  16. main.py +54 -0
  17. params.yaml +18 -0
  18. requirements.txt +0 -0
  19. research/01_data_ingestion.ipynb +230 -0
  20. research/02_prepare_base_model.ipynb +290 -0
  21. research/03_model_trainer.ipynb +303 -0
  22. research/04_model_evaluation_with_mlflow.ipynb +328 -0
  23. research/trials.ipynb +43 -0
  24. scores.json +4 -0
  25. setup.py +33 -0
  26. src/cnnClassifier/__init__.py +22 -0
  27. src/cnnClassifier/components/__init__.py +0 -0
  28. src/cnnClassifier/components/data_ingestion.py +46 -0
  29. src/cnnClassifier/components/model_evaluation_mlflow.py +124 -0
  30. src/cnnClassifier/components/model_trainer.py +127 -0
  31. src/cnnClassifier/components/prepare_base_model.py +74 -0
  32. src/cnnClassifier/config/__init__.py +0 -0
  33. src/cnnClassifier/config/configuration.py +82 -0
  34. src/cnnClassifier/constants/__init__.py +4 -0
  35. src/cnnClassifier/entity/__init__.py +0 -0
  36. src/cnnClassifier/entity/config_entity.py +43 -0
  37. src/cnnClassifier/pipeline/__init__.py +0 -0
  38. src/cnnClassifier/pipeline/prediction.py +36 -0
  39. src/cnnClassifier/pipeline/stage_01_data_ingestion.py +31 -0
  40. src/cnnClassifier/pipeline/stage_02_prepare_base_model.py +32 -0
  41. src/cnnClassifier/pipeline/stage_03_model_trainer.py +35 -0
  42. src/cnnClassifier/pipeline/stage_04_model_evaluation.py +37 -0
  43. src/cnnClassifier/utils/__init__.py +0 -0
  44. src/cnnClassifier/utils/common.py +137 -0
  45. static/script.js +159 -0
  46. static/style.css +116 -0
  47. template.py +47 -0
  48. templates/index.html +98 -0
  49. training_history.csv +11 -0
.dvc/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ /config.local
2
+ /tmp
3
+ /cache
.dvc/config ADDED
File without changes
.dvcignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Add patterns of files dvc should ignore, which could improve
2
+ # the performance. Learn more at
3
+ # https://dvc.org/doc/user-guide/dvcignore
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.h5 filter=lfs diff=lfs merge=lfs -text
2
+ artifacts/training/model/variables/variables.data-*-of-* filter=lfs diff=lfs merge=lfs -text
.github/workflows/main.yaml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI-CD Pipeline for Chest Cancer Classifier
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ # Job 1: Continuous Integration (Test the application)
10
+ ci-test:
11
+ name: Continuous Integration - Test Application
12
+ runs-on: ubuntu-latest
13
+
14
+ steps:
15
+ - name: Checkout Code
16
+ uses: actions/checkout@v3
17
+
18
+ - name: Set up Python 3.8
19
+ uses: actions/setup-python@v4
20
+ with:
21
+ python-version: '3.8'
22
+
23
+ - name: Install Dependencies
24
+ run: pip install -r requirements.txt
25
+
26
+ - name: Run a simple health check
27
+ run: echo "Placeholder for future tests. For now, we just check if dependencies install."
28
+
29
+ # Job 2: Continuous Deployment (Deploy to Hugging Face with Manual Git Push)
30
+ cd-deploy:
31
+ name: Continuous Deployment - Deploy to Hugging Face
32
+ needs: ci-test # This job will only run if the 'ci-test' job succeeds
33
+ runs-on: ubuntu-latest
34
+
35
+ steps:
36
+ - name: Checkout Code
37
+ uses: actions/checkout@v3
38
+ with:
39
+ # We need to fetch all history and tags for the push to work correctly
40
+ fetch-depth: 0
41
+ lfs: true
42
+
43
+ - name: Push to Hugging Face Hub
44
+ env:
45
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
46
+ HF_SPACE_REPO: ${{ secrets.HF_SPACE_REPO }}
47
+ run: |
48
+ echo "Setting up git repository for Hugging Face push"
49
+ # Add the Hugging Face Space as a new remote repository named "hf"
50
+ # Use your canonical (lowercase) HF username here
51
+ git remote add hf "https://alyyanahmed21:${HF_TOKEN}@huggingface.co/spaces/${HF_SPACE_REPO}"
52
+
53
+ echo "Pushing to Hugging Face..."
54
+ # Force-push the main branch from your GitHub repo to the main branch on the HF remote
55
+ git push --force hf main
56
+
57
+ echo "✅ Deployment successful!"
.gitignore ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
208
+
209
+ artifacts/*
210
+
211
+ mlruns/
212
+ .env
213
+ model/
214
+ cnn_env/
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 ALYYAN
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+
2
+
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, render_template
2
+ import os
3
+ from flask_cors import CORS, cross_origin
4
+ from cnnClassifier.utils.common import decodeImage
5
+ from cnnClassifier.pipeline.prediction import PredictionPipeline
6
+
7
+ # Set environment variables for consistent encoding
8
+ os.putenv('LANG', 'en_US.UTF-8')
9
+ os.putenv('LC_ALL', 'en_US.UTF-8')
10
+
11
+ app = Flask(__name__)
12
+ CORS(app)
13
+
14
+ class ClientApp:
15
+ def __init__(self):
16
+ self.filename = "inputImage.jpg"
17
+ self.classifier = PredictionPipeline(self.filename)
18
+
19
+ @app.route("/", methods=['GET'])
20
+ @cross_origin()
21
+ def home():
22
+ """Renders the main user interface."""
23
+ return render_template('index.html')
24
+
25
+ @app.route("/train", methods=['GET','POST'])
26
+ @cross_origin()
27
+ def trainRoute():
28
+ """Triggers the DVC pipeline to retrain the model."""
29
+ # os.system("python main.py") # You can use this if you have a main orchestrator
30
+ os.system("dvc repro")
31
+ return "Training done successfully!"
32
+
33
+ @app.route("/predict", methods=['POST'])
34
+ @cross_origin()
35
+ def predictRoute():
36
+ image = request.json['image']
37
+ decodeImage(image, clApp.filename)
38
+
39
+ # The predict() method now returns just the index (0 or 1)
40
+ prediction_value = clApp.classifier.predict()
41
+
42
+ # This logic is confirmed by your class indices: {'adenocarcinoma': 0, 'normal': 1}
43
+ if prediction_value == 1:
44
+ prediction_text = "Normal"
45
+ else: # The value was 0
46
+ prediction_text = "Cancer"
47
+
48
+ # The front-end expects the key "prediction"
49
+ return jsonify([{"prediction": prediction_text}])
50
+
51
+
52
+ if __name__ == "__main__":
53
+ clApp = ClientApp()
54
+ # Run the app on all available interfaces (for Docker/deployment) and port 8080
55
+ app.run(host='0.0.0.0', port=8080)
class_check.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # check_data_balance.py
2
+
3
+ import os
4
+ from pathlib import Path
5
+
6
+ def check_dataset_balance(data_path: Path):
7
+ """
8
+ Checks and prints the balance of classes in a dataset directory.
9
+
10
+ The expected directory structure is:
11
+ - data_path/
12
+ - class_A/
13
+ - image1.jpg
14
+ - image2.jpg
15
+ ...
16
+ - class_B/
17
+ - image1.jpg
18
+ - image2.jpg
19
+ ...
20
+
21
+ Args:
22
+ data_path (Path): The path to the main dataset directory.
23
+ """
24
+ print(f"--- Checking Dataset Balance at: {data_path} ---\n")
25
+
26
+ if not data_path.is_dir():
27
+ print(f"❌ ERROR: The provided path is not a valid directory.")
28
+ return
29
+
30
+ class_names = [d.name for d in data_path.iterdir() if d.is_dir()]
31
+
32
+ if not class_names:
33
+ print("❌ ERROR: No class subdirectories found in the dataset folder.")
34
+ return
35
+
36
+ print(f"Found {len(class_names)} classes: {', '.join(class_names)}\n")
37
+
38
+ class_counts = {}
39
+ total_images = 0
40
+
41
+ for class_name in class_names:
42
+ class_dir = data_path / class_name
43
+ # Count files, ignoring subdirectories (like .ipynb_checkpoints)
44
+ num_images = len([f for f in class_dir.iterdir() if f.is_file()])
45
+ class_counts[class_name] = num_images
46
+ total_images += num_images
47
+
48
+ print("--- Image Counts per Class ---")
49
+ for class_name, count in class_counts.items():
50
+ percentage = (count / total_images) * 100 if total_images > 0 else 0
51
+ print(f"- {class_name:<20}: {count:>5} images ({percentage:.2f}%)")
52
+
53
+ print("-" * 35)
54
+ print(f"- {'Total':<20}: {total_images:>5} images\n")
55
+
56
+ print("--- For your training script ---")
57
+ print("Use these counts to calculate your class_weight dictionary.")
58
+
59
+
60
+ if __name__ == "__main__":
61
+ # --- IMPORTANT ---
62
+ # Update this path to point to your actual dataset folder.
63
+ # This is the folder that contains the 'Normal' and 'adenocarcinoma' subfolders.
64
+ dataset_directory = Path("artifacts/data_ingestion/Chest-CT-Scan-data")
65
+
66
+ check_dataset_balance(dataset_directory)
config/config.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ artifacts_root: artifacts
2
+
3
+
4
+ data_ingestion:
5
+ root_dir: artifacts/data_ingestion
6
+ source_URL: https://drive.google.com/file/d/1z0mreUtRmR-P-magILsDR3T7M6IkGXtY/view?usp=sharing
7
+ local_data_file: artifacts/data_ingestion/data.zip
8
+ unzip_dir: artifacts/data_ingestion
9
+
10
+ prepare_base_model:
11
+ root_dir: artifacts/prepare_base_model
12
+ base_model_path: artifacts/prepare_base_model/base_model.h5
13
+ updated_base_model_path: artifacts/prepare_base_model/base_model_updated.h5
14
+
15
+ training:
16
+ root_dir: artifacts/training
17
+ trained_model_path: artifacts/training/model.h5
confusion_matrix.png ADDED
dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.8-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ COPY . .
10
+
11
+ EXPOSE 8080
12
+
13
+ CMD ["gunicorn", "--bind", "0.0.0.0:8080", "app:app"]
dvc.lock ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ schema: '2.0'
2
+ stages:
3
+ data_ingestion:
4
+ cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py
5
+ deps:
6
+ - path: config/config.yaml
7
+ hash: md5
8
+ md5: d4c6e6a52ca35ea93094c3e1a421499e
9
+ size: 578
10
+ - path: src/cnnClassifier/pipeline/stage_01_data_ingestion.py
11
+ hash: md5
12
+ md5: bad788253475f50d44fdaa7237967b49
13
+ size: 883
14
+ outs:
15
+ - path: artifacts/data_ingestion/Chest-CT-Scan-data
16
+ hash: md5
17
+ md5: 904fa45d934ce879b3b1933dca6cb2f1.dir
18
+ size: 49247431
19
+ nfiles: 343
20
+ prepare_base_model:
21
+ cmd: python src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
22
+ deps:
23
+ - path: config/config.yaml
24
+ hash: md5
25
+ md5: d4c6e6a52ca35ea93094c3e1a421499e
26
+ size: 578
27
+ - path: src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
28
+ hash: md5
29
+ md5: 30c63470719d961e32045908b7c0772d
30
+ size: 966
31
+ params:
32
+ params.yaml:
33
+ CLASSES: 2
34
+ IMAGE_SIZE:
35
+ - 224
36
+ - 224
37
+ - 3
38
+ INCLUDE_TOP: false
39
+ LEARNING_RATE: 0.001
40
+ WEIGHTS: imagenet
41
+ outs:
42
+ - path: artifacts/prepare_base_model
43
+ hash: md5
44
+ md5: 4aa2611cd37984c188512d3a19c6942b.dir
45
+ size: 118054560
46
+ nfiles: 2
47
+ training:
48
+ cmd: python src/cnnClassifier/pipeline/stage_03_model_trainer.py
49
+ deps:
50
+ - path: artifacts/data_ingestion/Chest-CT-Scan-data
51
+ hash: md5
52
+ md5: 904fa45d934ce879b3b1933dca6cb2f1.dir
53
+ size: 49247431
54
+ nfiles: 343
55
+ - path: artifacts/prepare_base_model
56
+ hash: md5
57
+ md5: 4aa2611cd37984c188512d3a19c6942b.dir
58
+ size: 118054560
59
+ nfiles: 2
60
+ - path: config/config.yaml
61
+ hash: md5
62
+ md5: d4c6e6a52ca35ea93094c3e1a421499e
63
+ size: 578
64
+ - path: src/cnnClassifier/pipeline/stage_03_model_trainer.py
65
+ hash: md5
66
+ md5: c33e23d2c123f157b2ab007c8e9d938f
67
+ size: 893
68
+ params:
69
+ params.yaml:
70
+ AUGMENTATION: true
71
+ BATCH_SIZE: 16
72
+ EPOCHS: 10
73
+ IMAGE_SIZE:
74
+ - 224
75
+ - 224
76
+ - 3
77
+ outs:
78
+ - path: artifacts/training/model.h5
79
+ hash: md5
80
+ md5: 233944d4fbed7856cf28be27c602014d
81
+ size: 59337520
82
+ evaluation:
83
+ cmd: python src/cnnClassifier/pipeline/stage_04_model_evaluation.py
84
+ deps:
85
+ - path: artifacts/data_ingestion/Chest-CT-Scan-data
86
+ hash: md5
87
+ md5: 904fa45d934ce879b3b1933dca6cb2f1.dir
88
+ size: 49247431
89
+ nfiles: 343
90
+ - path: artifacts/training/model.h5
91
+ hash: md5
92
+ md5: 233944d4fbed7856cf28be27c602014d
93
+ size: 59337520
94
+ - path: config/config.yaml
95
+ hash: md5
96
+ md5: d4c6e6a52ca35ea93094c3e1a421499e
97
+ size: 578
98
+ - path: src/cnnClassifier/pipeline/stage_04_model_evaluation.py
99
+ hash: md5
100
+ md5: d20a1645fd93cae9c7c0cecd8a0d4a2a
101
+ size: 1188
102
+ params:
103
+ params.yaml:
104
+ BATCH_SIZE: 16
105
+ IMAGE_SIZE:
106
+ - 224
107
+ - 224
108
+ - 3
109
+ outs:
110
+ - path: scores.json
111
+ hash: md5
112
+ md5: 2c6b298a9827df6c174dc1bbbf40d040
113
+ size: 59
dvc.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ stages:
2
+ data_ingestion:
3
+ cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py
4
+ deps:
5
+ - src/cnnClassifier/pipeline/stage_01_data_ingestion.py
6
+ - config/config.yaml
7
+ outs:
8
+ - artifacts/data_ingestion/Chest-CT-Scan-data
9
+
10
+
11
+ prepare_base_model:
12
+ cmd: python src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
13
+ deps:
14
+ - src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
15
+ - config/config.yaml
16
+ params:
17
+ - IMAGE_SIZE
18
+ - INCLUDE_TOP
19
+ - CLASSES
20
+ - WEIGHTS
21
+ - LEARNING_RATE
22
+ outs:
23
+ - artifacts/prepare_base_model
24
+
25
+
26
+ training:
27
+ cmd: python src/cnnClassifier/pipeline/stage_03_model_trainer.py
28
+ deps:
29
+ - src/cnnClassifier/pipeline/stage_03_model_trainer.py
30
+ - config/config.yaml
31
+ - artifacts/data_ingestion/Chest-CT-Scan-data
32
+ - artifacts/prepare_base_model
33
+ params:
34
+ - IMAGE_SIZE
35
+ - EPOCHS
36
+ - BATCH_SIZE
37
+ - AUGMENTATION
38
+ outs:
39
+ - artifacts/training/model.h5
40
+
41
+
42
+ evaluation:
43
+ cmd: python src/cnnClassifier/pipeline/stage_04_model_evaluation.py
44
+ deps:
45
+ - src/cnnClassifier/pipeline/stage_04_model_evaluation.py
46
+ - config/config.yaml
47
+ - artifacts/data_ingestion/Chest-CT-Scan-data
48
+ - artifacts/training/model.h5
49
+ params:
50
+ - IMAGE_SIZE
51
+ - BATCH_SIZE
52
+ metrics:
53
+ - scores.json:
54
+ cache: false
main.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassifier import logger
2
+ from cnnClassifier.pipeline.stage_01_data_ingestion import DataIngestionTrainingPipeline
3
+ from cnnClassifier.pipeline.stage_02_prepare_base_model import PrepareBaseModelTrainingPipeline
4
+ from cnnClassifier.pipeline.stage_03_model_trainer import ModelTrainingPipeline
5
+ from cnnClassifier.pipeline.stage_04_model_evaluation import EvaluationPipeline
6
+
7
+
8
+
9
+ STAGE_NAME = "Data Ingestion stage"
10
+ try:
11
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
12
+ obj = DataIngestionTrainingPipeline()
13
+ obj.main()
14
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
15
+ except Exception as e:
16
+ logger.exception(e)
17
+ raise e
18
+
19
+
20
+ STAGE_NAME = "Prepare base model"
21
+ try:
22
+ logger.info(f"*******************")
23
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
24
+ prepare_base_model = PrepareBaseModelTrainingPipeline()
25
+ prepare_base_model.main()
26
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
27
+ except Exception as e:
28
+ logger.exception(e)
29
+ raise e
30
+
31
+
32
+ STAGE_NAME = "Training"
33
+ try:
34
+ logger.info(f"*******************")
35
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
36
+ model_trainer = ModelTrainingPipeline()
37
+ model_trainer.main()
38
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
39
+ except Exception as e:
40
+ logger.exception(e)
41
+ raise e
42
+
43
+
44
+ STAGE_NAME = "Evaluation stage"
45
+ try:
46
+ logger.info(f"*******************")
47
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
48
+ model_evalution = EvaluationPipeline()
49
+ model_evalution.main()
50
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
51
+
52
+ except Exception as e:
53
+ logger.exception(e)
54
+ raise e
params.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #AUGMENTATION: True
2
+ #IMAGE_SIZE: [224, 224, 3] # as per VGG 16 model
3
+ #BATCH_SIZE: 16
4
+ #INCLUDE_TOP: False
5
+ #EPOCHS: 1
6
+ #CLASSES: 2
7
+ #WEIGHTS: imagenet
8
+ #LEARNING_RATE: 0.01
9
+
10
+
11
+ AUGMENTATION: True
12
+ IMAGE_SIZE: [224, 224, 3]
13
+ BATCH_SIZE: 16 # Use 16 if 32 gives you memory errors, otherwise 32 is fine
14
+ INCLUDE_TOP: False
15
+ EPOCHS: 10 # Give the model enough time to learn
16
+ CLASSES: 2
17
+ WEIGHTS: imagenet
18
+ LEARNING_RATE: 0.001 # CRUCIAL: A small learning rate for fine-tuning
requirements.txt ADDED
Binary file (526 Bytes). View file
 
research/01_data_ingestion.ipynb ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "8f33ab85",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import os"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 2,
16
+ "id": "5b55e660",
17
+ "metadata": {},
18
+ "outputs": [
19
+ {
20
+ "data": {
21
+ "text/plain": [
22
+ "'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\\\research'"
23
+ ]
24
+ },
25
+ "execution_count": 2,
26
+ "metadata": {},
27
+ "output_type": "execute_result"
28
+ }
29
+ ],
30
+ "source": [
31
+ "%pwd"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": 3,
37
+ "id": "b7338c82",
38
+ "metadata": {},
39
+ "outputs": [
40
+ {
41
+ "data": {
42
+ "text/plain": [
43
+ "'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC'"
44
+ ]
45
+ },
46
+ "execution_count": 3,
47
+ "metadata": {},
48
+ "output_type": "execute_result"
49
+ }
50
+ ],
51
+ "source": [
52
+ "os.chdir(\"../\")\n",
53
+ "%pwd"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": 4,
59
+ "id": "a770b8df",
60
+ "metadata": {},
61
+ "outputs": [],
62
+ "source": [
63
+ "from dataclasses import dataclass\n",
64
+ "from pathlib import Path\n",
65
+ "\n",
66
+ "\n",
67
+ "@dataclass(frozen=True)\n",
68
+ "class DataIngestionConfig:\n",
69
+ " root_dir: Path\n",
70
+ " source_URL: str\n",
71
+ " local_data_file: Path\n",
72
+ " unzip_dir: Path"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "execution_count": 6,
78
+ "id": "979add90",
79
+ "metadata": {},
80
+ "outputs": [],
81
+ "source": [
82
+ "from cnnClassifier.constants import *\n",
83
+ "from cnnClassifier.utils.common import read_yaml, create_directories\n",
84
+ "class ConfigurationManager:\n",
85
+ " def __init__(\n",
86
+ " self,\n",
87
+ " config_filepath = CONFIG_FILE_PATH,\n",
88
+ " params_filepath = PARAMS_FILE_PATH):\n",
89
+ "\n",
90
+ " self.config = read_yaml(config_filepath)\n",
91
+ " self.params = read_yaml(params_filepath)\n",
92
+ "\n",
93
+ " create_directories([self.config.artifacts_root])\n",
94
+ "\n",
95
+ " def get_data_ingestion_config(self) -> DataIngestionConfig:\n",
96
+ " config = self.config.data_ingestion\n",
97
+ "\n",
98
+ " create_directories([config.root_dir])\n",
99
+ "\n",
100
+ " data_ingestion_config = DataIngestionConfig(\n",
101
+ " root_dir=config.root_dir,\n",
102
+ " source_URL=config.source_URL,\n",
103
+ " local_data_file=config.local_data_file,\n",
104
+ " unzip_dir=config.unzip_dir \n",
105
+ " )\n",
106
+ "\n",
107
+ " return data_ingestion_config"
108
+ ]
109
+ },
110
+ {
111
+ "cell_type": "code",
112
+ "execution_count": 9,
113
+ "id": "e4fd8f68",
114
+ "metadata": {},
115
+ "outputs": [
116
+ {
117
+ "name": "stdout",
118
+ "output_type": "stream",
119
+ "text": [
120
+ "[2025-08-18 00:24:08,669: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
121
+ "[2025-08-18 00:24:08,684: INFO: common: yaml file: params.yaml loaded successfully]\n",
122
+ "[2025-08-18 00:24:08,686: INFO: common: created directory at: artifacts]\n",
123
+ "[2025-08-18 00:24:08,688: INFO: common: created directory at: artifacts/data_ingestion]\n",
124
+ "[2025-08-18 00:24:08,692: INFO: 78466947: Downloading data from https://drive.google.com/file/d/1z0mreUtRmR-P-magILsDR3T7M6IkGXtY/view?usp=sharing into file artifacts/data_ingestion/data.zip]\n"
125
+ ]
126
+ },
127
+ {
128
+ "name": "stderr",
129
+ "output_type": "stream",
130
+ "text": [
131
+ "Downloading...\n",
132
+ "From (original): https://drive.google.com/uc?/export=download&id=1z0mreUtRmR-P-magILsDR3T7M6IkGXtY\n",
133
+ "From (redirected): https://drive.google.com/uc?%2Fexport=download&id=1z0mreUtRmR-P-magILsDR3T7M6IkGXtY&confirm=t&uuid=954f5f66-c0d6-4c40-a993-933880515813\n",
134
+ "To: f:\\Projects\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\artifacts\\data_ingestion\\data.zip\n",
135
+ "100%|██████████| 49.0M/49.0M [00:24<00:00, 2.03MB/s]"
136
+ ]
137
+ },
138
+ {
139
+ "name": "stdout",
140
+ "output_type": "stream",
141
+ "text": [
142
+ "[2025-08-18 00:24:36,267: INFO: 78466947: Downloaded data from https://drive.google.com/file/d/1z0mreUtRmR-P-magILsDR3T7M6IkGXtY/view?usp=sharing into file artifacts/data_ingestion/data.zip]\n"
143
+ ]
144
+ },
145
+ {
146
+ "name": "stderr",
147
+ "output_type": "stream",
148
+ "text": [
149
+ "\n"
150
+ ]
151
+ }
152
+ ],
153
+ "source": [
154
+ "import os\n",
155
+ "import zipfile\n",
156
+ "import gdown\n",
157
+ "from cnnClassifier import logger\n",
158
+ "from cnnClassifier.utils.common import get_size\n",
159
+ "\n",
160
+ "class DataIngestion:\n",
161
+ " def __init__(self, config: DataIngestionConfig):\n",
162
+ " self.config = config\n",
163
+ "\n",
164
+ "\n",
165
+ " \n",
166
+ " \n",
167
+ " def download_file(self)-> str:\n",
168
+ " '''\n",
169
+ " Fetch data from the url\n",
170
+ " '''\n",
171
+ "\n",
172
+ " try: \n",
173
+ " dataset_url = self.config.source_URL\n",
174
+ " zip_download_dir = self.config.local_data_file\n",
175
+ " os.makedirs(\"artifacts/data_ingestion\", exist_ok=True)\n",
176
+ " logger.info(f\"Downloading data from {dataset_url} into file {zip_download_dir}\")\n",
177
+ "\n",
178
+ " file_id = dataset_url.split(\"/\")[-2]\n",
179
+ " prefix = 'https://drive.google.com/uc?/export=download&id='\n",
180
+ " gdown.download(prefix+file_id,zip_download_dir)\n",
181
+ "\n",
182
+ " logger.info(f\"Downloaded data from {dataset_url} into file {zip_download_dir}\")\n",
183
+ "\n",
184
+ " except Exception as e:\n",
185
+ " raise e\n",
186
+ " \n",
187
+ " \n",
188
+ " def extract_zip_file(self):\n",
189
+ " \"\"\"\n",
190
+ " zip_file_path: str\n",
191
+ " Extracts the zip file into the data directory\n",
192
+ " Function returns None\n",
193
+ " \"\"\"\n",
194
+ " unzip_path = self.config.unzip_dir\n",
195
+ " os.makedirs(unzip_path, exist_ok=True)\n",
196
+ " with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:\n",
197
+ " zip_ref.extractall(unzip_path)\n",
198
+ "try:\n",
199
+ " config = ConfigurationManager()\n",
200
+ " data_ingestion_config = config.get_data_ingestion_config()\n",
201
+ " data_ingestion = DataIngestion(config=data_ingestion_config)\n",
202
+ " data_ingestion.download_file()\n",
203
+ " data_ingestion.extract_zip_file()\n",
204
+ "except Exception as e:\n",
205
+ " raise e"
206
+ ]
207
+ }
208
+ ],
209
+ "metadata": {
210
+ "kernelspec": {
211
+ "display_name": "cnn_env",
212
+ "language": "python",
213
+ "name": "python3"
214
+ },
215
+ "language_info": {
216
+ "codemirror_mode": {
217
+ "name": "ipython",
218
+ "version": 3
219
+ },
220
+ "file_extension": ".py",
221
+ "mimetype": "text/x-python",
222
+ "name": "python",
223
+ "nbconvert_exporter": "python",
224
+ "pygments_lexer": "ipython3",
225
+ "version": "3.11.3"
226
+ }
227
+ },
228
+ "nbformat": 4,
229
+ "nbformat_minor": 5
230
+ }
research/02_prepare_base_model.ipynb ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "29206888",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "data": {
11
+ "text/plain": [
12
+ "'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\\\research'"
13
+ ]
14
+ },
15
+ "execution_count": 1,
16
+ "metadata": {},
17
+ "output_type": "execute_result"
18
+ }
19
+ ],
20
+ "source": [
21
+ "import os\n",
22
+ "%pwd"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": 2,
28
+ "id": "7dce8d4e",
29
+ "metadata": {},
30
+ "outputs": [
31
+ {
32
+ "data": {
33
+ "text/plain": [
34
+ "'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC'"
35
+ ]
36
+ },
37
+ "execution_count": 2,
38
+ "metadata": {},
39
+ "output_type": "execute_result"
40
+ }
41
+ ],
42
+ "source": [
43
+ "os.chdir(\"../\")\n",
44
+ "%pwd"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "code",
49
+ "execution_count": 4,
50
+ "id": "c4d0c484",
51
+ "metadata": {},
52
+ "outputs": [],
53
+ "source": [
54
+ "from dataclasses import dataclass\n",
55
+ "from pathlib import Path\n",
56
+ "\n",
57
+ "@dataclass(frozen=True)\n",
58
+ "class PrepareBaseModelConfig:\n",
59
+ " root_dir: Path\n",
60
+ " base_model_path: Path\n",
61
+ " updated_base_model_path: Path\n",
62
+ " params_image_size: list\n",
63
+ " params_learning_rate: float\n",
64
+ " params_include_top: bool\n",
65
+ " params_weights: str\n",
66
+ " params_classes: int"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "execution_count": 5,
72
+ "id": "26921811",
73
+ "metadata": {},
74
+ "outputs": [],
75
+ "source": [
76
+ "from cnnClassifier.constants import *\n",
77
+ "from cnnClassifier.utils.common import read_yaml, create_directories\n",
78
+ "\n",
79
+ "class ConfigurationManager:\n",
80
+ " def __init__(\n",
81
+ " self,\n",
82
+ " config_filepath = CONFIG_FILE_PATH,\n",
83
+ " params_filepath = PARAMS_FILE_PATH):\n",
84
+ "\n",
85
+ " self.config = read_yaml(config_filepath)\n",
86
+ " self.params = read_yaml(params_filepath)\n",
87
+ "\n",
88
+ " create_directories([self.config.artifacts_root])\n",
89
+ "\n",
90
+ "\n",
91
+ " def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:\n",
92
+ " config = self.config.prepare_base_model\n",
93
+ " \n",
94
+ " create_directories([config.root_dir])\n",
95
+ "\n",
96
+ " prepare_base_model_config = PrepareBaseModelConfig(\n",
97
+ " root_dir=Path(config.root_dir),\n",
98
+ " base_model_path=Path(config.base_model_path),\n",
99
+ " updated_base_model_path=Path(config.updated_base_model_path),\n",
100
+ " params_image_size=self.params.IMAGE_SIZE,\n",
101
+ " params_learning_rate=self.params.LEARNING_RATE,\n",
102
+ " params_include_top=self.params.INCLUDE_TOP,\n",
103
+ " params_weights=self.params.WEIGHTS,\n",
104
+ " params_classes=self.params.CLASSES\n",
105
+ " )\n",
106
+ "\n",
107
+ " return prepare_base_model_config"
108
+ ]
109
+ },
110
+ {
111
+ "cell_type": "code",
112
+ "execution_count": 6,
113
+ "id": "0442bc6f",
114
+ "metadata": {},
115
+ "outputs": [],
116
+ "source": [
117
+ "import os\n",
118
+ "import urllib.request as request\n",
119
+ "from zipfile import ZipFile\n",
120
+ "import tensorflow as tf\n",
121
+ "\n",
122
+ "class PrepareBaseModel:\n",
123
+ " def __init__(self, config: PrepareBaseModelConfig):\n",
124
+ " self.config = config\n",
125
+ "\n",
126
+ " \n",
127
+ " def get_base_model(self):\n",
128
+ " self.model = tf.keras.applications.vgg16.VGG16(\n",
129
+ " input_shape=self.config.params_image_size,\n",
130
+ " weights=self.config.params_weights,\n",
131
+ " include_top=self.config.params_include_top\n",
132
+ " )\n",
133
+ "\n",
134
+ " self.save_model(path=self.config.base_model_path, model=self.model)\n",
135
+ "\n",
136
+ "\n",
137
+ " \n",
138
+ " @staticmethod\n",
139
+ " def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):\n",
140
+ " if freeze_all:\n",
141
+ " for layer in model.layers:\n",
142
+ " model.trainable = False\n",
143
+ " elif (freeze_till is not None) and (freeze_till > 0):\n",
144
+ " for layer in model.layers[:-freeze_till]:\n",
145
+ " model.trainable = False\n",
146
+ "\n",
147
+ " flatten_in = tf.keras.layers.Flatten()(model.output)\n",
148
+ " prediction = tf.keras.layers.Dense(\n",
149
+ " units=classes,\n",
150
+ " activation=\"softmax\"\n",
151
+ " )(flatten_in)\n",
152
+ "\n",
153
+ " full_model = tf.keras.models.Model(\n",
154
+ " inputs=model.input,\n",
155
+ " outputs=prediction\n",
156
+ " )\n",
157
+ "\n",
158
+ " full_model.compile(\n",
159
+ " optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),\n",
160
+ " loss=tf.keras.losses.CategoricalCrossentropy(),\n",
161
+ " metrics=[\"accuracy\"]\n",
162
+ " )\n",
163
+ "\n",
164
+ " full_model.summary()\n",
165
+ " return full_model\n",
166
+ " \n",
167
+ "\n",
168
+ " def update_base_model(self):\n",
169
+ " self.full_model = self._prepare_full_model(\n",
170
+ " model=self.model,\n",
171
+ " classes=self.config.params_classes,\n",
172
+ " freeze_all=True,\n",
173
+ " freeze_till=None,\n",
174
+ " learning_rate=self.config.params_learning_rate\n",
175
+ " )\n",
176
+ "\n",
177
+ " self.save_model(path=self.config.updated_base_model_path, model=self.full_model)\n",
178
+ " \n",
179
+ "\n",
180
+ "\n",
181
+ " @staticmethod\n",
182
+ " def save_model(path: Path, model: tf.keras.Model):\n",
183
+ " model.save(path)"
184
+ ]
185
+ },
186
+ {
187
+ "cell_type": "code",
188
+ "execution_count": 7,
189
+ "id": "b21b58b5",
190
+ "metadata": {},
191
+ "outputs": [
192
+ {
193
+ "name": "stdout",
194
+ "output_type": "stream",
195
+ "text": [
196
+ "[2025-08-20 01:44:50,956: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
197
+ "[2025-08-20 01:44:50,982: INFO: common: yaml file: params.yaml loaded successfully]\n",
198
+ "[2025-08-20 01:44:50,984: INFO: common: created directory at: artifacts]\n",
199
+ "[2025-08-20 01:44:50,986: INFO: common: created directory at: artifacts/prepare_base_model]\n",
200
+ "Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5\n",
201
+ "58889256/58889256 [==============================] - 15s 0us/step\n",
202
+ "[2025-08-20 01:45:09,603: WARNING: saving_utils: Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.]\n",
203
+ "Model: \"model\"\n",
204
+ "_________________________________________________________________\n",
205
+ " Layer (type) Output Shape Param # \n",
206
+ "=================================================================\n",
207
+ " input_1 (InputLayer) [(None, 224, 224, 3)] 0 \n",
208
+ " \n",
209
+ " block1_conv1 (Conv2D) (None, 224, 224, 64) 1792 \n",
210
+ " \n",
211
+ " block1_conv2 (Conv2D) (None, 224, 224, 64) 36928 \n",
212
+ " \n",
213
+ " block1_pool (MaxPooling2D) (None, 112, 112, 64) 0 \n",
214
+ " \n",
215
+ " block2_conv1 (Conv2D) (None, 112, 112, 128) 73856 \n",
216
+ " \n",
217
+ " block2_conv2 (Conv2D) (None, 112, 112, 128) 147584 \n",
218
+ " \n",
219
+ " block2_pool (MaxPooling2D) (None, 56, 56, 128) 0 \n",
220
+ " \n",
221
+ " block3_conv1 (Conv2D) (None, 56, 56, 256) 295168 \n",
222
+ " \n",
223
+ " block3_conv2 (Conv2D) (None, 56, 56, 256) 590080 \n",
224
+ " \n",
225
+ " block3_conv3 (Conv2D) (None, 56, 56, 256) 590080 \n",
226
+ " \n",
227
+ " block3_pool (MaxPooling2D) (None, 28, 28, 256) 0 \n",
228
+ " \n",
229
+ " block4_conv1 (Conv2D) (None, 28, 28, 512) 1180160 \n",
230
+ " \n",
231
+ " block4_conv2 (Conv2D) (None, 28, 28, 512) 2359808 \n",
232
+ " \n",
233
+ " block4_conv3 (Conv2D) (None, 28, 28, 512) 2359808 \n",
234
+ " \n",
235
+ " block4_pool (MaxPooling2D) (None, 14, 14, 512) 0 \n",
236
+ " \n",
237
+ " block5_conv1 (Conv2D) (None, 14, 14, 512) 2359808 \n",
238
+ " \n",
239
+ " block5_conv2 (Conv2D) (None, 14, 14, 512) 2359808 \n",
240
+ " \n",
241
+ " block5_conv3 (Conv2D) (None, 14, 14, 512) 2359808 \n",
242
+ " \n",
243
+ " block5_pool (MaxPooling2D) (None, 7, 7, 512) 0 \n",
244
+ " \n",
245
+ " flatten (Flatten) (None, 25088) 0 \n",
246
+ " \n",
247
+ " dense (Dense) (None, 2) 50178 \n",
248
+ " \n",
249
+ "=================================================================\n",
250
+ "Total params: 14,764,866\n",
251
+ "Trainable params: 50,178\n",
252
+ "Non-trainable params: 14,714,688\n",
253
+ "_________________________________________________________________\n"
254
+ ]
255
+ }
256
+ ],
257
+ "source": [
258
+ "try:\n",
259
+ " config = ConfigurationManager()\n",
260
+ " prepare_base_model_config = config.get_prepare_base_model_config()\n",
261
+ " prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)\n",
262
+ " prepare_base_model.get_base_model()\n",
263
+ " prepare_base_model.update_base_model()\n",
264
+ "except Exception as e:\n",
265
+ " raise e"
266
+ ]
267
+ }
268
+ ],
269
+ "metadata": {
270
+ "kernelspec": {
271
+ "display_name": "cnn_env",
272
+ "language": "python",
273
+ "name": "python3"
274
+ },
275
+ "language_info": {
276
+ "codemirror_mode": {
277
+ "name": "ipython",
278
+ "version": 3
279
+ },
280
+ "file_extension": ".py",
281
+ "mimetype": "text/x-python",
282
+ "name": "python",
283
+ "nbconvert_exporter": "python",
284
+ "pygments_lexer": "ipython3",
285
+ "version": "3.11.3"
286
+ }
287
+ },
288
+ "nbformat": 4,
289
+ "nbformat_minor": 5
290
+ }
research/03_model_trainer.ipynb ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 2,
15
+ "metadata": {},
16
+ "outputs": [
17
+ {
18
+ "data": {
19
+ "text/plain": [
20
+ "'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\\\research'"
21
+ ]
22
+ },
23
+ "execution_count": 2,
24
+ "metadata": {},
25
+ "output_type": "execute_result"
26
+ }
27
+ ],
28
+ "source": [
29
+ "%pwd"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": 3,
35
+ "metadata": {},
36
+ "outputs": [],
37
+ "source": [
38
+ "os.chdir(\"../\")"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": 4,
44
+ "metadata": {},
45
+ "outputs": [
46
+ {
47
+ "data": {
48
+ "text/plain": [
49
+ "'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC'"
50
+ ]
51
+ },
52
+ "execution_count": 4,
53
+ "metadata": {},
54
+ "output_type": "execute_result"
55
+ }
56
+ ],
57
+ "source": [
58
+ "%pwd"
59
+ ]
60
+ },
61
+ {
62
+ "cell_type": "code",
63
+ "execution_count": 7,
64
+ "metadata": {},
65
+ "outputs": [],
66
+ "source": [
67
+ "from dataclasses import dataclass\n",
68
+ "from pathlib import Path\n",
69
+ "\n",
70
+ "\n",
71
+ "@dataclass(frozen=True)\n",
72
+ "class TrainingConfig:\n",
73
+ " root_dir: Path\n",
74
+ " trained_model_path: Path\n",
75
+ " updated_base_model_path: Path\n",
76
+ " training_data: Path\n",
77
+ " params_epochs: int\n",
78
+ " params_batch_size: int\n",
79
+ " params_is_augmentation: bool\n",
80
+ " params_image_size: list"
81
+ ]
82
+ },
83
+ {
84
+ "cell_type": "code",
85
+ "execution_count": 6,
86
+ "metadata": {},
87
+ "outputs": [],
88
+ "source": [
89
+ "from cnnClassifier.constants import *\n",
90
+ "from cnnClassifier.utils.common import read_yaml, create_directories\n",
91
+ "import tensorflow as tf"
92
+ ]
93
+ },
94
+ {
95
+ "cell_type": "code",
96
+ "execution_count": 8,
97
+ "metadata": {},
98
+ "outputs": [],
99
+ "source": [
100
+ "class ConfigurationManager:\n",
101
+ " def __init__(\n",
102
+ " self,\n",
103
+ " config_filepath = CONFIG_FILE_PATH,\n",
104
+ " params_filepath = PARAMS_FILE_PATH):\n",
105
+ "\n",
106
+ " self.config = read_yaml(config_filepath)\n",
107
+ " self.params = read_yaml(params_filepath)\n",
108
+ "\n",
109
+ " create_directories([self.config.artifacts_root])\n",
110
+ "\n",
111
+ " \n",
112
+ "\n",
113
+ " def get_training_config(self) -> TrainingConfig:\n",
114
+ " training = self.config.training\n",
115
+ " prepare_base_model = self.config.prepare_base_model\n",
116
+ " params = self.params\n",
117
+ " training_data = os.path.join(self.config.data_ingestion.unzip_dir, \"Chest-CT-Scan-data\")\n",
118
+ " create_directories([\n",
119
+ " Path(training.root_dir)\n",
120
+ " ])\n",
121
+ "\n",
122
+ " training_config = TrainingConfig(\n",
123
+ " root_dir=Path(training.root_dir),\n",
124
+ " trained_model_path=Path(training.trained_model_path),\n",
125
+ " updated_base_model_path=Path(prepare_base_model.updated_base_model_path),\n",
126
+ " training_data=Path(training_data),\n",
127
+ " params_epochs=params.EPOCHS,\n",
128
+ " params_batch_size=params.BATCH_SIZE,\n",
129
+ " params_is_augmentation=params.AUGMENTATION,\n",
130
+ " params_image_size=params.IMAGE_SIZE\n",
131
+ " )\n",
132
+ "\n",
133
+ " return training_config"
134
+ ]
135
+ },
136
+ {
137
+ "cell_type": "code",
138
+ "execution_count": 9,
139
+ "metadata": {},
140
+ "outputs": [],
141
+ "source": [
142
+ "import os\n",
143
+ "import urllib.request as request\n",
144
+ "from zipfile import ZipFile\n",
145
+ "import tensorflow as tf\n",
146
+ "import time"
147
+ ]
148
+ },
149
+ {
150
+ "cell_type": "code",
151
+ "execution_count": 10,
152
+ "metadata": {},
153
+ "outputs": [],
154
+ "source": [
155
+ "class Training:\n",
156
+ " def __init__(self, config: TrainingConfig):\n",
157
+ " self.config = config\n",
158
+ "\n",
159
+ " \n",
160
+ " def get_base_model(self):\n",
161
+ " self.model = tf.keras.models.load_model(\n",
162
+ " self.config.updated_base_model_path\n",
163
+ " )\n",
164
+ "\n",
165
+ " def train_valid_generator(self):\n",
166
+ "\n",
167
+ " datagenerator_kwargs = dict(\n",
168
+ " rescale = 1./255,\n",
169
+ " validation_split=0.20\n",
170
+ " )\n",
171
+ "\n",
172
+ " dataflow_kwargs = dict(\n",
173
+ " target_size=self.config.params_image_size[:-1],\n",
174
+ " batch_size=self.config.params_batch_size,\n",
175
+ " interpolation=\"bilinear\"\n",
176
+ " )\n",
177
+ "\n",
178
+ " valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
179
+ " **datagenerator_kwargs\n",
180
+ " )\n",
181
+ "\n",
182
+ " self.valid_generator = valid_datagenerator.flow_from_directory(\n",
183
+ " directory=self.config.training_data,\n",
184
+ " subset=\"validation\",\n",
185
+ " shuffle=False,\n",
186
+ " **dataflow_kwargs\n",
187
+ " )\n",
188
+ "\n",
189
+ " if self.config.params_is_augmentation:\n",
190
+ " train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
191
+ " rotation_range=40,\n",
192
+ " horizontal_flip=True,\n",
193
+ " width_shift_range=0.2,\n",
194
+ " height_shift_range=0.2,\n",
195
+ " shear_range=0.2,\n",
196
+ " zoom_range=0.2,\n",
197
+ " **datagenerator_kwargs\n",
198
+ " )\n",
199
+ " else:\n",
200
+ " train_datagenerator = valid_datagenerator\n",
201
+ "\n",
202
+ " self.train_generator = train_datagenerator.flow_from_directory(\n",
203
+ " directory=self.config.training_data,\n",
204
+ " subset=\"training\",\n",
205
+ " shuffle=True,\n",
206
+ " **dataflow_kwargs\n",
207
+ " )\n",
208
+ "\n",
209
+ " \n",
210
+ " @staticmethod\n",
211
+ " def save_model(path: Path, model: tf.keras.Model):\n",
212
+ " model.save(path)\n",
213
+ "\n",
214
+ "\n",
215
+ "\n",
216
+ " \n",
217
+ " def train(self):\n",
218
+ " self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size\n",
219
+ " self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size\n",
220
+ "\n",
221
+ " self.model.fit(\n",
222
+ " self.train_generator,\n",
223
+ " epochs=self.config.params_epochs,\n",
224
+ " steps_per_epoch=self.steps_per_epoch,\n",
225
+ " validation_steps=self.validation_steps,\n",
226
+ " validation_data=self.valid_generator\n",
227
+ " )\n",
228
+ "\n",
229
+ " self.save_model(\n",
230
+ " path=self.config.trained_model_path,\n",
231
+ " model=self.model\n",
232
+ " )\n",
233
+ "\n"
234
+ ]
235
+ },
236
+ {
237
+ "cell_type": "code",
238
+ "execution_count": 14,
239
+ "metadata": {},
240
+ "outputs": [
241
+ {
242
+ "name": "stdout",
243
+ "output_type": "stream",
244
+ "text": [
245
+ "[2025-08-20 02:03:39,280: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
246
+ "[2025-08-20 02:03:39,284: INFO: common: yaml file: params.yaml loaded successfully]\n",
247
+ "[2025-08-20 02:03:39,286: INFO: common: created directory at: artifacts]\n",
248
+ "[2025-08-20 02:03:39,288: INFO: common: created directory at: artifacts\\training]\n"
249
+ ]
250
+ },
251
+ {
252
+ "name": "stdout",
253
+ "output_type": "stream",
254
+ "text": [
255
+ "Found 68 images belonging to 2 classes.\n",
256
+ "Found 275 images belonging to 2 classes.\n",
257
+ "17/17 [==============================] - 63s 4s/step - loss: 14.4131 - accuracy: 0.5560 - val_loss: 0.2376 - val_accuracy: 0.8750\n"
258
+ ]
259
+ }
260
+ ],
261
+ "source": [
262
+ "try:\n",
263
+ " config = ConfigurationManager()\n",
264
+ " training_config = config.get_training_config()\n",
265
+ " training = Training(config=training_config)\n",
266
+ " training.get_base_model()\n",
267
+ " training.train_valid_generator()\n",
268
+ " training.train()\n",
269
+ " \n",
270
+ "except Exception as e:\n",
271
+ " raise e"
272
+ ]
273
+ },
274
+ {
275
+ "cell_type": "code",
276
+ "execution_count": null,
277
+ "metadata": {},
278
+ "outputs": [],
279
+ "source": []
280
+ }
281
+ ],
282
+ "metadata": {
283
+ "kernelspec": {
284
+ "display_name": "cnn_env",
285
+ "language": "python",
286
+ "name": "python3"
287
+ },
288
+ "language_info": {
289
+ "codemirror_mode": {
290
+ "name": "ipython",
291
+ "version": 3
292
+ },
293
+ "file_extension": ".py",
294
+ "mimetype": "text/x-python",
295
+ "name": "python",
296
+ "nbconvert_exporter": "python",
297
+ "pygments_lexer": "ipython3",
298
+ "version": "3.11.3"
299
+ }
300
+ },
301
+ "nbformat": 4,
302
+ "nbformat_minor": 2
303
+ }
research/04_model_evaluation_with_mlflow.ipynb ADDED
@@ -0,0 +1,328 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 3,
15
+ "metadata": {},
16
+ "outputs": [
17
+ {
18
+ "data": {
19
+ "text/plain": [
20
+ "'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\\\research'"
21
+ ]
22
+ },
23
+ "execution_count": 3,
24
+ "metadata": {},
25
+ "output_type": "execute_result"
26
+ }
27
+ ],
28
+ "source": [
29
+ "%pwd"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": 4,
35
+ "metadata": {},
36
+ "outputs": [],
37
+ "source": [
38
+ "os.chdir(\"../\")"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": 5,
44
+ "metadata": {},
45
+ "outputs": [
46
+ {
47
+ "data": {
48
+ "text/plain": [
49
+ "'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC'"
50
+ ]
51
+ },
52
+ "execution_count": 5,
53
+ "metadata": {},
54
+ "output_type": "execute_result"
55
+ }
56
+ ],
57
+ "source": [
58
+ "%pwd"
59
+ ]
60
+ },
61
+ {
62
+ "cell_type": "code",
63
+ "execution_count": 6,
64
+ "metadata": {},
65
+ "outputs": [],
66
+ "source": [
67
+ "os.environ[\"MLFLOW_TRACKING_URI\"]=\"https://dagshub.com/AlyyanAhmed21/End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC.mlflow\"\n",
68
+ "os.environ[\"MLFLOW_TRACKING_USERNAME\"]=\"AlyyanAhmed21\"\n",
69
+ "os.environ[\"MLFLOW_TRACKING_PASSWORD\"]=\"776454e991d86ea3a96179a4dc1ef72fbc134642\""
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "code",
74
+ "execution_count": 7,
75
+ "metadata": {},
76
+ "outputs": [],
77
+ "source": [
78
+ "import tensorflow as tf"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": 8,
84
+ "metadata": {},
85
+ "outputs": [],
86
+ "source": [
87
+ "model = tf.keras.models.load_model(\"artifacts/training/model.h5\")"
88
+ ]
89
+ },
90
+ {
91
+ "cell_type": "code",
92
+ "execution_count": 9,
93
+ "metadata": {},
94
+ "outputs": [],
95
+ "source": [
96
+ "from dataclasses import dataclass\n",
97
+ "from pathlib import Path\n",
98
+ "\n",
99
+ "@dataclass(frozen=True)\n",
100
+ "class EvaluationConfig:\n",
101
+ " path_of_model: Path\n",
102
+ " training_data: Path\n",
103
+ " all_params: dict\n",
104
+ " mlflow_uri: str\n",
105
+ " params_image_size: list\n",
106
+ " params_batch_size: int"
107
+ ]
108
+ },
109
+ {
110
+ "cell_type": "code",
111
+ "execution_count": 10,
112
+ "metadata": {},
113
+ "outputs": [],
114
+ "source": [
115
+ "from cnnClassifier.constants import *\n",
116
+ "from cnnClassifier.utils.common import read_yaml, create_directories, save_json"
117
+ ]
118
+ },
119
+ {
120
+ "cell_type": "code",
121
+ "execution_count": 11,
122
+ "metadata": {},
123
+ "outputs": [],
124
+ "source": [
125
+ "class ConfigurationManager:\n",
126
+ " def __init__(\n",
127
+ " self, \n",
128
+ " config_filepath = CONFIG_FILE_PATH,\n",
129
+ " params_filepath = PARAMS_FILE_PATH):\n",
130
+ " self.config = read_yaml(config_filepath)\n",
131
+ " self.params = read_yaml(params_filepath)\n",
132
+ " create_directories([self.config.artifacts_root])\n",
133
+ "\n",
134
+ " \n",
135
+ " def get_evaluation_config(self) -> EvaluationConfig:\n",
136
+ " eval_config = EvaluationConfig(\n",
137
+ " path_of_model=\"artifacts/training/model.h5\",\n",
138
+ " training_data=\"artifacts/data_ingestion/Chest-CT-Scan-data\",\n",
139
+ " mlflow_uri=\"https://dagshub.com/AlyyanAhmed21/End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC.mlflow\",\n",
140
+ " all_params=self.params,\n",
141
+ " params_image_size=self.params.IMAGE_SIZE,\n",
142
+ " params_batch_size=self.params.BATCH_SIZE\n",
143
+ " )\n",
144
+ " return eval_config\n",
145
+ "\n",
146
+ "\n"
147
+ ]
148
+ },
149
+ {
150
+ "cell_type": "code",
151
+ "execution_count": 12,
152
+ "metadata": {},
153
+ "outputs": [],
154
+ "source": [
155
+ "import tensorflow as tf\n",
156
+ "from pathlib import Path\n",
157
+ "import mlflow\n",
158
+ "import mlflow.keras\n",
159
+ "from urllib.parse import urlparse"
160
+ ]
161
+ },
162
+ {
163
+ "cell_type": "code",
164
+ "execution_count": 13,
165
+ "metadata": {},
166
+ "outputs": [],
167
+ "source": [
168
+ "class Evaluation:\n",
169
+ " def __init__(self, config: EvaluationConfig):\n",
170
+ " self.config = config\n",
171
+ "\n",
172
+ " \n",
173
+ " def _valid_generator(self):\n",
174
+ "\n",
175
+ " datagenerator_kwargs = dict(\n",
176
+ " rescale = 1./255,\n",
177
+ " validation_split=0.30\n",
178
+ " )\n",
179
+ "\n",
180
+ " dataflow_kwargs = dict(\n",
181
+ " target_size=self.config.params_image_size[:-1],\n",
182
+ " batch_size=self.config.params_batch_size,\n",
183
+ " interpolation=\"bilinear\"\n",
184
+ " )\n",
185
+ "\n",
186
+ " valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
187
+ " **datagenerator_kwargs\n",
188
+ " )\n",
189
+ "\n",
190
+ " self.valid_generator = valid_datagenerator.flow_from_directory(\n",
191
+ " directory=self.config.training_data,\n",
192
+ " subset=\"validation\",\n",
193
+ " shuffle=False,\n",
194
+ " **dataflow_kwargs\n",
195
+ " )\n",
196
+ "\n",
197
+ "\n",
198
+ " @staticmethod\n",
199
+ " def load_model(path: Path) -> tf.keras.Model:\n",
200
+ " return tf.keras.models.load_model(path)\n",
201
+ " \n",
202
+ "\n",
203
+ " def evaluation(self):\n",
204
+ " self.model = self.load_model(self.config.path_of_model)\n",
205
+ " self._valid_generator()\n",
206
+ " self.score = model.evaluate(self.valid_generator)\n",
207
+ " self.save_score()\n",
208
+ "\n",
209
+ " def save_score(self):\n",
210
+ " scores = {\"loss\": self.score[0], \"accuracy\": self.score[1]}\n",
211
+ " save_json(path=Path(\"scores.json\"), data=scores)\n",
212
+ "\n",
213
+ " \n",
214
+ " def log_into_mlflow(self):\n",
215
+ " mlflow.set_registry_uri(self.config.mlflow_uri)\n",
216
+ " tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme\n",
217
+ " \n",
218
+ " with mlflow.start_run():\n",
219
+ " mlflow.log_params(self.config.all_params)\n",
220
+ " mlflow.log_metrics(\n",
221
+ " {\"loss\": self.score[0], \"accuracy\": self.score[1]}\n",
222
+ " )\n",
223
+ " # Model registry does not work with file store\n",
224
+ " if tracking_url_type_store != \"file\":\n",
225
+ "\n",
226
+ " # Register the model\n",
227
+ " # There are other ways to use the Model Registry, which depends on the use case,\n",
228
+ " # please refer to the doc for more information:\n",
229
+ " # https://mlflow.org/docs/latest/model-registry.html#api-workflow\n",
230
+ " mlflow.keras.log_model(self.model, \"model\", registered_model_name=\"VGG16Model\")\n",
231
+ " else:\n",
232
+ " mlflow.keras.log_model(self.model, \"model\")\n"
233
+ ]
234
+ },
235
+ {
236
+ "cell_type": "code",
237
+ "execution_count": 14,
238
+ "metadata": {},
239
+ "outputs": [
240
+ {
241
+ "name": "stdout",
242
+ "output_type": "stream",
243
+ "text": [
244
+ "[2025-08-20 04:01:28,984: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
245
+ "[2025-08-20 04:01:28,988: INFO: common: yaml file: params.yaml loaded successfully]\n",
246
+ "[2025-08-20 04:01:28,991: INFO: common: created directory at: artifacts]\n"
247
+ ]
248
+ },
249
+ {
250
+ "name": "stdout",
251
+ "output_type": "stream",
252
+ "text": [
253
+ "Found 102 images belonging to 2 classes.\n",
254
+ "7/7 [==============================] - 16s 2s/step - loss: 57.2713 - accuracy: 0.4314\n",
255
+ "[2025-08-20 04:01:45,268: INFO: common: json file saved at: scores.json]\n"
256
+ ]
257
+ },
258
+ {
259
+ "name": "stderr",
260
+ "output_type": "stream",
261
+ "text": [
262
+ "2025/08/20 04:01:47 WARNING mlflow.tensorflow: You are saving a TensorFlow Core model or Keras model without a signature. Inference with mlflow.pyfunc.spark_udf() will not work unless the model's pyfunc representation accepts pandas DataFrames as inference inputs.\n"
263
+ ]
264
+ },
265
+ {
266
+ "name": "stdout",
267
+ "output_type": "stream",
268
+ "text": [
269
+ "[2025-08-20 04:01:48,249: WARNING: save: Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 14). These functions will not be directly callable after loading.]\n",
270
+ "INFO:tensorflow:Assets written to: E:\\Temp\\tmp32wvm7sm\\model\\data\\model\\assets\n",
271
+ "[2025-08-20 04:01:49,538: INFO: builder_impl: Assets written to: E:\\Temp\\tmp32wvm7sm\\model\\data\\model\\assets]\n"
272
+ ]
273
+ },
274
+ {
275
+ "name": "stderr",
276
+ "output_type": "stream",
277
+ "text": [
278
+ "f:\\Projects\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\cnn_env\\Lib\\site-packages\\_distutils_hack\\__init__.py:33: UserWarning: Setuptools is replacing distutils.\n",
279
+ " warnings.warn(\"Setuptools is replacing distutils.\")\n",
280
+ "Registered model 'VGG16Model' already exists. Creating a new version of this model...\n",
281
+ "2025/08/20 04:02:45 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: VGG16Model, version 2\n",
282
+ "Created version '2' of model 'VGG16Model'.\n"
283
+ ]
284
+ }
285
+ ],
286
+ "source": [
287
+ "try:\n",
288
+ " config = ConfigurationManager()\n",
289
+ " eval_config = config.get_evaluation_config()\n",
290
+ " evaluation = Evaluation(eval_config)\n",
291
+ " evaluation.evaluation()\n",
292
+ " evaluation.log_into_mlflow()\n",
293
+ "\n",
294
+ "except Exception as e:\n",
295
+ " raise e"
296
+ ]
297
+ },
298
+ {
299
+ "cell_type": "code",
300
+ "execution_count": null,
301
+ "metadata": {},
302
+ "outputs": [],
303
+ "source": []
304
+ }
305
+ ],
306
+ "metadata": {
307
+ "kernelspec": {
308
+ "display_name": "cnn_env",
309
+ "language": "python",
310
+ "name": "python3"
311
+ },
312
+ "language_info": {
313
+ "codemirror_mode": {
314
+ "name": "ipython",
315
+ "version": 3
316
+ },
317
+ "file_extension": ".py",
318
+ "mimetype": "text/x-python",
319
+ "name": "python",
320
+ "nbconvert_exporter": "python",
321
+ "pygments_lexer": "ipython3",
322
+ "version": "3.11.3"
323
+ },
324
+ "orig_nbformat": 4
325
+ },
326
+ "nbformat": 4,
327
+ "nbformat_minor": 2
328
+ }
research/trials.ipynb ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "981d0e26",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "Hello, World!\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "print('Hello, World!')"
19
+ ]
20
+ }
21
+ ],
22
+ "metadata": {
23
+ "kernelspec": {
24
+ "display_name": "cnn_env",
25
+ "language": "python",
26
+ "name": "python3"
27
+ },
28
+ "language_info": {
29
+ "codemirror_mode": {
30
+ "name": "ipython",
31
+ "version": 3
32
+ },
33
+ "file_extension": ".py",
34
+ "mimetype": "text/x-python",
35
+ "name": "python",
36
+ "nbconvert_exporter": "python",
37
+ "pygments_lexer": "ipython3",
38
+ "version": "3.11.3"
39
+ }
40
+ },
41
+ "nbformat": 4,
42
+ "nbformat_minor": 5
43
+ }
scores.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "loss": 0.13162432610988617,
3
+ "accuracy": 1.0
4
+ }
setup.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # setup.py
2
+
3
+ import setuptools
4
+
5
+ # Read the contents of your README file for the long description
6
+ with open("README.md", "r", encoding="utf-8") as f:
7
+ long_description = f.read()
8
+
9
+ __version__ = "0.0.0"
10
+
11
+ REPO_NAME = "End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC"
12
+ AUTHOR_USER_NAME = "AlyyanAhmed21" # Change to your GitHub username
13
+ SRC_REPO = "cnnClassifier" # This is the name of your main source folder under src/
14
+ AUTHOR_EMAIL = "alyyanawan19@gmail.com" # Change to your email
15
+
16
+ setuptools.setup(
17
+ name=SRC_REPO,
18
+ version=__version__,
19
+ author=AUTHOR_USER_NAME,
20
+ author_email=AUTHOR_EMAIL,
21
+ description="A small python package for CNN app",
22
+ long_description=long_description,
23
+ long_description_content_type="text/markdown",
24
+ url=f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}",
25
+ project_urls={
26
+ "Bug Tracker": f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}/issues",
27
+ },
28
+ # This is the crucial part!
29
+ # It tells setuptools to look for packages in the 'src' directory.
30
+ package_dir={"": "src"},
31
+ # This finds all packages automatically within the directory specified above.
32
+ packages=setuptools.find_packages(where="src")
33
+ )
src/cnnClassifier/__init__.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import logging
4
+
5
+ logging_str = "[%(asctime)s: %(levelname)s: %(module)s: %(message)s]"
6
+
7
+ log_dir = "logs"
8
+ log_filepath = os.path.join(log_dir,"running_logs.log")
9
+ os.makedirs(log_dir, exist_ok=True)
10
+
11
+
12
+ logging.basicConfig(
13
+ level= logging.INFO,
14
+ format= logging_str,
15
+
16
+ handlers=[
17
+ logging.FileHandler(log_filepath),
18
+ logging.StreamHandler(sys.stdout)
19
+ ]
20
+ )
21
+
22
+ logger = logging.getLogger("cnnClassifierLogger")
src/cnnClassifier/components/__init__.py ADDED
File without changes
src/cnnClassifier/components/data_ingestion.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import zipfile
3
+ import gdown
4
+ from cnnClassifier import logger
5
+ from cnnClassifier.utils.common import get_size
6
+ from cnnClassifier.entity.config_entity import DataIngestionConfig
7
+
8
+
9
+ class DataIngestion:
10
+ def __init__(self, config: DataIngestionConfig):
11
+ self.config = config
12
+
13
+
14
+
15
+
16
+ def download_file(self)-> str:
17
+ '''
18
+ Fetch data from the url
19
+ '''
20
+
21
+ try:
22
+ dataset_url = self.config.source_URL
23
+ zip_download_dir = self.config.local_data_file
24
+ os.makedirs("artifacts/data_ingestion", exist_ok=True)
25
+ logger.info(f"Downloading data from {dataset_url} into file {zip_download_dir}")
26
+
27
+ file_id = dataset_url.split("/")[-2]
28
+ prefix = 'https://drive.google.com/uc?/export=download&id='
29
+ gdown.download(prefix+file_id,zip_download_dir)
30
+
31
+ logger.info(f"Downloaded data from {dataset_url} into file {zip_download_dir}")
32
+
33
+ except Exception as e:
34
+ raise e
35
+
36
+
37
+ def extract_zip_file(self):
38
+ """
39
+ zip_file_path: str
40
+ Extracts the zip file into the data directory
41
+ Function returns None
42
+ """
43
+ unzip_path = self.config.unzip_dir
44
+ os.makedirs(unzip_path, exist_ok=True)
45
+ with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
46
+ zip_ref.extractall(unzip_path)
src/cnnClassifier/components/model_evaluation_mlflow.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ from pathlib import Path
3
+ import mlflow
4
+ import mlflow.keras
5
+ from urllib.parse import urlparse
6
+ from cnnClassifier.entity.config_entity import EvaluationConfig
7
+ from cnnClassifier.utils.common import save_json
8
+
9
+ # --- NEW IMPORTS for advanced evaluation ---
10
+ from sklearn.metrics import confusion_matrix, classification_report
11
+ import numpy as np
12
+ import seaborn as sns
13
+ import matplotlib.pyplot as plt
14
+ # -------------------------------------------
15
+
16
+ class Evaluation:
17
+ def __init__(self, config: EvaluationConfig):
18
+ self.config = config
19
+ self.model = None
20
+ self.valid_generator = None
21
+ self.score = None
22
+ self.y_true = None
23
+ self.y_pred = None
24
+
25
+ def _valid_generator(self):
26
+ datagenerator_kwargs = dict(
27
+ rescale=1./255,
28
+ validation_split=0.30
29
+ )
30
+
31
+ dataflow_kwargs = dict(
32
+ target_size=self.config.params_image_size[:-1],
33
+ batch_size=self.config.params_batch_size,
34
+ interpolation="bilinear"
35
+ )
36
+
37
+ valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(**datagenerator_kwargs)
38
+
39
+ self.valid_generator = valid_datagenerator.flow_from_directory(
40
+ directory=self.config.training_data,
41
+ subset="validation",
42
+ shuffle=False,
43
+ **dataflow_kwargs
44
+ )
45
+
46
+ @staticmethod
47
+ def load_model(path: Path) -> tf.keras.Model:
48
+ return tf.keras.models.load_model(path)
49
+
50
+ def _get_predictions(self):
51
+ """Gets ground truth labels and model's predicted labels."""
52
+ self.y_true = self.valid_generator.classes
53
+ y_pred_probs = self.model.predict(self.valid_generator)
54
+ self.y_pred = np.argmax(y_pred_probs, axis=1)
55
+
56
+ def evaluation(self):
57
+ """Loads model, evaluates basic metrics, and gets detailed predictions."""
58
+ self.model = self.load_model(self.config.path_of_model)
59
+ self._valid_generator()
60
+ self.score = self.model.evaluate(self.valid_generator)
61
+ self._get_predictions()
62
+ self.save_score()
63
+
64
+ # In your Evaluation component's save_score method
65
+
66
+ def save_score(self):
67
+ # If self.score is None or contains NaN, create a default file
68
+ if self.score is None or np.isnan(self.score).any():
69
+ print("⚠️ Warning: Invalid scores detected (NaN). Saving default scores file.")
70
+ scores = {"loss": float('nan'), "accuracy": float('nan')}
71
+ else:
72
+ scores = {"loss": self.score[0], "accuracy": self.score[1]}
73
+
74
+ # This will now always create the file
75
+ save_json(path=Path("scores.json"), data=scores)
76
+ print(f"Scores saved to scores.json: {scores}")
77
+
78
+ def log_confusion_matrix(self):
79
+ """Generates, saves, and logs the confusion matrix plot to MLflow."""
80
+ cm = confusion_matrix(self.y_true, self.y_pred)
81
+ class_names = list(self.valid_generator.class_indices.keys())
82
+
83
+ plt.figure(figsize=(8, 6))
84
+ sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
85
+ xticklabels=class_names, yticklabels=class_names)
86
+ plt.title('Confusion Matrix')
87
+ plt.ylabel('Actual')
88
+ plt.xlabel('Predicted')
89
+
90
+ matrix_path = Path("confusion_matrix.png")
91
+ plt.savefig(matrix_path)
92
+
93
+ mlflow.log_artifact(matrix_path, "plots")
94
+ print("Confusion Matrix plot saved and logged to MLflow.")
95
+
96
+ def log_into_mlflow(self):
97
+ mlflow.set_tracking_uri(self.config.mlflow_uri)
98
+
99
+ with mlflow.start_run():
100
+ print("Logging basic parameters and metrics to MLflow...")
101
+ mlflow.log_params(self.config.all_params)
102
+ mlflow.log_metrics({"loss": self.score[0], "accuracy": self.score[1]})
103
+
104
+ # --- Log detailed classification report metrics ---
105
+ print("\n--- Classification Report ---")
106
+ report = classification_report(self.y_true, self.y_pred,
107
+ target_names=list(self.valid_generator.class_indices.keys()),
108
+ output_dict=True)
109
+ print(classification_report(self.y_true, self.y_pred,
110
+ target_names=list(self.valid_generator.class_indices.keys())))
111
+
112
+ for className, metrics in report.items():
113
+ if isinstance(metrics, dict):
114
+ for metricName, value in metrics.items():
115
+ mlflow.log_metric(f"{className}_{metricName}", value)
116
+
117
+ # --- Log the confusion matrix plot ---
118
+ self.log_confusion_matrix()
119
+
120
+ # --- Log the model as an artifact ---
121
+ print("Logging model as an artifact...")
122
+ mlflow.keras.log_model(self.model, "model")
123
+
124
+ print("MLflow logging complete.")
src/cnnClassifier/components/model_trainer.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import urllib.request as request
3
+ from zipfile import ZipFile
4
+ import tensorflow as tf
5
+ import time
6
+ from cnnClassifier.entity.config_entity import TrainingConfig
7
+ from pathlib import Path
8
+
9
+ # --- NEW IMPORTS ---
10
+ import pandas as pd
11
+ from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
12
+ # --------------------
13
+
14
+ class Training:
15
+ def __init__(self, config: TrainingConfig):
16
+ self.config = config
17
+ self.model = None
18
+ self.train_generator = None
19
+ self.valid_generator = None
20
+
21
+ def get_base_model(self):
22
+ self.model = tf.keras.models.load_model(
23
+ self.config.updated_base_model_path
24
+ )
25
+
26
+ def train_valid_generator(self):
27
+ datagenerator_kwargs = dict(
28
+ rescale=1./255,
29
+ validation_split=0.20
30
+ )
31
+
32
+ dataflow_kwargs = dict(
33
+ target_size=self.config.params_image_size[:-1],
34
+ batch_size=self.config.params_batch_size,
35
+ interpolation="bilinear"
36
+ )
37
+
38
+ valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
39
+ **datagenerator_kwargs
40
+ )
41
+
42
+ self.valid_generator = valid_datagenerator.flow_from_directory(
43
+ directory=self.config.training_data,
44
+ subset="validation",
45
+ shuffle=False,
46
+ **dataflow_kwargs
47
+ )
48
+
49
+ if self.config.params_is_augmentation:
50
+ train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
51
+ rotation_range=20, # Reduced for stability
52
+ horizontal_flip=True,
53
+ width_shift_range=0.1,
54
+ height_shift_range=0.1,
55
+ shear_range=0.1,
56
+ zoom_range=0.1,
57
+ **datagenerator_kwargs
58
+ )
59
+ else:
60
+ train_datagenerator = valid_datagenerator
61
+
62
+ self.train_generator = train_datagenerator.flow_from_directory(
63
+ directory=self.config.training_data,
64
+ subset="training",
65
+ shuffle=True,
66
+ **dataflow_kwargs
67
+ )
68
+
69
+ # --- ADD THIS ---
70
+ # Print class indices to be 100% sure of the mapping
71
+ print(f"Discovered class indices: {self.train_generator.class_indices}")
72
+ # --------------
73
+
74
+ @staticmethod
75
+ def save_model(path: Path, model: tf.keras.Model):
76
+ model.save(path)
77
+
78
+ def train(self):
79
+ self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
80
+ self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size
81
+
82
+ # --- NEW: DEFINE CALLBACKS FOR SMART TRAINING ---
83
+ # This will save the BEST model based on validation accuracy
84
+ best_model_checkpoint = ModelCheckpoint(
85
+ filepath=self.config.trained_model_path, # Saves the best model to your specified path
86
+ save_best_only=True,
87
+ monitor='val_accuracy',
88
+ mode='max',
89
+ verbose=1
90
+ )
91
+
92
+ # This will stop training if there's no improvement
93
+ early_stopping = EarlyStopping(
94
+ monitor='val_accuracy',
95
+ patience=5, # Number of epochs with no improvement to wait
96
+ restore_best_weights=True,
97
+ verbose=1
98
+ )
99
+
100
+ callbacks_list = [best_model_checkpoint, early_stopping]
101
+ # -----------------------------------------------
102
+
103
+ # --- MODEL.FIT() IS NOW UPGRADED ---
104
+ history = self.model.fit(
105
+ self.train_generator,
106
+ epochs=self.config.params_epochs,
107
+ steps_per_epoch=self.steps_per_epoch,
108
+ validation_steps=self.validation_steps,
109
+ validation_data=self.valid_generator,
110
+ callbacks=callbacks_list # Pass the smart callbacks here
111
+ )
112
+ # -------------------------------------
113
+
114
+ # --- NEW: SAVE TRAINING HISTORY FOR ANALYSIS ---
115
+ history_df = pd.DataFrame(history.history)
116
+ history_path = "training_history.csv" # Saved in the root directory
117
+ history_df.to_csv(history_path, index=False)
118
+ print(f"✅ Training history saved to {history_path}")
119
+ # -----------------------------------------------
120
+
121
+ # The save_model call is now handled by ModelCheckpoint,
122
+ # so this is redundant but harmless. It will save the last epoch's model.
123
+ # The BEST model is already saved by the callback.
124
+ # self.save_model(
125
+ # path=self.config.trained_model_path,
126
+ # model=self.model
127
+ # )
src/cnnClassifier/components/prepare_base_model.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import urllib.request as request
3
+ from zipfile import ZipFile
4
+ import tensorflow as tf
5
+ from pathlib import Path
6
+ from cnnClassifier.entity.config_entity import PrepareBaseModelConfig
7
+
8
+
9
+
10
+
11
+
12
+ class PrepareBaseModel:
13
+ def __init__(self, config: PrepareBaseModelConfig):
14
+ self.config = config
15
+
16
+
17
+ def get_base_model(self):
18
+ self.model = tf.keras.applications.vgg16.VGG16(
19
+ input_shape=self.config.params_image_size,
20
+ weights=self.config.params_weights,
21
+ include_top=self.config.params_include_top
22
+ )
23
+
24
+ self.save_model(path=self.config.base_model_path, model=self.model)
25
+
26
+
27
+
28
+ @staticmethod
29
+ def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):
30
+ if freeze_all:
31
+ for layer in model.layers:
32
+ model.trainable = False
33
+ elif (freeze_till is not None) and (freeze_till > 0):
34
+ for layer in model.layers[:-freeze_till]:
35
+ model.trainable = False
36
+
37
+ flatten_in = tf.keras.layers.Flatten()(model.output)
38
+ prediction = tf.keras.layers.Dense(
39
+ units=classes,
40
+ activation="softmax"
41
+ )(flatten_in)
42
+
43
+ full_model = tf.keras.models.Model(
44
+ inputs=model.input,
45
+ outputs=prediction
46
+ )
47
+
48
+ full_model.compile(
49
+ optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
50
+ loss=tf.keras.losses.CategoricalCrossentropy(),
51
+ metrics=["accuracy"]
52
+ )
53
+
54
+ full_model.summary()
55
+ return full_model
56
+
57
+
58
+ def update_base_model(self):
59
+ self.full_model = self._prepare_full_model(
60
+ model=self.model,
61
+ classes=self.config.params_classes,
62
+ freeze_all=True,
63
+ freeze_till=None,
64
+ learning_rate=self.config.params_learning_rate
65
+ )
66
+
67
+ self.save_model(path=self.config.updated_base_model_path, model=self.full_model)
68
+
69
+
70
+
71
+ @staticmethod
72
+ def save_model(path: Path, model: tf.keras.Model):
73
+ model.save(path)
74
+
src/cnnClassifier/config/__init__.py ADDED
File without changes
src/cnnClassifier/config/configuration.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from cnnClassifier.constants import *
3
+ from cnnClassifier.utils.common import read_yaml, create_directories , save_json
4
+ from cnnClassifier.entity.config_entity import (DataIngestionConfig, PrepareBaseModelConfig, TrainingConfig, EvaluationConfig)
5
+
6
+ class ConfigurationManager:
7
+ def __init__(
8
+ self,
9
+ config_filepath = CONFIG_FILE_PATH,
10
+ params_filepath = PARAMS_FILE_PATH):
11
+
12
+ self.config = read_yaml(config_filepath)
13
+ self.params = read_yaml(params_filepath)
14
+
15
+ create_directories([self.config.artifacts_root])
16
+
17
+ def get_data_ingestion_config(self) -> DataIngestionConfig:
18
+ config = self.config.data_ingestion
19
+
20
+ create_directories([config.root_dir])
21
+
22
+ data_ingestion_config = DataIngestionConfig(
23
+ root_dir=config.root_dir,
24
+ source_URL=config.source_URL,
25
+ local_data_file=config.local_data_file,
26
+ unzip_dir=config.unzip_dir
27
+ )
28
+
29
+ return data_ingestion_config
30
+
31
+ def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:
32
+ config = self.config.prepare_base_model
33
+
34
+ create_directories([config.root_dir])
35
+
36
+ prepare_base_model_config = PrepareBaseModelConfig(
37
+ root_dir=Path(config.root_dir),
38
+ base_model_path=Path(config.base_model_path),
39
+ updated_base_model_path=Path(config.updated_base_model_path),
40
+ params_image_size=self.params.IMAGE_SIZE,
41
+ params_learning_rate=self.params.LEARNING_RATE,
42
+ params_include_top=self.params.INCLUDE_TOP,
43
+ params_weights=self.params.WEIGHTS,
44
+ params_classes=self.params.CLASSES
45
+ )
46
+
47
+ return prepare_base_model_config
48
+
49
+ def get_training_config(self) -> TrainingConfig:
50
+ training = self.config.training
51
+ prepare_base_model = self.config.prepare_base_model
52
+ params = self.params
53
+ training_data = os.path.join(self.config.data_ingestion.unzip_dir, "Chest-CT-Scan-data")
54
+ create_directories([
55
+ Path(training.root_dir)
56
+ ])
57
+
58
+ training_config = TrainingConfig(
59
+ root_dir=Path(training.root_dir),
60
+ trained_model_path=Path(training.trained_model_path),
61
+ updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
62
+ training_data=Path(training_data),
63
+ params_epochs=params.EPOCHS,
64
+ params_batch_size=params.BATCH_SIZE,
65
+ params_is_augmentation=params.AUGMENTATION,
66
+ params_image_size=params.IMAGE_SIZE
67
+ )
68
+
69
+ return training_config
70
+
71
+
72
+ def get_evaluation_config(self) -> EvaluationConfig:
73
+ eval_config = EvaluationConfig(
74
+ path_of_model="artifacts/training/model.h5",
75
+ training_data="artifacts/data_ingestion/Chest-CT-Scan-data",
76
+ mlflow_uri="https://dagshub.com/AlyyanAhmed21/End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC.mlflow",
77
+ all_params=self.params,
78
+ params_image_size=self.params.IMAGE_SIZE,
79
+ params_batch_size=self.params.BATCH_SIZE
80
+ )
81
+ return eval_config
82
+
src/cnnClassifier/constants/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ CONFIG_FILE_PATH = Path("config/config.yaml")
4
+ PARAMS_FILE_PATH = Path("params.yaml")
src/cnnClassifier/entity/__init__.py ADDED
File without changes
src/cnnClassifier/entity/config_entity.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from pathlib import Path
3
+
4
+
5
+ @dataclass(frozen=True)
6
+ class DataIngestionConfig:
7
+ root_dir: Path
8
+ source_URL: str
9
+ local_data_file: Path
10
+ unzip_dir: Path
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class PrepareBaseModelConfig:
15
+ root_dir: Path
16
+ base_model_path: Path
17
+ updated_base_model_path: Path
18
+ params_image_size: list
19
+ params_learning_rate: float
20
+ params_include_top: bool
21
+ params_weights: str
22
+ params_classes: int
23
+
24
+ @dataclass(frozen=True)
25
+ class TrainingConfig:
26
+ root_dir: Path
27
+ trained_model_path: Path
28
+ updated_base_model_path: Path
29
+ training_data: Path
30
+ params_epochs: int
31
+ params_batch_size: int
32
+ params_is_augmentation: bool
33
+ params_image_size: list
34
+
35
+
36
+ @dataclass(frozen=True)
37
+ class EvaluationConfig:
38
+ path_of_model: Path
39
+ training_data: Path
40
+ all_params: dict
41
+ mlflow_uri: str
42
+ params_image_size: list
43
+ params_batch_size: int
src/cnnClassifier/pipeline/__init__.py ADDED
File without changes
src/cnnClassifier/pipeline/prediction.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import tensorflow as tf
3
+ from tensorflow.keras.preprocessing import image
4
+ import os
5
+
6
+ class PredictionPipeline:
7
+ def __init__(self, filename):
8
+ self.filename = filename
9
+
10
+ def predict(self):
11
+ # --- FIX #1: LOAD THE CORRECT MODEL ---
12
+ # Load the BEST model produced by your DVC pipeline.
13
+ model_path = os.path.join("artifacts", "training", "best_model.h5")
14
+ model = tf.keras.models.load_model(model_path)
15
+
16
+ # --- Load and preprocess the image ---
17
+ imagename = self.filename
18
+ test_image = image.load_img(imagename, target_size=(224, 224))
19
+ test_image_array = image.img_to_array(test_image)
20
+
21
+ # --- FIX #2: THE CRITICAL RESCALING STEP ---
22
+ # Scale the pixel values to be between 0 and 1, just like the training data.
23
+ scaled_image_array = test_image_array / 255.0
24
+
25
+ # Add the batch dimension
26
+ input_data = np.expand_dims(scaled_image_array, axis=0)
27
+
28
+ # --- Make the prediction on the CORRECTLY preprocessed image ---
29
+ result_index = np.argmax(model.predict(input_data), axis=1)[0]
30
+ print(f"Model predicted index: {result_index}")
31
+
32
+ # --- FIX #3: RETURN THE CORRECT JSON STRUCTURE ---
33
+ # The logic for translation should be in app.py to keep this pipeline clean,
34
+ # but for now, we will just return the raw index.
35
+ # app.py will handle translating 0/1 to "Cancer"/"Normal".
36
+ return result_index
src/cnnClassifier/pipeline/stage_01_data_ingestion.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassifier.config.configuration import ConfigurationManager
2
+ from cnnClassifier.components.data_ingestion import DataIngestion
3
+ from cnnClassifier import logger
4
+
5
+
6
+
7
+ STAGE_NAME = "Data Ingestion stage"
8
+
9
+ class DataIngestionTrainingPipeline:
10
+ def __init__(self):
11
+ pass
12
+
13
+ def main(self):
14
+ config = ConfigurationManager()
15
+ data_ingestion_config = config.get_data_ingestion_config()
16
+ data_ingestion = DataIngestion(config=data_ingestion_config)
17
+ data_ingestion.download_file()
18
+ data_ingestion.extract_zip_file()
19
+
20
+
21
+
22
+
23
+ if __name__ == '__main__':
24
+ try:
25
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
26
+ obj = DataIngestionTrainingPipeline()
27
+ obj.main()
28
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
29
+ except Exception as e:
30
+ logger.exception(e)
31
+ raise e
src/cnnClassifier/pipeline/stage_02_prepare_base_model.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassifier.config.configuration import ConfigurationManager
2
+ from cnnClassifier.components.prepare_base_model import PrepareBaseModel
3
+ from cnnClassifier import logger
4
+
5
+
6
+
7
+ STAGE_NAME = "Prepare base model"
8
+
9
+
10
+ class PrepareBaseModelTrainingPipeline:
11
+ def __init__(self):
12
+ pass
13
+
14
+ def main(self):
15
+ config = ConfigurationManager()
16
+ prepare_base_model_config = config.get_prepare_base_model_config()
17
+ prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)
18
+ prepare_base_model.get_base_model()
19
+ prepare_base_model.update_base_model()
20
+
21
+
22
+
23
+ if __name__ == '__main__':
24
+ try:
25
+ logger.info(f"*******************")
26
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
27
+ obj = PrepareBaseModelTrainingPipeline()
28
+ obj.main()
29
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
30
+ except Exception as e:
31
+ logger.exception(e)
32
+ raise e
src/cnnClassifier/pipeline/stage_03_model_trainer.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassifier.config.configuration import ConfigurationManager
2
+ from cnnClassifier.components.model_trainer import Training
3
+ from cnnClassifier import logger
4
+
5
+
6
+
7
+ STAGE_NAME = "Training"
8
+
9
+
10
+
11
+ class ModelTrainingPipeline:
12
+ def __init__(self):
13
+ pass
14
+
15
+ def main(self):
16
+ config = ConfigurationManager()
17
+ training_config = config.get_training_config()
18
+ training = Training(config=training_config)
19
+ training.get_base_model()
20
+ training.train_valid_generator()
21
+ training.train()
22
+
23
+
24
+
25
+ if __name__ == '__main__':
26
+ try:
27
+ logger.info(f"*******************")
28
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
29
+ obj = ModelTrainingPipeline()
30
+ obj.main()
31
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
32
+ except Exception as e:
33
+ logger.exception(e)
34
+ raise e
35
+
src/cnnClassifier/pipeline/stage_04_model_evaluation.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassifier.config.configuration import ConfigurationManager
2
+ from cnnClassifier.components.model_evaluation_mlflow import Evaluation
3
+ from cnnClassifier import logger
4
+ from dotenv import load_dotenv
5
+
6
+ # Load environment variables from .env file for MLflow credentials
7
+ load_dotenv()
8
+
9
+ STAGE_NAME = "Evaluation stage"
10
+
11
+ class EvaluationPipeline:
12
+ def __init__(self):
13
+ pass
14
+
15
+ def main(self):
16
+ config = ConfigurationManager()
17
+ eval_config = config.get_evaluation_config()
18
+ evaluation = Evaluation(eval_config)
19
+ evaluation.evaluation()
20
+ # The save_score() method is called inside evaluation()
21
+ evaluation.log_into_mlflow()
22
+
23
+
24
+ # --- THIS IS THE CRITICAL BLOCK THAT TELLS THE SCRIPT TO RUN ---
25
+ if __name__ == '__main__':
26
+ try:
27
+ logger.info(f"*******************")
28
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
29
+
30
+ # Create an object of the class and call its main method
31
+ pipeline = EvaluationPipeline()
32
+ pipeline.main()
33
+
34
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
35
+ except Exception as e:
36
+ logger.exception(e)
37
+ raise e
src/cnnClassifier/utils/__init__.py ADDED
File without changes
src/cnnClassifier/utils/common.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from box.exceptions import BoxValueError
3
+ import yaml
4
+ from cnnClassifier import logger
5
+ import json
6
+ import joblib
7
+ from ensure import ensure_annotations
8
+ from box import ConfigBox
9
+ from pathlib import Path
10
+ from typing import Any
11
+ import base64
12
+
13
+
14
+
15
+ @ensure_annotations
16
+ def read_yaml(path_to_yaml: Path) -> ConfigBox:
17
+ """reads yaml file and returns
18
+
19
+ Args:
20
+ path_to_yaml (str): path like input
21
+
22
+ Raises:
23
+ ValueError: if yaml file is empty
24
+ e: empty file
25
+
26
+ Returns:
27
+ ConfigBox: ConfigBox type
28
+ """
29
+ try:
30
+ with open(path_to_yaml) as yaml_file:
31
+ content = yaml.safe_load(yaml_file)
32
+ logger.info(f"yaml file: {path_to_yaml} loaded successfully")
33
+ return ConfigBox(content)
34
+ except BoxValueError:
35
+ raise ValueError("yaml file is empty")
36
+ except Exception as e:
37
+ raise e
38
+
39
+
40
+
41
+ @ensure_annotations
42
+ def create_directories(path_to_directories: list, verbose=True):
43
+ """create list of directories
44
+
45
+ Args:
46
+ path_to_directories (list): list of path of directories
47
+ ignore_log (bool, optional): ignore if multiple dirs is to be created. Defaults to False.
48
+ """
49
+ for path in path_to_directories:
50
+ os.makedirs(path, exist_ok=True)
51
+ if verbose:
52
+ logger.info(f"created directory at: {path}")
53
+
54
+
55
+ @ensure_annotations
56
+ def save_json(path: Path, data: dict):
57
+ """save json data
58
+
59
+ Args:
60
+ path (Path): path to json file
61
+ data (dict): data to be saved in json file
62
+ """
63
+ with open(path, "w") as f:
64
+ json.dump(data, f, indent=4)
65
+
66
+ logger.info(f"json file saved at: {path}")
67
+
68
+
69
+
70
+
71
+ @ensure_annotations
72
+ def load_json(path: Path) -> ConfigBox:
73
+ """load json files data
74
+
75
+ Args:
76
+ path (Path): path to json file
77
+
78
+ Returns:
79
+ ConfigBox: data as class attributes instead of dict
80
+ """
81
+ with open(path) as f:
82
+ content = json.load(f)
83
+
84
+ logger.info(f"json file loaded succesfully from: {path}")
85
+ return ConfigBox(content)
86
+
87
+
88
+ @ensure_annotations
89
+ def save_bin(data: Any, path: Path):
90
+ """save binary file
91
+
92
+ Args:
93
+ data (Any): data to be saved as binary
94
+ path (Path): path to binary file
95
+ """
96
+ joblib.dump(value=data, filename=path)
97
+ logger.info(f"binary file saved at: {path}")
98
+
99
+
100
+ @ensure_annotations
101
+ def load_bin(path: Path) -> Any:
102
+ """load binary data
103
+
104
+ Args:
105
+ path (Path): path to binary file
106
+
107
+ Returns:
108
+ Any: object stored in the file
109
+ """
110
+ data = joblib.load(path)
111
+ logger.info(f"binary file loaded from: {path}")
112
+ return data
113
+
114
+ @ensure_annotations
115
+ def get_size(path: Path) -> str:
116
+ """get size in KB
117
+
118
+ Args:
119
+ path (Path): path of the file
120
+
121
+ Returns:
122
+ str: size in KB
123
+ """
124
+ size_in_kb = round(os.path.getsize(path)/1024)
125
+ return f"~ {size_in_kb} KB"
126
+
127
+
128
+ def decodeImage(imgstring, fileName):
129
+ imgdata = base64.b64decode(imgstring)
130
+ with open(fileName, 'wb') as f:
131
+ f.write(imgdata)
132
+ f.close()
133
+
134
+
135
+ def encodeImageIntoBase64(croppedImagePath):
136
+ with open(croppedImagePath, "rb") as f:
137
+ return base64.b64encode(f.read())
static/script.js ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ document.addEventListener('DOMContentLoaded', function () {
2
+ // --- DOM Elements ---
3
+ const fileInput = document.getElementById('fileInput');
4
+ const uploadLabel = document.querySelector('.upload-label');
5
+ const imagePreviewContainer = document.querySelector('.image-preview-container');
6
+ const imagePreview = document.getElementById('imagePreview');
7
+ const removeImageBtn = document.getElementById('removeImageBtn');
8
+ const predictBtn = document.getElementById('predictBtn');
9
+ const resultContainer = document.getElementById('result-container');
10
+ const jsonResponse = document.getElementById('jsonResponse').querySelector('code');
11
+
12
+ let base64Image = null;
13
+
14
+ // --- Event Listeners ---
15
+ fileInput.addEventListener('change', handleFileSelect);
16
+ removeImageBtn.addEventListener('click', resetUploader);
17
+ predictBtn.addEventListener('click', handlePrediction);
18
+
19
+ // --- Functions ---
20
+
21
+ /**
22
+ * Handles the file selection, reads the file as a Base64 string,
23
+ * and updates the UI to show the preview.
24
+ */
25
+ function handleFileSelect(event) {
26
+ const file = event.target.files[0];
27
+ if (file) {
28
+ const reader = new FileReader();
29
+ reader.onload = function(e) {
30
+ // Display the image preview
31
+ imagePreview.src = e.target.result;
32
+ uploadLabel.style.display = 'none';
33
+ imagePreviewContainer.style.display = 'block';
34
+
35
+ // Store the Base64 string (without the data URI prefix)
36
+ base64Image = e.target.result.split(',')[1];
37
+
38
+ // Enable the predict button
39
+ predictBtn.disabled = false;
40
+ resultContainer.innerHTML = '<p class="text-muted">Ready to predict.</p>';
41
+ jsonResponse.textContent = 'Waiting for response...';
42
+ };
43
+ reader.readAsDataURL(file);
44
+ }
45
+ }
46
+
47
+ /**
48
+ * Resets the uploader to its initial state.
49
+ */
50
+ function resetUploader() {
51
+ fileInput.value = ''; // Clear the file input
52
+ base64Image = null;
53
+ imagePreview.src = '#';
54
+ uploadLabel.style.display = 'flex';
55
+ imagePreviewContainer.style.display = 'none';
56
+ predictBtn.disabled = true;
57
+ resultContainer.innerHTML = '<p class="text-muted">Results will be displayed here after prediction.</p>';
58
+ jsonResponse.textContent = 'Waiting for response...';
59
+ }
60
+
61
+ /**
62
+ * Handles the prediction API call.
63
+ */
64
+ async function handlePrediction() {
65
+ if (!base64Image) {
66
+ alert('Please upload an image first.');
67
+ return;
68
+ }
69
+
70
+ setLoadingState(true);
71
+
72
+ // !! IMPORTANT: Change this URL to your actual API endpoint !!
73
+ const apiUrl = '/predict'; // Example for a local Flask app
74
+
75
+ try {
76
+ const response = await fetch(apiUrl, {
77
+ method: 'POST',
78
+ headers: { 'Content-Type': 'application/json' },
79
+ body: JSON.stringify({ image: base64Image }),
80
+ });
81
+
82
+ if (!response.ok) {
83
+ throw new Error(`Server error: ${response.statusText}`);
84
+ }
85
+
86
+ const data = await response.json();
87
+ displayResults(data);
88
+
89
+ } catch (error) {
90
+ console.error('Prediction Error:', error);
91
+ displayError(error.message);
92
+ } finally {
93
+ setLoadingState(false);
94
+ }
95
+ }
96
+
97
+ /**
98
+ * Displays the prediction results in a user-friendly format.
99
+ */
100
+ function displayResults(data) {
101
+ // Assuming the response is like: [{"prediction": "Normal"}]
102
+ const prediction = data[0]?.prediction; // Safely access the prediction
103
+
104
+ let resultHtml = '';
105
+ if (prediction) {
106
+ if (prediction.toLowerCase() === 'normal') {
107
+ resultHtml = `
108
+ <div class="result-normal">
109
+ <i class="fas fa-check-circle result-icon"></i>
110
+ <h3>Prediction: Normal</h3>
111
+ <p>The model predicts that the scan is not cancerous.</p>
112
+ </div>`;
113
+ } else {
114
+ resultHtml = `
115
+ <div class="result-cancer">
116
+ <i class="fas fa-exclamation-triangle result-icon"></i>
117
+ <h3>Prediction: Cancer Detected</h3>
118
+ <p>The model predicts a high probability of malignancy. Please consult a medical professional.</p>
119
+ </div>`;
120
+ }
121
+ } else {
122
+ resultHtml = `<p>Could not determine prediction from the response.</p>`;
123
+ }
124
+
125
+ resultContainer.innerHTML = resultHtml;
126
+ jsonResponse.textContent = JSON.stringify(data, null, 2);
127
+ }
128
+
129
+ /**
130
+ * Displays an error message in the UI.
131
+ */
132
+ function displayError(errorMessage) {
133
+ resultContainer.innerHTML = `
134
+ <div class="text-danger">
135
+ <i class="fas fa-times-circle result-icon"></i>
136
+ <h3>Prediction Failed</h3>
137
+ <p>${errorMessage}</p>
138
+ </div>`;
139
+ jsonResponse.textContent = `Error: ${errorMessage}`;
140
+ }
141
+
142
+ /**
143
+ * Manages the loading state of the predict button.
144
+ */
145
+ function setLoadingState(isLoading) {
146
+ const spinner = predictBtn.querySelector('.spinner-border');
147
+ const btnText = predictBtn.querySelector('.btn-text');
148
+
149
+ if (isLoading) {
150
+ predictBtn.disabled = true;
151
+ spinner.style.display = 'inline-block';
152
+ btnText.style.display = 'none';
153
+ } else {
154
+ predictBtn.disabled = false;
155
+ spinner.style.display = 'none';
156
+ btnText.style.display = 'inline-block';
157
+ }
158
+ }
159
+ });
static/style.css ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Using CSS Variables for easy theme changes */
2
+ :root {
3
+ --primary-color: #007bff;
4
+ --secondary-color: #6c757d;
5
+ --background-color: #f8f9fa;
6
+ --card-bg-color: #ffffff;
7
+ --font-family: 'Poppins', sans-serif;
8
+ --success-color: #28a745;
9
+ --danger-color: #dc3545;
10
+ }
11
+
12
+ body {
13
+ font-family: var(--font-family);
14
+ background-color: var(--background-color);
15
+ }
16
+
17
+ header h1 {
18
+ color: var(--primary-color);
19
+ font-weight: 600;
20
+ }
21
+
22
+ .card {
23
+ border-radius: 15px;
24
+ transition: transform 0.2s ease-in-out;
25
+ }
26
+
27
+ .card:hover {
28
+ transform: translateY(-5px);
29
+ }
30
+
31
+ .card-header {
32
+ border-top-left-radius: 15px;
33
+ border-top-right-radius: 15px;
34
+ }
35
+
36
+ .image-upload-wrapper {
37
+ position: relative;
38
+ width: 100%;
39
+ height: 350px;
40
+ border: 2px dashed var(--primary-color);
41
+ border-radius: 10px;
42
+ display: flex;
43
+ align-items: center;
44
+ justify-content: center;
45
+ overflow: hidden;
46
+ background-color: #f0f6ff;
47
+ }
48
+
49
+ .upload-label {
50
+ cursor: pointer;
51
+ text-align: center;
52
+ color: var(--primary-color);
53
+ }
54
+
55
+ .upload-label:hover .upload-icon {
56
+ transform: scale(1.1);
57
+ color: #0056b3;
58
+ }
59
+
60
+ .upload-icon {
61
+ transition: transform 0.2s ease-in-out;
62
+ }
63
+
64
+ .image-preview-container {
65
+ position: absolute;
66
+ top: 0;
67
+ left: 0;
68
+ width: 100%;
69
+ height: 100%;
70
+ display: none; /* Hidden by default */
71
+ }
72
+
73
+ #imagePreview {
74
+ width: 100%;
75
+ height: 100%;
76
+ object-fit: contain; /* Use contain to see the whole image */
77
+ padding: 10px;
78
+ }
79
+
80
+ .remove-btn {
81
+ position: absolute;
82
+ top: 10px;
83
+ right: 10px;
84
+ border-radius: 50%;
85
+ width: 30px;
86
+ height: 30px;
87
+ display: flex;
88
+ align-items: center;
89
+ justify-content: center;
90
+ font-size: 1.2rem;
91
+ line-height: 1;
92
+ }
93
+
94
+ #result-container h3 {
95
+ font-weight: 600;
96
+ }
97
+
98
+ .result-normal {
99
+ color: var(--success-color);
100
+ }
101
+
102
+ .result-cancer {
103
+ color: var(--danger-color);
104
+ }
105
+
106
+ .result-icon {
107
+ font-size: 4rem;
108
+ margin-bottom: 1rem;
109
+ }
110
+
111
+ #jsonResponse {
112
+ max-height: 200px;
113
+ overflow-y: auto;
114
+ white-space: pre-wrap;
115
+ word-break: break-all;
116
+ }
template.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ import logging
4
+
5
+ # Set up basic logging to see the script's output
6
+ logging.basicConfig(level=logging.INFO, format='[%(asctime)s]: %(message)s:')
7
+
8
+ # Define the project name
9
+ project_name = "cnnClassifier"
10
+
11
+ # List of files and directories to be created
12
+ list_of_files = [
13
+ ".github/workflows/.gitkeep",
14
+ f"src/{project_name}/__init__.py",
15
+ f"src/{project_name}/components/__init__.py",
16
+ f"src/{project_name}/utils/__init__.py",
17
+ f"src/{project_name}/config/__init__.py",
18
+ f"src/{project_name}/config/configuration.py",
19
+ f"src/{project_name}/pipeline/__init__.py",
20
+ f"src/{project_name}/entity/__init__.py",
21
+ f"src/{project_name}/constants/__init__.py",
22
+ "config/config.yaml",
23
+ "dvc.yaml",
24
+ "params.yaml",
25
+ "requirements.txt",
26
+ "setup.py",
27
+ "research/trials.ipynb",
28
+ "templates/index.html"
29
+ ]
30
+
31
+ # Loop through the list of files to create them
32
+ for filepath_str in list_of_files:
33
+ filepath = Path(filepath_str) # Convert string path to a Path object for robustness
34
+ filedir, filename = os.path.split(filepath)
35
+
36
+ # 1. Create the directory if it doesn't exist
37
+ if filedir != "":
38
+ os.makedirs(filedir, exist_ok=True)
39
+ logging.info(f"Creating directory: {filedir} for the file {filename}")
40
+
41
+ # 2. Create the file if it doesn't exist or is empty
42
+ if (not os.path.exists(filepath)) or (os.path.getsize(filepath) == 0):
43
+ with open(filepath, "w") as f:
44
+ pass # Creates an empty file
45
+ logging.info(f"Creating empty file: {filepath}")
46
+ else:
47
+ logging.info(f"{filename} already exists")
templates/index.html ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Chest Cancer Detection AI</title>
7
+
8
+ <!-- Bootstrap 5 CSS -->
9
+ <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" rel="stylesheet">
10
+
11
+ <!-- Font Awesome for Icons -->
12
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.1/css/all.min.css">
13
+
14
+ <!-- Google Fonts (Poppins) -->
15
+ <link rel="preconnect" href="https://fonts.googleapis.com">
16
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
17
+ <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;500;600;700&display=swap" rel="stylesheet">
18
+
19
+ <!-- Your Custom CSS -->
20
+ <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
21
+ </head>
22
+ <body>
23
+
24
+ <header class="text-center py-4 shadow-sm">
25
+ <div class="container">
26
+ <h1><i class="fas fa-lungs-virus"></i> Chest Cancer Detection AI</h1>
27
+ <p class="lead text-muted">Upload a Chest CT Scan to classify it as Normal or Cancerous</p>
28
+ </div>
29
+ </header>
30
+
31
+ <main class="container my-5">
32
+ <div class="row g-4">
33
+ <!-- Left Column: Uploader -->
34
+ <div class="col-lg-6">
35
+ <div class="card h-100 shadow-lg border-0">
36
+ <div class="card-body text-center d-flex flex-column justify-content-center">
37
+ <div class="image-upload-wrapper">
38
+ <input type="file" id="fileInput" accept="image/png, image/jpeg" style="display: none;">
39
+ <label for="fileInput" class="upload-label">
40
+ <div class="upload-icon">
41
+ <i class="fas fa-cloud-upload-alt fa-3x"></i>
42
+ </div>
43
+ <p class="upload-text"><strong>Click to browse</strong> or drag and drop an image here.</p>
44
+ </label>
45
+ <div class="image-preview-container">
46
+ <img id="imagePreview" src="#" alt="Image Preview" class="img-fluid rounded"/>
47
+ <button id="removeImageBtn" class="btn btn-sm btn-danger remove-btn">&times;</button>
48
+ </div>
49
+ </div>
50
+ <button id="predictBtn" class="btn btn-primary btn-lg mt-4 w-100" disabled>
51
+ <span class="spinner-border spinner-border-sm" role="status" aria-hidden="true" style="display: none;"></span>
52
+ <span class="btn-text"><i class="fas fa-microscope"></i> Predict</span>
53
+ </button>
54
+ </div>
55
+ </div>
56
+ </div>
57
+
58
+ <!-- Right Column: Results -->
59
+ <div class="col-lg-6">
60
+ <div class="card h-100 shadow-lg border-0">
61
+ <div class="card-header bg-primary text-white">
62
+ <h5 class="mb-0"><i class="fas fa-poll"></i> Prediction Results</h5>
63
+ </div>
64
+ <div class="card-body">
65
+ <div id="result-container" class="text-center">
66
+ <p class="text-muted">Results will be displayed here after prediction.</p>
67
+ </div>
68
+ <hr>
69
+ <div class="accordion" id="jsonAccordion">
70
+ <div class="accordion-item">
71
+ <h2 class="accordion-header" id="headingOne">
72
+ <button class="accordion-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#collapseOne" aria-expanded="false" aria-controls="collapseOne">
73
+ Raw JSON Response
74
+ </button>
75
+ </h2>
76
+ <div id="collapseOne" class="accordion-collapse collapse" aria-labelledby="headingOne" data-bs-parent="#jsonAccordion">
77
+ <div class="accordion-body">
78
+ <pre id="jsonResponse" class="bg-light p-3 rounded"><code>Waiting for response...</code></pre>
79
+ </div>
80
+ </div>
81
+ </div>
82
+ </div>
83
+ </div>
84
+ </div>
85
+ </div>
86
+ </div>
87
+ </main>
88
+
89
+ <footer class="text-center text-muted py-3 mt-4">
90
+ <p>&copy; 2024 Your Name. Powered by AI.</p>
91
+ </footer>
92
+
93
+ <!-- Bootstrap 5 JS -->
94
+ <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script>
95
+ <!-- Your Custom JS -->
96
+ <script src="{{ url_for('static', filename='script.js') }}"></script>
97
+ </body>
98
+ </html>
training_history.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ loss,accuracy,val_loss,val_accuracy
2
+ 1.0304151773452759,0.5675675868988037,0.6861137747764587,0.421875
3
+ 1.2392491102218628,0.5057914853096008,0.2789396345615387,0.9375
4
+ 0.521758496761322,0.7953668236732483,0.277998149394989,0.9375
5
+ 0.46904969215393066,0.760617733001709,0.2396804541349411,0.9375
6
+ 0.2891399562358856,0.8648648858070374,0.13092049956321716,0.96875
7
+ 0.2712053954601288,0.8823529481887817,0.10788409411907196,1.0
8
+ 0.23328891396522522,0.9305019378662109,0.09912744164466858,0.984375
9
+ 0.22442513704299927,0.92277991771698,0.14693066477775574,0.984375
10
+ 0.19375579059123993,0.9189189076423645,0.08046227693557739,0.984375
11
+ 0.20040491223335266,0.9189189076423645,0.2098347544670105,0.9375