Spaces:
Sleeping
Sleeping
Commit
·
d576da9
0
Parent(s):
Initial commit of clean, working project
Browse files- .dvc/.gitignore +3 -0
- .dvc/config +0 -0
- .dvcignore +3 -0
- .gitattributes +2 -0
- .github/workflows/main.yaml +57 -0
- .gitignore +214 -0
- LICENSE +21 -0
- README.md +2 -0
- app.py +55 -0
- class_check.py +66 -0
- config/config.yaml +17 -0
- confusion_matrix.png +0 -0
- dockerfile +13 -0
- dvc.lock +113 -0
- dvc.yaml +54 -0
- main.py +54 -0
- params.yaml +18 -0
- requirements.txt +0 -0
- research/01_data_ingestion.ipynb +230 -0
- research/02_prepare_base_model.ipynb +290 -0
- research/03_model_trainer.ipynb +303 -0
- research/04_model_evaluation_with_mlflow.ipynb +328 -0
- research/trials.ipynb +43 -0
- scores.json +4 -0
- setup.py +33 -0
- src/cnnClassifier/__init__.py +22 -0
- src/cnnClassifier/components/__init__.py +0 -0
- src/cnnClassifier/components/data_ingestion.py +46 -0
- src/cnnClassifier/components/model_evaluation_mlflow.py +124 -0
- src/cnnClassifier/components/model_trainer.py +127 -0
- src/cnnClassifier/components/prepare_base_model.py +74 -0
- src/cnnClassifier/config/__init__.py +0 -0
- src/cnnClassifier/config/configuration.py +82 -0
- src/cnnClassifier/constants/__init__.py +4 -0
- src/cnnClassifier/entity/__init__.py +0 -0
- src/cnnClassifier/entity/config_entity.py +43 -0
- src/cnnClassifier/pipeline/__init__.py +0 -0
- src/cnnClassifier/pipeline/prediction.py +36 -0
- src/cnnClassifier/pipeline/stage_01_data_ingestion.py +31 -0
- src/cnnClassifier/pipeline/stage_02_prepare_base_model.py +32 -0
- src/cnnClassifier/pipeline/stage_03_model_trainer.py +35 -0
- src/cnnClassifier/pipeline/stage_04_model_evaluation.py +37 -0
- src/cnnClassifier/utils/__init__.py +0 -0
- src/cnnClassifier/utils/common.py +137 -0
- static/script.js +159 -0
- static/style.css +116 -0
- template.py +47 -0
- templates/index.html +98 -0
- training_history.csv +11 -0
.dvc/.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/config.local
|
| 2 |
+
/tmp
|
| 3 |
+
/cache
|
.dvc/config
ADDED
|
File without changes
|
.dvcignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Add patterns of files dvc should ignore, which could improve
|
| 2 |
+
# the performance. Learn more at
|
| 3 |
+
# https://dvc.org/doc/user-guide/dvcignore
|
.gitattributes
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
artifacts/training/model/variables/variables.data-*-of-* filter=lfs diff=lfs merge=lfs -text
|
.github/workflows/main.yaml
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: CI-CD Pipeline for Chest Cancer Classifier
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- main
|
| 7 |
+
|
| 8 |
+
jobs:
|
| 9 |
+
# Job 1: Continuous Integration (Test the application)
|
| 10 |
+
ci-test:
|
| 11 |
+
name: Continuous Integration - Test Application
|
| 12 |
+
runs-on: ubuntu-latest
|
| 13 |
+
|
| 14 |
+
steps:
|
| 15 |
+
- name: Checkout Code
|
| 16 |
+
uses: actions/checkout@v3
|
| 17 |
+
|
| 18 |
+
- name: Set up Python 3.8
|
| 19 |
+
uses: actions/setup-python@v4
|
| 20 |
+
with:
|
| 21 |
+
python-version: '3.8'
|
| 22 |
+
|
| 23 |
+
- name: Install Dependencies
|
| 24 |
+
run: pip install -r requirements.txt
|
| 25 |
+
|
| 26 |
+
- name: Run a simple health check
|
| 27 |
+
run: echo "Placeholder for future tests. For now, we just check if dependencies install."
|
| 28 |
+
|
| 29 |
+
# Job 2: Continuous Deployment (Deploy to Hugging Face with Manual Git Push)
|
| 30 |
+
cd-deploy:
|
| 31 |
+
name: Continuous Deployment - Deploy to Hugging Face
|
| 32 |
+
needs: ci-test # This job will only run if the 'ci-test' job succeeds
|
| 33 |
+
runs-on: ubuntu-latest
|
| 34 |
+
|
| 35 |
+
steps:
|
| 36 |
+
- name: Checkout Code
|
| 37 |
+
uses: actions/checkout@v3
|
| 38 |
+
with:
|
| 39 |
+
# We need to fetch all history and tags for the push to work correctly
|
| 40 |
+
fetch-depth: 0
|
| 41 |
+
lfs: true
|
| 42 |
+
|
| 43 |
+
- name: Push to Hugging Face Hub
|
| 44 |
+
env:
|
| 45 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 46 |
+
HF_SPACE_REPO: ${{ secrets.HF_SPACE_REPO }}
|
| 47 |
+
run: |
|
| 48 |
+
echo "Setting up git repository for Hugging Face push"
|
| 49 |
+
# Add the Hugging Face Space as a new remote repository named "hf"
|
| 50 |
+
# Use your canonical (lowercase) HF username here
|
| 51 |
+
git remote add hf "https://alyyanahmed21:${HF_TOKEN}@huggingface.co/spaces/${HF_SPACE_REPO}"
|
| 52 |
+
|
| 53 |
+
echo "Pushing to Hugging Face..."
|
| 54 |
+
# Force-push the main branch from your GitHub repo to the main branch on the HF remote
|
| 55 |
+
git push --force hf main
|
| 56 |
+
|
| 57 |
+
echo "✅ Deployment successful!"
|
.gitignore
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[codz]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py.cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# UV
|
| 98 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
| 99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
+
# commonly ignored for libraries.
|
| 101 |
+
#uv.lock
|
| 102 |
+
|
| 103 |
+
# poetry
|
| 104 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 105 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 106 |
+
# commonly ignored for libraries.
|
| 107 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 108 |
+
#poetry.lock
|
| 109 |
+
#poetry.toml
|
| 110 |
+
|
| 111 |
+
# pdm
|
| 112 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 113 |
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
| 114 |
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
| 115 |
+
#pdm.lock
|
| 116 |
+
#pdm.toml
|
| 117 |
+
.pdm-python
|
| 118 |
+
.pdm-build/
|
| 119 |
+
|
| 120 |
+
# pixi
|
| 121 |
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
| 122 |
+
#pixi.lock
|
| 123 |
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
| 124 |
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
| 125 |
+
.pixi
|
| 126 |
+
|
| 127 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 128 |
+
__pypackages__/
|
| 129 |
+
|
| 130 |
+
# Celery stuff
|
| 131 |
+
celerybeat-schedule
|
| 132 |
+
celerybeat.pid
|
| 133 |
+
|
| 134 |
+
# SageMath parsed files
|
| 135 |
+
*.sage.py
|
| 136 |
+
|
| 137 |
+
# Environments
|
| 138 |
+
.env
|
| 139 |
+
.envrc
|
| 140 |
+
.venv
|
| 141 |
+
env/
|
| 142 |
+
venv/
|
| 143 |
+
ENV/
|
| 144 |
+
env.bak/
|
| 145 |
+
venv.bak/
|
| 146 |
+
|
| 147 |
+
# Spyder project settings
|
| 148 |
+
.spyderproject
|
| 149 |
+
.spyproject
|
| 150 |
+
|
| 151 |
+
# Rope project settings
|
| 152 |
+
.ropeproject
|
| 153 |
+
|
| 154 |
+
# mkdocs documentation
|
| 155 |
+
/site
|
| 156 |
+
|
| 157 |
+
# mypy
|
| 158 |
+
.mypy_cache/
|
| 159 |
+
.dmypy.json
|
| 160 |
+
dmypy.json
|
| 161 |
+
|
| 162 |
+
# Pyre type checker
|
| 163 |
+
.pyre/
|
| 164 |
+
|
| 165 |
+
# pytype static type analyzer
|
| 166 |
+
.pytype/
|
| 167 |
+
|
| 168 |
+
# Cython debug symbols
|
| 169 |
+
cython_debug/
|
| 170 |
+
|
| 171 |
+
# PyCharm
|
| 172 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 173 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 174 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 175 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 176 |
+
#.idea/
|
| 177 |
+
|
| 178 |
+
# Abstra
|
| 179 |
+
# Abstra is an AI-powered process automation framework.
|
| 180 |
+
# Ignore directories containing user credentials, local state, and settings.
|
| 181 |
+
# Learn more at https://abstra.io/docs
|
| 182 |
+
.abstra/
|
| 183 |
+
|
| 184 |
+
# Visual Studio Code
|
| 185 |
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
| 186 |
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
| 187 |
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
| 188 |
+
# you could uncomment the following to ignore the entire vscode folder
|
| 189 |
+
# .vscode/
|
| 190 |
+
|
| 191 |
+
# Ruff stuff:
|
| 192 |
+
.ruff_cache/
|
| 193 |
+
|
| 194 |
+
# PyPI configuration file
|
| 195 |
+
.pypirc
|
| 196 |
+
|
| 197 |
+
# Cursor
|
| 198 |
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
| 199 |
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
| 200 |
+
# refer to https://docs.cursor.com/context/ignore-files
|
| 201 |
+
.cursorignore
|
| 202 |
+
.cursorindexingignore
|
| 203 |
+
|
| 204 |
+
# Marimo
|
| 205 |
+
marimo/_static/
|
| 206 |
+
marimo/_lsp/
|
| 207 |
+
__marimo__/
|
| 208 |
+
|
| 209 |
+
artifacts/*
|
| 210 |
+
|
| 211 |
+
mlruns/
|
| 212 |
+
.env
|
| 213 |
+
model/
|
| 214 |
+
cnn_env/
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 ALYYAN
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
app.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, jsonify, render_template
|
| 2 |
+
import os
|
| 3 |
+
from flask_cors import CORS, cross_origin
|
| 4 |
+
from cnnClassifier.utils.common import decodeImage
|
| 5 |
+
from cnnClassifier.pipeline.prediction import PredictionPipeline
|
| 6 |
+
|
| 7 |
+
# Set environment variables for consistent encoding
|
| 8 |
+
os.putenv('LANG', 'en_US.UTF-8')
|
| 9 |
+
os.putenv('LC_ALL', 'en_US.UTF-8')
|
| 10 |
+
|
| 11 |
+
app = Flask(__name__)
|
| 12 |
+
CORS(app)
|
| 13 |
+
|
| 14 |
+
class ClientApp:
|
| 15 |
+
def __init__(self):
|
| 16 |
+
self.filename = "inputImage.jpg"
|
| 17 |
+
self.classifier = PredictionPipeline(self.filename)
|
| 18 |
+
|
| 19 |
+
@app.route("/", methods=['GET'])
|
| 20 |
+
@cross_origin()
|
| 21 |
+
def home():
|
| 22 |
+
"""Renders the main user interface."""
|
| 23 |
+
return render_template('index.html')
|
| 24 |
+
|
| 25 |
+
@app.route("/train", methods=['GET','POST'])
|
| 26 |
+
@cross_origin()
|
| 27 |
+
def trainRoute():
|
| 28 |
+
"""Triggers the DVC pipeline to retrain the model."""
|
| 29 |
+
# os.system("python main.py") # You can use this if you have a main orchestrator
|
| 30 |
+
os.system("dvc repro")
|
| 31 |
+
return "Training done successfully!"
|
| 32 |
+
|
| 33 |
+
@app.route("/predict", methods=['POST'])
|
| 34 |
+
@cross_origin()
|
| 35 |
+
def predictRoute():
|
| 36 |
+
image = request.json['image']
|
| 37 |
+
decodeImage(image, clApp.filename)
|
| 38 |
+
|
| 39 |
+
# The predict() method now returns just the index (0 or 1)
|
| 40 |
+
prediction_value = clApp.classifier.predict()
|
| 41 |
+
|
| 42 |
+
# This logic is confirmed by your class indices: {'adenocarcinoma': 0, 'normal': 1}
|
| 43 |
+
if prediction_value == 1:
|
| 44 |
+
prediction_text = "Normal"
|
| 45 |
+
else: # The value was 0
|
| 46 |
+
prediction_text = "Cancer"
|
| 47 |
+
|
| 48 |
+
# The front-end expects the key "prediction"
|
| 49 |
+
return jsonify([{"prediction": prediction_text}])
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
if __name__ == "__main__":
|
| 53 |
+
clApp = ClientApp()
|
| 54 |
+
# Run the app on all available interfaces (for Docker/deployment) and port 8080
|
| 55 |
+
app.run(host='0.0.0.0', port=8080)
|
class_check.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# check_data_balance.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
def check_dataset_balance(data_path: Path):
|
| 7 |
+
"""
|
| 8 |
+
Checks and prints the balance of classes in a dataset directory.
|
| 9 |
+
|
| 10 |
+
The expected directory structure is:
|
| 11 |
+
- data_path/
|
| 12 |
+
- class_A/
|
| 13 |
+
- image1.jpg
|
| 14 |
+
- image2.jpg
|
| 15 |
+
...
|
| 16 |
+
- class_B/
|
| 17 |
+
- image1.jpg
|
| 18 |
+
- image2.jpg
|
| 19 |
+
...
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
data_path (Path): The path to the main dataset directory.
|
| 23 |
+
"""
|
| 24 |
+
print(f"--- Checking Dataset Balance at: {data_path} ---\n")
|
| 25 |
+
|
| 26 |
+
if not data_path.is_dir():
|
| 27 |
+
print(f"❌ ERROR: The provided path is not a valid directory.")
|
| 28 |
+
return
|
| 29 |
+
|
| 30 |
+
class_names = [d.name for d in data_path.iterdir() if d.is_dir()]
|
| 31 |
+
|
| 32 |
+
if not class_names:
|
| 33 |
+
print("❌ ERROR: No class subdirectories found in the dataset folder.")
|
| 34 |
+
return
|
| 35 |
+
|
| 36 |
+
print(f"Found {len(class_names)} classes: {', '.join(class_names)}\n")
|
| 37 |
+
|
| 38 |
+
class_counts = {}
|
| 39 |
+
total_images = 0
|
| 40 |
+
|
| 41 |
+
for class_name in class_names:
|
| 42 |
+
class_dir = data_path / class_name
|
| 43 |
+
# Count files, ignoring subdirectories (like .ipynb_checkpoints)
|
| 44 |
+
num_images = len([f for f in class_dir.iterdir() if f.is_file()])
|
| 45 |
+
class_counts[class_name] = num_images
|
| 46 |
+
total_images += num_images
|
| 47 |
+
|
| 48 |
+
print("--- Image Counts per Class ---")
|
| 49 |
+
for class_name, count in class_counts.items():
|
| 50 |
+
percentage = (count / total_images) * 100 if total_images > 0 else 0
|
| 51 |
+
print(f"- {class_name:<20}: {count:>5} images ({percentage:.2f}%)")
|
| 52 |
+
|
| 53 |
+
print("-" * 35)
|
| 54 |
+
print(f"- {'Total':<20}: {total_images:>5} images\n")
|
| 55 |
+
|
| 56 |
+
print("--- For your training script ---")
|
| 57 |
+
print("Use these counts to calculate your class_weight dictionary.")
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
if __name__ == "__main__":
|
| 61 |
+
# --- IMPORTANT ---
|
| 62 |
+
# Update this path to point to your actual dataset folder.
|
| 63 |
+
# This is the folder that contains the 'Normal' and 'adenocarcinoma' subfolders.
|
| 64 |
+
dataset_directory = Path("artifacts/data_ingestion/Chest-CT-Scan-data")
|
| 65 |
+
|
| 66 |
+
check_dataset_balance(dataset_directory)
|
config/config.yaml
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
artifacts_root: artifacts
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
data_ingestion:
|
| 5 |
+
root_dir: artifacts/data_ingestion
|
| 6 |
+
source_URL: https://drive.google.com/file/d/1z0mreUtRmR-P-magILsDR3T7M6IkGXtY/view?usp=sharing
|
| 7 |
+
local_data_file: artifacts/data_ingestion/data.zip
|
| 8 |
+
unzip_dir: artifacts/data_ingestion
|
| 9 |
+
|
| 10 |
+
prepare_base_model:
|
| 11 |
+
root_dir: artifacts/prepare_base_model
|
| 12 |
+
base_model_path: artifacts/prepare_base_model/base_model.h5
|
| 13 |
+
updated_base_model_path: artifacts/prepare_base_model/base_model_updated.h5
|
| 14 |
+
|
| 15 |
+
training:
|
| 16 |
+
root_dir: artifacts/training
|
| 17 |
+
trained_model_path: artifacts/training/model.h5
|
confusion_matrix.png
ADDED
|
dockerfile
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.8-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY requirements.txt .
|
| 6 |
+
|
| 7 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 8 |
+
|
| 9 |
+
COPY . .
|
| 10 |
+
|
| 11 |
+
EXPOSE 8080
|
| 12 |
+
|
| 13 |
+
CMD ["gunicorn", "--bind", "0.0.0.0:8080", "app:app"]
|
dvc.lock
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
schema: '2.0'
|
| 2 |
+
stages:
|
| 3 |
+
data_ingestion:
|
| 4 |
+
cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py
|
| 5 |
+
deps:
|
| 6 |
+
- path: config/config.yaml
|
| 7 |
+
hash: md5
|
| 8 |
+
md5: d4c6e6a52ca35ea93094c3e1a421499e
|
| 9 |
+
size: 578
|
| 10 |
+
- path: src/cnnClassifier/pipeline/stage_01_data_ingestion.py
|
| 11 |
+
hash: md5
|
| 12 |
+
md5: bad788253475f50d44fdaa7237967b49
|
| 13 |
+
size: 883
|
| 14 |
+
outs:
|
| 15 |
+
- path: artifacts/data_ingestion/Chest-CT-Scan-data
|
| 16 |
+
hash: md5
|
| 17 |
+
md5: 904fa45d934ce879b3b1933dca6cb2f1.dir
|
| 18 |
+
size: 49247431
|
| 19 |
+
nfiles: 343
|
| 20 |
+
prepare_base_model:
|
| 21 |
+
cmd: python src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
|
| 22 |
+
deps:
|
| 23 |
+
- path: config/config.yaml
|
| 24 |
+
hash: md5
|
| 25 |
+
md5: d4c6e6a52ca35ea93094c3e1a421499e
|
| 26 |
+
size: 578
|
| 27 |
+
- path: src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
|
| 28 |
+
hash: md5
|
| 29 |
+
md5: 30c63470719d961e32045908b7c0772d
|
| 30 |
+
size: 966
|
| 31 |
+
params:
|
| 32 |
+
params.yaml:
|
| 33 |
+
CLASSES: 2
|
| 34 |
+
IMAGE_SIZE:
|
| 35 |
+
- 224
|
| 36 |
+
- 224
|
| 37 |
+
- 3
|
| 38 |
+
INCLUDE_TOP: false
|
| 39 |
+
LEARNING_RATE: 0.001
|
| 40 |
+
WEIGHTS: imagenet
|
| 41 |
+
outs:
|
| 42 |
+
- path: artifacts/prepare_base_model
|
| 43 |
+
hash: md5
|
| 44 |
+
md5: 4aa2611cd37984c188512d3a19c6942b.dir
|
| 45 |
+
size: 118054560
|
| 46 |
+
nfiles: 2
|
| 47 |
+
training:
|
| 48 |
+
cmd: python src/cnnClassifier/pipeline/stage_03_model_trainer.py
|
| 49 |
+
deps:
|
| 50 |
+
- path: artifacts/data_ingestion/Chest-CT-Scan-data
|
| 51 |
+
hash: md5
|
| 52 |
+
md5: 904fa45d934ce879b3b1933dca6cb2f1.dir
|
| 53 |
+
size: 49247431
|
| 54 |
+
nfiles: 343
|
| 55 |
+
- path: artifacts/prepare_base_model
|
| 56 |
+
hash: md5
|
| 57 |
+
md5: 4aa2611cd37984c188512d3a19c6942b.dir
|
| 58 |
+
size: 118054560
|
| 59 |
+
nfiles: 2
|
| 60 |
+
- path: config/config.yaml
|
| 61 |
+
hash: md5
|
| 62 |
+
md5: d4c6e6a52ca35ea93094c3e1a421499e
|
| 63 |
+
size: 578
|
| 64 |
+
- path: src/cnnClassifier/pipeline/stage_03_model_trainer.py
|
| 65 |
+
hash: md5
|
| 66 |
+
md5: c33e23d2c123f157b2ab007c8e9d938f
|
| 67 |
+
size: 893
|
| 68 |
+
params:
|
| 69 |
+
params.yaml:
|
| 70 |
+
AUGMENTATION: true
|
| 71 |
+
BATCH_SIZE: 16
|
| 72 |
+
EPOCHS: 10
|
| 73 |
+
IMAGE_SIZE:
|
| 74 |
+
- 224
|
| 75 |
+
- 224
|
| 76 |
+
- 3
|
| 77 |
+
outs:
|
| 78 |
+
- path: artifacts/training/model.h5
|
| 79 |
+
hash: md5
|
| 80 |
+
md5: 233944d4fbed7856cf28be27c602014d
|
| 81 |
+
size: 59337520
|
| 82 |
+
evaluation:
|
| 83 |
+
cmd: python src/cnnClassifier/pipeline/stage_04_model_evaluation.py
|
| 84 |
+
deps:
|
| 85 |
+
- path: artifacts/data_ingestion/Chest-CT-Scan-data
|
| 86 |
+
hash: md5
|
| 87 |
+
md5: 904fa45d934ce879b3b1933dca6cb2f1.dir
|
| 88 |
+
size: 49247431
|
| 89 |
+
nfiles: 343
|
| 90 |
+
- path: artifacts/training/model.h5
|
| 91 |
+
hash: md5
|
| 92 |
+
md5: 233944d4fbed7856cf28be27c602014d
|
| 93 |
+
size: 59337520
|
| 94 |
+
- path: config/config.yaml
|
| 95 |
+
hash: md5
|
| 96 |
+
md5: d4c6e6a52ca35ea93094c3e1a421499e
|
| 97 |
+
size: 578
|
| 98 |
+
- path: src/cnnClassifier/pipeline/stage_04_model_evaluation.py
|
| 99 |
+
hash: md5
|
| 100 |
+
md5: d20a1645fd93cae9c7c0cecd8a0d4a2a
|
| 101 |
+
size: 1188
|
| 102 |
+
params:
|
| 103 |
+
params.yaml:
|
| 104 |
+
BATCH_SIZE: 16
|
| 105 |
+
IMAGE_SIZE:
|
| 106 |
+
- 224
|
| 107 |
+
- 224
|
| 108 |
+
- 3
|
| 109 |
+
outs:
|
| 110 |
+
- path: scores.json
|
| 111 |
+
hash: md5
|
| 112 |
+
md5: 2c6b298a9827df6c174dc1bbbf40d040
|
| 113 |
+
size: 59
|
dvc.yaml
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
stages:
|
| 2 |
+
data_ingestion:
|
| 3 |
+
cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py
|
| 4 |
+
deps:
|
| 5 |
+
- src/cnnClassifier/pipeline/stage_01_data_ingestion.py
|
| 6 |
+
- config/config.yaml
|
| 7 |
+
outs:
|
| 8 |
+
- artifacts/data_ingestion/Chest-CT-Scan-data
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
prepare_base_model:
|
| 12 |
+
cmd: python src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
|
| 13 |
+
deps:
|
| 14 |
+
- src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
|
| 15 |
+
- config/config.yaml
|
| 16 |
+
params:
|
| 17 |
+
- IMAGE_SIZE
|
| 18 |
+
- INCLUDE_TOP
|
| 19 |
+
- CLASSES
|
| 20 |
+
- WEIGHTS
|
| 21 |
+
- LEARNING_RATE
|
| 22 |
+
outs:
|
| 23 |
+
- artifacts/prepare_base_model
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
training:
|
| 27 |
+
cmd: python src/cnnClassifier/pipeline/stage_03_model_trainer.py
|
| 28 |
+
deps:
|
| 29 |
+
- src/cnnClassifier/pipeline/stage_03_model_trainer.py
|
| 30 |
+
- config/config.yaml
|
| 31 |
+
- artifacts/data_ingestion/Chest-CT-Scan-data
|
| 32 |
+
- artifacts/prepare_base_model
|
| 33 |
+
params:
|
| 34 |
+
- IMAGE_SIZE
|
| 35 |
+
- EPOCHS
|
| 36 |
+
- BATCH_SIZE
|
| 37 |
+
- AUGMENTATION
|
| 38 |
+
outs:
|
| 39 |
+
- artifacts/training/model.h5
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
evaluation:
|
| 43 |
+
cmd: python src/cnnClassifier/pipeline/stage_04_model_evaluation.py
|
| 44 |
+
deps:
|
| 45 |
+
- src/cnnClassifier/pipeline/stage_04_model_evaluation.py
|
| 46 |
+
- config/config.yaml
|
| 47 |
+
- artifacts/data_ingestion/Chest-CT-Scan-data
|
| 48 |
+
- artifacts/training/model.h5
|
| 49 |
+
params:
|
| 50 |
+
- IMAGE_SIZE
|
| 51 |
+
- BATCH_SIZE
|
| 52 |
+
metrics:
|
| 53 |
+
- scores.json:
|
| 54 |
+
cache: false
|
main.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from cnnClassifier import logger
|
| 2 |
+
from cnnClassifier.pipeline.stage_01_data_ingestion import DataIngestionTrainingPipeline
|
| 3 |
+
from cnnClassifier.pipeline.stage_02_prepare_base_model import PrepareBaseModelTrainingPipeline
|
| 4 |
+
from cnnClassifier.pipeline.stage_03_model_trainer import ModelTrainingPipeline
|
| 5 |
+
from cnnClassifier.pipeline.stage_04_model_evaluation import EvaluationPipeline
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
STAGE_NAME = "Data Ingestion stage"
|
| 10 |
+
try:
|
| 11 |
+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
|
| 12 |
+
obj = DataIngestionTrainingPipeline()
|
| 13 |
+
obj.main()
|
| 14 |
+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
|
| 15 |
+
except Exception as e:
|
| 16 |
+
logger.exception(e)
|
| 17 |
+
raise e
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
STAGE_NAME = "Prepare base model"
|
| 21 |
+
try:
|
| 22 |
+
logger.info(f"*******************")
|
| 23 |
+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
|
| 24 |
+
prepare_base_model = PrepareBaseModelTrainingPipeline()
|
| 25 |
+
prepare_base_model.main()
|
| 26 |
+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
|
| 27 |
+
except Exception as e:
|
| 28 |
+
logger.exception(e)
|
| 29 |
+
raise e
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
STAGE_NAME = "Training"
|
| 33 |
+
try:
|
| 34 |
+
logger.info(f"*******************")
|
| 35 |
+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
|
| 36 |
+
model_trainer = ModelTrainingPipeline()
|
| 37 |
+
model_trainer.main()
|
| 38 |
+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
|
| 39 |
+
except Exception as e:
|
| 40 |
+
logger.exception(e)
|
| 41 |
+
raise e
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
STAGE_NAME = "Evaluation stage"
|
| 45 |
+
try:
|
| 46 |
+
logger.info(f"*******************")
|
| 47 |
+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
|
| 48 |
+
model_evalution = EvaluationPipeline()
|
| 49 |
+
model_evalution.main()
|
| 50 |
+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
|
| 51 |
+
|
| 52 |
+
except Exception as e:
|
| 53 |
+
logger.exception(e)
|
| 54 |
+
raise e
|
params.yaml
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#AUGMENTATION: True
|
| 2 |
+
#IMAGE_SIZE: [224, 224, 3] # as per VGG 16 model
|
| 3 |
+
#BATCH_SIZE: 16
|
| 4 |
+
#INCLUDE_TOP: False
|
| 5 |
+
#EPOCHS: 1
|
| 6 |
+
#CLASSES: 2
|
| 7 |
+
#WEIGHTS: imagenet
|
| 8 |
+
#LEARNING_RATE: 0.01
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
AUGMENTATION: True
|
| 12 |
+
IMAGE_SIZE: [224, 224, 3]
|
| 13 |
+
BATCH_SIZE: 16 # Use 16 if 32 gives you memory errors, otherwise 32 is fine
|
| 14 |
+
INCLUDE_TOP: False
|
| 15 |
+
EPOCHS: 10 # Give the model enough time to learn
|
| 16 |
+
CLASSES: 2
|
| 17 |
+
WEIGHTS: imagenet
|
| 18 |
+
LEARNING_RATE: 0.001 # CRUCIAL: A small learning rate for fine-tuning
|
requirements.txt
ADDED
|
Binary file (526 Bytes). View file
|
|
|
research/01_data_ingestion.ipynb
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"id": "8f33ab85",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [],
|
| 9 |
+
"source": [
|
| 10 |
+
"import os"
|
| 11 |
+
]
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"cell_type": "code",
|
| 15 |
+
"execution_count": 2,
|
| 16 |
+
"id": "5b55e660",
|
| 17 |
+
"metadata": {},
|
| 18 |
+
"outputs": [
|
| 19 |
+
{
|
| 20 |
+
"data": {
|
| 21 |
+
"text/plain": [
|
| 22 |
+
"'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\\\research'"
|
| 23 |
+
]
|
| 24 |
+
},
|
| 25 |
+
"execution_count": 2,
|
| 26 |
+
"metadata": {},
|
| 27 |
+
"output_type": "execute_result"
|
| 28 |
+
}
|
| 29 |
+
],
|
| 30 |
+
"source": [
|
| 31 |
+
"%pwd"
|
| 32 |
+
]
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"cell_type": "code",
|
| 36 |
+
"execution_count": 3,
|
| 37 |
+
"id": "b7338c82",
|
| 38 |
+
"metadata": {},
|
| 39 |
+
"outputs": [
|
| 40 |
+
{
|
| 41 |
+
"data": {
|
| 42 |
+
"text/plain": [
|
| 43 |
+
"'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC'"
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
"execution_count": 3,
|
| 47 |
+
"metadata": {},
|
| 48 |
+
"output_type": "execute_result"
|
| 49 |
+
}
|
| 50 |
+
],
|
| 51 |
+
"source": [
|
| 52 |
+
"os.chdir(\"../\")\n",
|
| 53 |
+
"%pwd"
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "code",
|
| 58 |
+
"execution_count": 4,
|
| 59 |
+
"id": "a770b8df",
|
| 60 |
+
"metadata": {},
|
| 61 |
+
"outputs": [],
|
| 62 |
+
"source": [
|
| 63 |
+
"from dataclasses import dataclass\n",
|
| 64 |
+
"from pathlib import Path\n",
|
| 65 |
+
"\n",
|
| 66 |
+
"\n",
|
| 67 |
+
"@dataclass(frozen=True)\n",
|
| 68 |
+
"class DataIngestionConfig:\n",
|
| 69 |
+
" root_dir: Path\n",
|
| 70 |
+
" source_URL: str\n",
|
| 71 |
+
" local_data_file: Path\n",
|
| 72 |
+
" unzip_dir: Path"
|
| 73 |
+
]
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"cell_type": "code",
|
| 77 |
+
"execution_count": 6,
|
| 78 |
+
"id": "979add90",
|
| 79 |
+
"metadata": {},
|
| 80 |
+
"outputs": [],
|
| 81 |
+
"source": [
|
| 82 |
+
"from cnnClassifier.constants import *\n",
|
| 83 |
+
"from cnnClassifier.utils.common import read_yaml, create_directories\n",
|
| 84 |
+
"class ConfigurationManager:\n",
|
| 85 |
+
" def __init__(\n",
|
| 86 |
+
" self,\n",
|
| 87 |
+
" config_filepath = CONFIG_FILE_PATH,\n",
|
| 88 |
+
" params_filepath = PARAMS_FILE_PATH):\n",
|
| 89 |
+
"\n",
|
| 90 |
+
" self.config = read_yaml(config_filepath)\n",
|
| 91 |
+
" self.params = read_yaml(params_filepath)\n",
|
| 92 |
+
"\n",
|
| 93 |
+
" create_directories([self.config.artifacts_root])\n",
|
| 94 |
+
"\n",
|
| 95 |
+
" def get_data_ingestion_config(self) -> DataIngestionConfig:\n",
|
| 96 |
+
" config = self.config.data_ingestion\n",
|
| 97 |
+
"\n",
|
| 98 |
+
" create_directories([config.root_dir])\n",
|
| 99 |
+
"\n",
|
| 100 |
+
" data_ingestion_config = DataIngestionConfig(\n",
|
| 101 |
+
" root_dir=config.root_dir,\n",
|
| 102 |
+
" source_URL=config.source_URL,\n",
|
| 103 |
+
" local_data_file=config.local_data_file,\n",
|
| 104 |
+
" unzip_dir=config.unzip_dir \n",
|
| 105 |
+
" )\n",
|
| 106 |
+
"\n",
|
| 107 |
+
" return data_ingestion_config"
|
| 108 |
+
]
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"cell_type": "code",
|
| 112 |
+
"execution_count": 9,
|
| 113 |
+
"id": "e4fd8f68",
|
| 114 |
+
"metadata": {},
|
| 115 |
+
"outputs": [
|
| 116 |
+
{
|
| 117 |
+
"name": "stdout",
|
| 118 |
+
"output_type": "stream",
|
| 119 |
+
"text": [
|
| 120 |
+
"[2025-08-18 00:24:08,669: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
|
| 121 |
+
"[2025-08-18 00:24:08,684: INFO: common: yaml file: params.yaml loaded successfully]\n",
|
| 122 |
+
"[2025-08-18 00:24:08,686: INFO: common: created directory at: artifacts]\n",
|
| 123 |
+
"[2025-08-18 00:24:08,688: INFO: common: created directory at: artifacts/data_ingestion]\n",
|
| 124 |
+
"[2025-08-18 00:24:08,692: INFO: 78466947: Downloading data from https://drive.google.com/file/d/1z0mreUtRmR-P-magILsDR3T7M6IkGXtY/view?usp=sharing into file artifacts/data_ingestion/data.zip]\n"
|
| 125 |
+
]
|
| 126 |
+
},
|
| 127 |
+
{
|
| 128 |
+
"name": "stderr",
|
| 129 |
+
"output_type": "stream",
|
| 130 |
+
"text": [
|
| 131 |
+
"Downloading...\n",
|
| 132 |
+
"From (original): https://drive.google.com/uc?/export=download&id=1z0mreUtRmR-P-magILsDR3T7M6IkGXtY\n",
|
| 133 |
+
"From (redirected): https://drive.google.com/uc?%2Fexport=download&id=1z0mreUtRmR-P-magILsDR3T7M6IkGXtY&confirm=t&uuid=954f5f66-c0d6-4c40-a993-933880515813\n",
|
| 134 |
+
"To: f:\\Projects\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\artifacts\\data_ingestion\\data.zip\n",
|
| 135 |
+
"100%|██████████| 49.0M/49.0M [00:24<00:00, 2.03MB/s]"
|
| 136 |
+
]
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"name": "stdout",
|
| 140 |
+
"output_type": "stream",
|
| 141 |
+
"text": [
|
| 142 |
+
"[2025-08-18 00:24:36,267: INFO: 78466947: Downloaded data from https://drive.google.com/file/d/1z0mreUtRmR-P-magILsDR3T7M6IkGXtY/view?usp=sharing into file artifacts/data_ingestion/data.zip]\n"
|
| 143 |
+
]
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"name": "stderr",
|
| 147 |
+
"output_type": "stream",
|
| 148 |
+
"text": [
|
| 149 |
+
"\n"
|
| 150 |
+
]
|
| 151 |
+
}
|
| 152 |
+
],
|
| 153 |
+
"source": [
|
| 154 |
+
"import os\n",
|
| 155 |
+
"import zipfile\n",
|
| 156 |
+
"import gdown\n",
|
| 157 |
+
"from cnnClassifier import logger\n",
|
| 158 |
+
"from cnnClassifier.utils.common import get_size\n",
|
| 159 |
+
"\n",
|
| 160 |
+
"class DataIngestion:\n",
|
| 161 |
+
" def __init__(self, config: DataIngestionConfig):\n",
|
| 162 |
+
" self.config = config\n",
|
| 163 |
+
"\n",
|
| 164 |
+
"\n",
|
| 165 |
+
" \n",
|
| 166 |
+
" \n",
|
| 167 |
+
" def download_file(self)-> str:\n",
|
| 168 |
+
" '''\n",
|
| 169 |
+
" Fetch data from the url\n",
|
| 170 |
+
" '''\n",
|
| 171 |
+
"\n",
|
| 172 |
+
" try: \n",
|
| 173 |
+
" dataset_url = self.config.source_URL\n",
|
| 174 |
+
" zip_download_dir = self.config.local_data_file\n",
|
| 175 |
+
" os.makedirs(\"artifacts/data_ingestion\", exist_ok=True)\n",
|
| 176 |
+
" logger.info(f\"Downloading data from {dataset_url} into file {zip_download_dir}\")\n",
|
| 177 |
+
"\n",
|
| 178 |
+
" file_id = dataset_url.split(\"/\")[-2]\n",
|
| 179 |
+
" prefix = 'https://drive.google.com/uc?/export=download&id='\n",
|
| 180 |
+
" gdown.download(prefix+file_id,zip_download_dir)\n",
|
| 181 |
+
"\n",
|
| 182 |
+
" logger.info(f\"Downloaded data from {dataset_url} into file {zip_download_dir}\")\n",
|
| 183 |
+
"\n",
|
| 184 |
+
" except Exception as e:\n",
|
| 185 |
+
" raise e\n",
|
| 186 |
+
" \n",
|
| 187 |
+
" \n",
|
| 188 |
+
" def extract_zip_file(self):\n",
|
| 189 |
+
" \"\"\"\n",
|
| 190 |
+
" zip_file_path: str\n",
|
| 191 |
+
" Extracts the zip file into the data directory\n",
|
| 192 |
+
" Function returns None\n",
|
| 193 |
+
" \"\"\"\n",
|
| 194 |
+
" unzip_path = self.config.unzip_dir\n",
|
| 195 |
+
" os.makedirs(unzip_path, exist_ok=True)\n",
|
| 196 |
+
" with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:\n",
|
| 197 |
+
" zip_ref.extractall(unzip_path)\n",
|
| 198 |
+
"try:\n",
|
| 199 |
+
" config = ConfigurationManager()\n",
|
| 200 |
+
" data_ingestion_config = config.get_data_ingestion_config()\n",
|
| 201 |
+
" data_ingestion = DataIngestion(config=data_ingestion_config)\n",
|
| 202 |
+
" data_ingestion.download_file()\n",
|
| 203 |
+
" data_ingestion.extract_zip_file()\n",
|
| 204 |
+
"except Exception as e:\n",
|
| 205 |
+
" raise e"
|
| 206 |
+
]
|
| 207 |
+
}
|
| 208 |
+
],
|
| 209 |
+
"metadata": {
|
| 210 |
+
"kernelspec": {
|
| 211 |
+
"display_name": "cnn_env",
|
| 212 |
+
"language": "python",
|
| 213 |
+
"name": "python3"
|
| 214 |
+
},
|
| 215 |
+
"language_info": {
|
| 216 |
+
"codemirror_mode": {
|
| 217 |
+
"name": "ipython",
|
| 218 |
+
"version": 3
|
| 219 |
+
},
|
| 220 |
+
"file_extension": ".py",
|
| 221 |
+
"mimetype": "text/x-python",
|
| 222 |
+
"name": "python",
|
| 223 |
+
"nbconvert_exporter": "python",
|
| 224 |
+
"pygments_lexer": "ipython3",
|
| 225 |
+
"version": "3.11.3"
|
| 226 |
+
}
|
| 227 |
+
},
|
| 228 |
+
"nbformat": 4,
|
| 229 |
+
"nbformat_minor": 5
|
| 230 |
+
}
|
research/02_prepare_base_model.ipynb
ADDED
|
@@ -0,0 +1,290 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"id": "29206888",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [
|
| 9 |
+
{
|
| 10 |
+
"data": {
|
| 11 |
+
"text/plain": [
|
| 12 |
+
"'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\\\research'"
|
| 13 |
+
]
|
| 14 |
+
},
|
| 15 |
+
"execution_count": 1,
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"output_type": "execute_result"
|
| 18 |
+
}
|
| 19 |
+
],
|
| 20 |
+
"source": [
|
| 21 |
+
"import os\n",
|
| 22 |
+
"%pwd"
|
| 23 |
+
]
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"cell_type": "code",
|
| 27 |
+
"execution_count": 2,
|
| 28 |
+
"id": "7dce8d4e",
|
| 29 |
+
"metadata": {},
|
| 30 |
+
"outputs": [
|
| 31 |
+
{
|
| 32 |
+
"data": {
|
| 33 |
+
"text/plain": [
|
| 34 |
+
"'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC'"
|
| 35 |
+
]
|
| 36 |
+
},
|
| 37 |
+
"execution_count": 2,
|
| 38 |
+
"metadata": {},
|
| 39 |
+
"output_type": "execute_result"
|
| 40 |
+
}
|
| 41 |
+
],
|
| 42 |
+
"source": [
|
| 43 |
+
"os.chdir(\"../\")\n",
|
| 44 |
+
"%pwd"
|
| 45 |
+
]
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"cell_type": "code",
|
| 49 |
+
"execution_count": 4,
|
| 50 |
+
"id": "c4d0c484",
|
| 51 |
+
"metadata": {},
|
| 52 |
+
"outputs": [],
|
| 53 |
+
"source": [
|
| 54 |
+
"from dataclasses import dataclass\n",
|
| 55 |
+
"from pathlib import Path\n",
|
| 56 |
+
"\n",
|
| 57 |
+
"@dataclass(frozen=True)\n",
|
| 58 |
+
"class PrepareBaseModelConfig:\n",
|
| 59 |
+
" root_dir: Path\n",
|
| 60 |
+
" base_model_path: Path\n",
|
| 61 |
+
" updated_base_model_path: Path\n",
|
| 62 |
+
" params_image_size: list\n",
|
| 63 |
+
" params_learning_rate: float\n",
|
| 64 |
+
" params_include_top: bool\n",
|
| 65 |
+
" params_weights: str\n",
|
| 66 |
+
" params_classes: int"
|
| 67 |
+
]
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"cell_type": "code",
|
| 71 |
+
"execution_count": 5,
|
| 72 |
+
"id": "26921811",
|
| 73 |
+
"metadata": {},
|
| 74 |
+
"outputs": [],
|
| 75 |
+
"source": [
|
| 76 |
+
"from cnnClassifier.constants import *\n",
|
| 77 |
+
"from cnnClassifier.utils.common import read_yaml, create_directories\n",
|
| 78 |
+
"\n",
|
| 79 |
+
"class ConfigurationManager:\n",
|
| 80 |
+
" def __init__(\n",
|
| 81 |
+
" self,\n",
|
| 82 |
+
" config_filepath = CONFIG_FILE_PATH,\n",
|
| 83 |
+
" params_filepath = PARAMS_FILE_PATH):\n",
|
| 84 |
+
"\n",
|
| 85 |
+
" self.config = read_yaml(config_filepath)\n",
|
| 86 |
+
" self.params = read_yaml(params_filepath)\n",
|
| 87 |
+
"\n",
|
| 88 |
+
" create_directories([self.config.artifacts_root])\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"\n",
|
| 91 |
+
" def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:\n",
|
| 92 |
+
" config = self.config.prepare_base_model\n",
|
| 93 |
+
" \n",
|
| 94 |
+
" create_directories([config.root_dir])\n",
|
| 95 |
+
"\n",
|
| 96 |
+
" prepare_base_model_config = PrepareBaseModelConfig(\n",
|
| 97 |
+
" root_dir=Path(config.root_dir),\n",
|
| 98 |
+
" base_model_path=Path(config.base_model_path),\n",
|
| 99 |
+
" updated_base_model_path=Path(config.updated_base_model_path),\n",
|
| 100 |
+
" params_image_size=self.params.IMAGE_SIZE,\n",
|
| 101 |
+
" params_learning_rate=self.params.LEARNING_RATE,\n",
|
| 102 |
+
" params_include_top=self.params.INCLUDE_TOP,\n",
|
| 103 |
+
" params_weights=self.params.WEIGHTS,\n",
|
| 104 |
+
" params_classes=self.params.CLASSES\n",
|
| 105 |
+
" )\n",
|
| 106 |
+
"\n",
|
| 107 |
+
" return prepare_base_model_config"
|
| 108 |
+
]
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"cell_type": "code",
|
| 112 |
+
"execution_count": 6,
|
| 113 |
+
"id": "0442bc6f",
|
| 114 |
+
"metadata": {},
|
| 115 |
+
"outputs": [],
|
| 116 |
+
"source": [
|
| 117 |
+
"import os\n",
|
| 118 |
+
"import urllib.request as request\n",
|
| 119 |
+
"from zipfile import ZipFile\n",
|
| 120 |
+
"import tensorflow as tf\n",
|
| 121 |
+
"\n",
|
| 122 |
+
"class PrepareBaseModel:\n",
|
| 123 |
+
" def __init__(self, config: PrepareBaseModelConfig):\n",
|
| 124 |
+
" self.config = config\n",
|
| 125 |
+
"\n",
|
| 126 |
+
" \n",
|
| 127 |
+
" def get_base_model(self):\n",
|
| 128 |
+
" self.model = tf.keras.applications.vgg16.VGG16(\n",
|
| 129 |
+
" input_shape=self.config.params_image_size,\n",
|
| 130 |
+
" weights=self.config.params_weights,\n",
|
| 131 |
+
" include_top=self.config.params_include_top\n",
|
| 132 |
+
" )\n",
|
| 133 |
+
"\n",
|
| 134 |
+
" self.save_model(path=self.config.base_model_path, model=self.model)\n",
|
| 135 |
+
"\n",
|
| 136 |
+
"\n",
|
| 137 |
+
" \n",
|
| 138 |
+
" @staticmethod\n",
|
| 139 |
+
" def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):\n",
|
| 140 |
+
" if freeze_all:\n",
|
| 141 |
+
" for layer in model.layers:\n",
|
| 142 |
+
" model.trainable = False\n",
|
| 143 |
+
" elif (freeze_till is not None) and (freeze_till > 0):\n",
|
| 144 |
+
" for layer in model.layers[:-freeze_till]:\n",
|
| 145 |
+
" model.trainable = False\n",
|
| 146 |
+
"\n",
|
| 147 |
+
" flatten_in = tf.keras.layers.Flatten()(model.output)\n",
|
| 148 |
+
" prediction = tf.keras.layers.Dense(\n",
|
| 149 |
+
" units=classes,\n",
|
| 150 |
+
" activation=\"softmax\"\n",
|
| 151 |
+
" )(flatten_in)\n",
|
| 152 |
+
"\n",
|
| 153 |
+
" full_model = tf.keras.models.Model(\n",
|
| 154 |
+
" inputs=model.input,\n",
|
| 155 |
+
" outputs=prediction\n",
|
| 156 |
+
" )\n",
|
| 157 |
+
"\n",
|
| 158 |
+
" full_model.compile(\n",
|
| 159 |
+
" optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),\n",
|
| 160 |
+
" loss=tf.keras.losses.CategoricalCrossentropy(),\n",
|
| 161 |
+
" metrics=[\"accuracy\"]\n",
|
| 162 |
+
" )\n",
|
| 163 |
+
"\n",
|
| 164 |
+
" full_model.summary()\n",
|
| 165 |
+
" return full_model\n",
|
| 166 |
+
" \n",
|
| 167 |
+
"\n",
|
| 168 |
+
" def update_base_model(self):\n",
|
| 169 |
+
" self.full_model = self._prepare_full_model(\n",
|
| 170 |
+
" model=self.model,\n",
|
| 171 |
+
" classes=self.config.params_classes,\n",
|
| 172 |
+
" freeze_all=True,\n",
|
| 173 |
+
" freeze_till=None,\n",
|
| 174 |
+
" learning_rate=self.config.params_learning_rate\n",
|
| 175 |
+
" )\n",
|
| 176 |
+
"\n",
|
| 177 |
+
" self.save_model(path=self.config.updated_base_model_path, model=self.full_model)\n",
|
| 178 |
+
" \n",
|
| 179 |
+
"\n",
|
| 180 |
+
"\n",
|
| 181 |
+
" @staticmethod\n",
|
| 182 |
+
" def save_model(path: Path, model: tf.keras.Model):\n",
|
| 183 |
+
" model.save(path)"
|
| 184 |
+
]
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"cell_type": "code",
|
| 188 |
+
"execution_count": 7,
|
| 189 |
+
"id": "b21b58b5",
|
| 190 |
+
"metadata": {},
|
| 191 |
+
"outputs": [
|
| 192 |
+
{
|
| 193 |
+
"name": "stdout",
|
| 194 |
+
"output_type": "stream",
|
| 195 |
+
"text": [
|
| 196 |
+
"[2025-08-20 01:44:50,956: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
|
| 197 |
+
"[2025-08-20 01:44:50,982: INFO: common: yaml file: params.yaml loaded successfully]\n",
|
| 198 |
+
"[2025-08-20 01:44:50,984: INFO: common: created directory at: artifacts]\n",
|
| 199 |
+
"[2025-08-20 01:44:50,986: INFO: common: created directory at: artifacts/prepare_base_model]\n",
|
| 200 |
+
"Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5\n",
|
| 201 |
+
"58889256/58889256 [==============================] - 15s 0us/step\n",
|
| 202 |
+
"[2025-08-20 01:45:09,603: WARNING: saving_utils: Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.]\n",
|
| 203 |
+
"Model: \"model\"\n",
|
| 204 |
+
"_________________________________________________________________\n",
|
| 205 |
+
" Layer (type) Output Shape Param # \n",
|
| 206 |
+
"=================================================================\n",
|
| 207 |
+
" input_1 (InputLayer) [(None, 224, 224, 3)] 0 \n",
|
| 208 |
+
" \n",
|
| 209 |
+
" block1_conv1 (Conv2D) (None, 224, 224, 64) 1792 \n",
|
| 210 |
+
" \n",
|
| 211 |
+
" block1_conv2 (Conv2D) (None, 224, 224, 64) 36928 \n",
|
| 212 |
+
" \n",
|
| 213 |
+
" block1_pool (MaxPooling2D) (None, 112, 112, 64) 0 \n",
|
| 214 |
+
" \n",
|
| 215 |
+
" block2_conv1 (Conv2D) (None, 112, 112, 128) 73856 \n",
|
| 216 |
+
" \n",
|
| 217 |
+
" block2_conv2 (Conv2D) (None, 112, 112, 128) 147584 \n",
|
| 218 |
+
" \n",
|
| 219 |
+
" block2_pool (MaxPooling2D) (None, 56, 56, 128) 0 \n",
|
| 220 |
+
" \n",
|
| 221 |
+
" block3_conv1 (Conv2D) (None, 56, 56, 256) 295168 \n",
|
| 222 |
+
" \n",
|
| 223 |
+
" block3_conv2 (Conv2D) (None, 56, 56, 256) 590080 \n",
|
| 224 |
+
" \n",
|
| 225 |
+
" block3_conv3 (Conv2D) (None, 56, 56, 256) 590080 \n",
|
| 226 |
+
" \n",
|
| 227 |
+
" block3_pool (MaxPooling2D) (None, 28, 28, 256) 0 \n",
|
| 228 |
+
" \n",
|
| 229 |
+
" block4_conv1 (Conv2D) (None, 28, 28, 512) 1180160 \n",
|
| 230 |
+
" \n",
|
| 231 |
+
" block4_conv2 (Conv2D) (None, 28, 28, 512) 2359808 \n",
|
| 232 |
+
" \n",
|
| 233 |
+
" block4_conv3 (Conv2D) (None, 28, 28, 512) 2359808 \n",
|
| 234 |
+
" \n",
|
| 235 |
+
" block4_pool (MaxPooling2D) (None, 14, 14, 512) 0 \n",
|
| 236 |
+
" \n",
|
| 237 |
+
" block5_conv1 (Conv2D) (None, 14, 14, 512) 2359808 \n",
|
| 238 |
+
" \n",
|
| 239 |
+
" block5_conv2 (Conv2D) (None, 14, 14, 512) 2359808 \n",
|
| 240 |
+
" \n",
|
| 241 |
+
" block5_conv3 (Conv2D) (None, 14, 14, 512) 2359808 \n",
|
| 242 |
+
" \n",
|
| 243 |
+
" block5_pool (MaxPooling2D) (None, 7, 7, 512) 0 \n",
|
| 244 |
+
" \n",
|
| 245 |
+
" flatten (Flatten) (None, 25088) 0 \n",
|
| 246 |
+
" \n",
|
| 247 |
+
" dense (Dense) (None, 2) 50178 \n",
|
| 248 |
+
" \n",
|
| 249 |
+
"=================================================================\n",
|
| 250 |
+
"Total params: 14,764,866\n",
|
| 251 |
+
"Trainable params: 50,178\n",
|
| 252 |
+
"Non-trainable params: 14,714,688\n",
|
| 253 |
+
"_________________________________________________________________\n"
|
| 254 |
+
]
|
| 255 |
+
}
|
| 256 |
+
],
|
| 257 |
+
"source": [
|
| 258 |
+
"try:\n",
|
| 259 |
+
" config = ConfigurationManager()\n",
|
| 260 |
+
" prepare_base_model_config = config.get_prepare_base_model_config()\n",
|
| 261 |
+
" prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)\n",
|
| 262 |
+
" prepare_base_model.get_base_model()\n",
|
| 263 |
+
" prepare_base_model.update_base_model()\n",
|
| 264 |
+
"except Exception as e:\n",
|
| 265 |
+
" raise e"
|
| 266 |
+
]
|
| 267 |
+
}
|
| 268 |
+
],
|
| 269 |
+
"metadata": {
|
| 270 |
+
"kernelspec": {
|
| 271 |
+
"display_name": "cnn_env",
|
| 272 |
+
"language": "python",
|
| 273 |
+
"name": "python3"
|
| 274 |
+
},
|
| 275 |
+
"language_info": {
|
| 276 |
+
"codemirror_mode": {
|
| 277 |
+
"name": "ipython",
|
| 278 |
+
"version": 3
|
| 279 |
+
},
|
| 280 |
+
"file_extension": ".py",
|
| 281 |
+
"mimetype": "text/x-python",
|
| 282 |
+
"name": "python",
|
| 283 |
+
"nbconvert_exporter": "python",
|
| 284 |
+
"pygments_lexer": "ipython3",
|
| 285 |
+
"version": "3.11.3"
|
| 286 |
+
}
|
| 287 |
+
},
|
| 288 |
+
"nbformat": 4,
|
| 289 |
+
"nbformat_minor": 5
|
| 290 |
+
}
|
research/03_model_trainer.ipynb
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"import os"
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "code",
|
| 14 |
+
"execution_count": 2,
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"outputs": [
|
| 17 |
+
{
|
| 18 |
+
"data": {
|
| 19 |
+
"text/plain": [
|
| 20 |
+
"'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\\\research'"
|
| 21 |
+
]
|
| 22 |
+
},
|
| 23 |
+
"execution_count": 2,
|
| 24 |
+
"metadata": {},
|
| 25 |
+
"output_type": "execute_result"
|
| 26 |
+
}
|
| 27 |
+
],
|
| 28 |
+
"source": [
|
| 29 |
+
"%pwd"
|
| 30 |
+
]
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"cell_type": "code",
|
| 34 |
+
"execution_count": 3,
|
| 35 |
+
"metadata": {},
|
| 36 |
+
"outputs": [],
|
| 37 |
+
"source": [
|
| 38 |
+
"os.chdir(\"../\")"
|
| 39 |
+
]
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"cell_type": "code",
|
| 43 |
+
"execution_count": 4,
|
| 44 |
+
"metadata": {},
|
| 45 |
+
"outputs": [
|
| 46 |
+
{
|
| 47 |
+
"data": {
|
| 48 |
+
"text/plain": [
|
| 49 |
+
"'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC'"
|
| 50 |
+
]
|
| 51 |
+
},
|
| 52 |
+
"execution_count": 4,
|
| 53 |
+
"metadata": {},
|
| 54 |
+
"output_type": "execute_result"
|
| 55 |
+
}
|
| 56 |
+
],
|
| 57 |
+
"source": [
|
| 58 |
+
"%pwd"
|
| 59 |
+
]
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"cell_type": "code",
|
| 63 |
+
"execution_count": 7,
|
| 64 |
+
"metadata": {},
|
| 65 |
+
"outputs": [],
|
| 66 |
+
"source": [
|
| 67 |
+
"from dataclasses import dataclass\n",
|
| 68 |
+
"from pathlib import Path\n",
|
| 69 |
+
"\n",
|
| 70 |
+
"\n",
|
| 71 |
+
"@dataclass(frozen=True)\n",
|
| 72 |
+
"class TrainingConfig:\n",
|
| 73 |
+
" root_dir: Path\n",
|
| 74 |
+
" trained_model_path: Path\n",
|
| 75 |
+
" updated_base_model_path: Path\n",
|
| 76 |
+
" training_data: Path\n",
|
| 77 |
+
" params_epochs: int\n",
|
| 78 |
+
" params_batch_size: int\n",
|
| 79 |
+
" params_is_augmentation: bool\n",
|
| 80 |
+
" params_image_size: list"
|
| 81 |
+
]
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"cell_type": "code",
|
| 85 |
+
"execution_count": 6,
|
| 86 |
+
"metadata": {},
|
| 87 |
+
"outputs": [],
|
| 88 |
+
"source": [
|
| 89 |
+
"from cnnClassifier.constants import *\n",
|
| 90 |
+
"from cnnClassifier.utils.common import read_yaml, create_directories\n",
|
| 91 |
+
"import tensorflow as tf"
|
| 92 |
+
]
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"cell_type": "code",
|
| 96 |
+
"execution_count": 8,
|
| 97 |
+
"metadata": {},
|
| 98 |
+
"outputs": [],
|
| 99 |
+
"source": [
|
| 100 |
+
"class ConfigurationManager:\n",
|
| 101 |
+
" def __init__(\n",
|
| 102 |
+
" self,\n",
|
| 103 |
+
" config_filepath = CONFIG_FILE_PATH,\n",
|
| 104 |
+
" params_filepath = PARAMS_FILE_PATH):\n",
|
| 105 |
+
"\n",
|
| 106 |
+
" self.config = read_yaml(config_filepath)\n",
|
| 107 |
+
" self.params = read_yaml(params_filepath)\n",
|
| 108 |
+
"\n",
|
| 109 |
+
" create_directories([self.config.artifacts_root])\n",
|
| 110 |
+
"\n",
|
| 111 |
+
" \n",
|
| 112 |
+
"\n",
|
| 113 |
+
" def get_training_config(self) -> TrainingConfig:\n",
|
| 114 |
+
" training = self.config.training\n",
|
| 115 |
+
" prepare_base_model = self.config.prepare_base_model\n",
|
| 116 |
+
" params = self.params\n",
|
| 117 |
+
" training_data = os.path.join(self.config.data_ingestion.unzip_dir, \"Chest-CT-Scan-data\")\n",
|
| 118 |
+
" create_directories([\n",
|
| 119 |
+
" Path(training.root_dir)\n",
|
| 120 |
+
" ])\n",
|
| 121 |
+
"\n",
|
| 122 |
+
" training_config = TrainingConfig(\n",
|
| 123 |
+
" root_dir=Path(training.root_dir),\n",
|
| 124 |
+
" trained_model_path=Path(training.trained_model_path),\n",
|
| 125 |
+
" updated_base_model_path=Path(prepare_base_model.updated_base_model_path),\n",
|
| 126 |
+
" training_data=Path(training_data),\n",
|
| 127 |
+
" params_epochs=params.EPOCHS,\n",
|
| 128 |
+
" params_batch_size=params.BATCH_SIZE,\n",
|
| 129 |
+
" params_is_augmentation=params.AUGMENTATION,\n",
|
| 130 |
+
" params_image_size=params.IMAGE_SIZE\n",
|
| 131 |
+
" )\n",
|
| 132 |
+
"\n",
|
| 133 |
+
" return training_config"
|
| 134 |
+
]
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"cell_type": "code",
|
| 138 |
+
"execution_count": 9,
|
| 139 |
+
"metadata": {},
|
| 140 |
+
"outputs": [],
|
| 141 |
+
"source": [
|
| 142 |
+
"import os\n",
|
| 143 |
+
"import urllib.request as request\n",
|
| 144 |
+
"from zipfile import ZipFile\n",
|
| 145 |
+
"import tensorflow as tf\n",
|
| 146 |
+
"import time"
|
| 147 |
+
]
|
| 148 |
+
},
|
| 149 |
+
{
|
| 150 |
+
"cell_type": "code",
|
| 151 |
+
"execution_count": 10,
|
| 152 |
+
"metadata": {},
|
| 153 |
+
"outputs": [],
|
| 154 |
+
"source": [
|
| 155 |
+
"class Training:\n",
|
| 156 |
+
" def __init__(self, config: TrainingConfig):\n",
|
| 157 |
+
" self.config = config\n",
|
| 158 |
+
"\n",
|
| 159 |
+
" \n",
|
| 160 |
+
" def get_base_model(self):\n",
|
| 161 |
+
" self.model = tf.keras.models.load_model(\n",
|
| 162 |
+
" self.config.updated_base_model_path\n",
|
| 163 |
+
" )\n",
|
| 164 |
+
"\n",
|
| 165 |
+
" def train_valid_generator(self):\n",
|
| 166 |
+
"\n",
|
| 167 |
+
" datagenerator_kwargs = dict(\n",
|
| 168 |
+
" rescale = 1./255,\n",
|
| 169 |
+
" validation_split=0.20\n",
|
| 170 |
+
" )\n",
|
| 171 |
+
"\n",
|
| 172 |
+
" dataflow_kwargs = dict(\n",
|
| 173 |
+
" target_size=self.config.params_image_size[:-1],\n",
|
| 174 |
+
" batch_size=self.config.params_batch_size,\n",
|
| 175 |
+
" interpolation=\"bilinear\"\n",
|
| 176 |
+
" )\n",
|
| 177 |
+
"\n",
|
| 178 |
+
" valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
|
| 179 |
+
" **datagenerator_kwargs\n",
|
| 180 |
+
" )\n",
|
| 181 |
+
"\n",
|
| 182 |
+
" self.valid_generator = valid_datagenerator.flow_from_directory(\n",
|
| 183 |
+
" directory=self.config.training_data,\n",
|
| 184 |
+
" subset=\"validation\",\n",
|
| 185 |
+
" shuffle=False,\n",
|
| 186 |
+
" **dataflow_kwargs\n",
|
| 187 |
+
" )\n",
|
| 188 |
+
"\n",
|
| 189 |
+
" if self.config.params_is_augmentation:\n",
|
| 190 |
+
" train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
|
| 191 |
+
" rotation_range=40,\n",
|
| 192 |
+
" horizontal_flip=True,\n",
|
| 193 |
+
" width_shift_range=0.2,\n",
|
| 194 |
+
" height_shift_range=0.2,\n",
|
| 195 |
+
" shear_range=0.2,\n",
|
| 196 |
+
" zoom_range=0.2,\n",
|
| 197 |
+
" **datagenerator_kwargs\n",
|
| 198 |
+
" )\n",
|
| 199 |
+
" else:\n",
|
| 200 |
+
" train_datagenerator = valid_datagenerator\n",
|
| 201 |
+
"\n",
|
| 202 |
+
" self.train_generator = train_datagenerator.flow_from_directory(\n",
|
| 203 |
+
" directory=self.config.training_data,\n",
|
| 204 |
+
" subset=\"training\",\n",
|
| 205 |
+
" shuffle=True,\n",
|
| 206 |
+
" **dataflow_kwargs\n",
|
| 207 |
+
" )\n",
|
| 208 |
+
"\n",
|
| 209 |
+
" \n",
|
| 210 |
+
" @staticmethod\n",
|
| 211 |
+
" def save_model(path: Path, model: tf.keras.Model):\n",
|
| 212 |
+
" model.save(path)\n",
|
| 213 |
+
"\n",
|
| 214 |
+
"\n",
|
| 215 |
+
"\n",
|
| 216 |
+
" \n",
|
| 217 |
+
" def train(self):\n",
|
| 218 |
+
" self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size\n",
|
| 219 |
+
" self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size\n",
|
| 220 |
+
"\n",
|
| 221 |
+
" self.model.fit(\n",
|
| 222 |
+
" self.train_generator,\n",
|
| 223 |
+
" epochs=self.config.params_epochs,\n",
|
| 224 |
+
" steps_per_epoch=self.steps_per_epoch,\n",
|
| 225 |
+
" validation_steps=self.validation_steps,\n",
|
| 226 |
+
" validation_data=self.valid_generator\n",
|
| 227 |
+
" )\n",
|
| 228 |
+
"\n",
|
| 229 |
+
" self.save_model(\n",
|
| 230 |
+
" path=self.config.trained_model_path,\n",
|
| 231 |
+
" model=self.model\n",
|
| 232 |
+
" )\n",
|
| 233 |
+
"\n"
|
| 234 |
+
]
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"cell_type": "code",
|
| 238 |
+
"execution_count": 14,
|
| 239 |
+
"metadata": {},
|
| 240 |
+
"outputs": [
|
| 241 |
+
{
|
| 242 |
+
"name": "stdout",
|
| 243 |
+
"output_type": "stream",
|
| 244 |
+
"text": [
|
| 245 |
+
"[2025-08-20 02:03:39,280: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
|
| 246 |
+
"[2025-08-20 02:03:39,284: INFO: common: yaml file: params.yaml loaded successfully]\n",
|
| 247 |
+
"[2025-08-20 02:03:39,286: INFO: common: created directory at: artifacts]\n",
|
| 248 |
+
"[2025-08-20 02:03:39,288: INFO: common: created directory at: artifacts\\training]\n"
|
| 249 |
+
]
|
| 250 |
+
},
|
| 251 |
+
{
|
| 252 |
+
"name": "stdout",
|
| 253 |
+
"output_type": "stream",
|
| 254 |
+
"text": [
|
| 255 |
+
"Found 68 images belonging to 2 classes.\n",
|
| 256 |
+
"Found 275 images belonging to 2 classes.\n",
|
| 257 |
+
"17/17 [==============================] - 63s 4s/step - loss: 14.4131 - accuracy: 0.5560 - val_loss: 0.2376 - val_accuracy: 0.8750\n"
|
| 258 |
+
]
|
| 259 |
+
}
|
| 260 |
+
],
|
| 261 |
+
"source": [
|
| 262 |
+
"try:\n",
|
| 263 |
+
" config = ConfigurationManager()\n",
|
| 264 |
+
" training_config = config.get_training_config()\n",
|
| 265 |
+
" training = Training(config=training_config)\n",
|
| 266 |
+
" training.get_base_model()\n",
|
| 267 |
+
" training.train_valid_generator()\n",
|
| 268 |
+
" training.train()\n",
|
| 269 |
+
" \n",
|
| 270 |
+
"except Exception as e:\n",
|
| 271 |
+
" raise e"
|
| 272 |
+
]
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"cell_type": "code",
|
| 276 |
+
"execution_count": null,
|
| 277 |
+
"metadata": {},
|
| 278 |
+
"outputs": [],
|
| 279 |
+
"source": []
|
| 280 |
+
}
|
| 281 |
+
],
|
| 282 |
+
"metadata": {
|
| 283 |
+
"kernelspec": {
|
| 284 |
+
"display_name": "cnn_env",
|
| 285 |
+
"language": "python",
|
| 286 |
+
"name": "python3"
|
| 287 |
+
},
|
| 288 |
+
"language_info": {
|
| 289 |
+
"codemirror_mode": {
|
| 290 |
+
"name": "ipython",
|
| 291 |
+
"version": 3
|
| 292 |
+
},
|
| 293 |
+
"file_extension": ".py",
|
| 294 |
+
"mimetype": "text/x-python",
|
| 295 |
+
"name": "python",
|
| 296 |
+
"nbconvert_exporter": "python",
|
| 297 |
+
"pygments_lexer": "ipython3",
|
| 298 |
+
"version": "3.11.3"
|
| 299 |
+
}
|
| 300 |
+
},
|
| 301 |
+
"nbformat": 4,
|
| 302 |
+
"nbformat_minor": 2
|
| 303 |
+
}
|
research/04_model_evaluation_with_mlflow.ipynb
ADDED
|
@@ -0,0 +1,328 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 2,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"import os"
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "code",
|
| 14 |
+
"execution_count": 3,
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"outputs": [
|
| 17 |
+
{
|
| 18 |
+
"data": {
|
| 19 |
+
"text/plain": [
|
| 20 |
+
"'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\\\research'"
|
| 21 |
+
]
|
| 22 |
+
},
|
| 23 |
+
"execution_count": 3,
|
| 24 |
+
"metadata": {},
|
| 25 |
+
"output_type": "execute_result"
|
| 26 |
+
}
|
| 27 |
+
],
|
| 28 |
+
"source": [
|
| 29 |
+
"%pwd"
|
| 30 |
+
]
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"cell_type": "code",
|
| 34 |
+
"execution_count": 4,
|
| 35 |
+
"metadata": {},
|
| 36 |
+
"outputs": [],
|
| 37 |
+
"source": [
|
| 38 |
+
"os.chdir(\"../\")"
|
| 39 |
+
]
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"cell_type": "code",
|
| 43 |
+
"execution_count": 5,
|
| 44 |
+
"metadata": {},
|
| 45 |
+
"outputs": [
|
| 46 |
+
{
|
| 47 |
+
"data": {
|
| 48 |
+
"text/plain": [
|
| 49 |
+
"'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC'"
|
| 50 |
+
]
|
| 51 |
+
},
|
| 52 |
+
"execution_count": 5,
|
| 53 |
+
"metadata": {},
|
| 54 |
+
"output_type": "execute_result"
|
| 55 |
+
}
|
| 56 |
+
],
|
| 57 |
+
"source": [
|
| 58 |
+
"%pwd"
|
| 59 |
+
]
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"cell_type": "code",
|
| 63 |
+
"execution_count": 6,
|
| 64 |
+
"metadata": {},
|
| 65 |
+
"outputs": [],
|
| 66 |
+
"source": [
|
| 67 |
+
"os.environ[\"MLFLOW_TRACKING_URI\"]=\"https://dagshub.com/AlyyanAhmed21/End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC.mlflow\"\n",
|
| 68 |
+
"os.environ[\"MLFLOW_TRACKING_USERNAME\"]=\"AlyyanAhmed21\"\n",
|
| 69 |
+
"os.environ[\"MLFLOW_TRACKING_PASSWORD\"]=\"776454e991d86ea3a96179a4dc1ef72fbc134642\""
|
| 70 |
+
]
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"cell_type": "code",
|
| 74 |
+
"execution_count": 7,
|
| 75 |
+
"metadata": {},
|
| 76 |
+
"outputs": [],
|
| 77 |
+
"source": [
|
| 78 |
+
"import tensorflow as tf"
|
| 79 |
+
]
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"cell_type": "code",
|
| 83 |
+
"execution_count": 8,
|
| 84 |
+
"metadata": {},
|
| 85 |
+
"outputs": [],
|
| 86 |
+
"source": [
|
| 87 |
+
"model = tf.keras.models.load_model(\"artifacts/training/model.h5\")"
|
| 88 |
+
]
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"cell_type": "code",
|
| 92 |
+
"execution_count": 9,
|
| 93 |
+
"metadata": {},
|
| 94 |
+
"outputs": [],
|
| 95 |
+
"source": [
|
| 96 |
+
"from dataclasses import dataclass\n",
|
| 97 |
+
"from pathlib import Path\n",
|
| 98 |
+
"\n",
|
| 99 |
+
"@dataclass(frozen=True)\n",
|
| 100 |
+
"class EvaluationConfig:\n",
|
| 101 |
+
" path_of_model: Path\n",
|
| 102 |
+
" training_data: Path\n",
|
| 103 |
+
" all_params: dict\n",
|
| 104 |
+
" mlflow_uri: str\n",
|
| 105 |
+
" params_image_size: list\n",
|
| 106 |
+
" params_batch_size: int"
|
| 107 |
+
]
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"cell_type": "code",
|
| 111 |
+
"execution_count": 10,
|
| 112 |
+
"metadata": {},
|
| 113 |
+
"outputs": [],
|
| 114 |
+
"source": [
|
| 115 |
+
"from cnnClassifier.constants import *\n",
|
| 116 |
+
"from cnnClassifier.utils.common import read_yaml, create_directories, save_json"
|
| 117 |
+
]
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"cell_type": "code",
|
| 121 |
+
"execution_count": 11,
|
| 122 |
+
"metadata": {},
|
| 123 |
+
"outputs": [],
|
| 124 |
+
"source": [
|
| 125 |
+
"class ConfigurationManager:\n",
|
| 126 |
+
" def __init__(\n",
|
| 127 |
+
" self, \n",
|
| 128 |
+
" config_filepath = CONFIG_FILE_PATH,\n",
|
| 129 |
+
" params_filepath = PARAMS_FILE_PATH):\n",
|
| 130 |
+
" self.config = read_yaml(config_filepath)\n",
|
| 131 |
+
" self.params = read_yaml(params_filepath)\n",
|
| 132 |
+
" create_directories([self.config.artifacts_root])\n",
|
| 133 |
+
"\n",
|
| 134 |
+
" \n",
|
| 135 |
+
" def get_evaluation_config(self) -> EvaluationConfig:\n",
|
| 136 |
+
" eval_config = EvaluationConfig(\n",
|
| 137 |
+
" path_of_model=\"artifacts/training/model.h5\",\n",
|
| 138 |
+
" training_data=\"artifacts/data_ingestion/Chest-CT-Scan-data\",\n",
|
| 139 |
+
" mlflow_uri=\"https://dagshub.com/AlyyanAhmed21/End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC.mlflow\",\n",
|
| 140 |
+
" all_params=self.params,\n",
|
| 141 |
+
" params_image_size=self.params.IMAGE_SIZE,\n",
|
| 142 |
+
" params_batch_size=self.params.BATCH_SIZE\n",
|
| 143 |
+
" )\n",
|
| 144 |
+
" return eval_config\n",
|
| 145 |
+
"\n",
|
| 146 |
+
"\n"
|
| 147 |
+
]
|
| 148 |
+
},
|
| 149 |
+
{
|
| 150 |
+
"cell_type": "code",
|
| 151 |
+
"execution_count": 12,
|
| 152 |
+
"metadata": {},
|
| 153 |
+
"outputs": [],
|
| 154 |
+
"source": [
|
| 155 |
+
"import tensorflow as tf\n",
|
| 156 |
+
"from pathlib import Path\n",
|
| 157 |
+
"import mlflow\n",
|
| 158 |
+
"import mlflow.keras\n",
|
| 159 |
+
"from urllib.parse import urlparse"
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"cell_type": "code",
|
| 164 |
+
"execution_count": 13,
|
| 165 |
+
"metadata": {},
|
| 166 |
+
"outputs": [],
|
| 167 |
+
"source": [
|
| 168 |
+
"class Evaluation:\n",
|
| 169 |
+
" def __init__(self, config: EvaluationConfig):\n",
|
| 170 |
+
" self.config = config\n",
|
| 171 |
+
"\n",
|
| 172 |
+
" \n",
|
| 173 |
+
" def _valid_generator(self):\n",
|
| 174 |
+
"\n",
|
| 175 |
+
" datagenerator_kwargs = dict(\n",
|
| 176 |
+
" rescale = 1./255,\n",
|
| 177 |
+
" validation_split=0.30\n",
|
| 178 |
+
" )\n",
|
| 179 |
+
"\n",
|
| 180 |
+
" dataflow_kwargs = dict(\n",
|
| 181 |
+
" target_size=self.config.params_image_size[:-1],\n",
|
| 182 |
+
" batch_size=self.config.params_batch_size,\n",
|
| 183 |
+
" interpolation=\"bilinear\"\n",
|
| 184 |
+
" )\n",
|
| 185 |
+
"\n",
|
| 186 |
+
" valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
|
| 187 |
+
" **datagenerator_kwargs\n",
|
| 188 |
+
" )\n",
|
| 189 |
+
"\n",
|
| 190 |
+
" self.valid_generator = valid_datagenerator.flow_from_directory(\n",
|
| 191 |
+
" directory=self.config.training_data,\n",
|
| 192 |
+
" subset=\"validation\",\n",
|
| 193 |
+
" shuffle=False,\n",
|
| 194 |
+
" **dataflow_kwargs\n",
|
| 195 |
+
" )\n",
|
| 196 |
+
"\n",
|
| 197 |
+
"\n",
|
| 198 |
+
" @staticmethod\n",
|
| 199 |
+
" def load_model(path: Path) -> tf.keras.Model:\n",
|
| 200 |
+
" return tf.keras.models.load_model(path)\n",
|
| 201 |
+
" \n",
|
| 202 |
+
"\n",
|
| 203 |
+
" def evaluation(self):\n",
|
| 204 |
+
" self.model = self.load_model(self.config.path_of_model)\n",
|
| 205 |
+
" self._valid_generator()\n",
|
| 206 |
+
" self.score = model.evaluate(self.valid_generator)\n",
|
| 207 |
+
" self.save_score()\n",
|
| 208 |
+
"\n",
|
| 209 |
+
" def save_score(self):\n",
|
| 210 |
+
" scores = {\"loss\": self.score[0], \"accuracy\": self.score[1]}\n",
|
| 211 |
+
" save_json(path=Path(\"scores.json\"), data=scores)\n",
|
| 212 |
+
"\n",
|
| 213 |
+
" \n",
|
| 214 |
+
" def log_into_mlflow(self):\n",
|
| 215 |
+
" mlflow.set_registry_uri(self.config.mlflow_uri)\n",
|
| 216 |
+
" tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme\n",
|
| 217 |
+
" \n",
|
| 218 |
+
" with mlflow.start_run():\n",
|
| 219 |
+
" mlflow.log_params(self.config.all_params)\n",
|
| 220 |
+
" mlflow.log_metrics(\n",
|
| 221 |
+
" {\"loss\": self.score[0], \"accuracy\": self.score[1]}\n",
|
| 222 |
+
" )\n",
|
| 223 |
+
" # Model registry does not work with file store\n",
|
| 224 |
+
" if tracking_url_type_store != \"file\":\n",
|
| 225 |
+
"\n",
|
| 226 |
+
" # Register the model\n",
|
| 227 |
+
" # There are other ways to use the Model Registry, which depends on the use case,\n",
|
| 228 |
+
" # please refer to the doc for more information:\n",
|
| 229 |
+
" # https://mlflow.org/docs/latest/model-registry.html#api-workflow\n",
|
| 230 |
+
" mlflow.keras.log_model(self.model, \"model\", registered_model_name=\"VGG16Model\")\n",
|
| 231 |
+
" else:\n",
|
| 232 |
+
" mlflow.keras.log_model(self.model, \"model\")\n"
|
| 233 |
+
]
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"cell_type": "code",
|
| 237 |
+
"execution_count": 14,
|
| 238 |
+
"metadata": {},
|
| 239 |
+
"outputs": [
|
| 240 |
+
{
|
| 241 |
+
"name": "stdout",
|
| 242 |
+
"output_type": "stream",
|
| 243 |
+
"text": [
|
| 244 |
+
"[2025-08-20 04:01:28,984: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
|
| 245 |
+
"[2025-08-20 04:01:28,988: INFO: common: yaml file: params.yaml loaded successfully]\n",
|
| 246 |
+
"[2025-08-20 04:01:28,991: INFO: common: created directory at: artifacts]\n"
|
| 247 |
+
]
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"name": "stdout",
|
| 251 |
+
"output_type": "stream",
|
| 252 |
+
"text": [
|
| 253 |
+
"Found 102 images belonging to 2 classes.\n",
|
| 254 |
+
"7/7 [==============================] - 16s 2s/step - loss: 57.2713 - accuracy: 0.4314\n",
|
| 255 |
+
"[2025-08-20 04:01:45,268: INFO: common: json file saved at: scores.json]\n"
|
| 256 |
+
]
|
| 257 |
+
},
|
| 258 |
+
{
|
| 259 |
+
"name": "stderr",
|
| 260 |
+
"output_type": "stream",
|
| 261 |
+
"text": [
|
| 262 |
+
"2025/08/20 04:01:47 WARNING mlflow.tensorflow: You are saving a TensorFlow Core model or Keras model without a signature. Inference with mlflow.pyfunc.spark_udf() will not work unless the model's pyfunc representation accepts pandas DataFrames as inference inputs.\n"
|
| 263 |
+
]
|
| 264 |
+
},
|
| 265 |
+
{
|
| 266 |
+
"name": "stdout",
|
| 267 |
+
"output_type": "stream",
|
| 268 |
+
"text": [
|
| 269 |
+
"[2025-08-20 04:01:48,249: WARNING: save: Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 14). These functions will not be directly callable after loading.]\n",
|
| 270 |
+
"INFO:tensorflow:Assets written to: E:\\Temp\\tmp32wvm7sm\\model\\data\\model\\assets\n",
|
| 271 |
+
"[2025-08-20 04:01:49,538: INFO: builder_impl: Assets written to: E:\\Temp\\tmp32wvm7sm\\model\\data\\model\\assets]\n"
|
| 272 |
+
]
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"name": "stderr",
|
| 276 |
+
"output_type": "stream",
|
| 277 |
+
"text": [
|
| 278 |
+
"f:\\Projects\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\cnn_env\\Lib\\site-packages\\_distutils_hack\\__init__.py:33: UserWarning: Setuptools is replacing distutils.\n",
|
| 279 |
+
" warnings.warn(\"Setuptools is replacing distutils.\")\n",
|
| 280 |
+
"Registered model 'VGG16Model' already exists. Creating a new version of this model...\n",
|
| 281 |
+
"2025/08/20 04:02:45 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: VGG16Model, version 2\n",
|
| 282 |
+
"Created version '2' of model 'VGG16Model'.\n"
|
| 283 |
+
]
|
| 284 |
+
}
|
| 285 |
+
],
|
| 286 |
+
"source": [
|
| 287 |
+
"try:\n",
|
| 288 |
+
" config = ConfigurationManager()\n",
|
| 289 |
+
" eval_config = config.get_evaluation_config()\n",
|
| 290 |
+
" evaluation = Evaluation(eval_config)\n",
|
| 291 |
+
" evaluation.evaluation()\n",
|
| 292 |
+
" evaluation.log_into_mlflow()\n",
|
| 293 |
+
"\n",
|
| 294 |
+
"except Exception as e:\n",
|
| 295 |
+
" raise e"
|
| 296 |
+
]
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"cell_type": "code",
|
| 300 |
+
"execution_count": null,
|
| 301 |
+
"metadata": {},
|
| 302 |
+
"outputs": [],
|
| 303 |
+
"source": []
|
| 304 |
+
}
|
| 305 |
+
],
|
| 306 |
+
"metadata": {
|
| 307 |
+
"kernelspec": {
|
| 308 |
+
"display_name": "cnn_env",
|
| 309 |
+
"language": "python",
|
| 310 |
+
"name": "python3"
|
| 311 |
+
},
|
| 312 |
+
"language_info": {
|
| 313 |
+
"codemirror_mode": {
|
| 314 |
+
"name": "ipython",
|
| 315 |
+
"version": 3
|
| 316 |
+
},
|
| 317 |
+
"file_extension": ".py",
|
| 318 |
+
"mimetype": "text/x-python",
|
| 319 |
+
"name": "python",
|
| 320 |
+
"nbconvert_exporter": "python",
|
| 321 |
+
"pygments_lexer": "ipython3",
|
| 322 |
+
"version": "3.11.3"
|
| 323 |
+
},
|
| 324 |
+
"orig_nbformat": 4
|
| 325 |
+
},
|
| 326 |
+
"nbformat": 4,
|
| 327 |
+
"nbformat_minor": 2
|
| 328 |
+
}
|
research/trials.ipynb
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"id": "981d0e26",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [
|
| 9 |
+
{
|
| 10 |
+
"name": "stdout",
|
| 11 |
+
"output_type": "stream",
|
| 12 |
+
"text": [
|
| 13 |
+
"Hello, World!\n"
|
| 14 |
+
]
|
| 15 |
+
}
|
| 16 |
+
],
|
| 17 |
+
"source": [
|
| 18 |
+
"print('Hello, World!')"
|
| 19 |
+
]
|
| 20 |
+
}
|
| 21 |
+
],
|
| 22 |
+
"metadata": {
|
| 23 |
+
"kernelspec": {
|
| 24 |
+
"display_name": "cnn_env",
|
| 25 |
+
"language": "python",
|
| 26 |
+
"name": "python3"
|
| 27 |
+
},
|
| 28 |
+
"language_info": {
|
| 29 |
+
"codemirror_mode": {
|
| 30 |
+
"name": "ipython",
|
| 31 |
+
"version": 3
|
| 32 |
+
},
|
| 33 |
+
"file_extension": ".py",
|
| 34 |
+
"mimetype": "text/x-python",
|
| 35 |
+
"name": "python",
|
| 36 |
+
"nbconvert_exporter": "python",
|
| 37 |
+
"pygments_lexer": "ipython3",
|
| 38 |
+
"version": "3.11.3"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"nbformat": 4,
|
| 42 |
+
"nbformat_minor": 5
|
| 43 |
+
}
|
scores.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": 0.13162432610988617,
|
| 3 |
+
"accuracy": 1.0
|
| 4 |
+
}
|
setup.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# setup.py
|
| 2 |
+
|
| 3 |
+
import setuptools
|
| 4 |
+
|
| 5 |
+
# Read the contents of your README file for the long description
|
| 6 |
+
with open("README.md", "r", encoding="utf-8") as f:
|
| 7 |
+
long_description = f.read()
|
| 8 |
+
|
| 9 |
+
__version__ = "0.0.0"
|
| 10 |
+
|
| 11 |
+
REPO_NAME = "End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC"
|
| 12 |
+
AUTHOR_USER_NAME = "AlyyanAhmed21" # Change to your GitHub username
|
| 13 |
+
SRC_REPO = "cnnClassifier" # This is the name of your main source folder under src/
|
| 14 |
+
AUTHOR_EMAIL = "alyyanawan19@gmail.com" # Change to your email
|
| 15 |
+
|
| 16 |
+
setuptools.setup(
|
| 17 |
+
name=SRC_REPO,
|
| 18 |
+
version=__version__,
|
| 19 |
+
author=AUTHOR_USER_NAME,
|
| 20 |
+
author_email=AUTHOR_EMAIL,
|
| 21 |
+
description="A small python package for CNN app",
|
| 22 |
+
long_description=long_description,
|
| 23 |
+
long_description_content_type="text/markdown",
|
| 24 |
+
url=f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}",
|
| 25 |
+
project_urls={
|
| 26 |
+
"Bug Tracker": f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}/issues",
|
| 27 |
+
},
|
| 28 |
+
# This is the crucial part!
|
| 29 |
+
# It tells setuptools to look for packages in the 'src' directory.
|
| 30 |
+
package_dir={"": "src"},
|
| 31 |
+
# This finds all packages automatically within the directory specified above.
|
| 32 |
+
packages=setuptools.find_packages(where="src")
|
| 33 |
+
)
|
src/cnnClassifier/__init__.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import logging
|
| 4 |
+
|
| 5 |
+
logging_str = "[%(asctime)s: %(levelname)s: %(module)s: %(message)s]"
|
| 6 |
+
|
| 7 |
+
log_dir = "logs"
|
| 8 |
+
log_filepath = os.path.join(log_dir,"running_logs.log")
|
| 9 |
+
os.makedirs(log_dir, exist_ok=True)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
logging.basicConfig(
|
| 13 |
+
level= logging.INFO,
|
| 14 |
+
format= logging_str,
|
| 15 |
+
|
| 16 |
+
handlers=[
|
| 17 |
+
logging.FileHandler(log_filepath),
|
| 18 |
+
logging.StreamHandler(sys.stdout)
|
| 19 |
+
]
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
logger = logging.getLogger("cnnClassifierLogger")
|
src/cnnClassifier/components/__init__.py
ADDED
|
File without changes
|
src/cnnClassifier/components/data_ingestion.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import zipfile
|
| 3 |
+
import gdown
|
| 4 |
+
from cnnClassifier import logger
|
| 5 |
+
from cnnClassifier.utils.common import get_size
|
| 6 |
+
from cnnClassifier.entity.config_entity import DataIngestionConfig
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class DataIngestion:
|
| 10 |
+
def __init__(self, config: DataIngestionConfig):
|
| 11 |
+
self.config = config
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def download_file(self)-> str:
|
| 17 |
+
'''
|
| 18 |
+
Fetch data from the url
|
| 19 |
+
'''
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
dataset_url = self.config.source_URL
|
| 23 |
+
zip_download_dir = self.config.local_data_file
|
| 24 |
+
os.makedirs("artifacts/data_ingestion", exist_ok=True)
|
| 25 |
+
logger.info(f"Downloading data from {dataset_url} into file {zip_download_dir}")
|
| 26 |
+
|
| 27 |
+
file_id = dataset_url.split("/")[-2]
|
| 28 |
+
prefix = 'https://drive.google.com/uc?/export=download&id='
|
| 29 |
+
gdown.download(prefix+file_id,zip_download_dir)
|
| 30 |
+
|
| 31 |
+
logger.info(f"Downloaded data from {dataset_url} into file {zip_download_dir}")
|
| 32 |
+
|
| 33 |
+
except Exception as e:
|
| 34 |
+
raise e
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def extract_zip_file(self):
|
| 38 |
+
"""
|
| 39 |
+
zip_file_path: str
|
| 40 |
+
Extracts the zip file into the data directory
|
| 41 |
+
Function returns None
|
| 42 |
+
"""
|
| 43 |
+
unzip_path = self.config.unzip_dir
|
| 44 |
+
os.makedirs(unzip_path, exist_ok=True)
|
| 45 |
+
with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
|
| 46 |
+
zip_ref.extractall(unzip_path)
|
src/cnnClassifier/components/model_evaluation_mlflow.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import tensorflow as tf
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
import mlflow
|
| 4 |
+
import mlflow.keras
|
| 5 |
+
from urllib.parse import urlparse
|
| 6 |
+
from cnnClassifier.entity.config_entity import EvaluationConfig
|
| 7 |
+
from cnnClassifier.utils.common import save_json
|
| 8 |
+
|
| 9 |
+
# --- NEW IMPORTS for advanced evaluation ---
|
| 10 |
+
from sklearn.metrics import confusion_matrix, classification_report
|
| 11 |
+
import numpy as np
|
| 12 |
+
import seaborn as sns
|
| 13 |
+
import matplotlib.pyplot as plt
|
| 14 |
+
# -------------------------------------------
|
| 15 |
+
|
| 16 |
+
class Evaluation:
|
| 17 |
+
def __init__(self, config: EvaluationConfig):
|
| 18 |
+
self.config = config
|
| 19 |
+
self.model = None
|
| 20 |
+
self.valid_generator = None
|
| 21 |
+
self.score = None
|
| 22 |
+
self.y_true = None
|
| 23 |
+
self.y_pred = None
|
| 24 |
+
|
| 25 |
+
def _valid_generator(self):
|
| 26 |
+
datagenerator_kwargs = dict(
|
| 27 |
+
rescale=1./255,
|
| 28 |
+
validation_split=0.30
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
dataflow_kwargs = dict(
|
| 32 |
+
target_size=self.config.params_image_size[:-1],
|
| 33 |
+
batch_size=self.config.params_batch_size,
|
| 34 |
+
interpolation="bilinear"
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(**datagenerator_kwargs)
|
| 38 |
+
|
| 39 |
+
self.valid_generator = valid_datagenerator.flow_from_directory(
|
| 40 |
+
directory=self.config.training_data,
|
| 41 |
+
subset="validation",
|
| 42 |
+
shuffle=False,
|
| 43 |
+
**dataflow_kwargs
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
@staticmethod
|
| 47 |
+
def load_model(path: Path) -> tf.keras.Model:
|
| 48 |
+
return tf.keras.models.load_model(path)
|
| 49 |
+
|
| 50 |
+
def _get_predictions(self):
|
| 51 |
+
"""Gets ground truth labels and model's predicted labels."""
|
| 52 |
+
self.y_true = self.valid_generator.classes
|
| 53 |
+
y_pred_probs = self.model.predict(self.valid_generator)
|
| 54 |
+
self.y_pred = np.argmax(y_pred_probs, axis=1)
|
| 55 |
+
|
| 56 |
+
def evaluation(self):
|
| 57 |
+
"""Loads model, evaluates basic metrics, and gets detailed predictions."""
|
| 58 |
+
self.model = self.load_model(self.config.path_of_model)
|
| 59 |
+
self._valid_generator()
|
| 60 |
+
self.score = self.model.evaluate(self.valid_generator)
|
| 61 |
+
self._get_predictions()
|
| 62 |
+
self.save_score()
|
| 63 |
+
|
| 64 |
+
# In your Evaluation component's save_score method
|
| 65 |
+
|
| 66 |
+
def save_score(self):
|
| 67 |
+
# If self.score is None or contains NaN, create a default file
|
| 68 |
+
if self.score is None or np.isnan(self.score).any():
|
| 69 |
+
print("⚠️ Warning: Invalid scores detected (NaN). Saving default scores file.")
|
| 70 |
+
scores = {"loss": float('nan'), "accuracy": float('nan')}
|
| 71 |
+
else:
|
| 72 |
+
scores = {"loss": self.score[0], "accuracy": self.score[1]}
|
| 73 |
+
|
| 74 |
+
# This will now always create the file
|
| 75 |
+
save_json(path=Path("scores.json"), data=scores)
|
| 76 |
+
print(f"Scores saved to scores.json: {scores}")
|
| 77 |
+
|
| 78 |
+
def log_confusion_matrix(self):
|
| 79 |
+
"""Generates, saves, and logs the confusion matrix plot to MLflow."""
|
| 80 |
+
cm = confusion_matrix(self.y_true, self.y_pred)
|
| 81 |
+
class_names = list(self.valid_generator.class_indices.keys())
|
| 82 |
+
|
| 83 |
+
plt.figure(figsize=(8, 6))
|
| 84 |
+
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
|
| 85 |
+
xticklabels=class_names, yticklabels=class_names)
|
| 86 |
+
plt.title('Confusion Matrix')
|
| 87 |
+
plt.ylabel('Actual')
|
| 88 |
+
plt.xlabel('Predicted')
|
| 89 |
+
|
| 90 |
+
matrix_path = Path("confusion_matrix.png")
|
| 91 |
+
plt.savefig(matrix_path)
|
| 92 |
+
|
| 93 |
+
mlflow.log_artifact(matrix_path, "plots")
|
| 94 |
+
print("Confusion Matrix plot saved and logged to MLflow.")
|
| 95 |
+
|
| 96 |
+
def log_into_mlflow(self):
|
| 97 |
+
mlflow.set_tracking_uri(self.config.mlflow_uri)
|
| 98 |
+
|
| 99 |
+
with mlflow.start_run():
|
| 100 |
+
print("Logging basic parameters and metrics to MLflow...")
|
| 101 |
+
mlflow.log_params(self.config.all_params)
|
| 102 |
+
mlflow.log_metrics({"loss": self.score[0], "accuracy": self.score[1]})
|
| 103 |
+
|
| 104 |
+
# --- Log detailed classification report metrics ---
|
| 105 |
+
print("\n--- Classification Report ---")
|
| 106 |
+
report = classification_report(self.y_true, self.y_pred,
|
| 107 |
+
target_names=list(self.valid_generator.class_indices.keys()),
|
| 108 |
+
output_dict=True)
|
| 109 |
+
print(classification_report(self.y_true, self.y_pred,
|
| 110 |
+
target_names=list(self.valid_generator.class_indices.keys())))
|
| 111 |
+
|
| 112 |
+
for className, metrics in report.items():
|
| 113 |
+
if isinstance(metrics, dict):
|
| 114 |
+
for metricName, value in metrics.items():
|
| 115 |
+
mlflow.log_metric(f"{className}_{metricName}", value)
|
| 116 |
+
|
| 117 |
+
# --- Log the confusion matrix plot ---
|
| 118 |
+
self.log_confusion_matrix()
|
| 119 |
+
|
| 120 |
+
# --- Log the model as an artifact ---
|
| 121 |
+
print("Logging model as an artifact...")
|
| 122 |
+
mlflow.keras.log_model(self.model, "model")
|
| 123 |
+
|
| 124 |
+
print("MLflow logging complete.")
|
src/cnnClassifier/components/model_trainer.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import urllib.request as request
|
| 3 |
+
from zipfile import ZipFile
|
| 4 |
+
import tensorflow as tf
|
| 5 |
+
import time
|
| 6 |
+
from cnnClassifier.entity.config_entity import TrainingConfig
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
# --- NEW IMPORTS ---
|
| 10 |
+
import pandas as pd
|
| 11 |
+
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
|
| 12 |
+
# --------------------
|
| 13 |
+
|
| 14 |
+
class Training:
|
| 15 |
+
def __init__(self, config: TrainingConfig):
|
| 16 |
+
self.config = config
|
| 17 |
+
self.model = None
|
| 18 |
+
self.train_generator = None
|
| 19 |
+
self.valid_generator = None
|
| 20 |
+
|
| 21 |
+
def get_base_model(self):
|
| 22 |
+
self.model = tf.keras.models.load_model(
|
| 23 |
+
self.config.updated_base_model_path
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
def train_valid_generator(self):
|
| 27 |
+
datagenerator_kwargs = dict(
|
| 28 |
+
rescale=1./255,
|
| 29 |
+
validation_split=0.20
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
dataflow_kwargs = dict(
|
| 33 |
+
target_size=self.config.params_image_size[:-1],
|
| 34 |
+
batch_size=self.config.params_batch_size,
|
| 35 |
+
interpolation="bilinear"
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
|
| 39 |
+
**datagenerator_kwargs
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
self.valid_generator = valid_datagenerator.flow_from_directory(
|
| 43 |
+
directory=self.config.training_data,
|
| 44 |
+
subset="validation",
|
| 45 |
+
shuffle=False,
|
| 46 |
+
**dataflow_kwargs
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
if self.config.params_is_augmentation:
|
| 50 |
+
train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
|
| 51 |
+
rotation_range=20, # Reduced for stability
|
| 52 |
+
horizontal_flip=True,
|
| 53 |
+
width_shift_range=0.1,
|
| 54 |
+
height_shift_range=0.1,
|
| 55 |
+
shear_range=0.1,
|
| 56 |
+
zoom_range=0.1,
|
| 57 |
+
**datagenerator_kwargs
|
| 58 |
+
)
|
| 59 |
+
else:
|
| 60 |
+
train_datagenerator = valid_datagenerator
|
| 61 |
+
|
| 62 |
+
self.train_generator = train_datagenerator.flow_from_directory(
|
| 63 |
+
directory=self.config.training_data,
|
| 64 |
+
subset="training",
|
| 65 |
+
shuffle=True,
|
| 66 |
+
**dataflow_kwargs
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
# --- ADD THIS ---
|
| 70 |
+
# Print class indices to be 100% sure of the mapping
|
| 71 |
+
print(f"Discovered class indices: {self.train_generator.class_indices}")
|
| 72 |
+
# --------------
|
| 73 |
+
|
| 74 |
+
@staticmethod
|
| 75 |
+
def save_model(path: Path, model: tf.keras.Model):
|
| 76 |
+
model.save(path)
|
| 77 |
+
|
| 78 |
+
def train(self):
|
| 79 |
+
self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
|
| 80 |
+
self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size
|
| 81 |
+
|
| 82 |
+
# --- NEW: DEFINE CALLBACKS FOR SMART TRAINING ---
|
| 83 |
+
# This will save the BEST model based on validation accuracy
|
| 84 |
+
best_model_checkpoint = ModelCheckpoint(
|
| 85 |
+
filepath=self.config.trained_model_path, # Saves the best model to your specified path
|
| 86 |
+
save_best_only=True,
|
| 87 |
+
monitor='val_accuracy',
|
| 88 |
+
mode='max',
|
| 89 |
+
verbose=1
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
# This will stop training if there's no improvement
|
| 93 |
+
early_stopping = EarlyStopping(
|
| 94 |
+
monitor='val_accuracy',
|
| 95 |
+
patience=5, # Number of epochs with no improvement to wait
|
| 96 |
+
restore_best_weights=True,
|
| 97 |
+
verbose=1
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
callbacks_list = [best_model_checkpoint, early_stopping]
|
| 101 |
+
# -----------------------------------------------
|
| 102 |
+
|
| 103 |
+
# --- MODEL.FIT() IS NOW UPGRADED ---
|
| 104 |
+
history = self.model.fit(
|
| 105 |
+
self.train_generator,
|
| 106 |
+
epochs=self.config.params_epochs,
|
| 107 |
+
steps_per_epoch=self.steps_per_epoch,
|
| 108 |
+
validation_steps=self.validation_steps,
|
| 109 |
+
validation_data=self.valid_generator,
|
| 110 |
+
callbacks=callbacks_list # Pass the smart callbacks here
|
| 111 |
+
)
|
| 112 |
+
# -------------------------------------
|
| 113 |
+
|
| 114 |
+
# --- NEW: SAVE TRAINING HISTORY FOR ANALYSIS ---
|
| 115 |
+
history_df = pd.DataFrame(history.history)
|
| 116 |
+
history_path = "training_history.csv" # Saved in the root directory
|
| 117 |
+
history_df.to_csv(history_path, index=False)
|
| 118 |
+
print(f"✅ Training history saved to {history_path}")
|
| 119 |
+
# -----------------------------------------------
|
| 120 |
+
|
| 121 |
+
# The save_model call is now handled by ModelCheckpoint,
|
| 122 |
+
# so this is redundant but harmless. It will save the last epoch's model.
|
| 123 |
+
# The BEST model is already saved by the callback.
|
| 124 |
+
# self.save_model(
|
| 125 |
+
# path=self.config.trained_model_path,
|
| 126 |
+
# model=self.model
|
| 127 |
+
# )
|
src/cnnClassifier/components/prepare_base_model.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import urllib.request as request
|
| 3 |
+
from zipfile import ZipFile
|
| 4 |
+
import tensorflow as tf
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from cnnClassifier.entity.config_entity import PrepareBaseModelConfig
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class PrepareBaseModel:
|
| 13 |
+
def __init__(self, config: PrepareBaseModelConfig):
|
| 14 |
+
self.config = config
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def get_base_model(self):
|
| 18 |
+
self.model = tf.keras.applications.vgg16.VGG16(
|
| 19 |
+
input_shape=self.config.params_image_size,
|
| 20 |
+
weights=self.config.params_weights,
|
| 21 |
+
include_top=self.config.params_include_top
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
self.save_model(path=self.config.base_model_path, model=self.model)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
@staticmethod
|
| 29 |
+
def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):
|
| 30 |
+
if freeze_all:
|
| 31 |
+
for layer in model.layers:
|
| 32 |
+
model.trainable = False
|
| 33 |
+
elif (freeze_till is not None) and (freeze_till > 0):
|
| 34 |
+
for layer in model.layers[:-freeze_till]:
|
| 35 |
+
model.trainable = False
|
| 36 |
+
|
| 37 |
+
flatten_in = tf.keras.layers.Flatten()(model.output)
|
| 38 |
+
prediction = tf.keras.layers.Dense(
|
| 39 |
+
units=classes,
|
| 40 |
+
activation="softmax"
|
| 41 |
+
)(flatten_in)
|
| 42 |
+
|
| 43 |
+
full_model = tf.keras.models.Model(
|
| 44 |
+
inputs=model.input,
|
| 45 |
+
outputs=prediction
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
full_model.compile(
|
| 49 |
+
optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
|
| 50 |
+
loss=tf.keras.losses.CategoricalCrossentropy(),
|
| 51 |
+
metrics=["accuracy"]
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
full_model.summary()
|
| 55 |
+
return full_model
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def update_base_model(self):
|
| 59 |
+
self.full_model = self._prepare_full_model(
|
| 60 |
+
model=self.model,
|
| 61 |
+
classes=self.config.params_classes,
|
| 62 |
+
freeze_all=True,
|
| 63 |
+
freeze_till=None,
|
| 64 |
+
learning_rate=self.config.params_learning_rate
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
self.save_model(path=self.config.updated_base_model_path, model=self.full_model)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
@staticmethod
|
| 72 |
+
def save_model(path: Path, model: tf.keras.Model):
|
| 73 |
+
model.save(path)
|
| 74 |
+
|
src/cnnClassifier/config/__init__.py
ADDED
|
File without changes
|
src/cnnClassifier/config/configuration.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from cnnClassifier.constants import *
|
| 3 |
+
from cnnClassifier.utils.common import read_yaml, create_directories , save_json
|
| 4 |
+
from cnnClassifier.entity.config_entity import (DataIngestionConfig, PrepareBaseModelConfig, TrainingConfig, EvaluationConfig)
|
| 5 |
+
|
| 6 |
+
class ConfigurationManager:
|
| 7 |
+
def __init__(
|
| 8 |
+
self,
|
| 9 |
+
config_filepath = CONFIG_FILE_PATH,
|
| 10 |
+
params_filepath = PARAMS_FILE_PATH):
|
| 11 |
+
|
| 12 |
+
self.config = read_yaml(config_filepath)
|
| 13 |
+
self.params = read_yaml(params_filepath)
|
| 14 |
+
|
| 15 |
+
create_directories([self.config.artifacts_root])
|
| 16 |
+
|
| 17 |
+
def get_data_ingestion_config(self) -> DataIngestionConfig:
|
| 18 |
+
config = self.config.data_ingestion
|
| 19 |
+
|
| 20 |
+
create_directories([config.root_dir])
|
| 21 |
+
|
| 22 |
+
data_ingestion_config = DataIngestionConfig(
|
| 23 |
+
root_dir=config.root_dir,
|
| 24 |
+
source_URL=config.source_URL,
|
| 25 |
+
local_data_file=config.local_data_file,
|
| 26 |
+
unzip_dir=config.unzip_dir
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
return data_ingestion_config
|
| 30 |
+
|
| 31 |
+
def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:
|
| 32 |
+
config = self.config.prepare_base_model
|
| 33 |
+
|
| 34 |
+
create_directories([config.root_dir])
|
| 35 |
+
|
| 36 |
+
prepare_base_model_config = PrepareBaseModelConfig(
|
| 37 |
+
root_dir=Path(config.root_dir),
|
| 38 |
+
base_model_path=Path(config.base_model_path),
|
| 39 |
+
updated_base_model_path=Path(config.updated_base_model_path),
|
| 40 |
+
params_image_size=self.params.IMAGE_SIZE,
|
| 41 |
+
params_learning_rate=self.params.LEARNING_RATE,
|
| 42 |
+
params_include_top=self.params.INCLUDE_TOP,
|
| 43 |
+
params_weights=self.params.WEIGHTS,
|
| 44 |
+
params_classes=self.params.CLASSES
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
return prepare_base_model_config
|
| 48 |
+
|
| 49 |
+
def get_training_config(self) -> TrainingConfig:
|
| 50 |
+
training = self.config.training
|
| 51 |
+
prepare_base_model = self.config.prepare_base_model
|
| 52 |
+
params = self.params
|
| 53 |
+
training_data = os.path.join(self.config.data_ingestion.unzip_dir, "Chest-CT-Scan-data")
|
| 54 |
+
create_directories([
|
| 55 |
+
Path(training.root_dir)
|
| 56 |
+
])
|
| 57 |
+
|
| 58 |
+
training_config = TrainingConfig(
|
| 59 |
+
root_dir=Path(training.root_dir),
|
| 60 |
+
trained_model_path=Path(training.trained_model_path),
|
| 61 |
+
updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
|
| 62 |
+
training_data=Path(training_data),
|
| 63 |
+
params_epochs=params.EPOCHS,
|
| 64 |
+
params_batch_size=params.BATCH_SIZE,
|
| 65 |
+
params_is_augmentation=params.AUGMENTATION,
|
| 66 |
+
params_image_size=params.IMAGE_SIZE
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
return training_config
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def get_evaluation_config(self) -> EvaluationConfig:
|
| 73 |
+
eval_config = EvaluationConfig(
|
| 74 |
+
path_of_model="artifacts/training/model.h5",
|
| 75 |
+
training_data="artifacts/data_ingestion/Chest-CT-Scan-data",
|
| 76 |
+
mlflow_uri="https://dagshub.com/AlyyanAhmed21/End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC.mlflow",
|
| 77 |
+
all_params=self.params,
|
| 78 |
+
params_image_size=self.params.IMAGE_SIZE,
|
| 79 |
+
params_batch_size=self.params.BATCH_SIZE
|
| 80 |
+
)
|
| 81 |
+
return eval_config
|
| 82 |
+
|
src/cnnClassifier/constants/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
CONFIG_FILE_PATH = Path("config/config.yaml")
|
| 4 |
+
PARAMS_FILE_PATH = Path("params.yaml")
|
src/cnnClassifier/entity/__init__.py
ADDED
|
File without changes
|
src/cnnClassifier/entity/config_entity.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import dataclass
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
@dataclass(frozen=True)
|
| 6 |
+
class DataIngestionConfig:
|
| 7 |
+
root_dir: Path
|
| 8 |
+
source_URL: str
|
| 9 |
+
local_data_file: Path
|
| 10 |
+
unzip_dir: Path
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@dataclass(frozen=True)
|
| 14 |
+
class PrepareBaseModelConfig:
|
| 15 |
+
root_dir: Path
|
| 16 |
+
base_model_path: Path
|
| 17 |
+
updated_base_model_path: Path
|
| 18 |
+
params_image_size: list
|
| 19 |
+
params_learning_rate: float
|
| 20 |
+
params_include_top: bool
|
| 21 |
+
params_weights: str
|
| 22 |
+
params_classes: int
|
| 23 |
+
|
| 24 |
+
@dataclass(frozen=True)
|
| 25 |
+
class TrainingConfig:
|
| 26 |
+
root_dir: Path
|
| 27 |
+
trained_model_path: Path
|
| 28 |
+
updated_base_model_path: Path
|
| 29 |
+
training_data: Path
|
| 30 |
+
params_epochs: int
|
| 31 |
+
params_batch_size: int
|
| 32 |
+
params_is_augmentation: bool
|
| 33 |
+
params_image_size: list
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@dataclass(frozen=True)
|
| 37 |
+
class EvaluationConfig:
|
| 38 |
+
path_of_model: Path
|
| 39 |
+
training_data: Path
|
| 40 |
+
all_params: dict
|
| 41 |
+
mlflow_uri: str
|
| 42 |
+
params_image_size: list
|
| 43 |
+
params_batch_size: int
|
src/cnnClassifier/pipeline/__init__.py
ADDED
|
File without changes
|
src/cnnClassifier/pipeline/prediction.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import tensorflow as tf
|
| 3 |
+
from tensorflow.keras.preprocessing import image
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
class PredictionPipeline:
|
| 7 |
+
def __init__(self, filename):
|
| 8 |
+
self.filename = filename
|
| 9 |
+
|
| 10 |
+
def predict(self):
|
| 11 |
+
# --- FIX #1: LOAD THE CORRECT MODEL ---
|
| 12 |
+
# Load the BEST model produced by your DVC pipeline.
|
| 13 |
+
model_path = os.path.join("artifacts", "training", "best_model.h5")
|
| 14 |
+
model = tf.keras.models.load_model(model_path)
|
| 15 |
+
|
| 16 |
+
# --- Load and preprocess the image ---
|
| 17 |
+
imagename = self.filename
|
| 18 |
+
test_image = image.load_img(imagename, target_size=(224, 224))
|
| 19 |
+
test_image_array = image.img_to_array(test_image)
|
| 20 |
+
|
| 21 |
+
# --- FIX #2: THE CRITICAL RESCALING STEP ---
|
| 22 |
+
# Scale the pixel values to be between 0 and 1, just like the training data.
|
| 23 |
+
scaled_image_array = test_image_array / 255.0
|
| 24 |
+
|
| 25 |
+
# Add the batch dimension
|
| 26 |
+
input_data = np.expand_dims(scaled_image_array, axis=0)
|
| 27 |
+
|
| 28 |
+
# --- Make the prediction on the CORRECTLY preprocessed image ---
|
| 29 |
+
result_index = np.argmax(model.predict(input_data), axis=1)[0]
|
| 30 |
+
print(f"Model predicted index: {result_index}")
|
| 31 |
+
|
| 32 |
+
# --- FIX #3: RETURN THE CORRECT JSON STRUCTURE ---
|
| 33 |
+
# The logic for translation should be in app.py to keep this pipeline clean,
|
| 34 |
+
# but for now, we will just return the raw index.
|
| 35 |
+
# app.py will handle translating 0/1 to "Cancer"/"Normal".
|
| 36 |
+
return result_index
|
src/cnnClassifier/pipeline/stage_01_data_ingestion.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from cnnClassifier.config.configuration import ConfigurationManager
|
| 2 |
+
from cnnClassifier.components.data_ingestion import DataIngestion
|
| 3 |
+
from cnnClassifier import logger
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
STAGE_NAME = "Data Ingestion stage"
|
| 8 |
+
|
| 9 |
+
class DataIngestionTrainingPipeline:
|
| 10 |
+
def __init__(self):
|
| 11 |
+
pass
|
| 12 |
+
|
| 13 |
+
def main(self):
|
| 14 |
+
config = ConfigurationManager()
|
| 15 |
+
data_ingestion_config = config.get_data_ingestion_config()
|
| 16 |
+
data_ingestion = DataIngestion(config=data_ingestion_config)
|
| 17 |
+
data_ingestion.download_file()
|
| 18 |
+
data_ingestion.extract_zip_file()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
if __name__ == '__main__':
|
| 24 |
+
try:
|
| 25 |
+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
|
| 26 |
+
obj = DataIngestionTrainingPipeline()
|
| 27 |
+
obj.main()
|
| 28 |
+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
|
| 29 |
+
except Exception as e:
|
| 30 |
+
logger.exception(e)
|
| 31 |
+
raise e
|
src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from cnnClassifier.config.configuration import ConfigurationManager
|
| 2 |
+
from cnnClassifier.components.prepare_base_model import PrepareBaseModel
|
| 3 |
+
from cnnClassifier import logger
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
STAGE_NAME = "Prepare base model"
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class PrepareBaseModelTrainingPipeline:
|
| 11 |
+
def __init__(self):
|
| 12 |
+
pass
|
| 13 |
+
|
| 14 |
+
def main(self):
|
| 15 |
+
config = ConfigurationManager()
|
| 16 |
+
prepare_base_model_config = config.get_prepare_base_model_config()
|
| 17 |
+
prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)
|
| 18 |
+
prepare_base_model.get_base_model()
|
| 19 |
+
prepare_base_model.update_base_model()
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
if __name__ == '__main__':
|
| 24 |
+
try:
|
| 25 |
+
logger.info(f"*******************")
|
| 26 |
+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
|
| 27 |
+
obj = PrepareBaseModelTrainingPipeline()
|
| 28 |
+
obj.main()
|
| 29 |
+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
|
| 30 |
+
except Exception as e:
|
| 31 |
+
logger.exception(e)
|
| 32 |
+
raise e
|
src/cnnClassifier/pipeline/stage_03_model_trainer.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from cnnClassifier.config.configuration import ConfigurationManager
|
| 2 |
+
from cnnClassifier.components.model_trainer import Training
|
| 3 |
+
from cnnClassifier import logger
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
STAGE_NAME = "Training"
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class ModelTrainingPipeline:
|
| 12 |
+
def __init__(self):
|
| 13 |
+
pass
|
| 14 |
+
|
| 15 |
+
def main(self):
|
| 16 |
+
config = ConfigurationManager()
|
| 17 |
+
training_config = config.get_training_config()
|
| 18 |
+
training = Training(config=training_config)
|
| 19 |
+
training.get_base_model()
|
| 20 |
+
training.train_valid_generator()
|
| 21 |
+
training.train()
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
if __name__ == '__main__':
|
| 26 |
+
try:
|
| 27 |
+
logger.info(f"*******************")
|
| 28 |
+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
|
| 29 |
+
obj = ModelTrainingPipeline()
|
| 30 |
+
obj.main()
|
| 31 |
+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
|
| 32 |
+
except Exception as e:
|
| 33 |
+
logger.exception(e)
|
| 34 |
+
raise e
|
| 35 |
+
|
src/cnnClassifier/pipeline/stage_04_model_evaluation.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from cnnClassifier.config.configuration import ConfigurationManager
|
| 2 |
+
from cnnClassifier.components.model_evaluation_mlflow import Evaluation
|
| 3 |
+
from cnnClassifier import logger
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
# Load environment variables from .env file for MLflow credentials
|
| 7 |
+
load_dotenv()
|
| 8 |
+
|
| 9 |
+
STAGE_NAME = "Evaluation stage"
|
| 10 |
+
|
| 11 |
+
class EvaluationPipeline:
|
| 12 |
+
def __init__(self):
|
| 13 |
+
pass
|
| 14 |
+
|
| 15 |
+
def main(self):
|
| 16 |
+
config = ConfigurationManager()
|
| 17 |
+
eval_config = config.get_evaluation_config()
|
| 18 |
+
evaluation = Evaluation(eval_config)
|
| 19 |
+
evaluation.evaluation()
|
| 20 |
+
# The save_score() method is called inside evaluation()
|
| 21 |
+
evaluation.log_into_mlflow()
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# --- THIS IS THE CRITICAL BLOCK THAT TELLS THE SCRIPT TO RUN ---
|
| 25 |
+
if __name__ == '__main__':
|
| 26 |
+
try:
|
| 27 |
+
logger.info(f"*******************")
|
| 28 |
+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
|
| 29 |
+
|
| 30 |
+
# Create an object of the class and call its main method
|
| 31 |
+
pipeline = EvaluationPipeline()
|
| 32 |
+
pipeline.main()
|
| 33 |
+
|
| 34 |
+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
|
| 35 |
+
except Exception as e:
|
| 36 |
+
logger.exception(e)
|
| 37 |
+
raise e
|
src/cnnClassifier/utils/__init__.py
ADDED
|
File without changes
|
src/cnnClassifier/utils/common.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from box.exceptions import BoxValueError
|
| 3 |
+
import yaml
|
| 4 |
+
from cnnClassifier import logger
|
| 5 |
+
import json
|
| 6 |
+
import joblib
|
| 7 |
+
from ensure import ensure_annotations
|
| 8 |
+
from box import ConfigBox
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from typing import Any
|
| 11 |
+
import base64
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@ensure_annotations
|
| 16 |
+
def read_yaml(path_to_yaml: Path) -> ConfigBox:
|
| 17 |
+
"""reads yaml file and returns
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
path_to_yaml (str): path like input
|
| 21 |
+
|
| 22 |
+
Raises:
|
| 23 |
+
ValueError: if yaml file is empty
|
| 24 |
+
e: empty file
|
| 25 |
+
|
| 26 |
+
Returns:
|
| 27 |
+
ConfigBox: ConfigBox type
|
| 28 |
+
"""
|
| 29 |
+
try:
|
| 30 |
+
with open(path_to_yaml) as yaml_file:
|
| 31 |
+
content = yaml.safe_load(yaml_file)
|
| 32 |
+
logger.info(f"yaml file: {path_to_yaml} loaded successfully")
|
| 33 |
+
return ConfigBox(content)
|
| 34 |
+
except BoxValueError:
|
| 35 |
+
raise ValueError("yaml file is empty")
|
| 36 |
+
except Exception as e:
|
| 37 |
+
raise e
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
@ensure_annotations
|
| 42 |
+
def create_directories(path_to_directories: list, verbose=True):
|
| 43 |
+
"""create list of directories
|
| 44 |
+
|
| 45 |
+
Args:
|
| 46 |
+
path_to_directories (list): list of path of directories
|
| 47 |
+
ignore_log (bool, optional): ignore if multiple dirs is to be created. Defaults to False.
|
| 48 |
+
"""
|
| 49 |
+
for path in path_to_directories:
|
| 50 |
+
os.makedirs(path, exist_ok=True)
|
| 51 |
+
if verbose:
|
| 52 |
+
logger.info(f"created directory at: {path}")
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@ensure_annotations
|
| 56 |
+
def save_json(path: Path, data: dict):
|
| 57 |
+
"""save json data
|
| 58 |
+
|
| 59 |
+
Args:
|
| 60 |
+
path (Path): path to json file
|
| 61 |
+
data (dict): data to be saved in json file
|
| 62 |
+
"""
|
| 63 |
+
with open(path, "w") as f:
|
| 64 |
+
json.dump(data, f, indent=4)
|
| 65 |
+
|
| 66 |
+
logger.info(f"json file saved at: {path}")
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
@ensure_annotations
|
| 72 |
+
def load_json(path: Path) -> ConfigBox:
|
| 73 |
+
"""load json files data
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
path (Path): path to json file
|
| 77 |
+
|
| 78 |
+
Returns:
|
| 79 |
+
ConfigBox: data as class attributes instead of dict
|
| 80 |
+
"""
|
| 81 |
+
with open(path) as f:
|
| 82 |
+
content = json.load(f)
|
| 83 |
+
|
| 84 |
+
logger.info(f"json file loaded succesfully from: {path}")
|
| 85 |
+
return ConfigBox(content)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
@ensure_annotations
|
| 89 |
+
def save_bin(data: Any, path: Path):
|
| 90 |
+
"""save binary file
|
| 91 |
+
|
| 92 |
+
Args:
|
| 93 |
+
data (Any): data to be saved as binary
|
| 94 |
+
path (Path): path to binary file
|
| 95 |
+
"""
|
| 96 |
+
joblib.dump(value=data, filename=path)
|
| 97 |
+
logger.info(f"binary file saved at: {path}")
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
@ensure_annotations
|
| 101 |
+
def load_bin(path: Path) -> Any:
|
| 102 |
+
"""load binary data
|
| 103 |
+
|
| 104 |
+
Args:
|
| 105 |
+
path (Path): path to binary file
|
| 106 |
+
|
| 107 |
+
Returns:
|
| 108 |
+
Any: object stored in the file
|
| 109 |
+
"""
|
| 110 |
+
data = joblib.load(path)
|
| 111 |
+
logger.info(f"binary file loaded from: {path}")
|
| 112 |
+
return data
|
| 113 |
+
|
| 114 |
+
@ensure_annotations
|
| 115 |
+
def get_size(path: Path) -> str:
|
| 116 |
+
"""get size in KB
|
| 117 |
+
|
| 118 |
+
Args:
|
| 119 |
+
path (Path): path of the file
|
| 120 |
+
|
| 121 |
+
Returns:
|
| 122 |
+
str: size in KB
|
| 123 |
+
"""
|
| 124 |
+
size_in_kb = round(os.path.getsize(path)/1024)
|
| 125 |
+
return f"~ {size_in_kb} KB"
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def decodeImage(imgstring, fileName):
|
| 129 |
+
imgdata = base64.b64decode(imgstring)
|
| 130 |
+
with open(fileName, 'wb') as f:
|
| 131 |
+
f.write(imgdata)
|
| 132 |
+
f.close()
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def encodeImageIntoBase64(croppedImagePath):
|
| 136 |
+
with open(croppedImagePath, "rb") as f:
|
| 137 |
+
return base64.b64encode(f.read())
|
static/script.js
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
document.addEventListener('DOMContentLoaded', function () {
|
| 2 |
+
// --- DOM Elements ---
|
| 3 |
+
const fileInput = document.getElementById('fileInput');
|
| 4 |
+
const uploadLabel = document.querySelector('.upload-label');
|
| 5 |
+
const imagePreviewContainer = document.querySelector('.image-preview-container');
|
| 6 |
+
const imagePreview = document.getElementById('imagePreview');
|
| 7 |
+
const removeImageBtn = document.getElementById('removeImageBtn');
|
| 8 |
+
const predictBtn = document.getElementById('predictBtn');
|
| 9 |
+
const resultContainer = document.getElementById('result-container');
|
| 10 |
+
const jsonResponse = document.getElementById('jsonResponse').querySelector('code');
|
| 11 |
+
|
| 12 |
+
let base64Image = null;
|
| 13 |
+
|
| 14 |
+
// --- Event Listeners ---
|
| 15 |
+
fileInput.addEventListener('change', handleFileSelect);
|
| 16 |
+
removeImageBtn.addEventListener('click', resetUploader);
|
| 17 |
+
predictBtn.addEventListener('click', handlePrediction);
|
| 18 |
+
|
| 19 |
+
// --- Functions ---
|
| 20 |
+
|
| 21 |
+
/**
|
| 22 |
+
* Handles the file selection, reads the file as a Base64 string,
|
| 23 |
+
* and updates the UI to show the preview.
|
| 24 |
+
*/
|
| 25 |
+
function handleFileSelect(event) {
|
| 26 |
+
const file = event.target.files[0];
|
| 27 |
+
if (file) {
|
| 28 |
+
const reader = new FileReader();
|
| 29 |
+
reader.onload = function(e) {
|
| 30 |
+
// Display the image preview
|
| 31 |
+
imagePreview.src = e.target.result;
|
| 32 |
+
uploadLabel.style.display = 'none';
|
| 33 |
+
imagePreviewContainer.style.display = 'block';
|
| 34 |
+
|
| 35 |
+
// Store the Base64 string (without the data URI prefix)
|
| 36 |
+
base64Image = e.target.result.split(',')[1];
|
| 37 |
+
|
| 38 |
+
// Enable the predict button
|
| 39 |
+
predictBtn.disabled = false;
|
| 40 |
+
resultContainer.innerHTML = '<p class="text-muted">Ready to predict.</p>';
|
| 41 |
+
jsonResponse.textContent = 'Waiting for response...';
|
| 42 |
+
};
|
| 43 |
+
reader.readAsDataURL(file);
|
| 44 |
+
}
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
/**
|
| 48 |
+
* Resets the uploader to its initial state.
|
| 49 |
+
*/
|
| 50 |
+
function resetUploader() {
|
| 51 |
+
fileInput.value = ''; // Clear the file input
|
| 52 |
+
base64Image = null;
|
| 53 |
+
imagePreview.src = '#';
|
| 54 |
+
uploadLabel.style.display = 'flex';
|
| 55 |
+
imagePreviewContainer.style.display = 'none';
|
| 56 |
+
predictBtn.disabled = true;
|
| 57 |
+
resultContainer.innerHTML = '<p class="text-muted">Results will be displayed here after prediction.</p>';
|
| 58 |
+
jsonResponse.textContent = 'Waiting for response...';
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
/**
|
| 62 |
+
* Handles the prediction API call.
|
| 63 |
+
*/
|
| 64 |
+
async function handlePrediction() {
|
| 65 |
+
if (!base64Image) {
|
| 66 |
+
alert('Please upload an image first.');
|
| 67 |
+
return;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
setLoadingState(true);
|
| 71 |
+
|
| 72 |
+
// !! IMPORTANT: Change this URL to your actual API endpoint !!
|
| 73 |
+
const apiUrl = '/predict'; // Example for a local Flask app
|
| 74 |
+
|
| 75 |
+
try {
|
| 76 |
+
const response = await fetch(apiUrl, {
|
| 77 |
+
method: 'POST',
|
| 78 |
+
headers: { 'Content-Type': 'application/json' },
|
| 79 |
+
body: JSON.stringify({ image: base64Image }),
|
| 80 |
+
});
|
| 81 |
+
|
| 82 |
+
if (!response.ok) {
|
| 83 |
+
throw new Error(`Server error: ${response.statusText}`);
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
const data = await response.json();
|
| 87 |
+
displayResults(data);
|
| 88 |
+
|
| 89 |
+
} catch (error) {
|
| 90 |
+
console.error('Prediction Error:', error);
|
| 91 |
+
displayError(error.message);
|
| 92 |
+
} finally {
|
| 93 |
+
setLoadingState(false);
|
| 94 |
+
}
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
/**
|
| 98 |
+
* Displays the prediction results in a user-friendly format.
|
| 99 |
+
*/
|
| 100 |
+
function displayResults(data) {
|
| 101 |
+
// Assuming the response is like: [{"prediction": "Normal"}]
|
| 102 |
+
const prediction = data[0]?.prediction; // Safely access the prediction
|
| 103 |
+
|
| 104 |
+
let resultHtml = '';
|
| 105 |
+
if (prediction) {
|
| 106 |
+
if (prediction.toLowerCase() === 'normal') {
|
| 107 |
+
resultHtml = `
|
| 108 |
+
<div class="result-normal">
|
| 109 |
+
<i class="fas fa-check-circle result-icon"></i>
|
| 110 |
+
<h3>Prediction: Normal</h3>
|
| 111 |
+
<p>The model predicts that the scan is not cancerous.</p>
|
| 112 |
+
</div>`;
|
| 113 |
+
} else {
|
| 114 |
+
resultHtml = `
|
| 115 |
+
<div class="result-cancer">
|
| 116 |
+
<i class="fas fa-exclamation-triangle result-icon"></i>
|
| 117 |
+
<h3>Prediction: Cancer Detected</h3>
|
| 118 |
+
<p>The model predicts a high probability of malignancy. Please consult a medical professional.</p>
|
| 119 |
+
</div>`;
|
| 120 |
+
}
|
| 121 |
+
} else {
|
| 122 |
+
resultHtml = `<p>Could not determine prediction from the response.</p>`;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
resultContainer.innerHTML = resultHtml;
|
| 126 |
+
jsonResponse.textContent = JSON.stringify(data, null, 2);
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
/**
|
| 130 |
+
* Displays an error message in the UI.
|
| 131 |
+
*/
|
| 132 |
+
function displayError(errorMessage) {
|
| 133 |
+
resultContainer.innerHTML = `
|
| 134 |
+
<div class="text-danger">
|
| 135 |
+
<i class="fas fa-times-circle result-icon"></i>
|
| 136 |
+
<h3>Prediction Failed</h3>
|
| 137 |
+
<p>${errorMessage}</p>
|
| 138 |
+
</div>`;
|
| 139 |
+
jsonResponse.textContent = `Error: ${errorMessage}`;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
/**
|
| 143 |
+
* Manages the loading state of the predict button.
|
| 144 |
+
*/
|
| 145 |
+
function setLoadingState(isLoading) {
|
| 146 |
+
const spinner = predictBtn.querySelector('.spinner-border');
|
| 147 |
+
const btnText = predictBtn.querySelector('.btn-text');
|
| 148 |
+
|
| 149 |
+
if (isLoading) {
|
| 150 |
+
predictBtn.disabled = true;
|
| 151 |
+
spinner.style.display = 'inline-block';
|
| 152 |
+
btnText.style.display = 'none';
|
| 153 |
+
} else {
|
| 154 |
+
predictBtn.disabled = false;
|
| 155 |
+
spinner.style.display = 'none';
|
| 156 |
+
btnText.style.display = 'inline-block';
|
| 157 |
+
}
|
| 158 |
+
}
|
| 159 |
+
});
|
static/style.css
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* Using CSS Variables for easy theme changes */
|
| 2 |
+
:root {
|
| 3 |
+
--primary-color: #007bff;
|
| 4 |
+
--secondary-color: #6c757d;
|
| 5 |
+
--background-color: #f8f9fa;
|
| 6 |
+
--card-bg-color: #ffffff;
|
| 7 |
+
--font-family: 'Poppins', sans-serif;
|
| 8 |
+
--success-color: #28a745;
|
| 9 |
+
--danger-color: #dc3545;
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
body {
|
| 13 |
+
font-family: var(--font-family);
|
| 14 |
+
background-color: var(--background-color);
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
header h1 {
|
| 18 |
+
color: var(--primary-color);
|
| 19 |
+
font-weight: 600;
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
.card {
|
| 23 |
+
border-radius: 15px;
|
| 24 |
+
transition: transform 0.2s ease-in-out;
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
.card:hover {
|
| 28 |
+
transform: translateY(-5px);
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
.card-header {
|
| 32 |
+
border-top-left-radius: 15px;
|
| 33 |
+
border-top-right-radius: 15px;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
.image-upload-wrapper {
|
| 37 |
+
position: relative;
|
| 38 |
+
width: 100%;
|
| 39 |
+
height: 350px;
|
| 40 |
+
border: 2px dashed var(--primary-color);
|
| 41 |
+
border-radius: 10px;
|
| 42 |
+
display: flex;
|
| 43 |
+
align-items: center;
|
| 44 |
+
justify-content: center;
|
| 45 |
+
overflow: hidden;
|
| 46 |
+
background-color: #f0f6ff;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
.upload-label {
|
| 50 |
+
cursor: pointer;
|
| 51 |
+
text-align: center;
|
| 52 |
+
color: var(--primary-color);
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
.upload-label:hover .upload-icon {
|
| 56 |
+
transform: scale(1.1);
|
| 57 |
+
color: #0056b3;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
.upload-icon {
|
| 61 |
+
transition: transform 0.2s ease-in-out;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
.image-preview-container {
|
| 65 |
+
position: absolute;
|
| 66 |
+
top: 0;
|
| 67 |
+
left: 0;
|
| 68 |
+
width: 100%;
|
| 69 |
+
height: 100%;
|
| 70 |
+
display: none; /* Hidden by default */
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
#imagePreview {
|
| 74 |
+
width: 100%;
|
| 75 |
+
height: 100%;
|
| 76 |
+
object-fit: contain; /* Use contain to see the whole image */
|
| 77 |
+
padding: 10px;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
.remove-btn {
|
| 81 |
+
position: absolute;
|
| 82 |
+
top: 10px;
|
| 83 |
+
right: 10px;
|
| 84 |
+
border-radius: 50%;
|
| 85 |
+
width: 30px;
|
| 86 |
+
height: 30px;
|
| 87 |
+
display: flex;
|
| 88 |
+
align-items: center;
|
| 89 |
+
justify-content: center;
|
| 90 |
+
font-size: 1.2rem;
|
| 91 |
+
line-height: 1;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
#result-container h3 {
|
| 95 |
+
font-weight: 600;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
.result-normal {
|
| 99 |
+
color: var(--success-color);
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
.result-cancer {
|
| 103 |
+
color: var(--danger-color);
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
.result-icon {
|
| 107 |
+
font-size: 4rem;
|
| 108 |
+
margin-bottom: 1rem;
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
#jsonResponse {
|
| 112 |
+
max-height: 200px;
|
| 113 |
+
overflow-y: auto;
|
| 114 |
+
white-space: pre-wrap;
|
| 115 |
+
word-break: break-all;
|
| 116 |
+
}
|
template.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
import logging
|
| 4 |
+
|
| 5 |
+
# Set up basic logging to see the script's output
|
| 6 |
+
logging.basicConfig(level=logging.INFO, format='[%(asctime)s]: %(message)s:')
|
| 7 |
+
|
| 8 |
+
# Define the project name
|
| 9 |
+
project_name = "cnnClassifier"
|
| 10 |
+
|
| 11 |
+
# List of files and directories to be created
|
| 12 |
+
list_of_files = [
|
| 13 |
+
".github/workflows/.gitkeep",
|
| 14 |
+
f"src/{project_name}/__init__.py",
|
| 15 |
+
f"src/{project_name}/components/__init__.py",
|
| 16 |
+
f"src/{project_name}/utils/__init__.py",
|
| 17 |
+
f"src/{project_name}/config/__init__.py",
|
| 18 |
+
f"src/{project_name}/config/configuration.py",
|
| 19 |
+
f"src/{project_name}/pipeline/__init__.py",
|
| 20 |
+
f"src/{project_name}/entity/__init__.py",
|
| 21 |
+
f"src/{project_name}/constants/__init__.py",
|
| 22 |
+
"config/config.yaml",
|
| 23 |
+
"dvc.yaml",
|
| 24 |
+
"params.yaml",
|
| 25 |
+
"requirements.txt",
|
| 26 |
+
"setup.py",
|
| 27 |
+
"research/trials.ipynb",
|
| 28 |
+
"templates/index.html"
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
# Loop through the list of files to create them
|
| 32 |
+
for filepath_str in list_of_files:
|
| 33 |
+
filepath = Path(filepath_str) # Convert string path to a Path object for robustness
|
| 34 |
+
filedir, filename = os.path.split(filepath)
|
| 35 |
+
|
| 36 |
+
# 1. Create the directory if it doesn't exist
|
| 37 |
+
if filedir != "":
|
| 38 |
+
os.makedirs(filedir, exist_ok=True)
|
| 39 |
+
logging.info(f"Creating directory: {filedir} for the file {filename}")
|
| 40 |
+
|
| 41 |
+
# 2. Create the file if it doesn't exist or is empty
|
| 42 |
+
if (not os.path.exists(filepath)) or (os.path.getsize(filepath) == 0):
|
| 43 |
+
with open(filepath, "w") as f:
|
| 44 |
+
pass # Creates an empty file
|
| 45 |
+
logging.info(f"Creating empty file: {filepath}")
|
| 46 |
+
else:
|
| 47 |
+
logging.info(f"{filename} already exists")
|
templates/index.html
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Chest Cancer Detection AI</title>
|
| 7 |
+
|
| 8 |
+
<!-- Bootstrap 5 CSS -->
|
| 9 |
+
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" rel="stylesheet">
|
| 10 |
+
|
| 11 |
+
<!-- Font Awesome for Icons -->
|
| 12 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.1/css/all.min.css">
|
| 13 |
+
|
| 14 |
+
<!-- Google Fonts (Poppins) -->
|
| 15 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 16 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 17 |
+
<link href="https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;500;600;700&display=swap" rel="stylesheet">
|
| 18 |
+
|
| 19 |
+
<!-- Your Custom CSS -->
|
| 20 |
+
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
|
| 21 |
+
</head>
|
| 22 |
+
<body>
|
| 23 |
+
|
| 24 |
+
<header class="text-center py-4 shadow-sm">
|
| 25 |
+
<div class="container">
|
| 26 |
+
<h1><i class="fas fa-lungs-virus"></i> Chest Cancer Detection AI</h1>
|
| 27 |
+
<p class="lead text-muted">Upload a Chest CT Scan to classify it as Normal or Cancerous</p>
|
| 28 |
+
</div>
|
| 29 |
+
</header>
|
| 30 |
+
|
| 31 |
+
<main class="container my-5">
|
| 32 |
+
<div class="row g-4">
|
| 33 |
+
<!-- Left Column: Uploader -->
|
| 34 |
+
<div class="col-lg-6">
|
| 35 |
+
<div class="card h-100 shadow-lg border-0">
|
| 36 |
+
<div class="card-body text-center d-flex flex-column justify-content-center">
|
| 37 |
+
<div class="image-upload-wrapper">
|
| 38 |
+
<input type="file" id="fileInput" accept="image/png, image/jpeg" style="display: none;">
|
| 39 |
+
<label for="fileInput" class="upload-label">
|
| 40 |
+
<div class="upload-icon">
|
| 41 |
+
<i class="fas fa-cloud-upload-alt fa-3x"></i>
|
| 42 |
+
</div>
|
| 43 |
+
<p class="upload-text"><strong>Click to browse</strong> or drag and drop an image here.</p>
|
| 44 |
+
</label>
|
| 45 |
+
<div class="image-preview-container">
|
| 46 |
+
<img id="imagePreview" src="#" alt="Image Preview" class="img-fluid rounded"/>
|
| 47 |
+
<button id="removeImageBtn" class="btn btn-sm btn-danger remove-btn">×</button>
|
| 48 |
+
</div>
|
| 49 |
+
</div>
|
| 50 |
+
<button id="predictBtn" class="btn btn-primary btn-lg mt-4 w-100" disabled>
|
| 51 |
+
<span class="spinner-border spinner-border-sm" role="status" aria-hidden="true" style="display: none;"></span>
|
| 52 |
+
<span class="btn-text"><i class="fas fa-microscope"></i> Predict</span>
|
| 53 |
+
</button>
|
| 54 |
+
</div>
|
| 55 |
+
</div>
|
| 56 |
+
</div>
|
| 57 |
+
|
| 58 |
+
<!-- Right Column: Results -->
|
| 59 |
+
<div class="col-lg-6">
|
| 60 |
+
<div class="card h-100 shadow-lg border-0">
|
| 61 |
+
<div class="card-header bg-primary text-white">
|
| 62 |
+
<h5 class="mb-0"><i class="fas fa-poll"></i> Prediction Results</h5>
|
| 63 |
+
</div>
|
| 64 |
+
<div class="card-body">
|
| 65 |
+
<div id="result-container" class="text-center">
|
| 66 |
+
<p class="text-muted">Results will be displayed here after prediction.</p>
|
| 67 |
+
</div>
|
| 68 |
+
<hr>
|
| 69 |
+
<div class="accordion" id="jsonAccordion">
|
| 70 |
+
<div class="accordion-item">
|
| 71 |
+
<h2 class="accordion-header" id="headingOne">
|
| 72 |
+
<button class="accordion-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#collapseOne" aria-expanded="false" aria-controls="collapseOne">
|
| 73 |
+
Raw JSON Response
|
| 74 |
+
</button>
|
| 75 |
+
</h2>
|
| 76 |
+
<div id="collapseOne" class="accordion-collapse collapse" aria-labelledby="headingOne" data-bs-parent="#jsonAccordion">
|
| 77 |
+
<div class="accordion-body">
|
| 78 |
+
<pre id="jsonResponse" class="bg-light p-3 rounded"><code>Waiting for response...</code></pre>
|
| 79 |
+
</div>
|
| 80 |
+
</div>
|
| 81 |
+
</div>
|
| 82 |
+
</div>
|
| 83 |
+
</div>
|
| 84 |
+
</div>
|
| 85 |
+
</div>
|
| 86 |
+
</div>
|
| 87 |
+
</main>
|
| 88 |
+
|
| 89 |
+
<footer class="text-center text-muted py-3 mt-4">
|
| 90 |
+
<p>© 2024 Your Name. Powered by AI.</p>
|
| 91 |
+
</footer>
|
| 92 |
+
|
| 93 |
+
<!-- Bootstrap 5 JS -->
|
| 94 |
+
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script>
|
| 95 |
+
<!-- Your Custom JS -->
|
| 96 |
+
<script src="{{ url_for('static', filename='script.js') }}"></script>
|
| 97 |
+
</body>
|
| 98 |
+
</html>
|
training_history.csv
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
loss,accuracy,val_loss,val_accuracy
|
| 2 |
+
1.0304151773452759,0.5675675868988037,0.6861137747764587,0.421875
|
| 3 |
+
1.2392491102218628,0.5057914853096008,0.2789396345615387,0.9375
|
| 4 |
+
0.521758496761322,0.7953668236732483,0.277998149394989,0.9375
|
| 5 |
+
0.46904969215393066,0.760617733001709,0.2396804541349411,0.9375
|
| 6 |
+
0.2891399562358856,0.8648648858070374,0.13092049956321716,0.96875
|
| 7 |
+
0.2712053954601288,0.8823529481887817,0.10788409411907196,1.0
|
| 8 |
+
0.23328891396522522,0.9305019378662109,0.09912744164466858,0.984375
|
| 9 |
+
0.22442513704299927,0.92277991771698,0.14693066477775574,0.984375
|
| 10 |
+
0.19375579059123993,0.9189189076423645,0.08046227693557739,0.984375
|
| 11 |
+
0.20040491223335266,0.9189189076423645,0.2098347544670105,0.9375
|