Spaces:

ALYYAN
/

chest-cancer-classifier-app

Sleeping

App Files Files Community

ALYYAN commited on Aug 27

Commit

d576da9

0 Parent(s):

Initial commit of clean, working project

Browse files

Files changed (49) hide show

.dvc/.gitignore +3 -0
.dvc/config +0 -0
.dvcignore +3 -0
.gitattributes +2 -0
.github/workflows/main.yaml +57 -0
.gitignore +214 -0
LICENSE +21 -0
README.md +2 -0
app.py +55 -0
class_check.py +66 -0
config/config.yaml +17 -0
confusion_matrix.png +0 -0
dockerfile +13 -0
dvc.lock +113 -0
dvc.yaml +54 -0
main.py +54 -0
params.yaml +18 -0
requirements.txt +0 -0
research/01_data_ingestion.ipynb +230 -0
research/02_prepare_base_model.ipynb +290 -0
research/03_model_trainer.ipynb +303 -0
research/04_model_evaluation_with_mlflow.ipynb +328 -0
research/trials.ipynb +43 -0
scores.json +4 -0
setup.py +33 -0
src/cnnClassifier/__init__.py +22 -0
src/cnnClassifier/components/__init__.py +0 -0
src/cnnClassifier/components/data_ingestion.py +46 -0
src/cnnClassifier/components/model_evaluation_mlflow.py +124 -0
src/cnnClassifier/components/model_trainer.py +127 -0
src/cnnClassifier/components/prepare_base_model.py +74 -0
src/cnnClassifier/config/__init__.py +0 -0
src/cnnClassifier/config/configuration.py +82 -0
src/cnnClassifier/constants/__init__.py +4 -0
src/cnnClassifier/entity/__init__.py +0 -0
src/cnnClassifier/entity/config_entity.py +43 -0
src/cnnClassifier/pipeline/__init__.py +0 -0
src/cnnClassifier/pipeline/prediction.py +36 -0
src/cnnClassifier/pipeline/stage_01_data_ingestion.py +31 -0
src/cnnClassifier/pipeline/stage_02_prepare_base_model.py +32 -0
src/cnnClassifier/pipeline/stage_03_model_trainer.py +35 -0
src/cnnClassifier/pipeline/stage_04_model_evaluation.py +37 -0
src/cnnClassifier/utils/__init__.py +0 -0
src/cnnClassifier/utils/common.py +137 -0
static/script.js +159 -0
static/style.css +116 -0
template.py +47 -0
templates/index.html +98 -0
training_history.csv +11 -0

.dvc/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+/config.local
+/tmp
+/cache

.dvc/config ADDED Viewed

File without changes

.dvcignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# Add patterns of files dvc should ignore, which could improve
+# the performance. Learn more at
+# https://dvc.org/doc/user-guide/dvcignore

.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.h5 filter=lfs diff=lfs merge=lfs -text
2	+ artifacts/training/model/variables/variables.data--of- filter=lfs diff=lfs merge=lfs -text

.github/workflows/main.yaml ADDED Viewed

	@@ -0,0 +1,57 @@

+name: CI-CD Pipeline for Chest Cancer Classifier
+on:
+  push:
+    branches:
+      - main
+jobs:
+  # Job 1: Continuous Integration (Test the application)
+  ci-test:
+    name: Continuous Integration - Test Application
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Code
+        uses: actions/checkout@v3
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.8'
+      - name: Install Dependencies
+        run: pip install -r requirements.txt
+      - name: Run a simple health check
+        run: echo "Placeholder for future tests. For now, we just check if dependencies install."
+  # Job 2: Continuous Deployment (Deploy to Hugging Face with Manual Git Push)
+  cd-deploy:
+    name: Continuous Deployment - Deploy to Hugging Face
+    needs: ci-test # This job will only run if the 'ci-test' job succeeds
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Code
+        uses: actions/checkout@v3
+        with:
+          # We need to fetch all history and tags for the push to work correctly
+          fetch-depth: 0
+          lfs: true
+      - name: Push to Hugging Face Hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          HF_SPACE_REPO: ${{ secrets.HF_SPACE_REPO }}
+        run: |
+          echo "Setting up git repository for Hugging Face push"
+          # Add the Hugging Face Space as a new remote repository named "hf"
+          # Use your canonical (lowercase) HF username here
+          git remote add hf "https://alyyanahmed21:${HF_TOKEN}@huggingface.co/spaces/${HF_SPACE_REPO}"
+          echo "Pushing to Hugging Face..."
+          # Force-push the main branch from your GitHub repo to the main branch on the HF remote
+          git push --force hf main
+          echo "✅ Deployment successful!"

.gitignore ADDED Viewed

	@@ -0,0 +1,214 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+artifacts/*
+mlruns/
+.env
+model/
+cnn_env/

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 ALYYAN
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,2 @@


1	+
2	+

app.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from flask import Flask, request, jsonify, render_template
+import os
+from flask_cors import CORS, cross_origin
+from cnnClassifier.utils.common import decodeImage
+from cnnClassifier.pipeline.prediction import PredictionPipeline
+# Set environment variables for consistent encoding
+os.putenv('LANG', 'en_US.UTF-8')
+os.putenv('LC_ALL', 'en_US.UTF-8')
+app = Flask(__name__)
+CORS(app)
+class ClientApp:
+    def __init__(self):
+        self.filename = "inputImage.jpg"
+        self.classifier = PredictionPipeline(self.filename)
+@app.route("/", methods=['GET'])
+@cross_origin()
+def home():
+    """Renders the main user interface."""
+    return render_template('index.html')
+@app.route("/train", methods=['GET','POST'])
+@cross_origin()
+def trainRoute():
+    """Triggers the DVC pipeline to retrain the model."""
+    # os.system("python main.py") # You can use this if you have a main orchestrator
+    os.system("dvc repro")
+    return "Training done successfully!"
+@app.route("/predict", methods=['POST'])
+@cross_origin()
+def predictRoute():
+    image = request.json['image']
+    decodeImage(image, clApp.filename)
+    # The predict() method now returns just the index (0 or 1)
+    prediction_value = clApp.classifier.predict()
+    # This logic is confirmed by your class indices: {'adenocarcinoma': 0, 'normal': 1}
+    if prediction_value == 1:
+        prediction_text = "Normal"
+    else: # The value was 0
+        prediction_text = "Cancer"
+    # The front-end expects the key "prediction"
+    return jsonify([{"prediction": prediction_text}])
+if __name__ == "__main__":
+    clApp = ClientApp()
+    # Run the app on all available interfaces (for Docker/deployment) and port 8080
+    app.run(host='0.0.0.0', port=8080)

class_check.py ADDED Viewed

	@@ -0,0 +1,66 @@

+# check_data_balance.py
+import os
+from pathlib import Path
+def check_dataset_balance(data_path: Path):
+    """
+    Checks and prints the balance of classes in a dataset directory.
+    The expected directory structure is:
+    - data_path/
+        - class_A/
+            - image1.jpg
+            - image2.jpg
+            ...
+        - class_B/
+            - image1.jpg
+            - image2.jpg
+            ...
+    Args:
+        data_path (Path): The path to the main dataset directory.
+    """
+    print(f"--- Checking Dataset Balance at: {data_path} ---\n")
+    if not data_path.is_dir():
+        print(f"❌ ERROR: The provided path is not a valid directory.")
+        return
+    class_names = [d.name for d in data_path.iterdir() if d.is_dir()]
+    if not class_names:
+        print("❌ ERROR: No class subdirectories found in the dataset folder.")
+        return
+    print(f"Found {len(class_names)} classes: {', '.join(class_names)}\n")
+    class_counts = {}
+    total_images = 0
+    for class_name in class_names:
+        class_dir = data_path / class_name
+        # Count files, ignoring subdirectories (like .ipynb_checkpoints)
+        num_images = len([f for f in class_dir.iterdir() if f.is_file()])
+        class_counts[class_name] = num_images
+        total_images += num_images
+    print("--- Image Counts per Class ---")
+    for class_name, count in class_counts.items():
+        percentage = (count / total_images) * 100 if total_images > 0 else 0
+        print(f"- {class_name:<20}: {count:>5} images ({percentage:.2f}%)")
+    print("-" * 35)
+    print(f"- {'Total':<20}: {total_images:>5} images\n")
+    print("--- For your training script ---")
+    print("Use these counts to calculate your class_weight dictionary.")
+if __name__ == "__main__":
+    # --- IMPORTANT ---
+    # Update this path to point to your actual dataset folder.
+    # This is the folder that contains the 'Normal' and 'adenocarcinoma' subfolders.
+    dataset_directory = Path("artifacts/data_ingestion/Chest-CT-Scan-data")
+    check_dataset_balance(dataset_directory)

config/config.yaml ADDED Viewed

	@@ -0,0 +1,17 @@

+artifacts_root: artifacts
+data_ingestion:
+  root_dir: artifacts/data_ingestion
+  source_URL: https://drive.google.com/file/d/1z0mreUtRmR-P-magILsDR3T7M6IkGXtY/view?usp=sharing
+  local_data_file: artifacts/data_ingestion/data.zip
+  unzip_dir: artifacts/data_ingestion
+prepare_base_model:
+  root_dir: artifacts/prepare_base_model
+  base_model_path: artifacts/prepare_base_model/base_model.h5
+  updated_base_model_path: artifacts/prepare_base_model/base_model_updated.h5
+training:
+  root_dir: artifacts/training
+  trained_model_path: artifacts/training/model.h5

confusion_matrix.png ADDED Viewed

dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+FROM python:3.8-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+EXPOSE 8080
+CMD ["gunicorn", "--bind", "0.0.0.0:8080", "app:app"]

dvc.lock ADDED Viewed

	@@ -0,0 +1,113 @@

+schema: '2.0'
+stages:
+  data_ingestion:
+    cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py
+    deps:
+    - path: config/config.yaml
+      hash: md5
+      md5: d4c6e6a52ca35ea93094c3e1a421499e
+      size: 578
+    - path: src/cnnClassifier/pipeline/stage_01_data_ingestion.py
+      hash: md5
+      md5: bad788253475f50d44fdaa7237967b49
+      size: 883
+    outs:
+    - path: artifacts/data_ingestion/Chest-CT-Scan-data
+      hash: md5
+      md5: 904fa45d934ce879b3b1933dca6cb2f1.dir
+      size: 49247431
+      nfiles: 343
+  prepare_base_model:
+    cmd: python src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
+    deps:
+    - path: config/config.yaml
+      hash: md5
+      md5: d4c6e6a52ca35ea93094c3e1a421499e
+      size: 578
+    - path: src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
+      hash: md5
+      md5: 30c63470719d961e32045908b7c0772d
+      size: 966
+    params:
+      params.yaml:
+        CLASSES: 2
+        IMAGE_SIZE:
+        - 224
+        - 224
+        - 3
+        INCLUDE_TOP: false
+        LEARNING_RATE: 0.001
+        WEIGHTS: imagenet
+    outs:
+    - path: artifacts/prepare_base_model
+      hash: md5
+      md5: 4aa2611cd37984c188512d3a19c6942b.dir
+      size: 118054560
+      nfiles: 2
+  training:
+    cmd: python src/cnnClassifier/pipeline/stage_03_model_trainer.py
+    deps:
+    - path: artifacts/data_ingestion/Chest-CT-Scan-data
+      hash: md5
+      md5: 904fa45d934ce879b3b1933dca6cb2f1.dir
+      size: 49247431
+      nfiles: 343
+    - path: artifacts/prepare_base_model
+      hash: md5
+      md5: 4aa2611cd37984c188512d3a19c6942b.dir
+      size: 118054560
+      nfiles: 2
+    - path: config/config.yaml
+      hash: md5
+      md5: d4c6e6a52ca35ea93094c3e1a421499e
+      size: 578
+    - path: src/cnnClassifier/pipeline/stage_03_model_trainer.py
+      hash: md5
+      md5: c33e23d2c123f157b2ab007c8e9d938f
+      size: 893
+    params:
+      params.yaml:
+        AUGMENTATION: true
+        BATCH_SIZE: 16
+        EPOCHS: 10
+        IMAGE_SIZE:
+        - 224
+        - 224
+        - 3
+    outs:
+    - path: artifacts/training/model.h5
+      hash: md5
+      md5: 233944d4fbed7856cf28be27c602014d
+      size: 59337520
+  evaluation:
+    cmd: python src/cnnClassifier/pipeline/stage_04_model_evaluation.py
+    deps:
+    - path: artifacts/data_ingestion/Chest-CT-Scan-data
+      hash: md5
+      md5: 904fa45d934ce879b3b1933dca6cb2f1.dir
+      size: 49247431
+      nfiles: 343
+    - path: artifacts/training/model.h5
+      hash: md5
+      md5: 233944d4fbed7856cf28be27c602014d
+      size: 59337520
+    - path: config/config.yaml
+      hash: md5
+      md5: d4c6e6a52ca35ea93094c3e1a421499e
+      size: 578
+    - path: src/cnnClassifier/pipeline/stage_04_model_evaluation.py
+      hash: md5
+      md5: d20a1645fd93cae9c7c0cecd8a0d4a2a
+      size: 1188
+    params:
+      params.yaml:
+        BATCH_SIZE: 16
+        IMAGE_SIZE:
+        - 224
+        - 224
+        - 3
+    outs:
+    - path: scores.json
+      hash: md5
+      md5: 2c6b298a9827df6c174dc1bbbf40d040
+      size: 59

dvc.yaml ADDED Viewed

	@@ -0,0 +1,54 @@

+stages:
+  data_ingestion:
+    cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py
+    deps:
+      - src/cnnClassifier/pipeline/stage_01_data_ingestion.py
+      - config/config.yaml
+    outs:
+      - artifacts/data_ingestion/Chest-CT-Scan-data
+  prepare_base_model:
+    cmd: python src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
+    deps:
+      - src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
+      - config/config.yaml
+    params:
+      - IMAGE_SIZE
+      - INCLUDE_TOP
+      - CLASSES
+      - WEIGHTS
+      - LEARNING_RATE
+    outs:
+      - artifacts/prepare_base_model
+  training:
+    cmd: python src/cnnClassifier/pipeline/stage_03_model_trainer.py
+    deps:
+      - src/cnnClassifier/pipeline/stage_03_model_trainer.py
+      - config/config.yaml
+      - artifacts/data_ingestion/Chest-CT-Scan-data
+      - artifacts/prepare_base_model
+    params:
+      - IMAGE_SIZE
+      - EPOCHS
+      - BATCH_SIZE
+      - AUGMENTATION
+    outs:
+      - artifacts/training/model.h5
+  evaluation:
+    cmd: python src/cnnClassifier/pipeline/stage_04_model_evaluation.py
+    deps:
+      - src/cnnClassifier/pipeline/stage_04_model_evaluation.py
+      - config/config.yaml
+      - artifacts/data_ingestion/Chest-CT-Scan-data
+      - artifacts/training/model.h5
+    params:
+      - IMAGE_SIZE
+      - BATCH_SIZE
+    metrics:
+    - scores.json:
+        cache: false

main.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from cnnClassifier import logger
+from cnnClassifier.pipeline.stage_01_data_ingestion import DataIngestionTrainingPipeline
+from cnnClassifier.pipeline.stage_02_prepare_base_model import PrepareBaseModelTrainingPipeline
+from cnnClassifier.pipeline.stage_03_model_trainer import ModelTrainingPipeline
+from cnnClassifier.pipeline.stage_04_model_evaluation import EvaluationPipeline
+STAGE_NAME = "Data Ingestion stage"
+try:
+    logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
+    obj = DataIngestionTrainingPipeline()
+    obj.main()
+    logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
+except Exception as e:
+    logger.exception(e)
+    raise e
+STAGE_NAME = "Prepare base model"
+try:
+   logger.info(f"*******************")
+   logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
+   prepare_base_model = PrepareBaseModelTrainingPipeline()
+   prepare_base_model.main()
+   logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
+except Exception as e:
+        logger.exception(e)
+        raise e
+STAGE_NAME = "Training"
+try:
+   logger.info(f"*******************")
+   logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
+   model_trainer = ModelTrainingPipeline()
+   model_trainer.main()
+   logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
+except Exception as e:
+        logger.exception(e)
+        raise e
+STAGE_NAME = "Evaluation stage"
+try:
+   logger.info(f"*******************")
+   logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
+   model_evalution = EvaluationPipeline()
+   model_evalution.main()
+   logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
+except Exception as e:
+        logger.exception(e)
+        raise e

params.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+#AUGMENTATION: True
+#IMAGE_SIZE: [224, 224, 3] # as per VGG 16 model
+#BATCH_SIZE: 16
+#INCLUDE_TOP: False
+#EPOCHS: 1
+#CLASSES: 2
+#WEIGHTS: imagenet
+#LEARNING_RATE: 0.01
+AUGMENTATION: True
+IMAGE_SIZE: [224, 224, 3]
+BATCH_SIZE: 16 # Use 16 if 32 gives you memory errors, otherwise 32 is fine
+INCLUDE_TOP: False
+EPOCHS: 10             # Give the model enough time to learn
+CLASSES: 2
+WEIGHTS: imagenet
+LEARNING_RATE: 0.001  # CRUCIAL: A small learning rate for fine-tuning

requirements.txt ADDED Viewed

Binary file (526 Bytes). View file

research/01_data_ingestion.ipynb ADDED Viewed

	@@ -0,0 +1,230 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "8f33ab85",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "5b55e660",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\\\research'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%pwd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "b7338c82",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "os.chdir(\"../\")\n",
+    "%pwd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "a770b8df",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dataclasses import dataclass\n",
+    "from pathlib import Path\n",
+    "\n",
+    "\n",
+    "@dataclass(frozen=True)\n",
+    "class DataIngestionConfig:\n",
+    "    root_dir: Path\n",
+    "    source_URL: str\n",
+    "    local_data_file: Path\n",
+    "    unzip_dir: Path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "979add90",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cnnClassifier.constants import *\n",
+    "from cnnClassifier.utils.common import read_yaml, create_directories\n",
+    "class ConfigurationManager:\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        config_filepath = CONFIG_FILE_PATH,\n",
+    "        params_filepath = PARAMS_FILE_PATH):\n",
+    "\n",
+    "        self.config = read_yaml(config_filepath)\n",
+    "        self.params = read_yaml(params_filepath)\n",
+    "\n",
+    "        create_directories([self.config.artifacts_root])\n",
+    "\n",
+    "    def get_data_ingestion_config(self) -> DataIngestionConfig:\n",
+    "        config = self.config.data_ingestion\n",
+    "\n",
+    "        create_directories([config.root_dir])\n",
+    "\n",
+    "        data_ingestion_config = DataIngestionConfig(\n",
+    "            root_dir=config.root_dir,\n",
+    "            source_URL=config.source_URL,\n",
+    "            local_data_file=config.local_data_file,\n",
+    "            unzip_dir=config.unzip_dir \n",
+    "        )\n",
+    "\n",
+    "        return data_ingestion_config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "e4fd8f68",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2025-08-18 00:24:08,669: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
+      "[2025-08-18 00:24:08,684: INFO: common: yaml file: params.yaml loaded successfully]\n",
+      "[2025-08-18 00:24:08,686: INFO: common: created directory at: artifacts]\n",
+      "[2025-08-18 00:24:08,688: INFO: common: created directory at: artifacts/data_ingestion]\n",
+      "[2025-08-18 00:24:08,692: INFO: 78466947: Downloading data from https://drive.google.com/file/d/1z0mreUtRmR-P-magILsDR3T7M6IkGXtY/view?usp=sharing into file artifacts/data_ingestion/data.zip]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Downloading...\n",
+      "From (original): https://drive.google.com/uc?/export=download&id=1z0mreUtRmR-P-magILsDR3T7M6IkGXtY\n",
+      "From (redirected): https://drive.google.com/uc?%2Fexport=download&id=1z0mreUtRmR-P-magILsDR3T7M6IkGXtY&confirm=t&uuid=954f5f66-c0d6-4c40-a993-933880515813\n",
+      "To: f:\\Projects\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\artifacts\\data_ingestion\\data.zip\n",
+      "100%|██████████| 49.0M/49.0M [00:24<00:00, 2.03MB/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2025-08-18 00:24:36,267: INFO: 78466947: Downloaded data from https://drive.google.com/file/d/1z0mreUtRmR-P-magILsDR3T7M6IkGXtY/view?usp=sharing into file artifacts/data_ingestion/data.zip]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import zipfile\n",
+    "import gdown\n",
+    "from cnnClassifier import logger\n",
+    "from cnnClassifier.utils.common import get_size\n",
+    "\n",
+    "class DataIngestion:\n",
+    "    def __init__(self, config: DataIngestionConfig):\n",
+    "        self.config = config\n",
+    "\n",
+    "\n",
+    "    \n",
+    "     \n",
+    "    def download_file(self)-> str:\n",
+    "        '''\n",
+    "        Fetch data from the url\n",
+    "        '''\n",
+    "\n",
+    "        try: \n",
+    "            dataset_url = self.config.source_URL\n",
+    "            zip_download_dir = self.config.local_data_file\n",
+    "            os.makedirs(\"artifacts/data_ingestion\", exist_ok=True)\n",
+    "            logger.info(f\"Downloading data from {dataset_url} into file {zip_download_dir}\")\n",
+    "\n",
+    "            file_id = dataset_url.split(\"/\")[-2]\n",
+    "            prefix = 'https://drive.google.com/uc?/export=download&id='\n",
+    "            gdown.download(prefix+file_id,zip_download_dir)\n",
+    "\n",
+    "            logger.info(f\"Downloaded data from {dataset_url} into file {zip_download_dir}\")\n",
+    "\n",
+    "        except Exception as e:\n",
+    "            raise e\n",
+    "        \n",
+    "    \n",
+    "    def extract_zip_file(self):\n",
+    "        \"\"\"\n",
+    "        zip_file_path: str\n",
+    "        Extracts the zip file into the data directory\n",
+    "        Function returns None\n",
+    "        \"\"\"\n",
+    "        unzip_path = self.config.unzip_dir\n",
+    "        os.makedirs(unzip_path, exist_ok=True)\n",
+    "        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:\n",
+    "            zip_ref.extractall(unzip_path)\n",
+    "try:\n",
+    "    config = ConfigurationManager()\n",
+    "    data_ingestion_config = config.get_data_ingestion_config()\n",
+    "    data_ingestion = DataIngestion(config=data_ingestion_config)\n",
+    "    data_ingestion.download_file()\n",
+    "    data_ingestion.extract_zip_file()\n",
+    "except Exception as e:\n",
+    "    raise e"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cnn_env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

research/02_prepare_base_model.ipynb ADDED Viewed

	@@ -0,0 +1,290 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "29206888",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\\\research'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "%pwd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "7dce8d4e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "os.chdir(\"../\")\n",
+    "%pwd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "c4d0c484",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dataclasses import dataclass\n",
+    "from pathlib import Path\n",
+    "\n",
+    "@dataclass(frozen=True)\n",
+    "class PrepareBaseModelConfig:\n",
+    "    root_dir: Path\n",
+    "    base_model_path: Path\n",
+    "    updated_base_model_path: Path\n",
+    "    params_image_size: list\n",
+    "    params_learning_rate: float\n",
+    "    params_include_top: bool\n",
+    "    params_weights: str\n",
+    "    params_classes: int"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "26921811",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cnnClassifier.constants import *\n",
+    "from cnnClassifier.utils.common import read_yaml, create_directories\n",
+    "\n",
+    "class ConfigurationManager:\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        config_filepath = CONFIG_FILE_PATH,\n",
+    "        params_filepath = PARAMS_FILE_PATH):\n",
+    "\n",
+    "        self.config = read_yaml(config_filepath)\n",
+    "        self.params = read_yaml(params_filepath)\n",
+    "\n",
+    "        create_directories([self.config.artifacts_root])\n",
+    "\n",
+    "\n",
+    "    def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:\n",
+    "        config = self.config.prepare_base_model\n",
+    "        \n",
+    "        create_directories([config.root_dir])\n",
+    "\n",
+    "        prepare_base_model_config = PrepareBaseModelConfig(\n",
+    "            root_dir=Path(config.root_dir),\n",
+    "            base_model_path=Path(config.base_model_path),\n",
+    "            updated_base_model_path=Path(config.updated_base_model_path),\n",
+    "            params_image_size=self.params.IMAGE_SIZE,\n",
+    "            params_learning_rate=self.params.LEARNING_RATE,\n",
+    "            params_include_top=self.params.INCLUDE_TOP,\n",
+    "            params_weights=self.params.WEIGHTS,\n",
+    "            params_classes=self.params.CLASSES\n",
+    "        )\n",
+    "\n",
+    "        return prepare_base_model_config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "0442bc6f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import urllib.request as request\n",
+    "from zipfile import ZipFile\n",
+    "import tensorflow as tf\n",
+    "\n",
+    "class PrepareBaseModel:\n",
+    "    def __init__(self, config: PrepareBaseModelConfig):\n",
+    "        self.config = config\n",
+    "\n",
+    "    \n",
+    "    def get_base_model(self):\n",
+    "        self.model = tf.keras.applications.vgg16.VGG16(\n",
+    "            input_shape=self.config.params_image_size,\n",
+    "            weights=self.config.params_weights,\n",
+    "            include_top=self.config.params_include_top\n",
+    "        )\n",
+    "\n",
+    "        self.save_model(path=self.config.base_model_path, model=self.model)\n",
+    "\n",
+    "\n",
+    "    \n",
+    "    @staticmethod\n",
+    "    def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):\n",
+    "        if freeze_all:\n",
+    "            for layer in model.layers:\n",
+    "                model.trainable = False\n",
+    "        elif (freeze_till is not None) and (freeze_till > 0):\n",
+    "            for layer in model.layers[:-freeze_till]:\n",
+    "                model.trainable = False\n",
+    "\n",
+    "        flatten_in = tf.keras.layers.Flatten()(model.output)\n",
+    "        prediction = tf.keras.layers.Dense(\n",
+    "            units=classes,\n",
+    "            activation=\"softmax\"\n",
+    "        )(flatten_in)\n",
+    "\n",
+    "        full_model = tf.keras.models.Model(\n",
+    "            inputs=model.input,\n",
+    "            outputs=prediction\n",
+    "        )\n",
+    "\n",
+    "        full_model.compile(\n",
+    "            optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),\n",
+    "            loss=tf.keras.losses.CategoricalCrossentropy(),\n",
+    "            metrics=[\"accuracy\"]\n",
+    "        )\n",
+    "\n",
+    "        full_model.summary()\n",
+    "        return full_model\n",
+    "    \n",
+    "\n",
+    "    def update_base_model(self):\n",
+    "        self.full_model = self._prepare_full_model(\n",
+    "            model=self.model,\n",
+    "            classes=self.config.params_classes,\n",
+    "            freeze_all=True,\n",
+    "            freeze_till=None,\n",
+    "            learning_rate=self.config.params_learning_rate\n",
+    "        )\n",
+    "\n",
+    "        self.save_model(path=self.config.updated_base_model_path, model=self.full_model)\n",
+    "    \n",
+    "\n",
+    "\n",
+    "    @staticmethod\n",
+    "    def save_model(path: Path, model: tf.keras.Model):\n",
+    "        model.save(path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "b21b58b5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2025-08-20 01:44:50,956: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
+      "[2025-08-20 01:44:50,982: INFO: common: yaml file: params.yaml loaded successfully]\n",
+      "[2025-08-20 01:44:50,984: INFO: common: created directory at: artifacts]\n",
+      "[2025-08-20 01:44:50,986: INFO: common: created directory at: artifacts/prepare_base_model]\n",
+      "Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5\n",
+      "58889256/58889256 [==============================] - 15s 0us/step\n",
+      "[2025-08-20 01:45:09,603: WARNING: saving_utils: Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.]\n",
+      "Model: \"model\"\n",
+      "_________________________________________________________________\n",
+      " Layer (type)                Output Shape              Param #   \n",
+      "=================================================================\n",
+      " input_1 (InputLayer)        [(None, 224, 224, 3)]     0         \n",
+      "                                                                 \n",
+      " block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      \n",
+      "                                                                 \n",
+      " block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     \n",
+      "                                                                 \n",
+      " block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         \n",
+      "                                                                 \n",
+      " block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     \n",
+      "                                                                 \n",
+      " block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    \n",
+      "                                                                 \n",
+      " block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0         \n",
+      "                                                                 \n",
+      " block3_conv1 (Conv2D)       (None, 56, 56, 256)       295168    \n",
+      "                                                                 \n",
+      " block3_conv2 (Conv2D)       (None, 56, 56, 256)       590080    \n",
+      "                                                                 \n",
+      " block3_conv3 (Conv2D)       (None, 56, 56, 256)       590080    \n",
+      "                                                                 \n",
+      " block3_pool (MaxPooling2D)  (None, 28, 28, 256)       0         \n",
+      "                                                                 \n",
+      " block4_conv1 (Conv2D)       (None, 28, 28, 512)       1180160   \n",
+      "                                                                 \n",
+      " block4_conv2 (Conv2D)       (None, 28, 28, 512)       2359808   \n",
+      "                                                                 \n",
+      " block4_conv3 (Conv2D)       (None, 28, 28, 512)       2359808   \n",
+      "                                                                 \n",
+      " block4_pool (MaxPooling2D)  (None, 14, 14, 512)       0         \n",
+      "                                                                 \n",
+      " block5_conv1 (Conv2D)       (None, 14, 14, 512)       2359808   \n",
+      "                                                                 \n",
+      " block5_conv2 (Conv2D)       (None, 14, 14, 512)       2359808   \n",
+      "                                                                 \n",
+      " block5_conv3 (Conv2D)       (None, 14, 14, 512)       2359808   \n",
+      "                                                                 \n",
+      " block5_pool (MaxPooling2D)  (None, 7, 7, 512)         0         \n",
+      "                                                                 \n",
+      " flatten (Flatten)           (None, 25088)             0         \n",
+      "                                                                 \n",
+      " dense (Dense)               (None, 2)                 50178     \n",
+      "                                                                 \n",
+      "=================================================================\n",
+      "Total params: 14,764,866\n",
+      "Trainable params: 50,178\n",
+      "Non-trainable params: 14,714,688\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "try:\n",
+    "    config = ConfigurationManager()\n",
+    "    prepare_base_model_config = config.get_prepare_base_model_config()\n",
+    "    prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)\n",
+    "    prepare_base_model.get_base_model()\n",
+    "    prepare_base_model.update_base_model()\n",
+    "except Exception as e:\n",
+    "    raise e"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cnn_env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

research/03_model_trainer.ipynb ADDED Viewed

	@@ -0,0 +1,303 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\\\research'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%pwd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.chdir(\"../\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%pwd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dataclasses import dataclass\n",
+    "from pathlib import Path\n",
+    "\n",
+    "\n",
+    "@dataclass(frozen=True)\n",
+    "class TrainingConfig:\n",
+    "    root_dir: Path\n",
+    "    trained_model_path: Path\n",
+    "    updated_base_model_path: Path\n",
+    "    training_data: Path\n",
+    "    params_epochs: int\n",
+    "    params_batch_size: int\n",
+    "    params_is_augmentation: bool\n",
+    "    params_image_size: list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cnnClassifier.constants import *\n",
+    "from cnnClassifier.utils.common import read_yaml, create_directories\n",
+    "import tensorflow as tf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ConfigurationManager:\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        config_filepath = CONFIG_FILE_PATH,\n",
+    "        params_filepath = PARAMS_FILE_PATH):\n",
+    "\n",
+    "        self.config = read_yaml(config_filepath)\n",
+    "        self.params = read_yaml(params_filepath)\n",
+    "\n",
+    "        create_directories([self.config.artifacts_root])\n",
+    "\n",
+    "        \n",
+    "\n",
+    "    def get_training_config(self) -> TrainingConfig:\n",
+    "        training = self.config.training\n",
+    "        prepare_base_model = self.config.prepare_base_model\n",
+    "        params = self.params\n",
+    "        training_data = os.path.join(self.config.data_ingestion.unzip_dir, \"Chest-CT-Scan-data\")\n",
+    "        create_directories([\n",
+    "            Path(training.root_dir)\n",
+    "        ])\n",
+    "\n",
+    "        training_config = TrainingConfig(\n",
+    "            root_dir=Path(training.root_dir),\n",
+    "            trained_model_path=Path(training.trained_model_path),\n",
+    "            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),\n",
+    "            training_data=Path(training_data),\n",
+    "            params_epochs=params.EPOCHS,\n",
+    "            params_batch_size=params.BATCH_SIZE,\n",
+    "            params_is_augmentation=params.AUGMENTATION,\n",
+    "            params_image_size=params.IMAGE_SIZE\n",
+    "        )\n",
+    "\n",
+    "        return training_config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import urllib.request as request\n",
+    "from zipfile import ZipFile\n",
+    "import tensorflow as tf\n",
+    "import time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Training:\n",
+    "    def __init__(self, config: TrainingConfig):\n",
+    "        self.config = config\n",
+    "\n",
+    "    \n",
+    "    def get_base_model(self):\n",
+    "        self.model = tf.keras.models.load_model(\n",
+    "            self.config.updated_base_model_path\n",
+    "        )\n",
+    "\n",
+    "    def train_valid_generator(self):\n",
+    "\n",
+    "        datagenerator_kwargs = dict(\n",
+    "            rescale = 1./255,\n",
+    "            validation_split=0.20\n",
+    "        )\n",
+    "\n",
+    "        dataflow_kwargs = dict(\n",
+    "            target_size=self.config.params_image_size[:-1],\n",
+    "            batch_size=self.config.params_batch_size,\n",
+    "            interpolation=\"bilinear\"\n",
+    "        )\n",
+    "\n",
+    "        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
+    "            **datagenerator_kwargs\n",
+    "        )\n",
+    "\n",
+    "        self.valid_generator = valid_datagenerator.flow_from_directory(\n",
+    "            directory=self.config.training_data,\n",
+    "            subset=\"validation\",\n",
+    "            shuffle=False,\n",
+    "            **dataflow_kwargs\n",
+    "        )\n",
+    "\n",
+    "        if self.config.params_is_augmentation:\n",
+    "            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
+    "                rotation_range=40,\n",
+    "                horizontal_flip=True,\n",
+    "                width_shift_range=0.2,\n",
+    "                height_shift_range=0.2,\n",
+    "                shear_range=0.2,\n",
+    "                zoom_range=0.2,\n",
+    "                **datagenerator_kwargs\n",
+    "            )\n",
+    "        else:\n",
+    "            train_datagenerator = valid_datagenerator\n",
+    "\n",
+    "        self.train_generator = train_datagenerator.flow_from_directory(\n",
+    "            directory=self.config.training_data,\n",
+    "            subset=\"training\",\n",
+    "            shuffle=True,\n",
+    "            **dataflow_kwargs\n",
+    "        )\n",
+    "\n",
+    "    \n",
+    "    @staticmethod\n",
+    "    def save_model(path: Path, model: tf.keras.Model):\n",
+    "        model.save(path)\n",
+    "\n",
+    "\n",
+    "\n",
+    "    \n",
+    "    def train(self):\n",
+    "        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size\n",
+    "        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size\n",
+    "\n",
+    "        self.model.fit(\n",
+    "            self.train_generator,\n",
+    "            epochs=self.config.params_epochs,\n",
+    "            steps_per_epoch=self.steps_per_epoch,\n",
+    "            validation_steps=self.validation_steps,\n",
+    "            validation_data=self.valid_generator\n",
+    "        )\n",
+    "\n",
+    "        self.save_model(\n",
+    "            path=self.config.trained_model_path,\n",
+    "            model=self.model\n",
+    "        )\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2025-08-20 02:03:39,280: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
+      "[2025-08-20 02:03:39,284: INFO: common: yaml file: params.yaml loaded successfully]\n",
+      "[2025-08-20 02:03:39,286: INFO: common: created directory at: artifacts]\n",
+      "[2025-08-20 02:03:39,288: INFO: common: created directory at: artifacts\\training]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 68 images belonging to 2 classes.\n",
+      "Found 275 images belonging to 2 classes.\n",
+      "17/17 [==============================] - 63s 4s/step - loss: 14.4131 - accuracy: 0.5560 - val_loss: 0.2376 - val_accuracy: 0.8750\n"
+     ]
+    }
+   ],
+   "source": [
+    "try:\n",
+    "    config = ConfigurationManager()\n",
+    "    training_config = config.get_training_config()\n",
+    "    training = Training(config=training_config)\n",
+    "    training.get_base_model()\n",
+    "    training.train_valid_generator()\n",
+    "    training.train()\n",
+    "    \n",
+    "except Exception as e:\n",
+    "    raise e"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cnn_env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

research/04_model_evaluation_with_mlflow.ipynb ADDED Viewed

	@@ -0,0 +1,328 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\\\research'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%pwd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.chdir(\"../\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'f:\\\\Projects\\\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC'"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%pwd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.environ[\"MLFLOW_TRACKING_URI\"]=\"https://dagshub.com/AlyyanAhmed21/End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC.mlflow\"\n",
+    "os.environ[\"MLFLOW_TRACKING_USERNAME\"]=\"AlyyanAhmed21\"\n",
+    "os.environ[\"MLFLOW_TRACKING_PASSWORD\"]=\"776454e991d86ea3a96179a4dc1ef72fbc134642\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = tf.keras.models.load_model(\"artifacts/training/model.h5\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dataclasses import dataclass\n",
+    "from pathlib import Path\n",
+    "\n",
+    "@dataclass(frozen=True)\n",
+    "class EvaluationConfig:\n",
+    "    path_of_model: Path\n",
+    "    training_data: Path\n",
+    "    all_params: dict\n",
+    "    mlflow_uri: str\n",
+    "    params_image_size: list\n",
+    "    params_batch_size: int"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cnnClassifier.constants import *\n",
+    "from cnnClassifier.utils.common import read_yaml, create_directories, save_json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ConfigurationManager:\n",
+    "    def __init__(\n",
+    "        self, \n",
+    "        config_filepath = CONFIG_FILE_PATH,\n",
+    "        params_filepath = PARAMS_FILE_PATH):\n",
+    "        self.config = read_yaml(config_filepath)\n",
+    "        self.params = read_yaml(params_filepath)\n",
+    "        create_directories([self.config.artifacts_root])\n",
+    "\n",
+    "    \n",
+    "    def get_evaluation_config(self) -> EvaluationConfig:\n",
+    "        eval_config = EvaluationConfig(\n",
+    "            path_of_model=\"artifacts/training/model.h5\",\n",
+    "            training_data=\"artifacts/data_ingestion/Chest-CT-Scan-data\",\n",
+    "            mlflow_uri=\"https://dagshub.com/AlyyanAhmed21/End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC.mlflow\",\n",
+    "            all_params=self.params,\n",
+    "            params_image_size=self.params.IMAGE_SIZE,\n",
+    "            params_batch_size=self.params.BATCH_SIZE\n",
+    "        )\n",
+    "        return eval_config\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "from pathlib import Path\n",
+    "import mlflow\n",
+    "import mlflow.keras\n",
+    "from urllib.parse import urlparse"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Evaluation:\n",
+    "    def __init__(self, config: EvaluationConfig):\n",
+    "        self.config = config\n",
+    "\n",
+    "    \n",
+    "    def _valid_generator(self):\n",
+    "\n",
+    "        datagenerator_kwargs = dict(\n",
+    "            rescale = 1./255,\n",
+    "            validation_split=0.30\n",
+    "        )\n",
+    "\n",
+    "        dataflow_kwargs = dict(\n",
+    "            target_size=self.config.params_image_size[:-1],\n",
+    "            batch_size=self.config.params_batch_size,\n",
+    "            interpolation=\"bilinear\"\n",
+    "        )\n",
+    "\n",
+    "        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
+    "            **datagenerator_kwargs\n",
+    "        )\n",
+    "\n",
+    "        self.valid_generator = valid_datagenerator.flow_from_directory(\n",
+    "            directory=self.config.training_data,\n",
+    "            subset=\"validation\",\n",
+    "            shuffle=False,\n",
+    "            **dataflow_kwargs\n",
+    "        )\n",
+    "\n",
+    "\n",
+    "    @staticmethod\n",
+    "    def load_model(path: Path) -> tf.keras.Model:\n",
+    "        return tf.keras.models.load_model(path)\n",
+    "    \n",
+    "\n",
+    "    def evaluation(self):\n",
+    "        self.model = self.load_model(self.config.path_of_model)\n",
+    "        self._valid_generator()\n",
+    "        self.score = model.evaluate(self.valid_generator)\n",
+    "        self.save_score()\n",
+    "\n",
+    "    def save_score(self):\n",
+    "        scores = {\"loss\": self.score[0], \"accuracy\": self.score[1]}\n",
+    "        save_json(path=Path(\"scores.json\"), data=scores)\n",
+    "\n",
+    "    \n",
+    "    def log_into_mlflow(self):\n",
+    "        mlflow.set_registry_uri(self.config.mlflow_uri)\n",
+    "        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme\n",
+    "        \n",
+    "        with mlflow.start_run():\n",
+    "            mlflow.log_params(self.config.all_params)\n",
+    "            mlflow.log_metrics(\n",
+    "                {\"loss\": self.score[0], \"accuracy\": self.score[1]}\n",
+    "            )\n",
+    "            # Model registry does not work with file store\n",
+    "            if tracking_url_type_store != \"file\":\n",
+    "\n",
+    "                # Register the model\n",
+    "                # There are other ways to use the Model Registry, which depends on the use case,\n",
+    "                # please refer to the doc for more information:\n",
+    "                # https://mlflow.org/docs/latest/model-registry.html#api-workflow\n",
+    "                mlflow.keras.log_model(self.model, \"model\", registered_model_name=\"VGG16Model\")\n",
+    "            else:\n",
+    "                mlflow.keras.log_model(self.model, \"model\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2025-08-20 04:01:28,984: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
+      "[2025-08-20 04:01:28,988: INFO: common: yaml file: params.yaml loaded successfully]\n",
+      "[2025-08-20 04:01:28,991: INFO: common: created directory at: artifacts]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 102 images belonging to 2 classes.\n",
+      "7/7 [==============================] - 16s 2s/step - loss: 57.2713 - accuracy: 0.4314\n",
+      "[2025-08-20 04:01:45,268: INFO: common: json file saved at: scores.json]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025/08/20 04:01:47 WARNING mlflow.tensorflow: You are saving a TensorFlow Core model or Keras model without a signature. Inference with mlflow.pyfunc.spark_udf() will not work unless the model's pyfunc representation accepts pandas DataFrames as inference inputs.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2025-08-20 04:01:48,249: WARNING: save: Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 14). These functions will not be directly callable after loading.]\n",
+      "INFO:tensorflow:Assets written to: E:\\Temp\\tmp32wvm7sm\\model\\data\\model\\assets\n",
+      "[2025-08-20 04:01:49,538: INFO: builder_impl: Assets written to: E:\\Temp\\tmp32wvm7sm\\model\\data\\model\\assets]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "f:\\Projects\\End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC\\cnn_env\\Lib\\site-packages\\_distutils_hack\\__init__.py:33: UserWarning: Setuptools is replacing distutils.\n",
+      "  warnings.warn(\"Setuptools is replacing distutils.\")\n",
+      "Registered model 'VGG16Model' already exists. Creating a new version of this model...\n",
+      "2025/08/20 04:02:45 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: VGG16Model, version 2\n",
+      "Created version '2' of model 'VGG16Model'.\n"
+     ]
+    }
+   ],
+   "source": [
+    "try:\n",
+    "    config = ConfigurationManager()\n",
+    "    eval_config = config.get_evaluation_config()\n",
+    "    evaluation = Evaluation(eval_config)\n",
+    "    evaluation.evaluation()\n",
+    "    evaluation.log_into_mlflow()\n",
+    "\n",
+    "except Exception as e:\n",
+    "   raise e"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cnn_env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

research/trials.ipynb ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "981d0e26",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Hello, World!\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('Hello, World!')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cnn_env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

scores.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "loss": 0.13162432610988617,
+    "accuracy": 1.0
+}

setup.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# setup.py
+import setuptools
+# Read the contents of your README file for the long description
+with open("README.md", "r", encoding="utf-8") as f:
+    long_description = f.read()
+__version__ = "0.0.0"
+REPO_NAME = "End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC"
+AUTHOR_USER_NAME = "AlyyanAhmed21" # Change to your GitHub username
+SRC_REPO = "cnnClassifier" # This is the name of your main source folder under src/
+AUTHOR_EMAIL = "alyyanawan19@gmail.com" # Change to your email
+setuptools.setup(
+    name=SRC_REPO,
+    version=__version__,
+    author=AUTHOR_USER_NAME,
+    author_email=AUTHOR_EMAIL,
+    description="A small python package for CNN app",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url=f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}",
+    project_urls={
+        "Bug Tracker": f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}/issues",
+    },
+    # This is the crucial part!
+    # It tells setuptools to look for packages in the 'src' directory.
+    package_dir={"": "src"},
+    # This finds all packages automatically within the directory specified above.
+    packages=setuptools.find_packages(where="src")
+)

src/cnnClassifier/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import os
+import sys
+import logging
+logging_str = "[%(asctime)s: %(levelname)s: %(module)s: %(message)s]"
+log_dir = "logs"
+log_filepath = os.path.join(log_dir,"running_logs.log")
+os.makedirs(log_dir, exist_ok=True)
+logging.basicConfig(
+    level= logging.INFO,
+    format= logging_str,
+    handlers=[
+        logging.FileHandler(log_filepath),
+        logging.StreamHandler(sys.stdout)
+    ]
+)
+logger = logging.getLogger("cnnClassifierLogger")

src/cnnClassifier/components/__init__.py ADDED Viewed

File without changes

src/cnnClassifier/components/data_ingestion.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import os
+import zipfile
+import gdown
+from cnnClassifier import logger
+from cnnClassifier.utils.common import get_size
+from cnnClassifier.entity.config_entity import DataIngestionConfig
+class DataIngestion:
+    def __init__(self, config: DataIngestionConfig):
+        self.config = config
+    def download_file(self)-> str:
+        '''
+        Fetch data from the url
+        '''
+        try:
+            dataset_url = self.config.source_URL
+            zip_download_dir = self.config.local_data_file
+            os.makedirs("artifacts/data_ingestion", exist_ok=True)
+            logger.info(f"Downloading data from {dataset_url} into file {zip_download_dir}")
+            file_id = dataset_url.split("/")[-2]
+            prefix = 'https://drive.google.com/uc?/export=download&id='
+            gdown.download(prefix+file_id,zip_download_dir)
+            logger.info(f"Downloaded data from {dataset_url} into file {zip_download_dir}")
+        except Exception as e:
+            raise e
+    def extract_zip_file(self):
+        """
+        zip_file_path: str
+        Extracts the zip file into the data directory
+        Function returns None
+        """
+        unzip_path = self.config.unzip_dir
+        os.makedirs(unzip_path, exist_ok=True)
+        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
+            zip_ref.extractall(unzip_path)

src/cnnClassifier/components/model_evaluation_mlflow.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import tensorflow as tf
+from pathlib import Path
+import mlflow
+import mlflow.keras
+from urllib.parse import urlparse
+from cnnClassifier.entity.config_entity import EvaluationConfig
+from cnnClassifier.utils.common import save_json
+# --- NEW IMPORTS for advanced evaluation ---
+from sklearn.metrics import confusion_matrix, classification_report
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+# -------------------------------------------
+class Evaluation:
+    def __init__(self, config: EvaluationConfig):
+        self.config = config
+        self.model = None
+        self.valid_generator = None
+        self.score = None
+        self.y_true = None
+        self.y_pred = None
+    def _valid_generator(self):
+        datagenerator_kwargs = dict(
+            rescale=1./255,
+            validation_split=0.30
+        )
+        dataflow_kwargs = dict(
+            target_size=self.config.params_image_size[:-1],
+            batch_size=self.config.params_batch_size,
+            interpolation="bilinear"
+        )
+        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(**datagenerator_kwargs)
+        self.valid_generator = valid_datagenerator.flow_from_directory(
+            directory=self.config.training_data,
+            subset="validation",
+            shuffle=False,
+            **dataflow_kwargs
+        )
+    @staticmethod
+    def load_model(path: Path) -> tf.keras.Model:
+        return tf.keras.models.load_model(path)
+    def _get_predictions(self):
+        """Gets ground truth labels and model's predicted labels."""
+        self.y_true = self.valid_generator.classes
+        y_pred_probs = self.model.predict(self.valid_generator)
+        self.y_pred = np.argmax(y_pred_probs, axis=1)
+    def evaluation(self):
+        """Loads model, evaluates basic metrics, and gets detailed predictions."""
+        self.model = self.load_model(self.config.path_of_model)
+        self._valid_generator()
+        self.score = self.model.evaluate(self.valid_generator)
+        self._get_predictions()
+        self.save_score()
+    # In your Evaluation component's save_score method
+    def save_score(self):
+        # If self.score is None or contains NaN, create a default file
+        if self.score is None or np.isnan(self.score).any():
+            print("⚠️ Warning: Invalid scores detected (NaN). Saving default scores file.")
+            scores = {"loss": float('nan'), "accuracy": float('nan')}
+        else:
+            scores = {"loss": self.score[0], "accuracy": self.score[1]}
+        # This will now always create the file
+        save_json(path=Path("scores.json"), data=scores)
+        print(f"Scores saved to scores.json: {scores}")
+    def log_confusion_matrix(self):
+        """Generates, saves, and logs the confusion matrix plot to MLflow."""
+        cm = confusion_matrix(self.y_true, self.y_pred)
+        class_names = list(self.valid_generator.class_indices.keys())
+        plt.figure(figsize=(8, 6))
+        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
+                    xticklabels=class_names, yticklabels=class_names)
+        plt.title('Confusion Matrix')
+        plt.ylabel('Actual')
+        plt.xlabel('Predicted')
+        matrix_path = Path("confusion_matrix.png")
+        plt.savefig(matrix_path)
+        mlflow.log_artifact(matrix_path, "plots")
+        print("Confusion Matrix plot saved and logged to MLflow.")
+    def log_into_mlflow(self):
+        mlflow.set_tracking_uri(self.config.mlflow_uri)
+        with mlflow.start_run():
+            print("Logging basic parameters and metrics to MLflow...")
+            mlflow.log_params(self.config.all_params)
+            mlflow.log_metrics({"loss": self.score[0], "accuracy": self.score[1]})
+            # --- Log detailed classification report metrics ---
+            print("\n--- Classification Report ---")
+            report = classification_report(self.y_true, self.y_pred,
+                                           target_names=list(self.valid_generator.class_indices.keys()),
+                                           output_dict=True)
+            print(classification_report(self.y_true, self.y_pred,
+                                        target_names=list(self.valid_generator.class_indices.keys())))
+            for className, metrics in report.items():
+                if isinstance(metrics, dict):
+                    for metricName, value in metrics.items():
+                        mlflow.log_metric(f"{className}_{metricName}", value)
+            # --- Log the confusion matrix plot ---
+            self.log_confusion_matrix()
+            # --- Log the model as an artifact ---
+            print("Logging model as an artifact...")
+            mlflow.keras.log_model(self.model, "model")
+            print("MLflow logging complete.")

src/cnnClassifier/components/model_trainer.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import os
+import urllib.request as request
+from zipfile import ZipFile
+import tensorflow as tf
+import time
+from cnnClassifier.entity.config_entity import TrainingConfig
+from pathlib import Path
+# --- NEW IMPORTS ---
+import pandas as pd
+from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
+# --------------------
+class Training:
+    def __init__(self, config: TrainingConfig):
+        self.config = config
+        self.model = None
+        self.train_generator = None
+        self.valid_generator = None
+    def get_base_model(self):
+        self.model = tf.keras.models.load_model(
+            self.config.updated_base_model_path
+        )
+    def train_valid_generator(self):
+        datagenerator_kwargs = dict(
+            rescale=1./255,
+            validation_split=0.20
+        )
+        dataflow_kwargs = dict(
+            target_size=self.config.params_image_size[:-1],
+            batch_size=self.config.params_batch_size,
+            interpolation="bilinear"
+        )
+        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
+            **datagenerator_kwargs
+        )
+        self.valid_generator = valid_datagenerator.flow_from_directory(
+            directory=self.config.training_data,
+            subset="validation",
+            shuffle=False,
+            **dataflow_kwargs
+        )
+        if self.config.params_is_augmentation:
+            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
+                rotation_range=20, # Reduced for stability
+                horizontal_flip=True,
+                width_shift_range=0.1,
+                height_shift_range=0.1,
+                shear_range=0.1,
+                zoom_range=0.1,
+                **datagenerator_kwargs
+            )
+        else:
+            train_datagenerator = valid_datagenerator
+        self.train_generator = train_datagenerator.flow_from_directory(
+            directory=self.config.training_data,
+            subset="training",
+            shuffle=True,
+            **dataflow_kwargs
+        )
+        # --- ADD THIS ---
+        # Print class indices to be 100% sure of the mapping
+        print(f"Discovered class indices: {self.train_generator.class_indices}")
+        # --------------
+    @staticmethod
+    def save_model(path: Path, model: tf.keras.Model):
+        model.save(path)
+    def train(self):
+        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
+        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size
+        # --- NEW: DEFINE CALLBACKS FOR SMART TRAINING ---
+        # This will save the BEST model based on validation accuracy
+        best_model_checkpoint = ModelCheckpoint(
+            filepath=self.config.trained_model_path, # Saves the best model to your specified path
+            save_best_only=True,
+            monitor='val_accuracy',
+            mode='max',
+            verbose=1
+        )
+        # This will stop training if there's no improvement
+        early_stopping = EarlyStopping(
+            monitor='val_accuracy',
+            patience=5, # Number of epochs with no improvement to wait
+            restore_best_weights=True,
+            verbose=1
+        )
+        callbacks_list = [best_model_checkpoint, early_stopping]
+        # -----------------------------------------------
+        # --- MODEL.FIT() IS NOW UPGRADED ---
+        history = self.model.fit(
+            self.train_generator,
+            epochs=self.config.params_epochs,
+            steps_per_epoch=self.steps_per_epoch,
+            validation_steps=self.validation_steps,
+            validation_data=self.valid_generator,
+            callbacks=callbacks_list # Pass the smart callbacks here
+        )
+        # -------------------------------------
+        # --- NEW: SAVE TRAINING HISTORY FOR ANALYSIS ---
+        history_df = pd.DataFrame(history.history)
+        history_path = "training_history.csv" # Saved in the root directory
+        history_df.to_csv(history_path, index=False)
+        print(f"✅ Training history saved to {history_path}")
+        # -----------------------------------------------
+        # The save_model call is now handled by ModelCheckpoint,
+        # so this is redundant but harmless. It will save the last epoch's model.
+        # The BEST model is already saved by the callback.
+        # self.save_model(
+        #     path=self.config.trained_model_path,
+        #     model=self.model
+        # )

src/cnnClassifier/components/prepare_base_model.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import os
+import urllib.request as request
+from zipfile import ZipFile
+import tensorflow as tf
+from pathlib import Path
+from cnnClassifier.entity.config_entity import PrepareBaseModelConfig
+class PrepareBaseModel:
+    def __init__(self, config: PrepareBaseModelConfig):
+        self.config = config
+    def get_base_model(self):
+        self.model = tf.keras.applications.vgg16.VGG16(
+            input_shape=self.config.params_image_size,
+            weights=self.config.params_weights,
+            include_top=self.config.params_include_top
+        )
+        self.save_model(path=self.config.base_model_path, model=self.model)
+    @staticmethod
+    def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):
+        if freeze_all:
+            for layer in model.layers:
+                model.trainable = False
+        elif (freeze_till is not None) and (freeze_till > 0):
+            for layer in model.layers[:-freeze_till]:
+                model.trainable = False
+        flatten_in = tf.keras.layers.Flatten()(model.output)
+        prediction = tf.keras.layers.Dense(
+            units=classes,
+            activation="softmax"
+        )(flatten_in)
+        full_model = tf.keras.models.Model(
+            inputs=model.input,
+            outputs=prediction
+        )
+        full_model.compile(
+            optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
+            loss=tf.keras.losses.CategoricalCrossentropy(),
+            metrics=["accuracy"]
+        )
+        full_model.summary()
+        return full_model
+    def update_base_model(self):
+        self.full_model = self._prepare_full_model(
+            model=self.model,
+            classes=self.config.params_classes,
+            freeze_all=True,
+            freeze_till=None,
+            learning_rate=self.config.params_learning_rate
+        )
+        self.save_model(path=self.config.updated_base_model_path, model=self.full_model)
+    @staticmethod
+    def save_model(path: Path, model: tf.keras.Model):
+        model.save(path)

src/cnnClassifier/config/__init__.py ADDED Viewed

File without changes

src/cnnClassifier/config/configuration.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import os
+from cnnClassifier.constants import *
+from cnnClassifier.utils.common import read_yaml, create_directories , save_json
+from cnnClassifier.entity.config_entity import (DataIngestionConfig, PrepareBaseModelConfig, TrainingConfig, EvaluationConfig)
+class ConfigurationManager:
+    def __init__(
+        self,
+        config_filepath = CONFIG_FILE_PATH,
+        params_filepath = PARAMS_FILE_PATH):
+        self.config = read_yaml(config_filepath)
+        self.params = read_yaml(params_filepath)
+        create_directories([self.config.artifacts_root])
+    def get_data_ingestion_config(self) -> DataIngestionConfig:
+        config = self.config.data_ingestion
+        create_directories([config.root_dir])
+        data_ingestion_config = DataIngestionConfig(
+            root_dir=config.root_dir,
+            source_URL=config.source_URL,
+            local_data_file=config.local_data_file,
+            unzip_dir=config.unzip_dir
+        )
+        return data_ingestion_config
+    def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:
+        config = self.config.prepare_base_model
+        create_directories([config.root_dir])
+        prepare_base_model_config = PrepareBaseModelConfig(
+            root_dir=Path(config.root_dir),
+            base_model_path=Path(config.base_model_path),
+            updated_base_model_path=Path(config.updated_base_model_path),
+            params_image_size=self.params.IMAGE_SIZE,
+            params_learning_rate=self.params.LEARNING_RATE,
+            params_include_top=self.params.INCLUDE_TOP,
+            params_weights=self.params.WEIGHTS,
+            params_classes=self.params.CLASSES
+        )
+        return prepare_base_model_config
+    def get_training_config(self) -> TrainingConfig:
+        training = self.config.training
+        prepare_base_model = self.config.prepare_base_model
+        params = self.params
+        training_data = os.path.join(self.config.data_ingestion.unzip_dir, "Chest-CT-Scan-data")
+        create_directories([
+            Path(training.root_dir)
+        ])
+        training_config = TrainingConfig(
+            root_dir=Path(training.root_dir),
+            trained_model_path=Path(training.trained_model_path),
+            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
+            training_data=Path(training_data),
+            params_epochs=params.EPOCHS,
+            params_batch_size=params.BATCH_SIZE,
+            params_is_augmentation=params.AUGMENTATION,
+            params_image_size=params.IMAGE_SIZE
+        )
+        return training_config
+    def get_evaluation_config(self) -> EvaluationConfig:
+        eval_config = EvaluationConfig(
+            path_of_model="artifacts/training/model.h5",
+            training_data="artifacts/data_ingestion/Chest-CT-Scan-data",
+            mlflow_uri="https://dagshub.com/AlyyanAhmed21/End-to-End-Chest-Cancer-Classification-using-MLflow-and-DVC.mlflow",
+            all_params=self.params,
+            params_image_size=self.params.IMAGE_SIZE,
+            params_batch_size=self.params.BATCH_SIZE
+        )
+        return eval_config

src/cnnClassifier/constants/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from pathlib import Path
+CONFIG_FILE_PATH = Path("config/config.yaml")
+PARAMS_FILE_PATH = Path("params.yaml")

src/cnnClassifier/entity/__init__.py ADDED Viewed

File without changes

src/cnnClassifier/entity/config_entity.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from dataclasses import dataclass
+from pathlib import Path
+@dataclass(frozen=True)
+class DataIngestionConfig:
+    root_dir: Path
+    source_URL: str
+    local_data_file: Path
+    unzip_dir: Path
+@dataclass(frozen=True)
+class PrepareBaseModelConfig:
+    root_dir: Path
+    base_model_path: Path
+    updated_base_model_path: Path
+    params_image_size: list
+    params_learning_rate: float
+    params_include_top: bool
+    params_weights: str
+    params_classes: int
+@dataclass(frozen=True)
+class TrainingConfig:
+    root_dir: Path
+    trained_model_path: Path
+    updated_base_model_path: Path
+    training_data: Path
+    params_epochs: int
+    params_batch_size: int
+    params_is_augmentation: bool
+    params_image_size: list
+@dataclass(frozen=True)
+class EvaluationConfig:
+    path_of_model: Path
+    training_data: Path
+    all_params: dict
+    mlflow_uri: str
+    params_image_size: list
+    params_batch_size: int

src/cnnClassifier/pipeline/__init__.py ADDED Viewed

File without changes

src/cnnClassifier/pipeline/prediction.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import numpy as np
+import tensorflow as tf
+from tensorflow.keras.preprocessing import image
+import os
+class PredictionPipeline:
+    def __init__(self, filename):
+        self.filename = filename
+    def predict(self):
+        # --- FIX #1: LOAD THE CORRECT MODEL ---
+        # Load the BEST model produced by your DVC pipeline.
+        model_path = os.path.join("artifacts", "training", "best_model.h5")
+        model = tf.keras.models.load_model(model_path)
+        # --- Load and preprocess the image ---
+        imagename = self.filename
+        test_image = image.load_img(imagename, target_size=(224, 224))
+        test_image_array = image.img_to_array(test_image)
+        # --- FIX #2: THE CRITICAL RESCALING STEP ---
+        # Scale the pixel values to be between 0 and 1, just like the training data.
+        scaled_image_array = test_image_array / 255.0
+        # Add the batch dimension
+        input_data = np.expand_dims(scaled_image_array, axis=0)
+        # --- Make the prediction on the CORRECTLY preprocessed image ---
+        result_index = np.argmax(model.predict(input_data), axis=1)[0]
+        print(f"Model predicted index: {result_index}")
+        # --- FIX #3: RETURN THE CORRECT JSON STRUCTURE ---
+        # The logic for translation should be in app.py to keep this pipeline clean,
+        # but for now, we will just return the raw index.
+        # app.py will handle translating 0/1 to "Cancer"/"Normal".
+        return result_index

src/cnnClassifier/pipeline/stage_01_data_ingestion.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from cnnClassifier.config.configuration import ConfigurationManager
+from cnnClassifier.components.data_ingestion import DataIngestion
+from cnnClassifier import logger
+STAGE_NAME = "Data Ingestion stage"
+class DataIngestionTrainingPipeline:
+    def __init__(self):
+        pass
+    def main(self):
+        config = ConfigurationManager()
+        data_ingestion_config = config.get_data_ingestion_config()
+        data_ingestion = DataIngestion(config=data_ingestion_config)
+        data_ingestion.download_file()
+        data_ingestion.extract_zip_file()
+if __name__ == '__main__':
+    try:
+        logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
+        obj = DataIngestionTrainingPipeline()
+        obj.main()
+        logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
+    except Exception as e:
+        logger.exception(e)
+        raise e

src/cnnClassifier/pipeline/stage_02_prepare_base_model.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from cnnClassifier.config.configuration import ConfigurationManager
+from cnnClassifier.components.prepare_base_model import PrepareBaseModel
+from cnnClassifier import logger
+STAGE_NAME = "Prepare base model"
+class PrepareBaseModelTrainingPipeline:
+    def __init__(self):
+        pass
+    def main(self):
+        config = ConfigurationManager()
+        prepare_base_model_config = config.get_prepare_base_model_config()
+        prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)
+        prepare_base_model.get_base_model()
+        prepare_base_model.update_base_model()
+if __name__ == '__main__':
+    try:
+        logger.info(f"*******************")
+        logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
+        obj = PrepareBaseModelTrainingPipeline()
+        obj.main()
+        logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
+    except Exception as e:
+        logger.exception(e)
+        raise e

src/cnnClassifier/pipeline/stage_03_model_trainer.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from cnnClassifier.config.configuration import ConfigurationManager
+from cnnClassifier.components.model_trainer import Training
+from cnnClassifier import logger
+STAGE_NAME = "Training"
+class ModelTrainingPipeline:
+    def __init__(self):
+        pass
+    def main(self):
+        config = ConfigurationManager()
+        training_config = config.get_training_config()
+        training = Training(config=training_config)
+        training.get_base_model()
+        training.train_valid_generator()
+        training.train()
+if __name__ == '__main__':
+    try:
+        logger.info(f"*******************")
+        logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
+        obj = ModelTrainingPipeline()
+        obj.main()
+        logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
+    except Exception as e:
+        logger.exception(e)
+        raise e

src/cnnClassifier/pipeline/stage_04_model_evaluation.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from cnnClassifier.config.configuration import ConfigurationManager
+from cnnClassifier.components.model_evaluation_mlflow import Evaluation
+from cnnClassifier import logger
+from dotenv import load_dotenv
+# Load environment variables from .env file for MLflow credentials
+load_dotenv()
+STAGE_NAME = "Evaluation stage"
+class EvaluationPipeline:
+    def __init__(self):
+        pass
+    def main(self):
+        config = ConfigurationManager()
+        eval_config = config.get_evaluation_config()
+        evaluation = Evaluation(eval_config)
+        evaluation.evaluation()
+        # The save_score() method is called inside evaluation()
+        evaluation.log_into_mlflow()
+# --- THIS IS THE CRITICAL BLOCK THAT TELLS THE SCRIPT TO RUN ---
+if __name__ == '__main__':
+    try:
+        logger.info(f"*******************")
+        logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
+        # Create an object of the class and call its main method
+        pipeline = EvaluationPipeline()
+        pipeline.main()
+        logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
+    except Exception as e:
+        logger.exception(e)
+        raise e

src/cnnClassifier/utils/__init__.py ADDED Viewed

File without changes

src/cnnClassifier/utils/common.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import os
+from box.exceptions import BoxValueError
+import yaml
+from cnnClassifier import logger
+import json
+import joblib
+from ensure import ensure_annotations
+from box import ConfigBox
+from pathlib import Path
+from typing import Any
+import base64
+@ensure_annotations
+def read_yaml(path_to_yaml: Path) -> ConfigBox:
+    """reads yaml file and returns
+    Args:
+        path_to_yaml (str): path like input
+    Raises:
+        ValueError: if yaml file is empty
+        e: empty file
+    Returns:
+        ConfigBox: ConfigBox type
+    """
+    try:
+        with open(path_to_yaml) as yaml_file:
+            content = yaml.safe_load(yaml_file)
+            logger.info(f"yaml file: {path_to_yaml} loaded successfully")
+            return ConfigBox(content)
+    except BoxValueError:
+        raise ValueError("yaml file is empty")
+    except Exception as e:
+        raise e
+@ensure_annotations
+def create_directories(path_to_directories: list, verbose=True):
+    """create list of directories
+    Args:
+        path_to_directories (list): list of path of directories
+        ignore_log (bool, optional): ignore if multiple dirs is to be created. Defaults to False.
+    """
+    for path in path_to_directories:
+        os.makedirs(path, exist_ok=True)
+        if verbose:
+            logger.info(f"created directory at: {path}")
+@ensure_annotations
+def save_json(path: Path, data: dict):
+    """save json data
+    Args:
+        path (Path): path to json file
+        data (dict): data to be saved in json file
+    """
+    with open(path, "w") as f:
+        json.dump(data, f, indent=4)
+    logger.info(f"json file saved at: {path}")
+@ensure_annotations
+def load_json(path: Path) -> ConfigBox:
+    """load json files data
+    Args:
+        path (Path): path to json file
+    Returns:
+        ConfigBox: data as class attributes instead of dict
+    """
+    with open(path) as f:
+        content = json.load(f)
+    logger.info(f"json file loaded succesfully from: {path}")
+    return ConfigBox(content)
+@ensure_annotations
+def save_bin(data: Any, path: Path):
+    """save binary file
+    Args:
+        data (Any): data to be saved as binary
+        path (Path): path to binary file
+    """
+    joblib.dump(value=data, filename=path)
+    logger.info(f"binary file saved at: {path}")
+@ensure_annotations
+def load_bin(path: Path) -> Any:
+    """load binary data
+    Args:
+        path (Path): path to binary file
+    Returns:
+        Any: object stored in the file
+    """
+    data = joblib.load(path)
+    logger.info(f"binary file loaded from: {path}")
+    return data
+@ensure_annotations
+def get_size(path: Path) -> str:
+    """get size in KB
+    Args:
+        path (Path): path of the file
+    Returns:
+        str: size in KB
+    """
+    size_in_kb = round(os.path.getsize(path)/1024)
+    return f"~ {size_in_kb} KB"
+def decodeImage(imgstring, fileName):
+    imgdata = base64.b64decode(imgstring)
+    with open(fileName, 'wb') as f:
+        f.write(imgdata)
+        f.close()
+def encodeImageIntoBase64(croppedImagePath):
+    with open(croppedImagePath, "rb") as f:
+        return base64.b64encode(f.read())

static/script.js ADDED Viewed

	@@ -0,0 +1,159 @@

+document.addEventListener('DOMContentLoaded', function () {
+    // --- DOM Elements ---
+    const fileInput = document.getElementById('fileInput');
+    const uploadLabel = document.querySelector('.upload-label');
+    const imagePreviewContainer = document.querySelector('.image-preview-container');
+    const imagePreview = document.getElementById('imagePreview');
+    const removeImageBtn = document.getElementById('removeImageBtn');
+    const predictBtn = document.getElementById('predictBtn');
+    const resultContainer = document.getElementById('result-container');
+    const jsonResponse = document.getElementById('jsonResponse').querySelector('code');
+    let base64Image = null;
+    // --- Event Listeners ---
+    fileInput.addEventListener('change', handleFileSelect);
+    removeImageBtn.addEventListener('click', resetUploader);
+    predictBtn.addEventListener('click', handlePrediction);
+    // --- Functions ---
+    /**
+     * Handles the file selection, reads the file as a Base64 string,
+     * and updates the UI to show the preview.
+     */
+    function handleFileSelect(event) {
+        const file = event.target.files[0];
+        if (file) {
+            const reader = new FileReader();
+            reader.onload = function(e) {
+                // Display the image preview
+                imagePreview.src = e.target.result;
+                uploadLabel.style.display = 'none';
+                imagePreviewContainer.style.display = 'block';
+                // Store the Base64 string (without the data URI prefix)
+                base64Image = e.target.result.split(',')[1];
+                // Enable the predict button
+                predictBtn.disabled = false;
+                resultContainer.innerHTML = '<p class="text-muted">Ready to predict.</p>';
+                jsonResponse.textContent = 'Waiting for response...';
+            };
+            reader.readAsDataURL(file);
+        }
+    }
+    /**
+     * Resets the uploader to its initial state.
+     */
+    function resetUploader() {
+        fileInput.value = ''; // Clear the file input
+        base64Image = null;
+        imagePreview.src = '#';
+        uploadLabel.style.display = 'flex';
+        imagePreviewContainer.style.display = 'none';
+        predictBtn.disabled = true;
+        resultContainer.innerHTML = '<p class="text-muted">Results will be displayed here after prediction.</p>';
+        jsonResponse.textContent = 'Waiting for response...';
+    }
+    /**
+     * Handles the prediction API call.
+     */
+    async function handlePrediction() {
+        if (!base64Image) {
+            alert('Please upload an image first.');
+            return;
+        }
+        setLoadingState(true);
+        // !! IMPORTANT: Change this URL to your actual API endpoint !!
+        const apiUrl = '/predict'; // Example for a local Flask app
+        try {
+            const response = await fetch(apiUrl, {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify({ image: base64Image }),
+            });
+            if (!response.ok) {
+                throw new Error(`Server error: ${response.statusText}`);
+            }
+            const data = await response.json();
+            displayResults(data);
+        } catch (error) {
+            console.error('Prediction Error:', error);
+            displayError(error.message);
+        } finally {
+            setLoadingState(false);
+        }
+    }
+    /**
+     * Displays the prediction results in a user-friendly format.
+     */
+    function displayResults(data) {
+        // Assuming the response is like: [{"prediction": "Normal"}]
+        const prediction = data[0]?.prediction; // Safely access the prediction
+        let resultHtml = '';
+        if (prediction) {
+            if (prediction.toLowerCase() === 'normal') {
+                resultHtml = `
+                    <div class="result-normal">
+                        <i class="fas fa-check-circle result-icon"></i>
+                        <h3>Prediction: Normal</h3>
+                        <p>The model predicts that the scan is not cancerous.</p>
+                    </div>`;
+            } else {
+                resultHtml = `
+                    <div class="result-cancer">
+                        <i class="fas fa-exclamation-triangle result-icon"></i>
+                        <h3>Prediction: Cancer Detected</h3>
+                        <p>The model predicts a high probability of malignancy. Please consult a medical professional.</p>
+                    </div>`;
+            }
+        } else {
+             resultHtml = `<p>Could not determine prediction from the response.</p>`;
+        }
+        resultContainer.innerHTML = resultHtml;
+        jsonResponse.textContent = JSON.stringify(data, null, 2);
+    }
+    /**
+     * Displays an error message in the UI.
+     */
+    function displayError(errorMessage) {
+        resultContainer.innerHTML = `
+            <div class="text-danger">
+                <i class="fas fa-times-circle result-icon"></i>
+                <h3>Prediction Failed</h3>
+                <p>${errorMessage}</p>
+            </div>`;
+        jsonResponse.textContent = `Error: ${errorMessage}`;
+    }
+    /**
+     * Manages the loading state of the predict button.
+     */
+    function setLoadingState(isLoading) {
+        const spinner = predictBtn.querySelector('.spinner-border');
+        const btnText = predictBtn.querySelector('.btn-text');
+        if (isLoading) {
+            predictBtn.disabled = true;
+            spinner.style.display = 'inline-block';
+            btnText.style.display = 'none';
+        } else {
+            predictBtn.disabled = false;
+            spinner.style.display = 'none';
+            btnText.style.display = 'inline-block';
+        }
+    }
+});

static/style.css ADDED Viewed

	@@ -0,0 +1,116 @@

+/* Using CSS Variables for easy theme changes */
+:root {
+    --primary-color: #007bff;
+    --secondary-color: #6c757d;
+    --background-color: #f8f9fa;
+    --card-bg-color: #ffffff;
+    --font-family: 'Poppins', sans-serif;
+    --success-color: #28a745;
+    --danger-color: #dc3545;
+}
+body {
+    font-family: var(--font-family);
+    background-color: var(--background-color);
+}
+header h1 {
+    color: var(--primary-color);
+    font-weight: 600;
+}
+.card {
+    border-radius: 15px;
+    transition: transform 0.2s ease-in-out;
+}
+.card:hover {
+    transform: translateY(-5px);
+}
+.card-header {
+    border-top-left-radius: 15px;
+    border-top-right-radius: 15px;
+}
+.image-upload-wrapper {
+    position: relative;
+    width: 100%;
+    height: 350px;
+    border: 2px dashed var(--primary-color);
+    border-radius: 10px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    overflow: hidden;
+    background-color: #f0f6ff;
+}
+.upload-label {
+    cursor: pointer;
+    text-align: center;
+    color: var(--primary-color);
+}
+.upload-label:hover .upload-icon {
+    transform: scale(1.1);
+    color: #0056b3;
+}
+.upload-icon {
+    transition: transform 0.2s ease-in-out;
+}
+.image-preview-container {
+    position: absolute;
+    top: 0;
+    left: 0;
+    width: 100%;
+    height: 100%;
+    display: none; /* Hidden by default */
+}
+#imagePreview {
+    width: 100%;
+    height: 100%;
+    object-fit: contain; /* Use contain to see the whole image */
+    padding: 10px;
+}
+.remove-btn {
+    position: absolute;
+    top: 10px;
+    right: 10px;
+    border-radius: 50%;
+    width: 30px;
+    height: 30px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    font-size: 1.2rem;
+    line-height: 1;
+}
+#result-container h3 {
+    font-weight: 600;
+}
+.result-normal {
+    color: var(--success-color);
+}
+.result-cancer {
+    color: var(--danger-color);
+}
+.result-icon {
+    font-size: 4rem;
+    margin-bottom: 1rem;
+}
+#jsonResponse {
+    max-height: 200px;
+    overflow-y: auto;
+    white-space: pre-wrap;
+    word-break: break-all;
+}

template.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import os
+from pathlib import Path
+import logging
+# Set up basic logging to see the script's output
+logging.basicConfig(level=logging.INFO, format='[%(asctime)s]: %(message)s:')
+# Define the project name
+project_name = "cnnClassifier"
+# List of files and directories to be created
+list_of_files = [
+    ".github/workflows/.gitkeep",
+    f"src/{project_name}/__init__.py",
+    f"src/{project_name}/components/__init__.py",
+    f"src/{project_name}/utils/__init__.py",
+    f"src/{project_name}/config/__init__.py",
+    f"src/{project_name}/config/configuration.py",
+    f"src/{project_name}/pipeline/__init__.py",
+    f"src/{project_name}/entity/__init__.py",
+    f"src/{project_name}/constants/__init__.py",
+    "config/config.yaml",
+    "dvc.yaml",
+    "params.yaml",
+    "requirements.txt",
+    "setup.py",
+    "research/trials.ipynb",
+    "templates/index.html"
+]
+# Loop through the list of files to create them
+for filepath_str in list_of_files:
+    filepath = Path(filepath_str)  # Convert string path to a Path object for robustness
+    filedir, filename = os.path.split(filepath)
+    # 1. Create the directory if it doesn't exist
+    if filedir != "":
+        os.makedirs(filedir, exist_ok=True)
+        logging.info(f"Creating directory: {filedir} for the file {filename}")
+    # 2. Create the file if it doesn't exist or is empty
+    if (not os.path.exists(filepath)) or (os.path.getsize(filepath) == 0):
+        with open(filepath, "w") as f:
+            pass  # Creates an empty file
+            logging.info(f"Creating empty file: {filepath}")
+    else:
+        logging.info(f"{filename} already exists")

templates/index.html ADDED Viewed

	@@ -0,0 +1,98 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Chest Cancer Detection AI</title>
+    <!-- Bootstrap 5 CSS -->
+    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" rel="stylesheet">
+    <!-- Font Awesome for Icons -->
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.1/css/all.min.css">
+    <!-- Google Fonts (Poppins) -->
+    <link rel="preconnect" href="https://fonts.googleapis.com">
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+    <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;500;600;700&display=swap" rel="stylesheet">
+    <!-- Your Custom CSS -->
+    <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
+</head>
+<body>
+    <header class="text-center py-4 shadow-sm">
+        <div class="container">
+            <h1><i class="fas fa-lungs-virus"></i> Chest Cancer Detection AI</h1>
+            <p class="lead text-muted">Upload a Chest CT Scan to classify it as Normal or Cancerous</p>
+        </div>
+    </header>
+    <main class="container my-5">
+        <div class="row g-4">
+            <!-- Left Column: Uploader -->
+            <div class="col-lg-6">
+                <div class="card h-100 shadow-lg border-0">
+                    <div class="card-body text-center d-flex flex-column justify-content-center">
+                        <div class="image-upload-wrapper">
+                            <input type="file" id="fileInput" accept="image/png, image/jpeg" style="display: none;">
+                            <label for="fileInput" class="upload-label">
+                                <div class="upload-icon">
+                                    <i class="fas fa-cloud-upload-alt fa-3x"></i>
+                                </div>
+                                <p class="upload-text"><strong>Click to browse</strong> or drag and drop an image here.</p>
+                            </label>
+                            <div class="image-preview-container">
+                                <img id="imagePreview" src="#" alt="Image Preview" class="img-fluid rounded"/>
+                                <button id="removeImageBtn" class="btn btn-sm btn-danger remove-btn">&times;</button>
+                            </div>
+                        </div>
+                        <button id="predictBtn" class="btn btn-primary btn-lg mt-4 w-100" disabled>
+                            <span class="spinner-border spinner-border-sm" role="status" aria-hidden="true" style="display: none;"></span>
+                            <span class="btn-text"><i class="fas fa-microscope"></i> Predict</span>
+                        </button>
+                    </div>
+                </div>
+            </div>
+            <!-- Right Column: Results -->
+            <div class="col-lg-6">
+                <div class="card h-100 shadow-lg border-0">
+                    <div class="card-header bg-primary text-white">
+                        <h5 class="mb-0"><i class="fas fa-poll"></i> Prediction Results</h5>
+                    </div>
+                    <div class="card-body">
+                        <div id="result-container" class="text-center">
+                            <p class="text-muted">Results will be displayed here after prediction.</p>
+                        </div>
+                        <hr>
+                        <div class="accordion" id="jsonAccordion">
+                            <div class="accordion-item">
+                                <h2 class="accordion-header" id="headingOne">
+                                    <button class="accordion-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#collapseOne" aria-expanded="false" aria-controls="collapseOne">
+                                        Raw JSON Response
+                                    </button>
+                                </h2>
+                                <div id="collapseOne" class="accordion-collapse collapse" aria-labelledby="headingOne" data-bs-parent="#jsonAccordion">
+                                    <div class="accordion-body">
+                                        <pre id="jsonResponse" class="bg-light p-3 rounded"><code>Waiting for response...</code></pre>
+                                    </div>
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </main>
+    <footer class="text-center text-muted py-3 mt-4">
+        <p>&copy; 2024 Your Name. Powered by AI.</p>
+    </footer>
+    <!-- Bootstrap 5 JS -->
+    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script>
+    <!-- Your Custom JS -->
+    <script src="{{ url_for('static', filename='script.js') }}"></script>
+</body>
+</html>

training_history.csv ADDED Viewed

	@@ -0,0 +1,11 @@

+loss,accuracy,val_loss,val_accuracy
+1.0304151773452759,0.5675675868988037,0.6861137747764587,0.421875
+1.2392491102218628,0.5057914853096008,0.2789396345615387,0.9375
+0.521758496761322,0.7953668236732483,0.277998149394989,0.9375
+0.46904969215393066,0.760617733001709,0.2396804541349411,0.9375
+0.2891399562358856,0.8648648858070374,0.13092049956321716,0.96875
+0.2712053954601288,0.8823529481887817,0.10788409411907196,1.0
+0.23328891396522522,0.9305019378662109,0.09912744164466858,0.984375
+0.22442513704299927,0.92277991771698,0.14693066477775574,0.984375
+0.19375579059123993,0.9189189076423645,0.08046227693557739,0.984375
+0.20040491223335266,0.9189189076423645,0.2098347544670105,0.9375