Spaces:

jalFaizy
/

mnist-digit-classifier

Sleeping

faizan commited on Dec 28, 2025

Commit

33beec1

1 Parent(s): 52a2c0b

feat: implement comprehensive MLflow tracking with MLOps best practices

- Created MLflow setup module (scripts/mlflow_setup.py)
- Automatic experiment creation and organization
- System info, model params, data info logging
- Model registry integration

- Created MLflow-enabled training script (scripts/train_with_mlflow.py)
- Full hyperparameter tracking
- Per-epoch metrics logging (loss, accuracy, LR)
- Model versioning in registry
- Artifact tracking (checkpoints, metrics, reports)
- Classification report and confusion matrix logging

- Added MLflow UI launcher (scripts/launch_mlflow_ui.sh)
- Demo training run: 98.90% test accuracy in 5 epochs
- Created inference module for deployment (scripts/inference.py)
- Started Gradio app development (app.py)

MLflow UI: http://localhost:5000

Files changed (8) hide show

app.py +197 -0
planning.md +2 -2
results/mlflow_test_metrics.json +203 -0
results/mlflow_training_history.json +37 -0
scripts/inference.py +240 -0
scripts/launch_mlflow_ui.sh +14 -0
scripts/mlflow_setup.py +272 -0
scripts/train_with_mlflow.py +365 -0

app.py ADDED Viewed

	@@ -0,0 +1,197 @@

+"""
+Gradio app for MNIST digit classification.
+Interactive web interface for handwritten digit recognition using trained CNN model.
+"""
+import gradio as gr
+from scripts.inference import DigitClassifier
+from PIL import Image
+import numpy as np
+# Initialize classifier
+print("Loading model...")
+classifier = DigitClassifier('models/best_model.pt')
+print(f"Model loaded on {classifier.device}")
+def predict_digit(image):
+    """
+    Predict digit from user-drawn image.
+    Args:
+        image: numpy array from Gradio Sketchpad (H, W, 3) or (H, W)
+    Returns:
+        Tuple of (predicted_digit, confidence_text, probability_dict)
+    """
+    if image is None:
+        return "Please draw a digit", "", {}
+    # Handle different image formats from Gradio
+    if isinstance(image, dict):
+        # Sketchpad returns dict with 'composite' key
+        image = image.get('composite', image)
+    # Convert to PIL Image
+    if isinstance(image, np.ndarray):
+        # If RGB, convert to grayscale
+        if len(image.shape) == 3:
+            # Take only the drawn part (alpha channel if available)
+            if image.shape[2] == 4:  # RGBA
+                image = image[:, :, 3]  # Use alpha channel
+            else:  # RGB
+                image = np.mean(image, axis=2).astype(np.uint8)
+        # Ensure values are in [0, 255]
+        if image.max() <= 1.0:
+            image = (image * 255).astype(np.uint8)
+        pil_image = Image.fromarray(image.astype(np.uint8), mode='L')
+    else:
+        pil_image = image
+    # Get prediction
+    result = classifier.predict(pil_image)
+    # Format output
+    digit = result['digit']
+    confidence = result['confidence']
+    probabilities = result['probabilities']
+    # Create confidence text
+    confidence_text = f"Confidence: {confidence*100:.1f}%"
+    # Create probability dictionary for bar chart
+    prob_dict = {str(i): prob for i, prob in enumerate(probabilities)}
+    return digit, confidence_text, prob_dict
+# Custom CSS for better styling
+custom_css = """
+.gradio-container {
+    font-family: 'Arial', sans-serif;
+    max-width: 900px;
+    margin: auto;
+}
+.title {
+    text-align: center;
+    color: #2c3e50;
+}
+.description {
+    text-align: center;
+    color: #7f8c8d;
+    margin-bottom: 20px;
+}
+"""
+# Create Gradio interface
+with gr.Blocks(css=custom_css, title="MNIST Digit Classifier") as demo:
+    gr.Markdown(
+        """
+        # 🔢 Handwritten Digit Classifier
+        Draw a digit (0-9) in the box below and the AI will predict what it is!
+        This model uses a Convolutional Neural Network (CNN) trained on the MNIST dataset
+        with **99.17% accuracy** on 10,000 test images.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            # Sketchpad for drawing
+            input_image = gr.Sketchpad(
+                label="Draw a digit here",
+                type="numpy",
+                image_mode="L",
+                brush=gr.Brush(default_size=5, colors=["#000000"], color_mode="fixed"),
+                height=280,
+                width=280
+            )
+            # Buttons
+            with gr.Row():
+                predict_btn = gr.Button("🔍 Predict", variant="primary", scale=2)
+                clear_btn = gr.ClearButton(components=[input_image], value="🗑️ Clear", scale=1)
+        with gr.Column(scale=1):
+            # Prediction output
+            output_digit = gr.Textbox(
+                label="Predicted Digit",
+                placeholder="Draw a digit to see prediction",
+                scale=1,
+                lines=1,
+                max_lines=1,
+                interactive=False
+            )
+            output_confidence = gr.Textbox(
+                label="Confidence",
+                placeholder="",
+                scale=1,
+                lines=1,
+                max_lines=1,
+                interactive=False
+            )
+            # Probability distribution
+            output_probs = gr.Label(
+                label="Probability Distribution",
+                num_top_classes=10
+            )
+    # Example images section
+    gr.Markdown("### 📝 Try these examples:")
+    gr.Examples(
+        examples=[
+            ["examples/digit_0.png"] if __name__ != "__main__" else None,
+        ],
+        inputs=input_image,
+        label="Example digits"
+    )
+    # Model info
+    gr.Markdown(
+        """
+        ---
+        ### 📊 Model Details
+        - **Architecture**: Convolutional Neural Network (CNN)
+        - **Parameters**: 421,066
+        - **Training**: MNIST dataset (60,000 images)
+        - **Test Accuracy**: 99.17%
+        - **Framework**: PyTorch 2.0.1
+        ### 💡 Tips for best results:
+        - Draw the digit large and centered
+        - Use a thick brush stroke
+        - Draw in white on black background (like MNIST)
+        - Make sure the digit is clear and recognizable
+        """
+    )
+    # Connect events
+    predict_btn.click(
+        fn=predict_digit,
+        inputs=input_image,
+        outputs=[output_digit, output_confidence, output_probs]
+    )
+    # Also predict on sketchpad change (real-time prediction)
+    input_image.change(
+        fn=predict_digit,
+        inputs=input_image,
+        outputs=[output_digit, output_confidence, output_probs]
+    )
+if __name__ == "__main__":
+    # Launch the app
+    demo.launch(
+        server_name="0.0.0.0",  # Allow external access
+        server_port=7860,        # Standard Gradio port
+        share=False,             # Set to True to create public link
+        show_error=True
+    )

planning.md CHANGED Viewed

@@ -536,7 +536,7 @@ Apply augmentations **on-the-fly** during training (not pre-generate). Reasons:
 > **Purpose:** Design, implement, and train CNN architecture with rigorous evaluation
-**Status:** 🟡 IN PROGRESS (3/6 tasks complete)
 **Prerequisites:** Phase 1 complete (data pipeline working)
 **Estimated Time:** 8-10 hours
@@ -817,7 +817,7 @@ Apply augmentations **on-the-fly** during training (not pre-generate). Reasons:
 ---
 ### **Task 2.4:** Comprehensive model evaluation
-**Status:** ⬜ NOT STARTED
 **Priority:** HIGH
 **Objective:** Compute all required metrics for report

 > **Purpose:** Design, implement, and train CNN architecture with rigorous evaluation
+**Status:** 🟡 IN PROGRESS (4/6 tasks complete)
 **Prerequisites:** Phase 1 complete (data pipeline working)
 **Estimated Time:** 8-10 hours
 ---
 ### **Task 2.4:** Comprehensive model evaluation
+**Status:** ✅ COMPLETE (integrated in training script)
 **Priority:** HIGH
 **Objective:** Compute all required metrics for report

results/mlflow_test_metrics.json ADDED Viewed

	@@ -0,0 +1,203 @@

+{
+  "test_accuracy": 98.9,
+  "test_precision": 0.9890924021865366,
+  "test_recall": 0.9889686019869461,
+  "test_f1_score": 0.989004598229335,
+  "classification_report": {
+    "0": {
+      "precision": 0.9959141981613892,
+      "recall": 0.9948979591836735,
+      "f1-score": 0.9954058192955589,
+      "support": 980.0
+    },
+    "1": {
+      "precision": 0.9973404255319149,
+      "recall": 0.9911894273127754,
+      "f1-score": 0.9942554131683606,
+      "support": 1135.0
+    },
+    "2": {
+      "precision": 0.9753320683111955,
+      "recall": 0.9961240310077519,
+      "f1-score": 0.9856184084372004,
+      "support": 1032.0
+    },
+    "3": {
+      "precision": 0.9862475442043221,
+      "recall": 0.994059405940594,
+      "f1-score": 0.9901380670611439,
+      "support": 1010.0
+    },
+    "4": {
+      "precision": 0.9808853118712274,
+      "recall": 0.9928716904276986,
+      "f1-score": 0.9868421052631581,
+      "support": 982.0
+    },
+    "5": {
+      "precision": 0.9932508436445444,
+      "recall": 0.9899103139013453,
+      "f1-score": 0.9915777653003931,
+      "support": 892.0
+    },
+    "6": {
+      "precision": 0.9957939011566772,
+      "recall": 0.988517745302714,
+      "f1-score": 0.9921424829753797,
+      "support": 958.0
+    },
+    "7": {
+      "precision": 0.9863680623174295,
+      "recall": 0.9854085603112841,
+      "f1-score": 0.9858880778588808,
+      "support": 1028.0
+    },
+    "8": {
+      "precision": 0.9947916666666666,
+      "recall": 0.9804928131416838,
+      "f1-score": 0.9875904860392969,
+      "support": 974.0
+    },
+    "9": {
+      "precision": 0.985,
+      "recall": 0.9762140733399405,
+      "f1-score": 0.9805873568939771,
+      "support": 1009.0
+    },
+    "accuracy": 0.989,
+    "macro avg": {
+      "precision": 0.9890924021865366,
+      "recall": 0.9889686019869461,
+      "f1-score": 0.989004598229335,
+      "support": 10000.0
+    },
+    "weighted avg": {
+      "precision": 0.989058814881263,
+      "recall": 0.989,
+      "f1-score": 0.9890032775348696,
+      "support": 10000.0
+    }
+  },
+  "confusion_matrix": [
+    [
+      975,
+      0,
+      2,
+      0,
+      1,
+      0,
+      1,
+      1,
+      0,
+      0
+    ],
+    [
+      0,
+      1125,
+      6,
+      0,
+      1,
+      0,
+      1,
+      2,
+      0,
+      0
+    ],
+    [
+      1,
+      0,
+      1028,
+      0,
+      1,
+      0,
+      0,
+      2,
+      0,
+      0
+    ],
+    [
+      0,
+      0,
+      1,
+      1004,
+      0,
+      1,
+      0,
+      2,
+      1,
+      1
+    ],
+    [
+      0,
+      0,
+      0,
+      0,
+      975,
+      0,
+      0,
+      0,
+      1,
+      6
+    ],
+    [
+      0,
+      0,
+      0,
+      7,
+      0,
+      883,
+      1,
+      1,
+      0,
+      0
+    ],
+    [
+      1,
+      2,
+      2,
+      0,
+      4,
+      2,
+      947,
+      0,
+      0,
+      0
+    ],
+    [
+      0,
+      1,
+      8,
+      0,
+      3,
+      0,
+      0,
+      1013,
+      1,
+      2
+    ],
+    [
+      1,
+      0,
+      7,
+      0,
+      1,
+      2,
+      1,
+      1,
+      955,
+      6
+    ],
+    [
+      1,
+      0,
+      0,
+      7,
+      8,
+      1,
+      0,
+      5,
+      2,
+      985
+    ]
+  ]
+}

results/mlflow_training_history.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "train_loss": [
+    0.623640251447595,
+    0.25853686837229406,
+    0.20275674461196627,
+    0.16697734022463953,
+    0.15257755666153233
+  ],
+  "train_accuracy": [
+    79.71565839788215,
+    92.16001568781253,
+    93.85233846455534,
+    94.94460241200117,
+    95.43092460045102
+  ],
+  "val_loss": [
+    0.091112698253297,
+    0.06359739341700436,
+    0.0590373575881434,
+    0.05036303665776281,
+    0.05376061073117163
+  ],
+  "val_accuracy": [
+    97.22376457523598,
+    98.06774014436424,
+    98.33425874514158,
+    98.75624652970572,
+    98.5341476957246
+  ],
+  "learning_rate": [
+    0.001,
+    0.001,
+    0.001,
+    0.001,
+    0.001
+  ]
+}

scripts/inference.py ADDED Viewed

	@@ -0,0 +1,240 @@

+"""
+Inference module for MNIST digit classification.
+Provides a clean API for making predictions with the trained model.
+Handles image preprocessing and returns predictions with confidence scores.
+"""
+import torch
+from PIL import Image
+import numpy as np
+from pathlib import Path
+from typing import Union, Dict
+class DigitClassifier:
+    """Production inference wrapper for MNIST digit classifier."""
+    def __init__(self, model_path: str, device: str = None):
+        """
+        Initialize the digit classifier.
+        Args:
+            model_path: Path to model checkpoint (.pt file)
+            device: Device to run inference on ('cuda' or 'cpu').
+                   If None, auto-detects CUDA availability.
+        """
+        if device is None:
+            self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        else:
+            self.device = device
+        self.model_path = Path(model_path)
+        if not self.model_path.exists():
+            raise FileNotFoundError(f"Model not found at {model_path}")
+        self.model = self._load_model()
+        self.model.eval()
+        # Normalization values (same as training)
+        self.mean = 0.1307
+        self.std = 0.3081
+    def _load_model(self) -> torch.nn.Module:
+        """Load model from checkpoint."""
+        from scripts.models import BaselineCNN
+        model = BaselineCNN()
+        # Load checkpoint
+        checkpoint = torch.load(self.model_path, map_location=self.device)
+        # Handle different checkpoint formats
+        if isinstance(checkpoint, dict) and 'model_state_dict' in checkpoint:
+            model.load_state_dict(checkpoint['model_state_dict'])
+        else:
+            model.load_state_dict(checkpoint)
+        return model.to(self.device)
+    def preprocess(self, image: Union[Image.Image, np.ndarray]) -> torch.Tensor:
+        """
+        Preprocess image for model input.
+        Handles:
+        - RGB to grayscale conversion
+        - Resizing to 28x28
+        - Normalization
+        - Inversion if needed (white digit on black background)
+        Args:
+            image: PIL Image or numpy array
+        Returns:
+            Preprocessed tensor of shape (1, 1, 28, 28)
+        """
+        # Convert numpy array to PIL Image if needed
+        if isinstance(image, np.ndarray):
+            image = Image.fromarray(image)
+        # Convert to grayscale if RGB
+        if image.mode != 'L':
+            image = image.convert('L')
+        # Resize to 28x28 if needed
+        if image.size != (28, 28):
+            image = image.resize((28, 28), Image.Resampling.LANCZOS)
+        # Convert to numpy array
+        img_array = np.array(image).astype(np.float32)
+        # Normalize to [0, 1]
+        img_array = img_array / 255.0
+        # Check if inversion is needed (MNIST is white digit on black background)
+        # If most pixels are bright, it's likely a black digit on white background
+        if img_array.mean() > 0.5:
+            img_array = 1.0 - img_array
+        # Apply normalization (same as training)
+        img_array = (img_array - self.mean) / self.std
+        # Convert to tensor and add batch and channel dimensions
+        img_tensor = torch.tensor(img_array).unsqueeze(0).unsqueeze(0)
+        return img_tensor.to(self.device)
+    def predict(self, image: Union[Image.Image, np.ndarray]) -> Dict:
+        """
+        Predict digit from image.
+        Args:
+            image: PIL Image or numpy array containing digit
+        Returns:
+            Dictionary with:
+                - digit: Predicted digit (0-9)
+                - confidence: Confidence score (0-1)
+                - probabilities: List of probabilities for each digit
+        """
+        img_tensor = self.preprocess(image)
+        with torch.no_grad():
+            outputs = self.model(img_tensor)
+            probabilities = torch.softmax(outputs, dim=1)[0]
+            confidence, predicted = torch.max(probabilities, dim=0)
+        return {
+            'digit': int(predicted.item()),
+            'confidence': float(confidence.item()),
+            'probabilities': probabilities.cpu().numpy().tolist()
+        }
+    def predict_batch(self, images: list) -> list:
+        """
+        Predict digits for a batch of images.
+        Args:
+            images: List of PIL Images or numpy arrays
+        Returns:
+            List of prediction dictionaries
+        """
+        return [self.predict(img) for img in images]
+def test_inference():
+    """Test inference module with sample images."""
+    import sys
+    from pathlib import Path
+    # Add project root to path
+    project_root = Path(__file__).parent.parent
+    sys.path.insert(0, str(project_root))
+    from scripts.data_loader import MnistDataloader
+    print("Testing Inference Module")
+    print("=" * 50)
+    # Check if model exists
+    model_path = project_root / 'models' / 'best_model.pt'
+    if not model_path.exists():
+        print(f"Error: Model not found at {model_path}")
+        print("Please train a model first.")
+        return
+    # Load MNIST test data
+    data_path = project_root / 'data' / 'raw'
+    loader = MnistDataloader(
+        training_images_filepath=str(data_path / 'train-images.idx3-ubyte'),
+        training_labels_filepath=str(data_path / 'train-labels.idx1-ubyte'),
+        test_images_filepath=str(data_path / 't10k-images.idx3-ubyte'),
+        test_labels_filepath=str(data_path / 't10k-labels.idx1-ubyte')
+    )
+    _, (x_test, y_test) = loader.load_data()
+    # Initialize classifier
+    print(f"\n1. Loading model from: {model_path}")
+    classifier = DigitClassifier(str(model_path))
+    print(f"   Device: {classifier.device}")
+    # Test on a few images
+    print("\n2. Testing predictions on 10 random test images:")
+    print("-" * 50)
+    indices = np.random.choice(len(x_test), 10, replace=False)
+    correct = 0
+    for i, idx in enumerate(indices, 1):
+        image = x_test[idx]
+        true_label = y_test[idx]
+        # Convert list to numpy array if needed
+        if isinstance(image, list):
+            image = np.array(image)
+        # Convert to PIL Image
+        img = Image.fromarray(image.astype(np.uint8), mode='L')
+        # Predict
+        result = classifier.predict(img)
+        is_correct = result['digit'] == true_label
+        correct += is_correct
+        print(f"   Image {i}: True={true_label}, Pred={result['digit']}, "
+              f"Conf={result['confidence']:.4f} {'✓' if is_correct else '✗'}")
+    accuracy = correct / len(indices) * 100
+    print(f"\nAccuracy on {len(indices)} samples: {accuracy:.1f}%")
+    # Test edge cases
+    print("\n3. Testing edge cases:")
+    print("-" * 50)
+    # Blank image
+    blank = np.zeros((28, 28), dtype=np.uint8)
+    blank_img = Image.fromarray(blank, mode='L')
+    result = classifier.predict(blank_img)
+    print(f"   Blank image: Pred={result['digit']}, Conf={result['confidence']:.4f}")
+    # All white image
+    white = np.ones((28, 28), dtype=np.uint8) * 255
+    white_img = Image.fromarray(white, mode='L')
+    result = classifier.predict(white_img)
+    print(f"   White image: Pred={result['digit']}, Conf={result['confidence']:.4f}")
+    # Different size image
+    test_img = x_test[0]
+    if isinstance(test_img, list):
+        test_img = np.array(test_img)
+    large = Image.fromarray(test_img.astype(np.uint8), mode='L').resize((56, 56))
+    result = classifier.predict(large)
+    print(f"   Resized image (56x56): Pred={result['digit']}, Conf={result['confidence']:.4f}")
+    print("\n✓ Inference module test complete!")
+if __name__ == '__main__':
+    test_inference()

scripts/launch_mlflow_ui.sh ADDED Viewed

	@@ -0,0 +1,14 @@

+#!/bin/bash
+# Launch MLflow UI dashboard
+#
+# Usage:
+#   ./scripts/launch_mlflow_ui.sh
+#
+# Access at: http://localhost:5000
+echo "Starting MLflow UI..."
+echo "Access dashboard at: http://localhost:5000"
+echo "Press Ctrl+C to stop"
+echo ""
+mlflow ui --backend-store-uri file:./mlruns --host 0.0.0.0 --port 5000

scripts/mlflow_setup.py ADDED Viewed

	@@ -0,0 +1,272 @@

+"""
+MLflow Setup and Configuration
+Utilities for MLflow experiment tracking with MLOps best practices:
+- Automatic experiment naming and organization
+- Parameter and metric logging
+- Model registry integration
+- Artifact tracking
+"""
+import mlflow
+from pathlib import Path
+from typing import Optional, Dict, Any
+import os
+# MLflow configuration
+MLFLOW_TRACKING_URI = "file:./mlruns"
+DEFAULT_EXPERIMENT_NAME = "mnist-digit-classification"
+def setup_mlflow(
+    experiment_name: str = DEFAULT_EXPERIMENT_NAME,
+    tracking_uri: Optional[str] = None
+) -> str:
+    """
+    Setup MLflow tracking with best practices.
+    Args:
+        experiment_name: Name of the experiment
+        tracking_uri: MLflow tracking URI (default: local ./mlruns)
+    Returns:
+        experiment_id: MLflow experiment ID
+    """
+    # Set tracking URI
+    if tracking_uri is None:
+        tracking_uri = MLFLOW_TRACKING_URI
+    mlflow.set_tracking_uri(tracking_uri)
+    # Create or get experiment
+    try:
+        experiment = mlflow.get_experiment_by_name(experiment_name)
+        if experiment is None:
+            experiment_id = mlflow.create_experiment(
+                experiment_name,
+                tags={
+                    "project": "mnist-classification",
+                    "framework": "pytorch",
+                    "model_type": "cnn"
+                }
+            )
+        else:
+            experiment_id = experiment.experiment_id
+    except Exception as e:
+        print(f"Warning: Could not create experiment: {e}")
+        experiment_id = "0"  # Default experiment
+    mlflow.set_experiment(experiment_name)
+    print(f"MLflow tracking URI: {tracking_uri}")
+    print(f"Experiment: {experiment_name} (ID: {experiment_id})")
+    return experiment_id
+def log_model_params(model: Any, prefix: str = "model") -> Dict[str, Any]:
+    """
+    Log model parameters to MLflow.
+    Args:
+        model: PyTorch model
+        prefix: Prefix for parameter names
+    Returns:
+        Dictionary of logged parameters
+    """
+    from scripts.models import count_parameters
+    params = {
+        f"{prefix}_name": model.__class__.__name__,
+        f"{prefix}_total_params": count_parameters(model),
+        f"{prefix}_trainable_params": sum(p.numel() for p in model.parameters() if p.requires_grad)
+    }
+    mlflow.log_params(params)
+    return params
+def log_training_config(config: Dict[str, Any]) -> None:
+    """
+    Log training configuration to MLflow.
+    Args:
+        config: Dictionary of training hyperparameters
+    """
+    # Flatten nested config if needed
+    flat_config = {}
+    for key, value in config.items():
+        if isinstance(value, dict):
+            for subkey, subvalue in value.items():
+                flat_config[f"{key}_{subkey}"] = subvalue
+        else:
+            flat_config[key] = value
+    mlflow.log_params(flat_config)
+def log_data_info(
+    train_size: int,
+    val_size: int,
+    test_size: int,
+    num_classes: int = 10,
+    augmentation: bool = False
+) -> None:
+    """
+    Log dataset information to MLflow.
+    Args:
+        train_size: Number of training samples
+        val_size: Number of validation samples
+        test_size: Number of test samples
+        num_classes: Number of classes
+        augmentation: Whether data augmentation is used
+    """
+    mlflow.log_params({
+        "data_train_size": train_size,
+        "data_val_size": val_size,
+        "data_test_size": test_size,
+        "data_num_classes": num_classes,
+        "data_augmentation": augmentation
+    })
+def log_system_info() -> Dict[str, Any]:
+    """
+    Log system information to MLflow.
+    Returns:
+        Dictionary of system information
+    """
+    import torch
+    import platform
+    system_info = {
+        "system_platform": platform.system(),
+        "system_python_version": platform.python_version(),
+        "system_pytorch_version": torch.__version__,
+        "system_cuda_available": torch.cuda.is_available(),
+        "system_cuda_version": torch.version.cuda if torch.cuda.is_available() else "N/A",
+        "system_device": "cuda" if torch.cuda.is_available() else "cpu"
+    }
+    if torch.cuda.is_available():
+        system_info["system_gpu_name"] = torch.cuda.get_device_name(0)
+        system_info["system_gpu_count"] = torch.cuda.device_count()
+    mlflow.log_params(system_info)
+    return system_info
+def log_metrics_epoch(metrics: Dict[str, float], step: int) -> None:
+    """
+    Log metrics for a specific epoch.
+    Args:
+        metrics: Dictionary of metric names and values
+        step: Epoch number
+    """
+    mlflow.log_metrics(metrics, step=step)
+def log_artifact_path(path: str, artifact_path: Optional[str] = None) -> None:
+    """
+    Log a file or directory as an artifact.
+    Args:
+        path: Path to file or directory
+        artifact_path: Optional artifact path in MLflow
+    """
+    if Path(path).exists():
+        mlflow.log_artifact(path, artifact_path=artifact_path)
+    else:
+        print(f"Warning: Artifact not found: {path}")
+def log_model_to_registry(
+    model: Any,
+    model_name: str,
+    artifact_path: str = "model",
+    registered_model_name: Optional[str] = None
+) -> None:
+    """
+    Log model to MLflow with model registry integration.
+    Args:
+        model: PyTorch model
+        model_name: Name for the model artifact
+        artifact_path: Artifact path in MLflow
+        registered_model_name: Name for model registry (optional)
+    """
+    import torch
+    # Log model
+    mlflow.pytorch.log_model(
+        pytorch_model=model,
+        artifact_path=artifact_path,
+        registered_model_name=registered_model_name
+    )
+def get_or_create_run(
+    run_name: Optional[str] = None,
+    tags: Optional[Dict[str, str]] = None
+) -> mlflow.ActiveRun:
+    """
+    Get existing run or create a new one.
+    Args:
+        run_name: Name for the run
+        tags: Tags for the run
+    Returns:
+        MLflow active run context
+    """
+    return mlflow.start_run(run_name=run_name, tags=tags)
+def end_run() -> None:
+    """End the current MLflow run."""
+    mlflow.end_run()
+def test_mlflow_setup():
+    """Test MLflow setup and basic logging."""
+    print("Testing MLflow Setup")
+    print("=" * 50)
+    # Setup MLflow
+    exp_id = setup_mlflow("test-experiment")
+    # Test logging
+    with mlflow.start_run(run_name="test-run"):
+        # Log parameters
+        mlflow.log_params({
+            "learning_rate": 0.001,
+            "batch_size": 64,
+            "epochs": 10
+        })
+        # Log metrics
+        for epoch in range(3):
+            mlflow.log_metrics({
+                "train_loss": 0.5 - epoch * 0.1,
+                "val_loss": 0.6 - epoch * 0.1,
+                "train_accuracy": 0.8 + epoch * 0.05,
+                "val_accuracy": 0.75 + epoch * 0.05
+            }, step=epoch)
+        # Log system info
+        system_info = log_system_info()
+        print("\nSystem Info:")
+        for key, value in system_info.items():
+            print(f"  {key}: {value}")
+        print("\n✓ MLflow test complete!")
+        print(f"View results at: mlflow ui --backend-store-uri {MLFLOW_TRACKING_URI}")
+if __name__ == "__main__":
+    test_mlflow_setup()

scripts/train_with_mlflow.py ADDED Viewed

	@@ -0,0 +1,365 @@

+"""
+MLflow-Enabled Training Script for MNIST CNN
+Full training script with comprehensive MLflow tracking:
+- Hyperparameters and model architecture
+- Per-epoch metrics (loss, accuracy, learning rate)
+- System information and environment
+- Model artifacts and checkpoints
+- Training visualizations
+- Confusion matrix and classification report
+Usage:
+    python scripts/train_with_mlflow.py --epochs 20 --lr 0.001 --augment
+    python scripts/train_with_mlflow.py --help
+"""
+import argparse
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from pathlib import Path
+import json
+import sys
+import numpy as np
+import mlflow
+# Add project root to path
+project_root = Path(__file__).parent.parent
+sys.path.insert(0, str(project_root))
+from scripts.models import BaselineCNN, count_parameters
+from scripts.preprocessing import MnistDataset, create_dataloaders, split_train_val
+from scripts.train import train_epoch, validate, evaluate_model, save_training_history
+from scripts.data_loader import MnistDataloader
+from scripts.augmentation import get_train_augmentation
+from scripts.mlflow_setup import (
+    setup_mlflow, log_model_params, log_training_config,
+    log_data_info, log_system_info, log_metrics_epoch,
+    log_artifact_path, log_model_to_registry
+)
+def train_with_mlflow(
+    model: nn.Module,
+    train_loader: torch.utils.data.DataLoader,
+    val_loader: torch.utils.data.DataLoader,
+    test_loader: torch.utils.data.DataLoader,
+    config: dict,
+    run_name: str = None
+) -> dict:
+    """
+    Train model with full MLflow tracking.
+    Args:
+        model: PyTorch model to train
+        train_loader: Training data loader
+        val_loader: Validation data loader
+        test_loader: Test data loader
+        config: Training configuration dictionary
+        run_name: Optional name for MLflow run
+    Returns:
+        Training history dictionary
+    """
+    device = config['device']
+    num_epochs = config['num_epochs']
+    learning_rate = config['learning_rate']
+    # Setup MLflow
+    setup_mlflow("mnist-digit-classification")
+    # Start MLflow run
+    with mlflow.start_run(run_name=run_name):
+        print("\n" + "="*70)
+        print(f"MLflow Run ID: {mlflow.active_run().info.run_id}")
+        print("="*70 + "\n")
+        # Log all configuration
+        print("Logging configuration to MLflow...")
+        log_training_config(config)
+        log_model_params(model)
+        log_data_info(
+            train_size=len(train_loader.dataset),
+            val_size=len(val_loader.dataset),
+            test_size=len(test_loader.dataset),
+            num_classes=10,
+            augmentation=config.get('augmentation', False)
+        )
+        log_system_info()
+        # Log model architecture as text
+        total_params, trainable_params = count_parameters(model)
+        model_summary = f"""
+Model: {model.__class__.__name__}
+Total Parameters: {total_params:,}
+Trainable Parameters: {trainable_params:,}
+Device: {device}
+Architecture:
+{str(model)}
+"""
+        mlflow.log_text(model_summary, "model_architecture.txt")
+        # Setup training
+        criterion = nn.CrossEntropyLoss()
+        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+        scheduler = optim.lr_scheduler.ReduceLROnPlateau(
+            optimizer, mode='min', patience=3, factor=0.5, verbose=True
+        )
+        # Training history
+        history = {
+            'train_loss': [],
+            'train_accuracy': [],
+            'val_loss': [],
+            'val_accuracy': [],
+            'learning_rate': []
+        }
+        best_val_loss = float('inf')
+        patience = 5
+        patience_counter = 0
+        print(f"\nStarting training for {num_epochs} epochs...")
+        print(f"Device: {device}")
+        total_p, _ = count_parameters(model)
+        print(f"Model: {model.__class__.__name__} ({total_p:,} parameters)")
+        print("-" * 70)
+        for epoch in range(num_epochs):
+            # Train
+            train_metrics = train_epoch(model, train_loader, criterion, optimizer, device)
+            # Validate
+            val_metrics = validate(model, val_loader, criterion, device)
+            # Get current learning rate
+            current_lr = optimizer.param_groups[0]['lr']
+            # Update scheduler
+            scheduler.step(val_metrics['loss'])
+            # Save history
+            history['train_loss'].append(train_metrics['loss'])
+            history['train_accuracy'].append(train_metrics['accuracy'])
+            history['val_loss'].append(val_metrics['loss'])
+            history['val_accuracy'].append(val_metrics['accuracy'])
+            history['learning_rate'].append(current_lr)
+            # Log metrics to MLflow
+            mlflow_metrics = {
+                'train_loss': train_metrics['loss'],
+                'train_accuracy': train_metrics['accuracy'],
+                'val_loss': val_metrics['loss'],
+                'val_accuracy': val_metrics['accuracy'],
+                'learning_rate': current_lr,
+                'epoch': epoch + 1
+            }
+            log_metrics_epoch(mlflow_metrics, step=epoch)
+            # Print progress
+            print(f"Epoch {epoch+1}/{num_epochs} | "
+                  f"Train Loss: {train_metrics['loss']:.4f} ({train_metrics['accuracy']:.2f}%) | "
+                  f"Val Loss: {val_metrics['loss']:.4f} ({val_metrics['accuracy']:.2f}%) | "
+                  f"LR: {current_lr:.6f}")
+            # Save best model
+            if val_metrics['loss'] < best_val_loss:
+                best_val_loss = val_metrics['loss']
+                best_epoch = epoch + 1
+                patience_counter = 0
+                # Save checkpoint
+                checkpoint_path = project_root / 'models' / 'best_model_mlflow.pt'
+                torch.save({
+                    'epoch': epoch,
+                    'model_state_dict': model.state_dict(),
+                    'optimizer_state_dict': optimizer.state_dict(),
+                    'train_loss': train_metrics['loss'],
+                    'val_loss': val_metrics['loss'],
+                    'val_accuracy': val_metrics['accuracy'],
+                }, checkpoint_path)
+                print(f"  → New best model! (Val Loss: {best_val_loss:.4f})")
+                # Log model to MLflow
+                mlflow.pytorch.log_model(
+                    model,
+                    "model",
+                    registered_model_name="mnist-cnn-baseline"
+                )
+            else:
+                patience_counter += 1
+            # Early stopping
+            if patience_counter >= patience:
+                print(f"\nEarly stopping triggered after {epoch+1} epochs")
+                mlflow.log_param("early_stopped", True)
+                mlflow.log_param("early_stop_epoch", epoch + 1)
+                break
+        print("-" * 70)
+        print(f"\nTraining complete!")
+        print(f"Best epoch: {best_epoch} (Val Loss: {best_val_loss:.4f})")
+        # Log best metrics
+        mlflow.log_metrics({
+            'best_epoch': best_epoch,
+            'best_val_loss': best_val_loss,
+            'final_train_loss': history['train_loss'][-1],
+            'final_val_loss': history['val_loss'][-1]
+        })
+        # Evaluate on test set
+        print("\nEvaluating on test set...")
+        test_metrics = evaluate_model(model, test_loader, device)
+        test_accuracy = test_metrics['accuracy']
+        test_report = test_metrics['classification_report']
+        # Extract macro average metrics
+        test_precision = test_report['macro avg']['precision']
+        test_recall = test_report['macro avg']['recall']
+        test_f1_score = test_report['macro avg']['f1-score']
+        print(f"Test Accuracy: {test_accuracy:.2f}%")
+        print(f"Test Precision: {test_precision:.4f}")
+        print(f"Test Recall: {test_recall:.4f}")
+        print(f"Test F1-Score: {test_f1_score:.4f}")
+        # Log test metrics to MLflow
+        mlflow.log_metrics({
+            'test_accuracy': test_accuracy,
+            'test_precision': test_precision,
+            'test_recall': test_recall,
+            'test_f1_score': test_f1_score
+        })
+        # Save and log artifacts
+        print("\nSaving artifacts...")
+        # Save history
+        history_path = project_root / 'results' / 'mlflow_training_history.json'
+        history_path.parent.mkdir(exist_ok=True)
+        save_training_history(history, history_path)
+        log_artifact_path(str(history_path))
+        # Save test metrics
+        metrics_to_save = {
+            'test_accuracy': test_accuracy,
+            'test_precision': test_precision,
+            'test_recall': test_recall,
+            'test_f1_score': test_f1_score,
+            'classification_report': test_report,
+            'confusion_matrix': test_metrics['confusion_matrix'].tolist()
+        }
+        metrics_path = project_root / 'results' / 'mlflow_test_metrics.json'
+        with open(metrics_path, 'w') as f:
+            json.dump(metrics_to_save, f, indent=2)
+        log_artifact_path(str(metrics_path))
+        # Save model checkpoint
+        log_artifact_path(str(project_root / 'models' / 'best_model_mlflow.pt'))
+        # Log confusion matrix as JSON
+        conf_matrix_dict = {
+            f"row_{i}": test_metrics['confusion_matrix'][i].tolist()
+            for i in range(len(test_metrics['confusion_matrix']))
+        }
+        mlflow.log_dict(conf_matrix_dict, "confusion_matrix.json")
+        # Log classification report
+        mlflow.log_dict(test_report, "classification_report.json")
+        print(f"\n✓ All artifacts logged to MLflow")
+        print(f"View results: mlflow ui --backend-store-uri file:./mlruns")
+        return history
+def main():
+    parser = argparse.ArgumentParser(description='Train MNIST CNN with MLflow tracking')
+    parser.add_argument('--epochs', type=int, default=20, help='Number of epochs (default: 20)')
+    parser.add_argument('--lr', type=float, default=0.001, help='Learning rate (default: 0.001)')
+    parser.add_argument('--batch-size', type=int, default=64, help='Batch size (default: 64)')
+    parser.add_argument('--augment', action='store_true', help='Use data augmentation')
+    parser.add_argument('--run-name', type=str, default=None, help='MLflow run name')
+    parser.add_argument('--seed', type=int, default=42, help='Random seed (default: 42)')
+    args = parser.parse_args()
+    # Set random seeds
+    torch.manual_seed(args.seed)
+    np.random.seed(args.seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(args.seed)
+    # Configuration
+    config = {
+        'num_epochs': args.epochs,
+        'learning_rate': args.lr,
+        'batch_size': args.batch_size,
+        'augmentation': args.augment,
+        'random_seed': args.seed,
+        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
+        'optimizer': 'Adam',
+        'scheduler': 'ReduceLROnPlateau',
+        'early_stopping_patience': 5
+    }
+    print("Training Configuration:")
+    print(json.dumps(config, indent=2))
+    # Load MNIST data
+    print("\nLoading MNIST data...")
+    data_path = project_root / 'data' / 'raw'
+    loader = MnistDataloader(
+        training_images_filepath=str(data_path / 'train-images.idx3-ubyte'),
+        training_labels_filepath=str(data_path / 'train-labels.idx1-ubyte'),
+        test_images_filepath=str(data_path / 't10k-images.idx3-ubyte'),
+        test_labels_filepath=str(data_path / 't10k-labels.idx1-ubyte')
+    )
+    (x_train, y_train), (x_test, y_test) = loader.load_data()
+    # Split train/val
+    (x_train_split, y_train_split), (x_val, y_val) = split_train_val(
+        x_train, y_train, val_split=0.15, random_seed=args.seed
+    )
+    # Create datasets with optional augmentation
+    augmentation = get_train_augmentation() if args.augment else None
+    train_dataset = MnistDataset(x_train_split, y_train_split, transform=augmentation)
+    val_dataset = MnistDataset(x_val, y_val, transform=None)
+    test_dataset = MnistDataset(x_test, y_test, transform=None)
+    # Create data loaders
+    train_loader, val_loader = create_dataloaders(
+        train_dataset, val_dataset, batch_size=args.batch_size, num_workers=2
+    )
+    test_loader = torch.utils.data.DataLoader(
+        test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=2
+    )
+    print(f"Train: {len(train_loader.dataset)} samples")
+    print(f"Val: {len(val_loader.dataset)} samples")
+    print(f"Test: {len(test_loader.dataset)} samples")
+    # Create model
+    model = BaselineCNN().to(config['device'])
+    # Train with MLflow
+    history = train_with_mlflow(
+        model, train_loader, val_loader, test_loader,
+        config, run_name=args.run_name
+    )
+    print("\n" + "="*70)
+    print("Training complete! View MLflow dashboard:")
+    print("  ./scripts/launch_mlflow_ui.sh")
+    print("  or: mlflow ui --backend-store-uri file:./mlruns")
+    print("="*70)
+if __name__ == '__main__':
+    main()