feat: add init model

Files changed (10) hide show

.gitignore +7 -0
README.md +5 -3
mnist_classifier/configs/config.yaml +15 -0
mnist_classifier/data/datamodule.py +27 -0
mnist_classifier/models/mnist_model.py +124 -0
mnist_classifier/train.py +49 -0
mnist_classifier/utils/metrics.py +6 -0
poetry.lock +0 -0
pyproject.toml +1 -1
tests/test_model.py +11 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+__pycache__/
+*.pyc
+.pytest_cache/
+wandb/
+checkpoints/
+*.egg-info/
+dist/

README.md CHANGED Viewed

@@ -1,9 +1,11 @@
 ---
 license: mit
 datasets:
-- ylecun/mnist
 language:
-- en
 ---
-MNIST classifier model for learning transformer fundamentals.

 ---
 license: mit
 datasets:
+  - ylecun/mnist
 language:
+  - en
 ---
+# MNIST classifier
+MNIST classifier model for learning transformer fundamentals.

mnist_classifier/configs/config.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+training:
+  batch_size: 64
+  max_epochs: 10
+  learning_rate: 0.001
+  early_stopping_patience: 5
+model:
+  conv1_channels: 32
+  conv2_channels: 64
+  fc1_size: 128
+  dropout_rate: 0.25
+wandb:
+  project: "mnist-classifier"
+  entity: "bardenha"

mnist_classifier/data/datamodule.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from typing import Dict, Any
+import pytorch_lightning as pl
+from datasets import load_dataset
+from torch.utils.data import DataLoader
+class MNISTDataModule(pl.LightningDataModule):
+    def __init__(self, config: Dict[str, Any]):
+        super().__init__()
+        self.config = config
+    def setup(self, stage=None):
+        self.dataset = load_dataset('mnist')
+        self.dataset = self.dataset.with_transform(self.config.transform_dataset)
+    def train_dataloader(self):
+        return DataLoader(
+            self.dataset['train'],
+            batch_size=self.config.batch_size,
+            shuffle=True
+        )
+    def val_dataloader(self):
+        return DataLoader(
+            self.dataset['test'],  # Using test set as validation
+            batch_size=self.config.batch_size
+        )

mnist_classifier/models/mnist_model.py ADDED Viewed

	@@ -0,0 +1,124 @@

+from typing import Dict, Any
+import pytorch_lightning as pl
+import torch
+import torch.nn as nn
+import torchmetrics
+import wandb
+# Simple CNN architecture for MNIST
+class MNISTNet(nn.Module):
+    def __init__(self, config: Dict[str, Any]):
+        super().__init__()
+        self.conv1 = nn.Conv2d(1, config['model']['conv1_channels'], kernel_size=3)
+        self.conv2 = nn.Conv2d(config['model']['conv1_channels'],
+                              config['model']['conv2_channels'], kernel_size=3)
+        self.pool = nn.MaxPool2d(2)
+        self.dropout = nn.Dropout(config['model']['dropout_rate'])
+        self.fc1 = nn.Linear(config['model']['conv2_channels'] * 5 * 5,
+                            config['model']['fc1_size'])
+        self.fc2 = nn.Linear(config['model']['fc1_size'], 10)
+    def forward(self, x):
+        x = torch.relu(self.conv1(x))
+        x = self.pool(torch.relu(self.conv2(x)))
+        x = self.dropout(x)
+        x = x.view(-1, 64 * 5 * 5)
+        x = torch.relu(self.fc1(x))
+        x = self.fc2(x)
+        return x
+class MNISTModule(pl.LightningModule):
+    def __init__(self, config: Dict[str, Any]):
+        super().__init__()
+        self.config = config
+        self.model = MNISTNet(config)
+        # Initialize metrics
+        self.train_accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=10)
+        self.val_accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=10)
+        self.train_f1 = torchmetrics.F1Score(task='multiclass', num_classes=10)
+        self.val_f1 = torchmetrics.F1Score(task='multiclass', num_classes=10)
+        self.confusion_matrix = torchmetrics.ConfusionMatrix(task='multiclass', num_classes=10)
+    def forward(self, x):
+        return self.model(x)
+    def training_step(self, batch, batch_idx):
+        x, y = batch['pixel_values'], batch['label']
+        logits = self(x)
+        loss = nn.CrossEntropyLoss()(logits, y)
+        # Calculate and log metrics
+        preds = torch.argmax(logits, dim=1)
+        self.train_accuracy(preds, y)
+        self.train_f1(preds, y)
+        # Log metrics
+        self.log('train_loss', loss, prog_bar=True)
+        self.log('train_accuracy', self.train_accuracy, prog_bar=True)
+        self.log('train_f1', self.train_f1, prog_bar=True)
+        return loss
+    def validation_step(self, batch, batch_idx):
+        x, y = batch['pixel_values'], batch['label']
+        logits = self(x)
+        loss = nn.CrossEntropyLoss()(logits, y)
+        # Calculate metrics
+        preds = torch.argmax(logits, dim=1)
+        self.val_accuracy(preds, y)
+        self.val_f1(preds, y)
+        self.confusion_matrix(preds, y)
+        # Log metrics
+        self.log('val_loss', loss, prog_bar=True)
+        self.log('val_accuracy', self.val_accuracy, prog_bar=True)
+        self.log('val_f1', self.val_f1, prog_bar=True)
+        # Log sample predictions periodically
+        if batch_idx == 0:  # First batch of each epoch
+            self._log_sample_predictions(x, y, preds)
+    def _log_sample_predictions(self, images, labels, predictions):
+        # Log a grid of sample predictions
+        if self.logger:
+            n_samples = min(16, len(images))
+            self.logger.experiment.log({
+                "sample_predictions": [
+                    wandb.Image(
+                        images[i],
+                        caption=f"True: {labels[i].item()} Pred: {predictions[i].item()}"
+                    )
+                    for i in range(n_samples)
+                ]
+            })
+    def on_validation_epoch_end(self):
+        # Log confusion matrix at the end of each validation epoch
+        conf_mat = self.confusion_matrix.compute()
+        self.logger.experiment.log({
+            "confusion_matrix": wandb.plot.confusion_matrix(
+                probs=None,
+                y_true=conf_mat.flatten(),
+                preds=None,
+                class_names=range(10)
+            )
+        })
+        self.confusion_matrix.reset()
+    def configure_optimizers(self):
+        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
+        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
+            optimizer, mode='min', factor=0.1, patience=3, verbose=True
+        )
+        return {
+            "optimizer": optimizer,
+            "lr_scheduler": {
+                "scheduler": scheduler,
+                "monitor": "val_loss"
+            }
+        }

mnist_classifier/train.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import pytorch_lightning as pl
+from pytorch_lightning.loggers import WandbLogger
+from pathlib import Path
+from mnist_classifier.models.mnist_model import MNISTModule
+from mnist_classifier.data.datamodule import MNISTDataModule
+from mnist_classifier.utils.metrics import load_config
+def main():
+    config = load_config(Path("mnist_classifier/configs/config.yaml"))
+    # Initialize wandb logger
+    wandb_logger = WandbLogger(
+        project=config['wandb']['project'],
+        entity=config['wandb']['entity']
+    )
+    # Initialize trainer
+    trainer = pl.Trainer(
+        max_epochs=config['training']['max_epochs'],
+        accelerator='gpu',
+        devices=[0],
+        logger=wandb_logger,
+        callbacks=[
+            pl.callbacks.ModelCheckpoint(
+                dirpath='checkpoints',
+                filename='mnist-{epoch:02d}-{val_loss:.2f}',
+                save_top_k=3,
+                monitor='val_loss',
+                mode='min'
+            ),
+            pl.callbacks.EarlyStopping(
+                monitor='val_loss',
+                patience=config['training']['early_stopping_patience'],
+                mode='min'
+            ),
+            pl.callbacks.LearningRateMonitor(logging_interval='epoch')
+        ]
+    )
+    # Initialize data module and model
+    data_module = MNISTDataModule(config)
+    model = MNISTModule(config)
+    # Train
+    trainer.fit(model, data_module)
+if __name__ == "__main__":
+    main()

mnist_classifier/utils/metrics.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import yaml
+from pathlib import Path
+def load_config(config_path: str):
+    with open(config_path, 'r') as f:
+        return yaml.safe_load(f)

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml CHANGED Viewed

@@ -8,7 +8,7 @@ readme = "README.md"
 [tool.poetry.dependencies]
 python = "^3.10"
-torch = "^2.0.0"
 torchvision = "^0.15.0"
 pytorch-lightning = "^2.0.0"
 wandb = "^0.15.0"

 [tool.poetry.dependencies]
 python = "^3.10"
+torch = "^2.4.0"
 torchvision = "^0.15.0"
 pytorch-lightning = "^2.0.0"
 wandb = "^0.15.0"

tests/test_model.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import pytest
+import torch
+from mnist_classifier.models.mnist_model import MNISTNet
+from mnist_classifier.utils.metrics import load_config
+def test_mnist_net_forward():
+    config = load_config('mnist_classifier/configs/config.yaml')
+    model = MNISTNet(config)
+    x = torch.randn(1, 1, 28, 28)
+    output = model(x)
+    assert output.shape == (1, 10)