feat: update training loop with learning rate scheduler and progress bar enhancements

Files changed (3) hide show

mnist_classifier/train.py +12 -3
models/mnist_model_lr0.001_bs32_ep10.pth +3 -0
models/mnist_model_lr0.001_bs64_ep10.pth +1 -1

mnist_classifier/train.py CHANGED Viewed

@@ -10,6 +10,7 @@ import os
 import random
 import numpy as np
 from tqdm import tqdm
 def set_seed(seed):
     torch.manual_seed(seed)
@@ -23,7 +24,7 @@ def set_seed(seed):
 def train():
     # Training loop
     learning_rate = 0.001
-    batch_size = 128
     epochs = 10
     # Set seed for reproducibility
@@ -34,7 +35,7 @@ def train():
     print(f"Using device: {device}")
     # Initialize tensorboard
-    log_dir = 'runs/mnist_experiment_' + datetime.now().strftime('%Y%m%d-%H%M%S')
     writer = SummaryWriter(log_dir)
     # Setup data
@@ -44,6 +45,7 @@ def train():
     # Initialize model, optimizer, and loss function
     model = MNISTModel().to(device)
     optimizer = optim.Adam(model.parameters(), lr=learning_rate)
     criterion = nn.CrossEntropyLoss()
@@ -53,6 +55,7 @@ def train():
         running_loss = 0.0
         correct = 0
         total = 0
         with tqdm(total=len(train_loader), desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch") as pbar:
             for batch_idx, batch in enumerate(train_loader):
@@ -76,11 +79,13 @@ def train():
                 total += labels.size(0)
                 correct += predicted.eq(labels).sum().item()
                 # Update tqdm progress bar
                 pbar.set_postfix({
                     'loss': running_loss / (batch_idx + 1),
                     'accuracy': 100. * correct / total,
-                    'step': batch_idx + 1
                 })
                 pbar.update(1)
@@ -93,6 +98,10 @@ def train():
                                       epoch * len(train_loader) + batch_idx)
                     running_loss = 0.0
         # Validation phase
         model.eval()
         test_loss = 0

 import random
 import numpy as np
 from tqdm import tqdm
+from torch.optim.lr_scheduler import StepLR
 def set_seed(seed):
     torch.manual_seed(seed)
 def train():
     # Training loop
     learning_rate = 0.001
+    batch_size = 64
     epochs = 10
     # Set seed for reproducibility
     print(f"Using device: {device}")
     # Initialize tensorboard
+    log_dir = 'runs/mnist_experiment_' + f"lr{learning_rate}_bs{batch_size}_ep{epochs}_" + datetime.now().strftime('%Y%m%d-%H%M%S')
     writer = SummaryWriter(log_dir)
     # Setup data
     # Initialize model, optimizer, and loss function
     model = MNISTModel().to(device)
     optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+    scheduler = StepLR(optimizer, step_size=2, gamma=0.5)  # Decay LR by a factor of 0.1 every 2 epochs
     criterion = nn.CrossEntropyLoss()
         running_loss = 0.0
         correct = 0
         total = 0
+        current_lr = optimizer.param_groups[0]['lr']  # Get current learning rate
         with tqdm(total=len(train_loader), desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch") as pbar:
             for batch_idx, batch in enumerate(train_loader):
                 total += labels.size(0)
                 correct += predicted.eq(labels).sum().item()
                 # Update tqdm progress bar
                 pbar.set_postfix({
                     'loss': running_loss / (batch_idx + 1),
                     'accuracy': 100. * correct / total,
+                    'step': batch_idx + 1,
+                    'lr': current_lr,
                 })
                 pbar.update(1)
                                       epoch * len(train_loader) + batch_idx)
                     running_loss = 0.0
+            writer.add_scalar('learning rate', current_lr, epoch)
+        scheduler.step()  # Update the learning rate
         # Validation phase
         model.eval()
         test_loss = 0

models/mnist_model_lr0.001_bs32_ep10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d77abc86d2b18d3a3b4b7a560186ac1695d0c2e8e708028dd2b65211cefde6ae
+size 4803144

models/mnist_model_lr0.001_bs64_ep10.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1b474acf8a447dea4e3aaaf0371346ee7a7055d1c716fb371c059b9a1799bab
 size 4803144

 version https://git-lfs.github.com/spec/v1
+oid sha256:b7ed0e38b2b8663379c3655a73c1e6f7e4165bf7d9f792491c7cc9fa99e1e97f
 size 4803144