Implement Vision Transformer model (custom) and update configuration for model selection + changed data loader outputs (it was partially redundant)

Browse files

Files changed (6) hide show

flaring/MEGS_AI_baseline/SDOAIA_dataloader.py +1 -1
flaring/MEGS_AI_baseline/callback.py +1 -1
flaring/MEGS_AI_baseline/config.yaml +17 -3
flaring/MEGS_AI_baseline/models/base_model.py +1 -1
flaring/MEGS_AI_baseline/models/vision_transformer_custom.py +170 -0
flaring/MEGS_AI_baseline/train.py +12 -3

flaring/MEGS_AI_baseline/SDOAIA_dataloader.py CHANGED Viewed

@@ -94,7 +94,7 @@ class AIA_GOESDataset(torch.utils.data.Dataset):
         if self.sxr_transform:
             sxr_val = self.sxr_transform(sxr_val)
-        return (aia_img, torch.tensor(sxr_val, dtype=torch.float32)), torch.tensor(sxr_val, dtype=torch.float32)
 class AIA_GOESDataModule(LightningDataModule):
     """PyTorch Lightning DataModule for AIA and SXR data."""

         if self.sxr_transform:
             sxr_val = self.sxr_transform(sxr_val)
+        return aia_img, torch.tensor(sxr_val, dtype=torch.float32)
 class AIA_GOESDataModule(LightningDataModule):
     """PyTorch Lightning DataModule for AIA and SXR data."""

flaring/MEGS_AI_baseline/callback.py CHANGED Viewed

@@ -33,7 +33,7 @@ class ImagePredictionLogger_SXR(Callback):
         true_sxr = []
         pred_sxr = []
         # print(self.val_samples)
-        for (aia, _), target in self.data_samples:
             #device = torch.device("cuda:0")
             aia = aia.to(pl_module.device).unsqueeze(0)
             # Get prediction

         true_sxr = []
         pred_sxr = []
         # print(self.val_samples)
+        for aia, target in self.data_samples:
             #device = torch.device("cuda:0")
             aia = aia.to(pl_module.device).unsqueeze(0)
             # Get prediction

flaring/MEGS_AI_baseline/config.yaml CHANGED Viewed

@@ -4,9 +4,11 @@ base_data_dir: "/mnt/data/ML-Ready/flares_event_dir"  # Change this line for dif
 base_checkpoint_dir: "/mnt/data/ML-Ready/flares_event_dir"    # Change this line for different datasets
 # Model configuration
 model:
   architecture:
-    "hybrid"
   seed:
     42
   lr:
@@ -20,6 +22,18 @@ model:
   batch_size:
     64
 # Data paths (automatically constructed from base directories)
 data:
   aia_dir:
@@ -33,11 +47,11 @@ data:
 wandb:
   entity: jayantbiradar619-university-of-arizona # Use your exact W&B username
-  project: MEGS-AI Basline Models
   job_type: training
   tags:
     - aia
     - sxr
     - regression
-  wb_name: flaring-baseline-lr-scheduler
   notes: Regression from AIA images (6 channels) to GOES SXR flux

 base_checkpoint_dir: "/mnt/data/ML-Ready/flares_event_dir"    # Change this line for different datasets
 # Model configuration
+selected_model: "ViT"  # Options: "cnn", "vit",
 model:
   architecture:
+    "cnn"
   seed:
     42
   lr:
   batch_size:
     64
+vit:
+    embed_dim: 512
+    num_channels: 6  # AIA has 6 channels
+    num_classes: 1  # Regression task, predicting SXR flux
+    patch_size: 16
+    num_patches: 262144
+    hidden_dim: 512
+    num_heads: 8
+    num_layers: 6
+    dropout: 0.1
+    lr: .00001
 # Data paths (automatically constructed from base directories)
 data:
   aia_dir:
 wandb:
   entity: jayantbiradar619-university-of-arizona # Use your exact W&B username
+  project: MEGS-AI ViT Testing Griffin
   job_type: training
   tags:
     - aia
     - sxr
     - regression
+  wb_name: flaring-vit-lr-scheduler
   notes: Regression from AIA images (6 channels) to GOES SXR flux

flaring/MEGS_AI_baseline/models/base_model.py CHANGED Viewed

@@ -36,7 +36,7 @@ class BaseModel(LightningModule):
         return loss
     def test_step(self, batch, batch_idx):
-        (x, sxr), target = batch
         pred = self(x)
         loss = self.loss_func(torch.squeeze(pred), target)
         self.log('test_loss', loss)

         return loss
     def test_step(self, batch, batch_idx):
+        x, target = batch
         pred = self(x)
         loss = self.loss_func(torch.squeeze(pred), target)
         self.log('test_loss', loss)

flaring/MEGS_AI_baseline/models/vision_transformer_custom.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import torch.utils.data as data
+import torchvision
+from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
+from torchvision import transforms
+import pytorch_lightning as pl
+class ViT(pl.LightningModule):
+    def __init__(self, model_kwargs):
+        super().__init__()
+        self.lr = model_kwargs['lr']
+        self.save_hyperparameters()
+        filtered_kwargs = dict(model_kwargs)
+        filtered_kwargs.pop('lr', None)
+        self.model = VisionTransformer(**filtered_kwargs)
+    def forward(self, x):
+        return self.model(x)
+    def configure_optimizers(self):
+        optimizer = optim.AdamW(self.parameters(), lr=self.lr)
+        lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 150], gamma=0.1)
+        return [optimizer], [lr_scheduler]
+    def _calculate_loss(self, batch, mode="train"):
+        imgs, sxr = batch
+        preds = self.model(imgs)
+        # Change loss function for regression
+        loss = F.huber_loss(torch.squeeze(preds), sxr)  # or F.l1_loss() or F.huber_loss()
+        # Change accuracy to a regression metric
+        mae = F.l1_loss(torch.squeeze(preds), sxr)  # Mean Absolute Error
+        # OR use RMSE:
+        # rmse = torch.sqrt(F.mse_loss(preds, labels))
+        self.log(f"{mode}_loss", loss)
+        self.log(f"{mode}_mae", mae)  # or f"{mode}_rmse" if using RMSE
+        return loss
+    def training_step(self, batch, batch_idx):
+        loss = self._calculate_loss(batch, mode="train")
+        return loss
+    def validation_step(self, batch, batch_idx):
+        self._calculate_loss(batch, mode="val")
+    def test_step(self, batch, batch_idx):
+        self._calculate_loss(batch, mode="test")
+class VisionTransformer(nn.Module):
+    def __init__(
+        self,
+        embed_dim,
+        hidden_dim,
+        num_channels,
+        num_heads,
+        num_layers,
+        num_classes,
+        patch_size,
+        num_patches,
+        dropout=0.0,
+    ):
+        """Vision Transformer.
+        Args:
+            embed_dim: Dimensionality of the input feature vectors to the Transformer
+            hidden_dim: Dimensionality of the hidden layer in the feed-forward networks
+                         within the Transformer
+            num_channels: Number of channels of the input (3 for RGB)
+            num_heads: Number of heads to use in the Multi-Head Attention block
+            num_layers: Number of layers to use in the Transformer
+            num_classes: Number of classes to predict
+            patch_size: Number of pixels that the patches have per dimension
+            num_patches: Maximum number of patches an image can have
+            dropout: Amount of dropout to apply in the feed-forward network and
+                      on the input encoding
+        """
+        super().__init__()
+        self.patch_size = patch_size
+        # Layers/Networks
+        self.input_layer = nn.Linear(num_channels * (patch_size**2), embed_dim)
+        self.transformer = nn.Sequential(
+            *(AttentionBlock(embed_dim, hidden_dim, num_heads, dropout=dropout) for _ in range(num_layers))
+        )
+        self.mlp_head = nn.Sequential(nn.LayerNorm(embed_dim), nn.Linear(embed_dim, num_classes))
+        self.dropout = nn.Dropout(dropout)
+        # Parameters/Embeddings
+        self.cls_token = nn.Parameter(torch.randn(1, 1, embed_dim))
+        self.pos_embedding = nn.Parameter(torch.randn(1, 1 + num_patches, embed_dim))
+    def forward(self, x):
+        # Preprocess input
+        #x = x[0]
+        x = img_to_patch(x, self.patch_size)
+        B, T, _ = x.shape
+        x = self.input_layer(x)
+        # Add CLS token and positional encoding
+        cls_token = self.cls_token.repeat(B, 1, 1)
+        x = torch.cat([cls_token, x], dim=1)
+        x = x + self.pos_embedding[:, : T + 1]
+        # Apply Transforrmer
+        x = self.dropout(x)
+        x = x.transpose(0, 1)
+        x = self.transformer(x)
+        # Perform classification prediction
+        cls = x[0]
+        out = self.mlp_head(cls)
+        return out
+class AttentionBlock(nn.Module):
+    def __init__(self, embed_dim, hidden_dim, num_heads, dropout=0.0):
+        """Attention Block.
+        Args:
+            embed_dim: Dimensionality of input and attention feature vectors
+            hidden_dim: Dimensionality of hidden layer in feed-forward network
+                         (usually 2-4x larger than embed_dim)
+            num_heads: Number of heads to use in the Multi-Head Attention block
+            dropout: Amount of dropout to apply in the feed-forward network
+        """
+        super().__init__()
+        self.layer_norm_1 = nn.LayerNorm(embed_dim)
+        self.attn = nn.MultiheadAttention(embed_dim, num_heads)
+        self.layer_norm_2 = nn.LayerNorm(embed_dim)
+        self.linear = nn.Sequential(
+            nn.Linear(embed_dim, hidden_dim),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_dim, embed_dim),
+            nn.Dropout(dropout),
+        )
+    def forward(self, x):
+        inp_x = self.layer_norm_1(x)
+        x = x + self.attn(inp_x, inp_x, inp_x)[0]
+        x = x + self.linear(self.layer_norm_2(x))
+        return x
+def img_to_patch(x, patch_size, flatten_channels=True):
+    """
+    Args:
+        x: Tensor representing the image of shape [B, C, H, W]
+        patch_size: Number of pixels per dimension of the patches (integer)
+        flatten_channels: If True, the patches will be returned in a flattened format
+                           as a feature vector instead of a image grid.
+    """
+    x = x.permute(0, 3, 1, 2)
+    B, C, H, W = x.shape
+    x = x.reshape(B, C, H // patch_size, patch_size, W // patch_size, patch_size)
+    x = x.permute(0, 2, 4, 1, 3, 5)  # [B, H', W', C, p_H, p_W]
+    x = x.flatten(1, 2)  # [B, H'*W', C, p_H, p_W]
+    if flatten_channels:
+        x = x.flatten(2, 4)  # [B, H'*W', C*p_H*p_W]
+    return x

flaring/MEGS_AI_baseline/train.py CHANGED Viewed

@@ -14,6 +14,7 @@ from pytorch_lightning.loggers import WandbLogger
 from pytorch_lightning.callbacks import ModelCheckpoint
 from torch.nn import MSELoss
 from SDOAIA_dataloader import AIA_GOESDataModule
 from models.linear_and_hybrid import LinearIrradianceModel, HybridIrradianceModel
 from callback import ImagePredictionLogger_SXR
 from pytorch_lightning.callbacks import Callback
@@ -166,14 +167,14 @@ pth_callback = PTHCheckpointCallback(
 )
 # Model
-if config_data['model']['architecture'] == 'linear':
     model = LinearIrradianceModel(
         d_input=6,
         d_output=1,
         lr= config_data['model']['lr'],
         loss_func=MSELoss()
     )
-elif config_data['model']['architecture'] == 'hybrid':
     model = HybridIrradianceModel(
         d_input=6,
         d_output=1,
@@ -182,8 +183,16 @@ elif config_data['model']['architecture'] == 'hybrid':
         cnn_dp=config_data['model']['cnn_dp'],
         lr=config_data['model']['lr'],
     )
 else:
-    raise NotImplementedError(f"Architecture {config_data['model']['architecture']} not supported.")
 # Trainer
 trainer = Trainer(

 from pytorch_lightning.callbacks import ModelCheckpoint
 from torch.nn import MSELoss
 from SDOAIA_dataloader import AIA_GOESDataModule
+from models.vision_transformer_custom import ViT
 from models.linear_and_hybrid import LinearIrradianceModel, HybridIrradianceModel
 from callback import ImagePredictionLogger_SXR
 from pytorch_lightning.callbacks import Callback
 )
 # Model
+if config_data['selected_model'] == 'linear':
     model = LinearIrradianceModel(
         d_input=6,
         d_output=1,
         lr= config_data['model']['lr'],
         loss_func=MSELoss()
     )
+elif config_data['selected_model'] == 'hybrid':
     model = HybridIrradianceModel(
         d_input=6,
         d_output=1,
         cnn_dp=config_data['model']['cnn_dp'],
         lr=config_data['model']['lr'],
     )
+elif config_data['selected_model'] == 'ViT':
+    print("Using ViT")
+#     model = ViT(embed_dim=config_data['vit']['embed_dim'], hidden_dim=config_data['vit']['hidden_dim'],
+#                 num_channels=config_data['vit']['num_channels'],num_heads=config_data['vit']['num_heads'],
+#                 num_layers=config_data['vit']['num_layers'], num_classes=config_data['vit']['num_classes'],
+#                 patch_size=config_data['vit']['patch_size'], num_patches=config_data['vit']['num_patches'],
+#                 dropout=config_data['vit']['dropout'], lr=config_data['vit']['lr'])
+    model = ViT(model_kwargs=config_data['vit'])
 else:
+    raise NotImplementedError(f"Architecture {config_data['selected_model']} not supported.")
 # Trainer
 trainer = Trainer(