added megai baseline

Browse files

Files changed (16) hide show

flaring/MEGS_AI_baseline/SDOAIA_dataloader.py +153 -0
flaring/MEGS_AI_baseline/__init__.py +0 -0
flaring/MEGS_AI_baseline/base_model.py +45 -0
flaring/MEGS_AI_baseline/chopped_alexnet.py +50 -0
flaring/MEGS_AI_baseline/efficientnet.py +45 -0
flaring/MEGS_AI_baseline/kan_success.py +219 -0
flaring/MEGS_AI_baseline/linear_and_hybrid.py +119 -0
flaring/MEGS_AI_baseline/models/base_model.py +45 -0
flaring/MEGS_AI_baseline/models/chopped_alexnet.py +50 -0
flaring/MEGS_AI_baseline/models/efficientnet.py +45 -0
flaring/MEGS_AI_baseline/models/kan_success.py +219 -0
flaring/MEGS_AI_baseline/models/linear_and_hybrid.py +119 -0
flaring/MEGS_AI_baseline/sxr_normalization.py +57 -0
flaring/MEGS_AI_baseline/train.py +179 -0
flaring/__init__.py +0 -0
flaring/cut_off_aia.py +38 -0

flaring/MEGS_AI_baseline/SDOAIA_dataloader.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import torch
+from torch.utils.data import DataLoader, Subset
+import numpy as np
+from pathlib import Path
+from scipy.ndimage import zoom
+import torchvision.transforms as T
+from pytorch_lightning import LightningDataModule
+import glob
+import os
+class AIA_GOESDataset(torch.utils.data.Dataset):
+    """Dataset for loading AIA images and SXR values for regression."""
+    def __init__(self, aia_dir, sxr_dir, transform=None, sxr_transform=None, target_size=(512, 512)):
+        self.aia_dir = Path(aia_dir).resolve()
+        self.sxr_dir = Path(sxr_dir).resolve()
+        self.transform = transform
+        self.sxr_transform = sxr_transform
+        self.target_size = target_size
+        self.samples = []
+        # Check directories
+        if not self.aia_dir.is_dir():
+            raise FileNotFoundError(f"AIA directory not found: {self.aia_dir}")
+        if not self.sxr_dir.is_dir():
+            raise FileNotFoundError(f"SXR directory not found: {self.sxr_dir}")
+        # Find matching files
+        aia_files = sorted(glob.glob(str(self.aia_dir / "*.npy")))
+        aia_files = [Path(f) for f in aia_files]
+        for f in aia_files:
+            timestamp = f.stem
+            sxr_path = self.sxr_dir / f"{timestamp}.npy"
+            if sxr_path.exists():
+                self.samples.append(timestamp)
+        if len(self.samples) == 0:
+            raise ValueError("No valid sample pairs found")
+    def __len__(self):
+        return len(self.samples)
+    def __getitem__(self, idx):
+        timestamp = self.samples[idx]
+        aia_path = self.aia_dir / f"{timestamp}.npy"
+        sxr_path = self.sxr_dir / f"{timestamp}.npy"
+        # Load AIA image as (6, H, W)
+        aia_img = np.load(aia_path)
+        if aia_img.shape[0] != 6:
+            raise ValueError(f"AIA image has {aia_img.shape[0]} channels, expected 6")
+        # Resize if needed (operates on (6, H, W))
+        if aia_img.shape[1:3] != self.target_size:
+            aia_img = zoom(aia_img, (1,
+                                     self.target_size[0]/aia_img.shape[1],
+                                     self.target_size[1]/aia_img.shape[2]))
+        # Convert to torch for transforms
+        aia_img = torch.tensor(aia_img, dtype=torch.float32) # (6, H, W)
+        # Apply transforms (should expect channel-first (C, H, W))
+        if self.transform:
+            aia_img = self.transform(aia_img)
+        # Always output channel-last for model: (H, W, C)
+        aia_img = aia_img.permute(1,2,0) # (H, W, 6)
+        # Load SXR value
+        sxr_val = np.load(sxr_path)
+        if sxr_val.size != 1:
+            raise ValueError(f"SXR value has size {sxr_val.size}, expected scalar")
+        sxr_val = float(np.atleast_1d(sxr_val).flatten()[0])
+        if self.sxr_transform:
+            sxr_val = self.sxr_transform(sxr_val)
+        return (aia_img, torch.tensor(sxr_val, dtype=torch.float32)), torch.tensor(sxr_val, dtype=torch.float32)
+class AIA_GOESDataModule(LightningDataModule):
+    """PyTorch Lightning DataModule for AIA and SXR data."""
+    def __init__(self, aia_dir, sxr_dir, sxr_norm, batch_size=16, num_workers=4,
+                 train_transforms=None, val_transforms=None, val_split=0.2, test_split=0.1):
+        super().__init__()
+        self.aia_dir = aia_dir
+        self.sxr_dir = sxr_dir
+        self.sxr_norm = sxr_norm
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.train_transforms = train_transforms
+        self.val_transforms = val_transforms
+        self.val_split = val_split
+        self.test_split = test_split
+    def setup(self, stage=None):
+        # Prepare base set just to get indices
+        base_ds = AIA_GOESDataset(
+            aia_dir=self.aia_dir,
+            sxr_dir=self.sxr_dir,
+            transform=None,
+            sxr_transform=T.Lambda(lambda x: (np.log10(x + 1e-8) - self.sxr_norm[0]) / self.sxr_norm[1]),
+            target_size=(512, 512)
+        )
+        total_size = len(base_ds)
+        test_size = int(self.test_split * total_size)
+        val_size = int(self.val_split * total_size)
+        train_size = total_size - val_size - test_size
+        indices = np.random.permutation(total_size)
+        train_idx = indices[:train_size]
+        val_idx = indices[train_size:train_size + val_size]
+        test_idx = indices[train_size + val_size:]
+        # Now, re-instantiate with proper transforms for all splits
+        full_train_ds = AIA_GOESDataset(
+            aia_dir=self.aia_dir,
+            sxr_dir=self.sxr_dir,
+            transform=self.train_transforms,
+            sxr_transform=T.Lambda(lambda x: (np.log10(x + 1e-8) - self.sxr_norm[0]) / self.sxr_norm[1]),
+            target_size=(512, 512)
+        )
+        self.train_ds = Subset(full_train_ds, train_idx)
+        full_val_ds = AIA_GOESDataset(
+            aia_dir=self.aia_dir,
+            sxr_dir=self.sxr_dir,
+            transform=self.val_transforms,
+            sxr_transform=T.Lambda(lambda x: (np.log10(x + 1e-8) - self.sxr_norm[0]) / self.sxr_norm[1]),
+            target_size=(512, 512)
+        )
+        self.valid_ds = Subset(full_val_ds, val_idx)
+        full_test_ds = AIA_GOESDataset(
+            aia_dir=self.aia_dir,
+            sxr_dir=self.sxr_dir,
+            transform=self.val_transforms,
+            sxr_transform=T.Lambda(lambda x: (np.log10(x + 1e-8) - self.sxr_norm[0]) / self.sxr_norm[1]),
+            target_size=(512, 512)
+        )
+        self.test_ds = Subset(full_test_ds, test_idx)
+    def train_dataloader(self):
+        return DataLoader(self.train_ds, batch_size=self.batch_size,
+                          shuffle=True, num_workers=self.num_workers)
+    def val_dataloader(self):
+        return DataLoader(self.valid_ds, batch_size=self.batch_size,
+                          shuffle=False, num_workers=self.num_workers)
+    def test_dataloader(self):
+        return DataLoader(self.test_ds, batch_size=self.batch_size,
+                          shuffle=False, num_workers=self.num_workers)

flaring/MEGS_AI_baseline/__init__.py ADDED Viewed

File without changes

flaring/MEGS_AI_baseline/base_model.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import torch
+import torch.nn as nn
+from pytorch_lightning import LightningModule
+class BaseModel(LightningModule):
+    def __init__(self, model, eve_norm, loss_func, lr):
+        super().__init__()
+        self.model = model
+        self.eve_norm = eve_norm  # Used for SXR normalization (mean, std)
+        self.loss_func = loss_func
+        self.lr = lr
+    def forward(self, x, sxr=None):
+        return self.model(x)
+    def configure_optimizers(self):
+        return torch.optim.Adam(self.parameters(), lr=self.lr)
+    def training_step(self, batch, batch_idx):
+        (x, sxr), target = batch
+        pred = self(x, sxr)
+        pred = pred * self.eve_norm[1] + self.eve_norm[0]  # Denormalize for loss
+        target = target * self.eve_norm[1] + self.eve_norm[0]  # Denormalize target
+        loss = self.loss_func(pred, target)
+        self.log('train_loss', loss)
+        return loss
+    def validation_step(self, batch, batch_idx):
+        (x, sxr), target = batch
+        pred = self(x, sxr)
+        pred = pred * self.eve_norm[1] + self.eve_norm[0]
+        target = target * self.eve_norm[1] + self.eve_norm[0]
+        loss = self.loss_func(pred, target)
+        self.log('valid_loss', loss)
+        return loss
+    def test_step(self, batch, batch_idx):
+        (x, sxr), target = batch
+        pred = self(x, sxr)
+        pred = pred * self.eve_norm[1] + self.eve_norm[0]
+        target = target * self.eve_norm[1] + self.eve_norm[0]
+        loss = self.loss_func(pred, target)
+        self.log('test_loss', loss)
+        return loss

flaring/MEGS_AI_baseline/chopped_alexnet.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from torch import nn
+from torch.nn import HuberLoss
+from irradiance.models.base_model import BaseModel
+class ChoppedAlexnet(BaseModel):
+    # def __init__(self, numlayers, n_channels, outSize, dropout):
+    def __init__(self, d_input, d_output, eve_norm, loss_func=HuberLoss(), numLayers=3, dropout=0, lr=1e-4):
+        self.numLayers = numLayers
+        self.n_channels = d_input
+        self.outSize = d_output
+        self.loss_func = HuberLoss() # consider MSE
+        layers, channelSize = self.getLayers(self.numLayers, self.n_channels)
+        self.features = nn.Sequential(*layers)
+        self.pool = nn.AdaptiveAvgPool2d((1,1))
+        model = nn.Sequential(nn.Dropout(p=dropout),
+                                   nn.Linear(channelSize, self.outSize))
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        super().__init__(model=model, eve_norm=eve_norm, loss_func=loss_func, lr=lr)
+    def getLayers(self, numLayers, n_channels):
+        """Returns a list of layers + the feature size coming out"""
+        layers = [nn.Conv2d(n_channels, 64, kernel_size=11, stride=4, padding=2), nn.BatchNorm2d(64), nn.ReLU(inplace=True), ]
+        if numLayers == 1:
+            return (layers, 64)
+        layers += [nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.BatchNorm2d(192), nn.ReLU(inplace=True), ]
+        if numLayers == 2:
+            return (layers, 192)
+        layers += [nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.BatchNorm2d(384), nn.ReLU(inplace=True)]
+        if numLayers == 3:
+            return (layers,384)
+        layers += [nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.BatchNorm2d(256)]
+        return (layers,256)
+    def forward(self, x):
+        x = self.features(x)
+        x = self.pool(x).view(x.size(0),-1)
+        x = self.model(x)
+        return x

flaring/MEGS_AI_baseline/efficientnet.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import torchvision
+from torch import nn
+from torch.nn import HuberLoss
+from irradiance.models.base_model import BaseModel
+class EfficientnetIrradiance(BaseModel):
+    def __init__(self, d_input, d_output, eve_norm, loss_func=HuberLoss(), model='efficientnet_b0', dp=0.75, lr=1e-4):
+        if model == 'efficientnet_b0':
+            model = torchvision.models.efficientnet_b0(pretrained=True)
+        elif model == 'efficientnet_b1':
+            model = torchvision.models.efficientnet_b1(pretrained=True)
+        elif model == 'efficientnet_b2':
+            model = torchvision.models.efficientnet_b2(pretrained=True)
+        elif model == 'efficientnet_b3':
+            model = torchvision.models.efficientnet_b3(pretrained=True)
+        elif model == 'efficientnet_b4':
+            model = torchvision.models.efficientnet_b4(pretrained=True)
+        elif model == 'efficientnet_b5':
+            model = torchvision.models.efficientnet_b5(pretrained=True)
+        elif model == 'efficientnet_b6':
+            model = torchvision.models.efficientnet_b6(pretrained=True)
+        elif model == 'efficientnet_b7':
+            model = torchvision.models.efficientnet_b7(pretrained=True)
+        conv1_out = model.features[0][0].out_channels
+        model.features[0][0] = nn.Conv2d(d_input, conv1_out, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+        lin_in = model.classifier[1].in_features
+        # consider adding average pool of full image(s)
+        classifier = nn.Sequential(nn.Dropout(p=dp, inplace=True),
+                                   nn.Linear(in_features=lin_in, out_features=d_output, bias=True))
+        model.classifier = classifier
+        # set all dropouts to 0.75
+        # TODO: other dropout values?
+        for m in model.modules():
+            if m.__class__.__name__.startswith('Dropout'):
+                m.p = dp
+        model = model
+        super().__init__(model=model, eve_norm=eve_norm, loss_func=loss_func, lr=lr)
+    def forward(self, x):
+        x = self.model(x)
+        return x

flaring/MEGS_AI_baseline/kan_success.py ADDED Viewed

	@@ -0,0 +1,219 @@

+# Copyright 2024 Li, Ziyao
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import *
+from torch.nn import HuberLoss
+from irradiance.models.base_model import BaseModel
+class SplineLinear(nn.Linear):
+    def __init__(self, in_features: int, out_features: int, init_scale: float = 0.1, **kw) -> None:
+        self.init_scale = init_scale
+        super().__init__(in_features, out_features, bias=False, **kw)
+    def reset_parameters(self) -> None:
+        nn.init.trunc_normal_(self.weight, mean=0, std=self.init_scale)
+class RadialBasisFunction(nn.Module):
+    def __init__(
+        self,
+        grid_min: float = -2.,
+        grid_max: float = 2.,
+        num_grids: int = 8,
+        denominator: float = None,  # larger denominators lead to smoother basis
+    ):
+        super().__init__()
+        self.grid_min = grid_min
+        self.grid_max = grid_max
+        self.num_grids = num_grids
+        grid = torch.linspace(grid_min, grid_max, num_grids)
+        self.grid = torch.nn.Parameter(grid, requires_grad=False)
+        self.denominator = denominator or (grid_max - grid_min) / (num_grids - 1)
+    def forward(self, x):
+        return torch.exp(-((x[..., None] - self.grid) / self.denominator) ** 2)
+class FastKANLayer(nn.Module):
+    def __init__(
+        self,
+        input_dim: int,
+        output_dim: int,
+        grid_min: float = -2.,
+        grid_max: float = 2.,
+        num_grids: int = 8,
+        use_base_update: bool = True,
+        use_layernorm: bool = True,
+        base_activation = F.silu,
+        spline_weight_init_scale: float = 0.1,
+    ) -> None:
+        super().__init__()
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.layernorm = None
+        if use_layernorm:
+            assert input_dim > 1, "Do not use layernorms on 1D inputs. Set `use_layernorm=False`."
+            self.layernorm = nn.LayerNorm(input_dim)
+        self.rbf = RadialBasisFunction(grid_min, grid_max, num_grids)
+        self.spline_linear = SplineLinear(input_dim * num_grids, output_dim, spline_weight_init_scale)
+        self.use_base_update = use_base_update
+        if use_base_update:
+            self.base_activation = base_activation
+            self.base_linear = nn.Linear(input_dim, output_dim)
+    def forward(self, x, use_layernorm=True):
+        if self.layernorm is not None and use_layernorm:
+            spline_basis = self.rbf(self.layernorm(x))
+        else:
+            spline_basis = self.rbf(x)
+        ret = self.spline_linear(spline_basis.view(*spline_basis.shape[:-2], -1))
+        if self.use_base_update:
+            base = self.base_linear(self.base_activation(x))
+            ret = ret + base
+        return ret
+    def plot_curve(
+        self,
+        input_index: int,
+        output_index: int,
+        num_pts: int = 1000,
+        num_extrapolate_bins: int = 2
+    ):
+        '''this function returns the learned curves in a FastKANLayer.
+        input_index: the selected index of the input, in [0, input_dim) .
+        output_index: the selected index of the output, in [0, output_dim) .
+        num_pts: num of points sampled for the curve.
+        num_extrapolate_bins (N_e): num of bins extrapolating from the given grids. The curve
+            will be calculate in the range of [grid_min - h * N_e, grid_max + h * N_e].
+        '''
+        ng = self.rbf.num_grids
+        h = self.rbf.denominator
+        assert input_index < self.input_dim
+        assert output_index < self.output_dim
+        w = self.spline_linear.weight[
+            output_index, input_index * ng : (input_index + 1) * ng
+        ]   # num_grids,
+        x = torch.linspace(
+            self.rbf.grid_min - num_extrapolate_bins * h,
+            self.rbf.grid_max + num_extrapolate_bins * h,
+            num_pts
+        )   # num_pts, num_grids
+        with torch.no_grad():
+            y = (w * self.rbf(x.to(w.dtype))).sum(-1)
+        return x, y
+class FastKANIrradiance(BaseModel):
+    def __init__(
+        self,
+        eve_norm,
+        layers_hidden: List[int],
+        grid_min: float = -2.,
+        grid_max: float = 2.,
+        num_grids: int = 8,
+        use_base_update: bool = True,
+        base_activation = F.silu,
+        spline_weight_init_scale: float = 0.1,
+        loss_func = HuberLoss(),
+        lr=1e-4,
+        use_std=False
+    ) -> None:
+        super().__init__(model=None, eve_norm=eve_norm, loss_func=loss_func, lr=lr)
+        self.use_std = use_std
+        if use_std:
+            layers_hidden[0] = layers_hidden[0]*2
+        self.layers = nn.ModuleList([
+            FastKANLayer(
+                in_dim, out_dim,
+                grid_min=grid_min,
+                grid_max=grid_max,
+                num_grids=num_grids,
+                use_base_update=use_base_update,
+                base_activation=base_activation,
+                spline_weight_init_scale=spline_weight_init_scale,
+            ) for in_dim, out_dim in zip(layers_hidden[:-1], layers_hidden[1:])
+        ])
+    def forward(self, x):
+        # Calculating mean and std of images to take them as input to 1D KAN
+        mean_irradiance = torch.torch.mean(x, dim=(2,3))
+        std_irradiance = torch.torch.std(x, dim=(2,3))
+        if self.use_std:
+            x = torch.cat((mean_irradiance, std_irradiance), dim=1)
+        else:
+            x = mean_irradiance
+        for layer in self.layers:
+            x = layer(x)
+        return x
+class AttentionWithFastKANTransform(nn.Module):
+    def __init__(
+        self,
+        q_dim: int,
+        k_dim: int,
+        v_dim: int,
+        head_dim: int,
+        num_heads: int,
+        gating: bool = True,
+    ):
+        super(AttentionWithFastKANTransform, self).__init__()
+        self.num_heads = num_heads
+        total_dim = head_dim * self.num_heads
+        self.gating = gating
+        self.linear_q = FastKANLayer(q_dim, total_dim)
+        self.linear_k = FastKANLayer(k_dim, total_dim)
+        self.linear_v = FastKANLayer(v_dim, total_dim)
+        self.linear_o = FastKANLayer(total_dim, q_dim)
+        self.linear_g = None
+        if self.gating:
+            self.linear_g = FastKANLayer(q_dim, total_dim)
+        # precompute the 1/sqrt(head_dim)
+        self.norm = head_dim**-0.5
+    def forward(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        bias: torch.Tensor = None,      # additive attention bias
+    ) -> torch.Tensor:
+        wq = self.linear_q(q).view(*q.shape[:-1], 1, self.num_heads, -1) * self.norm     # *q1hc
+        wk = self.linear_k(k).view(*k.shape[:-2], 1, k.shape[-2], self.num_heads, -1)    # *1khc
+        att = (wq * wk).sum(-1).softmax(-2)     # *qkh
+        del wq, wk
+        if bias is not None:
+            att = att + bias[..., None]
+        wv = self.linear_v(v).view(*v.shape[:-2],1, v.shape[-2], self.num_heads, -1)     # *1khc
+        o = (att[..., None] * wv).sum(-3)        # *qhc
+        del att, wv
+        o = o.view(*o.shape[:-2], -1)           # *q(hc)
+        if self.linear_g is not None:
+            # gating, use raw query input
+            g = self.linear_g(q)
+            o = torch.sigmoid(g) * o
+        # merge heads
+        o = self.linear_o(o)
+        return o

flaring/MEGS_AI_baseline/linear_and_hybrid.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import torch
+import torch.nn as nn
+from torch.nn import HuberLoss
+from models.base_model import BaseModel
+class LinearIrradianceModel(BaseModel):
+    def __init__(self, d_input, d_output, eve_norm, loss_func=HuberLoss(), lr=1e-2):
+        self.n_channels = d_input
+        self.outSize = d_output
+        model = nn.Linear(2 * self.n_channels, self.outSize)
+        super().__init__(model=model, eve_norm=eve_norm, loss_func=loss_func, lr=lr)
+    def forward(self, x, sxr=None, **kwargs):
+        # If x is a tuple (aia_img, sxr_val), extract the AIA image tensor
+        if isinstance(x, (list, tuple)):
+            x = x[0]
+        # Debug: Print input shape
+        print(f"Input shape to LinearIrradianceModel.forward: {x.shape}")
+        # Expect x shape: (batch_size, H, W, C)
+        if len(x.shape) != 4:
+            raise ValueError(f"Expected 4D input tensor (batch_size, H, W, C), got shape {x.shape}")
+        if x.shape[-1] != self.n_channels:
+            raise ValueError(f"AIA image has {x.shape[-1]} channels, expected {self.n_channels}")
+        # Calculate mean and std across spatial dimensions (H,W)
+        # First permute to (batch_size, C, H, W)
+        x = x.permute(0, 3, 1, 2)
+        # Now calculate mean/std across dimensions 2 and 3 (H,W)
+        mean_irradiance = torch.mean(x, dim=(2, 3))  # Shape: (batch_size, n_channels)
+        std_irradiance = torch.std(x, dim=(2, 3))    # Shape: (batch_size, n_channels)
+        # Debug: Print shapes after mean and std
+        print(f"mean_irradiance shape: {mean_irradiance.shape}, std_irradiance shape: {std_irradiance.shape}")
+        input_features = torch.cat((mean_irradiance, std_irradiance), dim=1)  # Shape: (batch_size, 2 * n_channels)
+        print(f"Input features shape to linear layer: {input_features.shape}")
+        if input_features.shape[1] != 2 * self.n_channels:
+            raise ValueError(f"Expected {2 * self.n_channels} features, got {input_features.shape[1]}")
+        return self.model(input_features)
+class HybridIrradianceModel(BaseModel):
+    def __init__(self, d_input, d_output, eve_norm, cnn_model='resnet', ln_model=True, ln_params=None, lr=1e-4, cnn_dp=0.75, loss_func=HuberLoss()):
+        super().__init__(model=None, eve_norm=eve_norm, loss_func=loss_func, lr=lr)
+        self.n_channels = d_input
+        self.outSize = d_output
+        self.ln_params = ln_params
+        self.ln_model = None
+        if ln_model:
+            self.ln_model = LinearIrradianceModel(d_input, d_output, eve_norm, loss_func=loss_func, lr=lr)
+        if self.ln_params is not None and self.ln_model is not None:
+            self.ln_model.model.weight = nn.Parameter(self.ln_params['weight'])
+            self.ln_model.model.bias = nn.Parameter(self.ln_params['bias'])
+        self.cnn_model = None
+        self.cnn_lambda = 1.
+        if cnn_model == 'resnet':
+            self.cnn_model = nn.Sequential(
+                nn.Conv2d(d_input, 64, kernel_size=7, stride=2, padding=3),
+                nn.ReLU(),
+                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+                nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
+                nn.ReLU(),
+                nn.AdaptiveAvgPool2d((1, 1)),
+                nn.Flatten(),
+                nn.Linear(64, d_output),
+                nn.Dropout(cnn_dp)
+            )
+        elif cnn_model.startswith('efficientnet'):
+            raise NotImplementedError("EfficientNet requires timm; replace with custom CNN or install timm")
+        if self.ln_model is None and self.cnn_model is None:
+            raise ValueError('Please pass at least one model.')
+    def forward(self, x, sxr=None, **kwargs):
+        # If x is a tuple (aia_img, sxr_val), extract the AIA image tensor
+        if isinstance(x, (list, tuple)):
+            x = x[0]
+        # Debug: Print input shape
+        print(f"Input shape to HybridIrradianceModel.forward: {x.shape}")
+        # Expect x shape: (batch_size, H, W, C)
+        if len(x.shape) != 4:
+            raise ValueError(f"Expected 4D input tensor (batch_size, H, W, C), got shape {x.shape}")
+        if x.shape[-1] != self.n_channels:
+            raise ValueError(f"AIA image has {x.shape[-1]} channels, expected {self.n_channels}")
+        # Convert to (batch_size, C, H, W) for CNN
+        x_cnn = x.permute(0, 3, 1, 2)
+        if self.ln_model is not None and self.cnn_model is not None:
+            # For linear model, keep original (B,H,W,C) format
+            return self.ln_model(x) + self.cnn_lambda * self.cnn_model(x_cnn)
+        elif self.ln_model is not None:
+            return self.ln_model(x)
+        elif self.cnn_model is not None:
+            return self.cnn_model(x_cnn)
+    def configure_optimizers(self):
+        return torch.optim.Adam(self.parameters(), lr=self.lr)
+    def set_train_mode(self, mode):
+        if mode == 'linear':
+            self.cnn_lambda = 0
+            if self.cnn_model: self.cnn_model.eval()
+            if self.ln_model: self.ln_model.train()
+        elif mode == 'cnn':
+            self.cnn_lambda = 0.01
+            if self.cnn_model: self.cnn_model.train()
+            if self.ln_model: self.ln_model.eval()
+        elif mode == 'both':
+            self.cnn_lambda = 0.01
+            if self.cnn_model: self.cnn_model.train()
+            if self.ln_model: self.ln_model.train()
+        else:
+            raise NotImplementedError(f'Mode not supported: {mode}')

flaring/MEGS_AI_baseline/models/base_model.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import torch
+import torch.nn as nn
+from pytorch_lightning import LightningModule
+class BaseModel(LightningModule):
+    def __init__(self, model, eve_norm, loss_func, lr):
+        super().__init__()
+        self.model = model
+        self.eve_norm = eve_norm  # Used for SXR normalization (mean, std)
+        self.loss_func = loss_func
+        self.lr = lr
+    def forward(self, x, sxr=None):
+        return self.model(x)
+    def configure_optimizers(self):
+        return torch.optim.Adam(self.parameters(), lr=self.lr)
+    def training_step(self, batch, batch_idx):
+        (x, sxr), target = batch
+        pred = self(x, sxr)
+        pred = pred * self.eve_norm[1] + self.eve_norm[0]  # Denormalize for loss
+        target = target * self.eve_norm[1] + self.eve_norm[0]  # Denormalize target
+        loss = self.loss_func(pred, target)
+        self.log('train_loss', loss)
+        return loss
+    def validation_step(self, batch, batch_idx):
+        (x, sxr), target = batch
+        pred = self(x, sxr)
+        pred = pred * self.eve_norm[1] + self.eve_norm[0]
+        target = target * self.eve_norm[1] + self.eve_norm[0]
+        loss = self.loss_func(pred, target)
+        self.log('valid_loss', loss)
+        return loss
+    def test_step(self, batch, batch_idx):
+        (x, sxr), target = batch
+        pred = self(x, sxr)
+        pred = pred * self.eve_norm[1] + self.eve_norm[0]
+        target = target * self.eve_norm[1] + self.eve_norm[0]
+        loss = self.loss_func(pred, target)
+        self.log('test_loss', loss)
+        return loss

flaring/MEGS_AI_baseline/models/chopped_alexnet.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from torch import nn
+from torch.nn import HuberLoss
+from models.base_model import BaseModel
+class ChoppedAlexnet(BaseModel):
+    # def __init__(self, numlayers, n_channels, outSize, dropout):
+    def __init__(self, d_input, d_output, eve_norm, loss_func=HuberLoss(), numLayers=3, dropout=0, lr=1e-4):
+        self.numLayers = numLayers
+        self.n_channels = d_input
+        self.outSize = d_output
+        self.loss_func = HuberLoss() # consider MSE
+        layers, channelSize = self.getLayers(self.numLayers, self.n_channels)
+        self.features = nn.Sequential(*layers)
+        self.pool = nn.AdaptiveAvgPool2d((1,1))
+        model = nn.Sequential(nn.Dropout(p=dropout),
+                                   nn.Linear(channelSize, self.outSize))
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        super().__init__(model=model, eve_norm=eve_norm, loss_func=loss_func, lr=lr)
+    def getLayers(self, numLayers, n_channels):
+        """Returns a list of layers + the feature size coming out"""
+        layers = [nn.Conv2d(n_channels, 64, kernel_size=11, stride=4, padding=2), nn.BatchNorm2d(64), nn.ReLU(inplace=True), ]
+        if numLayers == 1:
+            return (layers, 64)
+        layers += [nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.BatchNorm2d(192), nn.ReLU(inplace=True), ]
+        if numLayers == 2:
+            return (layers, 192)
+        layers += [nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.BatchNorm2d(384), nn.ReLU(inplace=True)]
+        if numLayers == 3:
+            return (layers,384)
+        layers += [nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.BatchNorm2d(256)]
+        return (layers,256)
+    def forward(self, x):
+        x = self.features(x)
+        x = self.pool(x).view(x.size(0),-1)
+        x = self.model(x)
+        return x

flaring/MEGS_AI_baseline/models/efficientnet.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import torchvision
+from torch import nn
+from torch.nn import HuberLoss
+from models.base_model import BaseModel
+class EfficientnetIrradiance(BaseModel):
+    def __init__(self, d_input, d_output, eve_norm, loss_func=HuberLoss(), model='efficientnet_b0', dp=0.75, lr=1e-4):
+        if model == 'efficientnet_b0':
+            model = torchvision.models.efficientnet_b0(pretrained=True)
+        elif model == 'efficientnet_b1':
+            model = torchvision.models.efficientnet_b1(pretrained=True)
+        elif model == 'efficientnet_b2':
+            model = torchvision.models.efficientnet_b2(pretrained=True)
+        elif model == 'efficientnet_b3':
+            model = torchvision.models.efficientnet_b3(pretrained=True)
+        elif model == 'efficientnet_b4':
+            model = torchvision.models.efficientnet_b4(pretrained=True)
+        elif model == 'efficientnet_b5':
+            model = torchvision.models.efficientnet_b5(pretrained=True)
+        elif model == 'efficientnet_b6':
+            model = torchvision.models.efficientnet_b6(pretrained=True)
+        elif model == 'efficientnet_b7':
+            model = torchvision.models.efficientnet_b7(pretrained=True)
+        conv1_out = model.features[0][0].out_channels
+        model.features[0][0] = nn.Conv2d(d_input, conv1_out, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+        lin_in = model.classifier[1].in_features
+        # consider adding average pool of full image(s)
+        classifier = nn.Sequential(nn.Dropout(p=dp, inplace=True),
+                                   nn.Linear(in_features=lin_in, out_features=d_output, bias=True))
+        model.classifier = classifier
+        # set all dropouts to 0.75
+        # TODO: other dropout values?
+        for m in model.modules():
+            if m.__class__.__name__.startswith('Dropout'):
+                m.p = dp
+        model = model
+        super().__init__(model=model, eve_norm=eve_norm, loss_func=loss_func, lr=lr)
+    def forward(self, x):
+        x = self.model(x)
+        return x

flaring/MEGS_AI_baseline/models/kan_success.py ADDED Viewed

	@@ -0,0 +1,219 @@

+# Copyright 2024 Li, Ziyao
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import *
+from torch.nn import HuberLoss
+from irradiance.models.base_model import BaseModel
+class SplineLinear(nn.Linear):
+    def __init__(self, in_features: int, out_features: int, init_scale: float = 0.1, **kw) -> None:
+        self.init_scale = init_scale
+        super().__init__(in_features, out_features, bias=False, **kw)
+    def reset_parameters(self) -> None:
+        nn.init.trunc_normal_(self.weight, mean=0, std=self.init_scale)
+class RadialBasisFunction(nn.Module):
+    def __init__(
+        self,
+        grid_min: float = -2.,
+        grid_max: float = 2.,
+        num_grids: int = 8,
+        denominator: float = None,  # larger denominators lead to smoother basis
+    ):
+        super().__init__()
+        self.grid_min = grid_min
+        self.grid_max = grid_max
+        self.num_grids = num_grids
+        grid = torch.linspace(grid_min, grid_max, num_grids)
+        self.grid = torch.nn.Parameter(grid, requires_grad=False)
+        self.denominator = denominator or (grid_max - grid_min) / (num_grids - 1)
+    def forward(self, x):
+        return torch.exp(-((x[..., None] - self.grid) / self.denominator) ** 2)
+class FastKANLayer(nn.Module):
+    def __init__(
+        self,
+        input_dim: int,
+        output_dim: int,
+        grid_min: float = -2.,
+        grid_max: float = 2.,
+        num_grids: int = 8,
+        use_base_update: bool = True,
+        use_layernorm: bool = True,
+        base_activation = F.silu,
+        spline_weight_init_scale: float = 0.1,
+    ) -> None:
+        super().__init__()
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.layernorm = None
+        if use_layernorm:
+            assert input_dim > 1, "Do not use layernorms on 1D inputs. Set `use_layernorm=False`."
+            self.layernorm = nn.LayerNorm(input_dim)
+        self.rbf = RadialBasisFunction(grid_min, grid_max, num_grids)
+        self.spline_linear = SplineLinear(input_dim * num_grids, output_dim, spline_weight_init_scale)
+        self.use_base_update = use_base_update
+        if use_base_update:
+            self.base_activation = base_activation
+            self.base_linear = nn.Linear(input_dim, output_dim)
+    def forward(self, x, use_layernorm=True):
+        if self.layernorm is not None and use_layernorm:
+            spline_basis = self.rbf(self.layernorm(x))
+        else:
+            spline_basis = self.rbf(x)
+        ret = self.spline_linear(spline_basis.view(*spline_basis.shape[:-2], -1))
+        if self.use_base_update:
+            base = self.base_linear(self.base_activation(x))
+            ret = ret + base
+        return ret
+    def plot_curve(
+        self,
+        input_index: int,
+        output_index: int,
+        num_pts: int = 1000,
+        num_extrapolate_bins: int = 2
+    ):
+        '''this function returns the learned curves in a FastKANLayer.
+        input_index: the selected index of the input, in [0, input_dim) .
+        output_index: the selected index of the output, in [0, output_dim) .
+        num_pts: num of points sampled for the curve.
+        num_extrapolate_bins (N_e): num of bins extrapolating from the given grids. The curve
+            will be calculate in the range of [grid_min - h * N_e, grid_max + h * N_e].
+        '''
+        ng = self.rbf.num_grids
+        h = self.rbf.denominator
+        assert input_index < self.input_dim
+        assert output_index < self.output_dim
+        w = self.spline_linear.weight[
+            output_index, input_index * ng : (input_index + 1) * ng
+        ]   # num_grids,
+        x = torch.linspace(
+            self.rbf.grid_min - num_extrapolate_bins * h,
+            self.rbf.grid_max + num_extrapolate_bins * h,
+            num_pts
+        )   # num_pts, num_grids
+        with torch.no_grad():
+            y = (w * self.rbf(x.to(w.dtype))).sum(-1)
+        return x, y
+class FastKANIrradiance(BaseModel):
+    def __init__(
+        self,
+        eve_norm,
+        layers_hidden: List[int],
+        grid_min: float = -2.,
+        grid_max: float = 2.,
+        num_grids: int = 8,
+        use_base_update: bool = True,
+        base_activation = F.silu,
+        spline_weight_init_scale: float = 0.1,
+        loss_func = HuberLoss(),
+        lr=1e-4,
+        use_std=False
+    ) -> None:
+        super().__init__(model=None, eve_norm=eve_norm, loss_func=loss_func, lr=lr)
+        self.use_std = use_std
+        if use_std:
+            layers_hidden[0] = layers_hidden[0]*2
+        self.layers = nn.ModuleList([
+            FastKANLayer(
+                in_dim, out_dim,
+                grid_min=grid_min,
+                grid_max=grid_max,
+                num_grids=num_grids,
+                use_base_update=use_base_update,
+                base_activation=base_activation,
+                spline_weight_init_scale=spline_weight_init_scale,
+            ) for in_dim, out_dim in zip(layers_hidden[:-1], layers_hidden[1:])
+        ])
+    def forward(self, x):
+        # Calculating mean and std of images to take them as input to 1D KAN
+        mean_irradiance = torch.torch.mean(x, dim=(2,3))
+        std_irradiance = torch.torch.std(x, dim=(2,3))
+        if self.use_std:
+            x = torch.cat((mean_irradiance, std_irradiance), dim=1)
+        else:
+            x = mean_irradiance
+        for layer in self.layers:
+            x = layer(x)
+        return x
+class AttentionWithFastKANTransform(nn.Module):
+    def __init__(
+        self,
+        q_dim: int,
+        k_dim: int,
+        v_dim: int,
+        head_dim: int,
+        num_heads: int,
+        gating: bool = True,
+    ):
+        super(AttentionWithFastKANTransform, self).__init__()
+        self.num_heads = num_heads
+        total_dim = head_dim * self.num_heads
+        self.gating = gating
+        self.linear_q = FastKANLayer(q_dim, total_dim)
+        self.linear_k = FastKANLayer(k_dim, total_dim)
+        self.linear_v = FastKANLayer(v_dim, total_dim)
+        self.linear_o = FastKANLayer(total_dim, q_dim)
+        self.linear_g = None
+        if self.gating:
+            self.linear_g = FastKANLayer(q_dim, total_dim)
+        # precompute the 1/sqrt(head_dim)
+        self.norm = head_dim**-0.5
+    def forward(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        bias: torch.Tensor = None,      # additive attention bias
+    ) -> torch.Tensor:
+        wq = self.linear_q(q).view(*q.shape[:-1], 1, self.num_heads, -1) * self.norm     # *q1hc
+        wk = self.linear_k(k).view(*k.shape[:-2], 1, k.shape[-2], self.num_heads, -1)    # *1khc
+        att = (wq * wk).sum(-1).softmax(-2)     # *qkh
+        del wq, wk
+        if bias is not None:
+            att = att + bias[..., None]
+        wv = self.linear_v(v).view(*v.shape[:-2],1, v.shape[-2], self.num_heads, -1)     # *1khc
+        o = (att[..., None] * wv).sum(-3)        # *qhc
+        del att, wv
+        o = o.view(*o.shape[:-2], -1)           # *q(hc)
+        if self.linear_g is not None:
+            # gating, use raw query input
+            g = self.linear_g(q)
+            o = torch.sigmoid(g) * o
+        # merge heads
+        o = self.linear_o(o)
+        return o

flaring/MEGS_AI_baseline/models/linear_and_hybrid.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import torch
+import torch.nn as nn
+from torch.nn import HuberLoss
+from models.base_model import BaseModel
+class LinearIrradianceModel(BaseModel):
+    def __init__(self, d_input, d_output, eve_norm, loss_func=HuberLoss(), lr=1e-2):
+        self.n_channels = d_input
+        self.outSize = d_output
+        model = nn.Linear(2 * self.n_channels, self.outSize)
+        super().__init__(model=model, eve_norm=eve_norm, loss_func=loss_func, lr=lr)
+    def forward(self, x, sxr=None, **kwargs):
+        # If x is a tuple (aia_img, sxr_val), extract the AIA image tensor
+        if isinstance(x, (list, tuple)):
+            x = x[0]
+        # Debug: Print input shape
+        print(f"Input shape to LinearIrradianceModel.forward: {x.shape}")
+        # Expect x shape: (batch_size, H, W, C)
+        if len(x.shape) != 4:
+            raise ValueError(f"Expected 4D input tensor (batch_size, H, W, C), got shape {x.shape}")
+        if x.shape[-1] != self.n_channels:
+            raise ValueError(f"AIA image has {x.shape[-1]} channels, expected {self.n_channels}")
+        # Calculate mean and std across spatial dimensions (H,W)
+        # First permute to (batch_size, C, H, W)
+        x = x.permute(0, 3, 1, 2)
+        # Now calculate mean/std across dimensions 2 and 3 (H,W)
+        mean_irradiance = torch.mean(x, dim=(2, 3))  # Shape: (batch_size, n_channels)
+        std_irradiance = torch.std(x, dim=(2, 3))    # Shape: (batch_size, n_channels)
+        # Debug: Print shapes after mean and std
+        print(f"mean_irradiance shape: {mean_irradiance.shape}, std_irradiance shape: {std_irradiance.shape}")
+        input_features = torch.cat((mean_irradiance, std_irradiance), dim=1)  # Shape: (batch_size, 2 * n_channels)
+        print(f"Input features shape to linear layer: {input_features.shape}")
+        if input_features.shape[1] != 2 * self.n_channels:
+            raise ValueError(f"Expected {2 * self.n_channels} features, got {input_features.shape[1]}")
+        return self.model(input_features)
+class HybridIrradianceModel(BaseModel):
+    def __init__(self, d_input, d_output, eve_norm, cnn_model='resnet', ln_model=True, ln_params=None, lr=1e-4, cnn_dp=0.75, loss_func=HuberLoss()):
+        super().__init__(model=None, eve_norm=eve_norm, loss_func=loss_func, lr=lr)
+        self.n_channels = d_input
+        self.outSize = d_output
+        self.ln_params = ln_params
+        self.ln_model = None
+        if ln_model:
+            self.ln_model = LinearIrradianceModel(d_input, d_output, eve_norm, loss_func=loss_func, lr=lr)
+        if self.ln_params is not None and self.ln_model is not None:
+            self.ln_model.model.weight = nn.Parameter(self.ln_params['weight'])
+            self.ln_model.model.bias = nn.Parameter(self.ln_params['bias'])
+        self.cnn_model = None
+        self.cnn_lambda = 1.
+        if cnn_model == 'resnet':
+            self.cnn_model = nn.Sequential(
+                nn.Conv2d(d_input, 64, kernel_size=7, stride=2, padding=3),
+                nn.ReLU(),
+                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+                nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
+                nn.ReLU(),
+                nn.AdaptiveAvgPool2d((1, 1)),
+                nn.Flatten(),
+                nn.Linear(64, d_output),
+                nn.Dropout(cnn_dp)
+            )
+        elif cnn_model.startswith('efficientnet'):
+            raise NotImplementedError("EfficientNet requires timm; replace with custom CNN or install timm")
+        if self.ln_model is None and self.cnn_model is None:
+            raise ValueError('Please pass at least one model.')
+    def forward(self, x, sxr=None, **kwargs):
+        # If x is a tuple (aia_img, sxr_val), extract the AIA image tensor
+        if isinstance(x, (list, tuple)):
+            x = x[0]
+        # Debug: Print input shape
+        print(f"Input shape to HybridIrradianceModel.forward: {x.shape}")
+        # Expect x shape: (batch_size, H, W, C)
+        if len(x.shape) != 4:
+            raise ValueError(f"Expected 4D input tensor (batch_size, H, W, C), got shape {x.shape}")
+        if x.shape[-1] != self.n_channels:
+            raise ValueError(f"AIA image has {x.shape[-1]} channels, expected {self.n_channels}")
+        # Convert to (batch_size, C, H, W) for CNN
+        x_cnn = x.permute(0, 3, 1, 2)
+        if self.ln_model is not None and self.cnn_model is not None:
+            # For linear model, keep original (B,H,W,C) format
+            return self.ln_model(x) + self.cnn_lambda * self.cnn_model(x_cnn)
+        elif self.ln_model is not None:
+            return self.ln_model(x)
+        elif self.cnn_model is not None:
+            return self.cnn_model(x_cnn)
+    def configure_optimizers(self):
+        return torch.optim.Adam(self.parameters(), lr=self.lr)
+    def set_train_mode(self, mode):
+        if mode == 'linear':
+            self.cnn_lambda = 0
+            if self.cnn_model: self.cnn_model.eval()
+            if self.ln_model: self.ln_model.train()
+        elif mode == 'cnn':
+            self.cnn_lambda = 0.01
+            if self.cnn_model: self.cnn_model.train()
+            if self.ln_model: self.ln_model.eval()
+        elif mode == 'both':
+            self.cnn_lambda = 0.01
+            if self.cnn_model: self.cnn_model.train()
+            if self.ln_model: self.ln_model.train()
+        else:
+            raise NotImplementedError(f'Mode not supported: {mode}')

flaring/MEGS_AI_baseline/sxr_normalization.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import numpy as np
+from pathlib import Path
+import glob
+import os
+def compute_sxr_norm(sxr_dir):
+    """
+    Compute mean and standard deviation of log10-transformed SXR values.
+    Args:
+        sxr_dir (str): Path to directory containing SXR .npy files.
+    Returns:
+        tuple: (mean, std) of log10(SXR + 1e-8) values.
+    """
+    sxr_dir = Path(sxr_dir).resolve()
+    print(f"Checking SXR directory: {sxr_dir}")
+    if not sxr_dir.is_dir():
+        raise FileNotFoundError(f"SXR directory does not exist or is not a directory: {sxr_dir}")
+    # Use glob for case-insensitive matching
+    sxr_files = sorted(glob.glob(os.path.join(sxr_dir, "*.npy")))
+    print(f"Found {len(sxr_files)} SXR files in {sxr_dir}")
+    if len(sxr_files) == 0:
+        print(f"No files matching '*_sxr.npy' found. Listing directory contents:")
+        print(os.listdir(sxr_dir)[:10])  # Show first 10 files
+        raise ValueError(f"No SXR files found in {sxr_dir}")
+    sxr_values = []
+    for f in sxr_files:
+        try:
+            sxr = np.load(f)
+            sxr = np.atleast_1d(sxr).flatten()[0]
+            if not np.isfinite(sxr) or sxr < 0:
+                print(f"Skipping invalid SXR value in {f}: {sxr}")
+                continue
+            sxr_values.append(np.log10(sxr + 1e-8))
+        except Exception as e:
+            print(f"Failed to load SXR file {f}: {e}")
+            continue
+    sxr_values = np.array(sxr_values)
+    if len(sxr_values) == 0:
+        raise ValueError(f"No valid SXR values found in {sxr_dir}. All files failed to load or contained invalid data.")
+    mean = np.mean(sxr_values)
+    std = np.std(sxr_values)
+    print(f"Computed SXR normalization: mean={mean}, std={std}")
+    return mean, std
+if __name__ == "__main__":
+    # Update this path to your real data SXR directory
+    sxr_dir = "/mnt/data/ML-Ready-Data-No-Intensity-Cut/GOES-18-SXR-B/"  # Replace with actual path
+    sxr_norm = compute_sxr_norm(sxr_dir)
+    np.save("/home/jayantbiradar619/sxr_norm2.npy", sxr_norm)
+    print(f"Saved SXR normalization to /home/jayantbiradar619/sxr_norm.npy")

flaring/MEGS_AI_baseline/train.py ADDED Viewed

	@@ -0,0 +1,179 @@

+import argparse
+import os
+import yaml
+import itertools
+import wandb
+import torch
+import numpy as np
+from pathlib import Path
+import torchvision.transforms as transforms
+from pytorch_lightning import Trainer
+from pytorch_lightning.loggers import WandbLogger
+from pytorch_lightning.callbacks import ModelCheckpoint, Callback
+from torch.nn import HuberLoss
+from SDOAIA_dataloader import AIA_GOESDataModule
+from linear_and_hybrid import LinearIrradianceModel, HybridIrradianceModel
+# SXR Prediction Logger
+class SXRPredictionLogger(Callback):
+    def __init__(self, val_samples):
+        super().__init__()
+        self.val_samples = val_samples
+    def on_validation_epoch_end(self, trainer, pl_module):
+        # val_samples is a list of ((aia, sxr), target)
+        for (aia, sxr), target in self.val_samples:
+            aia, sxr, target = aia.to(pl_module.device), sxr.to(pl_module.device), target.to(pl_module.device)
+            pred = pl_module(aia.unsqueeze(0))  # Add batch dimension
+            trainer.logger.experiment.log({
+                "val_pred_sxr": pred.cpu().numpy(),
+                "val_target_sxr": target.cpu().numpy()
+            })
+# Compute SXR normalization
+def compute_sxr_norm(sxr_dir):
+    sxr_values = []
+    for f in Path(sxr_dir).glob("*.npy"):
+        sxr = np.load(f)
+        sxr = np.atleast_1d(sxr).flatten()[0]
+        sxr_values.append(np.log10(sxr + 1e-8))
+    sxr_values = np.array(sxr_values)
+    if len(sxr_values) == 0:
+        raise ValueError(f"No SXR files found in {sxr_dir}")
+    return np.mean(sxr_values), np.std(sxr_values)
+# Parser
+parser = argparse.ArgumentParser()
+parser.add_argument('-checkpoint_dir', type=str, required=True, help='Directory to save checkpoints.')
+parser.add_argument('-model', type=str, default='config.yaml', help='Path to model config YAML.')
+parser.add_argument('-aia_dir', type=str, required=True, help='Path to AIA .npy files.')
+parser.add_argument('-sxr_dir', type=str, required=True, help='Path to SXR .npy files.')
+parser.add_argument('-sxr_norm', type=str, help='Path to SXR normalization (mean, std).')
+parser.add_argument('-instrument', type=str, default='AIA_6', help='Instrument (e.g., AIA_6 for 6 wavelengths).')
+args = parser.parse_args()
+# Load config
+with open(args.model, 'r') as stream:
+    config_data = yaml.load(stream, Loader=yaml.SafeLoader)
+dic_values = [i for i in config_data['model'].values()]
+combined_parameters = list(itertools.product(*dic_values))
+# Paths and normalization
+checkpoint_dir = args.checkpoint_dir
+aia_dir = args.aia_dir
+sxr_dir = args.sxr_dir
+if args.sxr_norm:
+    sxr_norm = np.load(args.sxr_norm)
+else:
+    sxr_norm = compute_sxr_norm(sxr_dir)
+instrument = args.instrument
+# Transforms
+train_transforms = transforms.Compose([
+    transforms.Lambda(lambda x: (x - x.min()) / (x.max() - x.min() + 1e-8)),  # Remove clone/detach
+    transforms.RandomHorizontalFlip(p=0.5),
+    transforms.RandomRotation(10),
+])
+val_transforms = transforms.Compose([
+    transforms.Lambda(lambda x: (x - x.min()) / (x.max() - x.min() + 1e-8)),  # Remove clone/detach
+])
+# Training loop
+n = 0
+for parameter_set in combined_parameters:
+    run_config = {key: item for key, item in zip(config_data['model'].keys(), parameter_set)}
+    torch.manual_seed(run_config['seed'])
+    np.random.seed(run_config['seed'])
+    # DataModule
+    data_loader = AIA_GOESDataModule(
+        aia_dir=aia_dir,
+        sxr_dir=sxr_dir,
+        sxr_norm=sxr_norm,
+        batch_size=16,
+        num_workers=os.cpu_count() // 2,
+        train_transforms=train_transforms,
+        val_transforms=val_transforms,
+        val_split=0.2,
+        test_split=0.1
+    )
+    data_loader.setup()
+    # Logger
+    wb_name = f"{instrument}_{n}" if len(combined_parameters) > 1 else "aia_sxr_model"
+    wandb_logger = WandbLogger(
+        entity=config_data['wandb']['entity'],
+        project=config_data['wandb']['project'],
+        job_type=config_data['wandb']['job_type'],
+        tags=config_data['wandb']['tags'],
+        name=wb_name,
+        notes=config_data['wandb']['notes'],
+        config=run_config
+    )
+    # Logging callback
+    total_n_valid = len(data_loader.valid_ds)
+    plot_data = [data_loader.valid_ds[i] for i in range(0, total_n_valid, max(1, total_n_valid // 4))]
+    plot_samples = plot_data  # Keep as list of ((aia, sxr), target)
+    sxr_callback = SXRPredictionLogger(plot_samples)
+    # Checkpoint callback
+    checkpoint_callback = ModelCheckpoint(
+        dirpath=checkpoint_dir,
+        monitor='valid_loss',
+        mode='min',
+        save_top_k=1,
+        filename=f"{wb_name}-{{epoch:02d}}-{{valid_loss:.4f}}"
+    )
+    # Model
+    if run_config['architecture'] == 'linear':
+        model = LinearIrradianceModel(
+            d_input=6,
+            d_output=1,
+            eve_norm=sxr_norm,
+            lr=run_config.get('lr', 1e-2),
+            loss_func=HuberLoss()
+        )
+    elif run_config['architecture'] == 'hybrid':
+        model = HybridIrradianceModel(
+            d_input=6,
+            d_output=1,
+            eve_norm=sxr_norm,
+            cnn_model=run_config['cnn_model'],
+            ln_model=True,
+            cnn_dp=run_config.get('cnn_dp', 0.75),
+            lr=run_config.get('lr', 1e-4)
+        )
+    else:
+        raise NotImplementedError(f"Architecture {run_config['architecture']} not supported.")
+    # Trainer
+    trainer = Trainer(
+        default_root_dir=checkpoint_dir,
+        accelerator="gpu" if torch.cuda.is_available() else "cpu",
+        devices=1,
+        max_epochs=run_config.get('epochs', 10),
+        callbacks=[sxr_callback, checkpoint_callback],
+        logger=wandb_logger,
+        log_every_n_steps=10
+    )
+    # Train
+    trainer.fit(model, data_loader)
+    # Save checkpoint
+    save_dictionary = run_config
+    save_dictionary['model'] = model
+    save_dictionary['instrument'] = instrument
+    full_checkpoint_path = os.path.join(checkpoint_dir, f"{wb_name}_{n}.ckpt")
+    torch.save(save_dictionary, full_checkpoint_path)
+    # Test
+    trainer.test(model, dataloaders=data_loader.test_dataloader())
+    # Finalize
+    wandb.finish()
+    n += 1

flaring/__init__.py ADDED Viewed

File without changes

flaring/cut_off_aia.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import numpy as np
+import os
+aia = os.listdir("/mnt/data/ML-Ready-Data-No-Intensity-Cut/AIA-Data")
+target_dates = ["2023-07-11","2023-07-15","2023-07-16", "2023-07-18" "2023-07-20", "2023-07-26", "2023-07-30", "2023-08-01", "2023-08-02", "2023-08-07", ]
+aia_dict = {}
+aia_dict[0] = []
+aia_dict[1] = []
+aia_dict[2] = []
+aia_dict[3] = []
+aia_dict[4] = []
+aia_dict[5] = []
+count = 0
+for i, file in enumerate(aia):
+    if file.split("T")[0] in target_dates:
+        aia_data = np.load("/mnt/data/ML-Ready-Data-No-Intensity-Cut/AIA-Data/"+file)
+        aia_dict[0].append(aia_data[0].flatten())
+        aia_dict[1].append(aia_data[1].flatten())
+        aia_dict[2].append(aia_data[2].flatten())
+        aia_dict[3].append(aia_data[3].flatten())
+        aia_dict[4].append(aia_data[4].flatten())
+        aia_dict[5].append(aia_data[5].flatten())
+        count = count + 1
+        print("Flares: " + str(count) + "\n")
+    print(f"\nProcessed {i+1}/{len(aia)} files", end='\r')
+def percentile(data, perc):
+    return np.percentile(data, perc)
+percentile_dict = {0:[percentile(aia_dict[0], 95), percentile(aia_dict[0], 99.5)],1: [percentile(aia_dict[1], 95), percentile(aia_dict[1], 99.5)], 2: [percentile(aia_dict[2], 95), percentile(aia_dict[2], 99.5)], 3: [percentile(aia_dict[3], 95), percentile(aia_dict[3], 99.5)], 4: [percentile(aia_dict[4], 95), percentile(aia_dict[4], 99.5)], 5: [percentile(aia_dict[5], 95), percentile(aia_dict[5], 99.5)]}
+print(percentile_dict)
+#{0: [np.float32(5.0747647), np.float32(16.560747)], 1: [np.float32(24.491392), np.float32(75.84181)], 2: [np.float32(607.3201), np.float32(1536.1443)], 3: [np.float32(1021.83466), np.float32(2288.1)], 4: [np.float32(480.13672), np.float32(1163.9178)], 5: [np.float32(144.44502), np.float32(401.82352)]}