NeuroDX
/

manas-1

Model card Files Files and versions

xet

Community

neurodx-labs commited on 11 days ago

Commit

48d00fd

verified ·

1 Parent(s): b5001b2

Upload 3 files

Browse files

Files changed (3) hide show

mae.py +357 -0
manas1.pt +3 -0
modelclass.py +81 -0

mae.py ADDED Viewed

	@@ -0,0 +1,357 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class PatchEmbed(nn.Module):
+    def __init__(self, fs: int = 200, patch_seconds: float = 1.0, overlap_seconds: float = 0.1, embed_dim: int = 512):
+        super().__init__()
+        self.patch_size = int(round(patch_seconds * fs))
+        self.overlap_size = int(round(overlap_seconds * fs))
+        self.step = self.patch_size - self.overlap_size
+        self.linear = nn.Linear(self.patch_size, embed_dim, bias=False)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        patches = x.unfold(dimension=-1, size=self.patch_size, step=self.step)
+        return self.linear(patches)
+class PosEnc(nn.Module):
+    def __init__(self, n_freqs: int = 4, embed_dim: int = 512):
+        super().__init__()
+        freqs = torch.linspace(1.0, 10.0, n_freqs)
+        self.register_buffer("freq_matrix", torch.cartesian_prod(freqs, freqs, freqs, freqs).transpose(1, 0))
+        fourier_features_dim = 2 * (n_freqs**4)
+        self.fourier_linear = nn.Linear(fourier_features_dim, embed_dim, bias=False)
+        self.learned_linear = nn.Sequential(nn.Linear(4, embed_dim, bias=False), nn.GELU(), nn.LayerNorm(embed_dim))
+        self.final_norm = nn.LayerNorm(embed_dim)
+    def forward(self, coords: torch.Tensor):
+        phases = torch.matmul(coords, self.freq_matrix)
+        fourier_features = torch.cat([torch.sin(phases), torch.cos(phases)], -1)
+        fourier_emb = self.fourier_linear(fourier_features)
+        learned_emb = self.learned_linear(coords)
+        return self.final_norm(fourier_emb + learned_emb)
+class TransformerBlock(nn.Module):
+    def __init__(self, embed_dim: int, heads: int, dropout: float = 0.0):
+        super().__init__()
+        assert embed_dim % heads == 0, "dim must be divisible by heads"
+        self.pre_attn_norm = nn.LayerNorm(embed_dim)
+        self.attn = nn.MultiheadAttention(embed_dim=embed_dim, num_heads=heads, dropout=dropout, batch_first=True)
+        self.pre_ffn_norm = nn.LayerNorm(embed_dim)
+        self.ffn = nn.Sequential(nn.Linear(embed_dim, 4 * embed_dim), nn.GELU(), nn.Linear(4 * embed_dim, embed_dim))
+    def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+        attn_in = self.pre_attn_norm(x)
+        attn_out, _ = self.attn(attn_in, attn_in, attn_in)
+        x = x + attn_out
+        ffn_in = self.pre_ffn_norm(x)
+        ffn_out = self.ffn(ffn_in)
+        x = x + ffn_out
+        return x, ffn_out
+class TransformerEncoderDecoder(nn.Module):
+    def __init__(self, embed_dim: int = 512, depth: int = 16, heads: int = 8):
+        super().__init__()
+        self.layers = nn.ModuleList([TransformerBlock(embed_dim, heads) for _ in range(depth)])
+        self.final_norm = nn.LayerNorm(embed_dim)
+    def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, list[torch.Tensor]]:
+        intermediate = []
+        for layer in self.layers:
+            x, ffn_out = layer(x)
+            intermediate.append(ffn_out)
+        return self.final_norm(x), intermediate
+class MAEDecoder(nn.Module):
+    def __init__(self, embed_dim: int = 512, decoder_depth: int = 4, decoder_heads: int = 8, patch_size: int = 200):
+        super().__init__()
+        # 1. The Mask Token (The "Gray Tile")
+        # A learnable vector that replaces every missing patch
+        self.mask_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        nn.init.normal_(self.mask_token, std=0.02)
+        # 2. The Decoder Transformer (Reuse your Encoder logic)
+        # It's lighter (fewer layers) than the main Encoder
+        self.decoder = TransformerEncoderDecoder(embed_dim=embed_dim, depth=decoder_depth, heads=decoder_heads)
+        # 3. The Prediction Head
+        # Projects Vector (512) -> Raw Signal (200)
+        self.predict = nn.Linear(embed_dim, patch_size, bias=True)
+    def forward(self, x_visible: torch.Tensor, pos_enc: nn.Module, coords: torch.Tensor, mask: torch.Tensor):
+        B, N_Total, D = coords.shape[0], coords.shape[1], x_visible.shape[-1]
+        # --- Step A: Fill Canvas with Mask Tokens ---
+        # Create a tensor of size (Batch, Total, Dim) filled with the mask token
+        x_full = self.mask_token.expand(B, N_Total, D).clone()
+        # --- Step B: Paste Visible Tokens ---
+        # Overwrite the mask tokens with the actual encoder output at the visible spots
+        for i in range(B):
+            # We use the boolean mask to select the "True" slots
+            x_full[i, mask[i]] = x_visible[i]
+        # --- Step C: Add Positional Encoding ---
+        # We call YOUR PosEnc class here.
+        # It takes coords (B, N_Total, 4) and returns (B, N_Total, Dim)
+        pos_emb = pos_enc(coords)
+        # Add GPS info to the tokens
+        x_full = x_full + pos_emb
+        # --- Step D: Decode ---
+        # Pass through the Transformer
+        # We ignore the intermediate outputs (the second return value) for now
+        x_decoded, _ = self.decoder(x_full)
+        # --- Step E: Predict ---
+        # (Batch, N_Total, 512) -> (Batch, N_Total, 200)
+        prediction = self.predict(x_decoded)
+        return prediction
+def generate_mask(coords: torch.Tensor, mask_ratio: float = 0.55, spatial_radius: float = 3.0, temporal_radius: float = 3.0):
+    B, N, _ = coords.shape
+    device = coords.device
+    # Calculate exact number of tokens to hide
+    num_masked_target = int(mask_ratio * N)
+    # Start with all True (Visible)
+    mask = torch.ones(B, N, dtype=torch.bool, device=device)
+    for b in range(B):
+        spatial_coords = coords[b, :, :3]
+        temporal_coords = coords[b, :, 3]
+        # --- Phase 1: Block Masking Strategy ---
+        # Keep masking blocks until we meet or exceed the target
+        while (~mask[b]).sum() < num_masked_target:
+            # Pick random seed
+            seed_idx = torch.randint(0, N, (1,)).item()
+            # Calculate distances
+            seed_spatial = spatial_coords[seed_idx]
+            dists_spatial = torch.norm(spatial_coords - seed_spatial, dim=1)
+            seed_temporal = temporal_coords[seed_idx]
+            dists_temporal = torch.abs(temporal_coords - seed_temporal)
+            # Find block
+            in_block = (dists_spatial <= spatial_radius) & (dists_temporal <= temporal_radius)
+            # Mask this block (Set to False)
+            mask[b, in_block] = False
+        # --- Phase 2: Exact Count Enforcement ---
+        # We likely masked too many tokens. We must unmask the excess.
+        # Get indices of all tokens that are currently masked
+        masked_indices = torch.where(mask[b] == False)[0]
+        num_current_masked = len(masked_indices)
+        if num_current_masked > num_masked_target:
+            # We have excess. Randomly choose which ones to KEEP masked.
+            # Shuffle the masked indices
+            shuffled_indices = masked_indices[torch.randperm(num_current_masked)]
+            # The first 'num_masked_target' stay masked.
+            # The rest (excess) must be turned back to Visible (True).
+            excess_indices = shuffled_indices[num_masked_target:]
+            mask[b, excess_indices] = True
+    return mask
+class MAE(nn.Module):
+    def __init__(
+        self,
+        # Data Params
+        fs: int = 200,
+        patch_seconds: float = 1.0,
+        overlap_seconds: float = 0.1,
+        # Model Params
+        embed_dim: int = 512,
+        encoder_depth: int = 12,
+        encoder_heads: int = 8,
+        decoder_depth: int = 4,
+        decoder_heads: int = 8,
+        # Training Params
+        mask_ratio: float = 0.55,
+        aux_loss_weight: float = 0.1,
+    ):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.mask_ratio = mask_ratio
+        self.aux_loss_weight = aux_loss_weight
+        # 1. Input Processing
+        self.patch_embed = PatchEmbed(fs, patch_seconds, overlap_seconds, embed_dim)
+        # We calculate patch_size and step from the component we just initialized
+        self.patch_size = self.patch_embed.patch_size
+        self.step = self.patch_embed.step
+        # 2. Positional Encoding (Shared between Encoder and Decoder)
+        self.pos_enc = PosEnc(n_freqs=4, embed_dim=embed_dim)
+        # 3. Encoder
+        self.encoder = TransformerEncoderDecoder(embed_dim=embed_dim, depth=encoder_depth, heads=encoder_heads)
+        # 4. Decoder (Main Reconstruction)
+        self.decoder = MAEDecoder(embed_dim=embed_dim, decoder_depth=decoder_depth, decoder_heads=decoder_heads, patch_size=self.patch_size)
+        # 5. Auxiliary Head (Global Token)
+        # We concatenate outputs from ALL encoder layers
+        self.aux_dim = encoder_depth * embed_dim
+        # A learned query vector to look at the encoder outputs
+        self.aux_query = nn.Parameter(torch.randn(1, 1, self.aux_dim))
+        nn.init.normal_(self.aux_query, std=0.02)
+        # Projection: (Depth * Dim) -> Dim
+        self.aux_linear = nn.Linear(self.aux_dim, embed_dim, bias=False)
+        # Reconstruction Head for Aux Task
+        self.aux_predict = nn.Sequential(nn.Linear(embed_dim, embed_dim), nn.GELU(), nn.Linear(embed_dim, self.patch_size))
+    def prepare_coords(self, xyz: torch.Tensor, num_patches: int):
+        B, C, _ = xyz.shape
+        device = xyz.device
+        # 2. Generate Time Indices (0, 1, 2, ... P-1)
+        time_idx = torch.arange(num_patches, device=device, dtype=torch.float32)
+        # 3. Expand Spatial Coords
+        # (B, C, 3) -> (B, C, 1, 3) -> (B, C, P, 3)
+        spat = xyz.unsqueeze(2).expand(-1, -1, num_patches, -1)
+        # 4. Expand Time Coords
+        # (P,) -> (1, 1, P, 1) -> (B, C, P, 1)
+        time = time_idx.view(1, 1, num_patches, 1).expand(B, C, -1, -1)
+        # 5. Concatenate -> (B, C, P, 4)
+        coords = torch.cat([spat, time], dim=-1)
+        # 6. Flatten to (B, N_Total, 4)
+        return coords.flatten(1, 2)
+    def forward(self, x: torch.Tensor, xyz: torch.Tensor):
+        B, _, _ = x.shape
+        # --- 1. Patchify & Embed ---
+        # patches: (B, C, P, PatchSize)
+        patches = x.unfold(-1, self.patch_size, self.step)
+        num_patches = patches.shape[2]
+        # tokens: (B, C, P, Dim)
+        tokens = self.patch_embed.linear(patches)
+        # Flatten to Sequence: (B, N_Total, Dim)
+        tokens_flat = tokens.flatten(1, 2)
+        patches_flat = patches.flatten(1, 2)  # Target for loss
+        # --- 2. Prepare 4D Coordinates ---
+        coords = self.prepare_coords(xyz, num_patches)
+        # --- 3. Generate Mask ---
+        # Returns mask where counts are GUARANTEED to be equal across batch
+        mask = generate_mask(coords, mask_ratio=self.mask_ratio)
+        # --- 4. Prepare Encoder Input ---
+        # We need to extract only the visible tokens and stack them.
+        # Since counts are fixed, we can do this efficiently using boolean masking and reshaping.
+        # tokens_flat: (B, N_Total, D)
+        # mask: (B, N_Total)
+        # Result: (B, N_Vis, D)
+        # The .view() works because the number of Trues in mask is identical for every row b.
+        n_vis = mask[0].sum().item()
+        x_vis = tokens_flat[mask].view(B, n_vis, -1)
+        coords_vis = coords[mask].view(B, n_vis, -1)
+        # Add PE
+        pe_vis = self.pos_enc(coords_vis)
+        x_vis = x_vis + pe_vis
+        # --- 5. Encoder Forward ---
+        x_encoded, intermediates = self.encoder(x_vis)
+        # --- 6. Main Decoder Path ---
+        predictions_main = self.decoder(x_visible=x_encoded, pos_enc=self.pos_enc, coords=coords, mask=mask)
+        # --- 7. Auxiliary Path (Global Token) ---
+        # Concatenate all intermediate layers: (B, N_Vis, Depth*Dim)
+        aux_input = torch.cat(intermediates, dim=-1)
+        # Attention Pooling
+        # Score = Input @ Query.T
+        # (B, N_Vis, AuxDim) @ (1, 1, AuxDim).T -> (B, N_Vis, 1)
+        attn_scores = torch.matmul(aux_input, self.aux_query.transpose(1, 2))
+        attn_weights = F.softmax(attn_scores, dim=1)
+        # Pool: Sum(Weights * Input) -> (B, 1, AuxDim)
+        global_token = torch.sum(attn_weights * aux_input, dim=1, keepdim=True)
+        # Project to Embed Dim: (B, 1, Dim)
+        global_emb = self.aux_linear(global_token)
+        # Predict Masked Patches
+        # 1. Get coords of masked tokens
+        # Since mask is fixed count, we can reshape cleanly
+        n_masked = (~mask[0]).sum().item()
+        coords_masked = coords[~mask].view(B, n_masked, -1)
+        pe_masked = self.pos_enc(coords_masked)
+        # 2. Expand global token
+        global_expanded = global_emb.expand(-1, n_masked, -1)
+        # 3. Combine & Predict
+        aux_pred_in = global_expanded + pe_masked
+        predictions_aux = self.aux_predict(aux_pred_in)
+        # --- 8. Loss Calculation ---
+        # Target: Only the masked patches
+        target_masked = patches_flat[~mask].view(B, n_masked, -1)
+        # Main Loss (L1 on masked)
+        pred_main_masked = predictions_main[~mask].view(B, n_masked, -1)
+        loss_main = F.l1_loss(pred_main_masked, target_masked)
+        # Aux Loss (L1 on masked)
+        loss_aux = F.l1_loss(predictions_aux, target_masked)
+        total_loss = loss_main + self.aux_loss_weight * loss_aux
+        return total_loss, predictions_main, mask

manas1.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb221351143c45e71ed478a5622a1ccf8f140b983a613f6f5875c862ae48ba76
+size 653413200

modelclass.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import torch
+import torch.nn as nn
+import numpy as np
+from functools import partial
+from tqdm import tqdm
+from mae import MAE
+# Use cm positions from mne.get_montage() - these are only included for reference.
+POSITIONS = {
+    "Fp1": (-3.09, 11.46, 2.79),
+    "Fp2": (2.84, 11.53, 2.77),
+    "F3": (-5.18, 8.67, 7.87),
+    "F4": (5.03, 8.74, 7.73),
+    "F7": (-7.19, 7.31, 2.58),
+    "F8": (7.14, 7.45, 2.51),
+    "T3": (-8.60, 1.49, 3.12),
+    "T4": (8.33, 1.53, 3.10),
+    "C3": (-6.71, 2.34, 10.45),
+    "C4": (6.53, 2.36, 10.37),
+    "T5": (-8.77, 1.29, -0.77),
+    "T6": (8.37, 1.17, -0.77),
+    "P3": (-5.50, -4.42, 9.99),
+    "P4": (5.36, -4.43, 10.05),
+    "O1": (-3.16, -8.06, 5.48),
+    "O2": (2.77, -8.05, 5.47),
+    "Fz": (-0.12, 9.33, 10.26),
+    "Cz": (-0.14, 2.76, 14.02),
+    "Pz": (-0.17, -4.52, 12.67),
+    "A2": (8.39, 0.20, -2.69),
+}
+class MANAS1(nn.Module):
+    def __init__(self, checkpoint_path, num_classes=2, flat_dim=512):
+        super().__init__()
+        print(f"Loading checkpoint from {checkpoint_path}...")
+        ckpt = torch.load(checkpoint_path, map_location="cpu")
+        self.mae = MAE(fs=200, embed_dim=512, encoder_depth=12, encoder_heads=8, decoder_depth=4, decoder_heads=8, mask_ratio=0.55)
+        self.mae.load_state_dict(ckpt["model_state_dict"])
+        self.patch_embed = self.mae.patch_embed
+        self.pos_enc = self.mae.pos_enc
+        self.encoder = self.mae.encoder
+        self.patch_size = self.mae.patch_size
+        self.step = self.mae.step
+        self.flat_dim = flat_dim
+        # # The Head
+        # self.final_layer = nn.Sequential(
+        #     nn.Flatten(),
+        #     nn.RMSNorm(self.flat_dim),  # Tutorial uses RMSNorm
+        #     nn.Dropout(0.1),
+        #     nn.Linear(self.flat_dim, num_classes),
+        # )
+    def prepare_coords(self, xyz, num_patches):
+        B, C, _ = xyz.shape
+        device = xyz.device
+        time_idx = torch.arange(num_patches, device=device).float()
+        spat = xyz.unsqueeze(2).expand(-1, -1, num_patches, -1)
+        time = time_idx.view(1, 1, num_patches, 1).expand(B, C, -1, -1)
+        return torch.cat([spat, time], dim=-1).flatten(1, 2)
+    def forward(self, x, pos):
+        patches = x.unfold(-1, self.patch_size, self.step)
+        num_patches = patches.shape[2]
+        tokens = self.patch_embed.linear(patches).flatten(1, 2)
+        coords = self.prepare_coords(pos, num_patches)
+        pe = self.pos_enc(coords)
+        x_enc = tokens + pe
+        latents, _ = self.encoder(x_enc)
+        # add final layer for classification
+        return latents