added fortitran estimator

Browse files

Files changed (5) hide show

config/model_config.yaml +7 -1
src/config/schemas.py +59 -5
src/models/blocks/__init__.py +5 -0
src/models/blocks/patch_processors.py +2 -2
src/models/fortitran.py +179 -24

config/model_config.yaml CHANGED Viewed

@@ -1,3 +1,9 @@
-patch_size: [10, 4]
 num_layers: 6
 device: "cpu"

+patch_size: [3, 2]
 num_layers: 6
 device: "cpu"
+model_dim: 128
+num_head: 4
+activation: 'gelu'
+dropout: 0.1
+max_seq_len: 512
+pos_encoding_type: 'learnable'

src/config/schemas.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from pydantic import BaseModel, Field, model_validator
 from typing import Self, Tuple
 class OFDMParams(BaseModel):
@@ -19,10 +20,63 @@ class ModelParams(BaseModel):
     @model_validator(mode='after')
     def validate_device(self) -> Self:
-        pass
 class SystemConfig(BaseModel):
@@ -84,4 +138,4 @@ class ModelConfig(BaseModel):
         return self
-    model_config = {"extra": "forbid"}

 from pydantic import BaseModel, Field, model_validator
 from typing import Self, Tuple
+import torch
 class OFDMParams(BaseModel):
     @model_validator(mode='after')
     def validate_device(self) -> Self:
+        """Validate that the specified device is available."""
+        device_str = self.device.lower()
+        # Handle 'auto' case - automatically select best available device
+        if device_str == 'auto':
+            if torch.cuda.is_available():
+                self.device = 'cuda'
+            elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+                self.device = 'mps'  # Apple Silicon
+            else:
+                self.device = 'cpu'
+            return self
+        # Validate CPU
+        if device_str == 'cpu':
+            return self
+        # Validate CUDA devices
+        if device_str.startswith('cuda'):
+            if not torch.cuda.is_available():
+                raise ValueError("CUDA is not available on this system")
+            # Handle specific CUDA device (e.g., 'cuda:0', 'cuda:1')
+            if ':' in device_str:
+                try:
+                    device_id = int(device_str.split(':')[1])
+                    if device_id >= torch.cuda.device_count():
+                        available_devices = list(range(torch.cuda.device_count()))
+                        raise ValueError(
+                            f"CUDA device {device_id} not available. "
+                            f"Available CUDA devices: {available_devices}"
+                        )
+                except (ValueError, IndexError) as e:
+                    if "invalid literal" in str(e):
+                        raise ValueError(f"Invalid CUDA device format: {device_str}")
+                    raise
+            return self
+        # Validate MPS (Apple Silicon)
+        if device_str == 'mps':
+            if not (hasattr(torch.backends, 'mps') and torch.backends.mps.is_available()):
+                raise ValueError("MPS is not available on this system")
+            return self
+        # If we get here, the device is not recognized
+        available_devices = ['cpu']
+        if torch.cuda.is_available():
+            cuda_devices = [f'cuda:{i}' for i in range(torch.cuda.device_count())]
+            available_devices.extend(['cuda'] + cuda_devices)
+        if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+            available_devices.append('mps')
+        raise ValueError(
+            f"Unsupported device: '{self.device}'. "
+            f"Available devices: {available_devices}"
+        )
 class SystemConfig(BaseModel):
         return self
+    model_config = {"extra": "forbid"}

src/models/blocks/__init__.py CHANGED Viewed

	@@ -0,0 +1,5 @@

+from channel_adaptivity import ChannelAdapter
+from encoders import TransformerEncoderForChannels
+from enhancers import ConvEnhancer
+from patch_processors import PatchEmbedding, InversePatchEmbedding
+from positional_encodings import SinusoidalPositionalEncoding, LearnablePositionalEncoding

src/models/blocks/patch_processors.py CHANGED Viewed

@@ -40,8 +40,8 @@ class InversePatchEmbedding(nn.Module):
     def __init__(
             self,
-            output_size: Tuple[int, int] = (120, 28),
-            patch_size: Tuple[int, int] = (10, 4)
     ):
         """Initialize the InversePatchEmbedding layer.

     def __init__(
             self,
+            output_size: Tuple[int, int] = (120, 14),
+            patch_size: Tuple[int, int] = (3, 2)
     ):
         """Initialize the InversePatchEmbedding layer.

src/models/fortitran.py CHANGED Viewed

@@ -1,40 +1,195 @@
-from torch import nn
 import torch
 import logging
 from src.config.schemas import SystemConfig, ModelConfig
 class FortiTranEstimator(nn.Module):
-    """A DL-based Channel Estimator based on a hybrid convolutional + transformers model"""
     def __init__(self, system_config: SystemConfig, model_config: ModelConfig) -> None:
-        """Initialize the FortiTranEstimator.
         Args:
-            system_config: SystemConfig object containing OFDM system parameters
-            system_config: ModelConfig object containing model parameters
         """
         super().__init__()
         self.system_config = system_config
-        self.device = torch.device(config.device)
-        self.logger = logging.getLogger(__name__)
-        # Extract dimensions from validated config
-        self.ofdm_size = (config.ofdm.num_scs, config.ofdm.num_symbols)
-        self.pilot_size = (config.pilot.num_scs, config.pilot.num_symbols)
-        # Calculate feature dimensions
-        in_feature_dim = config.pilot.num_scs * config.pilot.num_symbols
-        out_feature_dim = config.ofdm.num_scs * config.ofdm.num_symbols
-        self.logger.info(f"Initializing LinearEstimator:")
-        self.logger.info(f"  OFDM size: {self.ofdm_size}")
-        self.logger.info(f"  Pilot size: {self.pilot_size}")
-        self.logger.info(f"  Input features: {in_feature_dim}")
-        self.logger.info(f"  Output features: {out_feature_dim}")
         self.logger.info(f"  Device: {self.device}")
-        # Create linear layer
-        self.linear = nn.Linear(in_feature_dim, out_feature_dim)
-        self.to(self.device)

 import torch
+from torch import nn
 import logging
 from src.config.schemas import SystemConfig, ModelConfig
+from src.models.blocks import ConvEnhancer, PatchEmbedding, InversePatchEmbedding, TransformerEncoderForChannels
 class FortiTranEstimator(nn.Module):
+    """
+    Hybrid CNN-Transformer Channel Estimator for OFDM Systems.
+    This model performs channel estimation by:
+    1. Upsampling pilot symbols to full OFDM grid size
+    2. Applying convolutional enhancement for spatial features
+    3. Converting to patch embeddings for transformer processing
+    4. Using transformer encoder to capture long-range dependencies
+    5. Reconstructing spatial representation and applying residual connections
+    6. Final convolutional refinement for high-quality channel estimates
+    """
     def __init__(self, system_config: SystemConfig, model_config: ModelConfig) -> None:
+        """
+        Initialize the FortiTranEstimator.
         Args:
+            system_config: OFDM system configuration (subcarriers, symbols, pilot arrangement)
+            model_config: Model architecture configuration (patch size, layers, etc.)
         """
         super().__init__()
         self.system_config = system_config
+        self.model_config = model_config
+        self.device = torch.device(model_config.device)
+        self.logger = logging.getLogger(self.__class__.__name__)
+        # Cache key dimensions for efficiency
+        self._setup_dimensions()
+        # Initialize model components
+        self._build_architecture()
+        # Move model to specified device
+        self.to(self.device)
+        self._log_initialization_info()
+    def _setup_dimensions(self) -> None:
+        """Calculate and cache key dimensions from configuration."""
+        # OFDM grid dimensions
+        self.ofdm_size = (
+            self.system_config.ofdm.num_scs,
+            self.system_config.ofdm.num_symbols
+        )
+        # Pilot arrangement dimensions
+        self.pilot_size = (
+            self.system_config.pilot.num_scs,
+            self.system_config.pilot.num_symbols
+        )
+        # Feature dimensions for linear layers
+        self.pilot_features = self.pilot_size[0] * self.pilot_size[1]
+        self.ofdm_features = self.ofdm_size[0] * self.ofdm_size[1]
+        # Patch processing dimensions
+        self.patch_length = (
+                self.model_config.patch_size[0] * self.model_config.patch_size[1]
+        )
+    def _build_architecture(self) -> None:
+        """Construct the model architecture components."""
+        # 1. Pilot-to-OFDM upsampling
+        self.pilot_upsampler = nn.Linear(self.pilot_features, self.ofdm_features)
+        # 2. Initial convolutional enhancement
+        self.initial_enhancer = ConvEnhancer()
+        # 3. Patch embedding for transformer processing
+        self.patch_embedder = PatchEmbedding(self.model_config.patch_size)
+        # 4. Transformer encoder for sequence modeling
+        self.transformer_encoder = TransformerEncoderForChannels(
+            input_dim=self.patch_length,
+            output_dim=self.patch_length,
+            model_dim=self.model_config.model_dim,
+            num_head=self.model_config.num_head,
+            activation=self.model_config.activation,
+            dropout=self.model_config.dropout,
+            num_layers=self.model_config.num_layers,
+            max_len=self.model_config.max_seq_len,
+            pos_encoding_type=self.model_config.pos_encoding_type,
+        )
+        # 5. Patch reconstruction
+        self.patch_reconstructor = InversePatchEmbedding(
+            self.ofdm_size,
+            self.model_config.patch_size
+        )
+        # 6. Final convolutional refinement
+        self.final_refiner = ConvEnhancer()
+    def _log_initialization_info(self) -> None:
+        """Log model initialization details."""
+        self.logger.info("FortiTranEstimator initialized successfully:")
+        self.logger.info(f"  OFDM grid: {self.ofdm_size[0]}×{self.ofdm_size[1]} = {self.ofdm_features} elements")
+        self.logger.info(f"  Pilot grid: {self.pilot_size[0]}×{self.pilot_size[1]} = {self.pilot_features} elements")
+        self.logger.info(f"  Patch size: {self.model_config.patch_size}")
+        self.logger.info(f"  Model dimension: {self.model_config.model_dim}")
+        self.logger.info(f"  Transformer layers: {self.model_config.num_layers}")
         self.logger.info(f"  Device: {self.device}")
+        total_params = sum(p.numel() for p in self.parameters())
+        trainable_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
+        self.logger.info(f"  Total parameters: {total_params:,}")
+        self.logger.info(f"  Trainable parameters: {trainable_params:,}")
+    def forward(self, pilot_symbols: torch.Tensor) -> torch.Tensor:
+        """
+        Forward pass for channel estimation.
+        Args:
+            pilot_symbols: Complex pilot symbols of shape [batch, pilot_scs, pilot_symbols]
+        Returns:
+            Estimated channel matrix of shape [batch, ofdm_scs, ofdm_symbols]
+        """
+        # Ensure input is on correct device
+        pilot_symbols = pilot_symbols.to(self.device)
+        # Process real and imaginary parts separately
+        real_estimate = self._forward_real_valued(pilot_symbols.real)
+        imag_estimate = self._forward_real_valued(pilot_symbols.imag)
+        # Combine into complex tensor
+        channel_estimate = torch.complex(real_estimate, imag_estimate)
+        return channel_estimate
+    def _forward_real_valued(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Process real-valued input through the estimation pipeline.
+        Args:
+            x: Real-valued input tensor [batch, pilot_features] or [batch, pilot_scs, pilot_symbols]
+        Returns:
+            Real-valued channel estimate [batch, ofdm_scs, ofdm_symbols]
+        """
+        batch_size = x.shape[0]
+        # Flatten spatial dimensions for linear upsampling
+        if x.dim() > 2:
+            x = x.view(batch_size, -1)
+        # Stage 1: Upsample from pilot grid to OFDM grid
+        upsampled = self.pilot_upsampler(x)
+        # Reshape for convolutional processing
+        upsampled_2d = upsampled.view(batch_size, 1, *self.ofdm_size)
+        # Stage 2: Initial convolutional enhancement
+        conv_enhanced = torch.squeeze(self.initial_enhancer(upsampled_2d), dim=1)
+        # Stage 3: Convert to patch embeddings
+        patch_embeddings = self.patch_embedder(conv_enhanced)
+        # Stage 4: Transformer processing for long-range dependencies
+        transformer_output = self.transformer_encoder(patch_embeddings)
+        # Stage 5: Reconstruct spatial representation
+        reconstructed = self.patch_reconstructor(transformer_output)
+        # Stage 6: Apply residual connection
+        residual_combined = conv_enhanced + reconstructed
+        # Stage 7: Final convolutional refinement
+        refined_output = torch.squeeze(self.final_refiner(torch.unsqueeze(residual_combined, dim=1)), dim=1)
+        return refined_output
+    def get_model_info(self) -> dict:
+        """Return model configuration and statistics."""
+        return {
+            'model_name': self.__class__.__name__,
+            'ofdm_size': self.ofdm_size,
+            'pilot_size': self.pilot_size,
+            'patch_size': self.model_config.patch_size,
+            'patch_length': self.patch_length,
+            'model_dim': self.model_config.model_dim,
+            'num_layers': self.model_config.num_layers,
+            'device': str(self.device),
+            'total_parameters': sum(p.numel() for p in self.parameters()),
+            'trainable_parameters': sum(p.numel() for p in self.parameters() if p.requires_grad)
+        }