refactored trainer class

Browse files

Files changed (13) hide show

.gitignore +1 -1
requirements.txt +2 -1
src/__pycache__/__init__.cpython-312.pyc +0 -0
src/config/__pycache__/__init__.cpython-312.pyc +0 -0
src/config/__pycache__/schemas.cpython-312.pyc +0 -0
src/config/config_loader.py +10 -13
src/config/schemas.py +81 -30
src/main.py +78 -0
src/main/parser.py +47 -4
src/main/trainer.py +144 -137
src/models/adafortitran.py +1 -1
src/models/fortitran.py +1 -1
src/models/linear.py +3 -2

.gitignore CHANGED Viewed

	@@ -1,2 +1,2 @@
1	.idea/
2	-


1	.idea/
2	+ +**/__pycache__/

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 torch
 pydantic
 yaml
-scipy

 torch
 pydantic
 yaml
+scipy
+tqdm

src/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (156 Bytes). View file

src/config/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (246 Bytes). View file

src/config/__pycache__/schemas.cpython-312.pyc ADDED Viewed

Binary file (9.4 kB). View file

src/config/config_loader.py CHANGED Viewed

@@ -21,10 +21,8 @@ class ConfigLoader:
             system_config_path: Path to YAML configuration file for OFDM-related parameters
             model_config_path: Path to YAML configuration file for model-related parameters
         Returns:
-            ModelConfig: Validated model configuration object
-            SystemConfig: Validated system configuration object
         Raises:
             FileNotFoundError: If one of the config files doesn't exist
@@ -34,10 +32,10 @@ class ConfigLoader:
         model_config_path = Path(model_config_path)
         if not system_config_path.exists():
-            raise FileNotFoundError(f"Configuration file not found: {system_config_path}")
         if not model_config_path.exists():
-            raise FileNotFoundError(f"Configuration file not found: {model_config_path}")
         try:
             with open(system_config_path, 'r') as f:
@@ -55,16 +53,15 @@ class ConfigLoader:
             system_config = SystemConfig(**system_raw_config)
             self.logger.info(f"Successfully loaded system config from {system_config_path}")
         except ValidationError as e:
-            raise ValueError(f"Configuration validation for {system_config_path} failed:\n{e}")
-        if system_config:
-            try:
-                model_config = ModelConfig(system_config, **model_raw_config)
-                self.logger.info(f"Successfully loaded model config from {model_config_path}")
-            except ValidationError as e:
-                raise ValueError(f"Configuration validation for {model_config_path} failed:\n{e}")
-        return system_config, model_config
 def load_config(system_config_path: Union[str, Path], model_config_path: Union[str, Path]) -> Tuple[SystemConfig, ModelConfig]:

             system_config_path: Path to YAML configuration file for OFDM-related parameters
             model_config_path: Path to YAML configuration file for model-related parameters
         Returns:
+            Tuple of (SystemConfig, ModelConfig): Validated configuration objects
         Raises:
             FileNotFoundError: If one of the config files doesn't exist
         model_config_path = Path(model_config_path)
         if not system_config_path.exists():
+            raise FileNotFoundError(f"System configuration file not found: {system_config_path}")
         if not model_config_path.exists():
+            raise FileNotFoundError(f"Model configuration file not found: {model_config_path}")
         try:
             with open(system_config_path, 'r') as f:
             system_config = SystemConfig(**system_raw_config)
             self.logger.info(f"Successfully loaded system config from {system_config_path}")
         except ValidationError as e:
+            raise ValueError(f"System configuration validation for {system_config_path} failed:\n{e}")
+        try:
+            model_config = ModelConfig(**model_raw_config)
+            self.logger.info(f"Successfully loaded model config from {model_config_path}")
+        except ValidationError as e:
+            raise ValueError(f"Model configuration validation for {model_config_path} failed:\n{e}")
+        return system_config, model_config
 def load_config(system_config_path: Union[str, Path], model_config_path: Union[str, Path]) -> Tuple[SystemConfig, ModelConfig]:

src/config/schemas.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from pydantic import BaseModel, Field, model_validator
-from typing import Self, Tuple
 import torch
@@ -14,8 +14,19 @@ class PilotParams(BaseModel):
 class ModelParams(BaseModel):
-    patch_size: Tuple[int, int] = Field(default=(10, 4), description="Patch size as (height, width)")
-    num_layers: int = Field(default=6, gt=0, description="Number of model layers")
     device: str = Field(default="cpu", description="Device to use")
     @model_validator(mode='after')
@@ -103,39 +114,79 @@ class SystemConfig(BaseModel):
 class ModelConfig(BaseModel):
-    system: SystemConfig
-    model: ModelParams
     @model_validator(mode='after')
-    def validate_patch_constraints(self) -> Self:
-        """Ensure patch size is compatible with OFDM dimensions."""
-        patch_height, patch_width = self.model.patch_size
-        if patch_height > self.system.ofdm.num_symbols:
-            raise ValueError(
-                f"Patch height ({patch_height}) cannot exceed "
-                f"OFDM symbols ({self.system.ofdm.num_symbols})"
-            )
-        if patch_width > self.system.ofdm.num_scs:
-            raise ValueError(
-                f"Patch width ({patch_width}) cannot exceed "
-                f"OFDM sub-carriers ({self.system.ofdm.num_scs})"
-            )
-        # Check if OFDM dimensions are divisible by patch size for clean patching
-        if self.system.ofdm.num_symbols % patch_height != 0:
-            raise ValueError(
-                f"OFDM symbols ({self.system.ofdm.num_symbols}) must be divisible "
-                f"by patch height ({patch_height}) for clean patching"
-            )
-        if self.system.ofdm.num_scs % patch_width != 0:
-            raise ValueError(
-                f"OFDM sub-carriers ({self.system.ofdm.num_scs}) must be divisible "
-                f"by patch width ({patch_width}) for clean patching"
-            )
-        return self
     model_config = {"extra": "forbid"}

 from pydantic import BaseModel, Field, model_validator
+from typing import Self, Tuple, List, Optional
 import torch
 class ModelParams(BaseModel):
+    patch_size: Tuple[int, int] = Field(..., description="Patch size as (height, width)")
+    num_layers: int = Field(..., gt=0, description="Number of transformer layers")
+    model_dim: int = Field(..., gt=0, description="Model dimension")
+    num_head: int = Field(..., gt=0, description="Number of attention heads")
+    activation: str = Field(default="gelu", description="Activation function")
+    dropout: float = Field(default=0.1, ge=0.0, le=1.0, description="Dropout rate")
+    max_seq_len: int = Field(default=512, gt=0, description="Maximum sequence length")
+    pos_encoding_type: str = Field(default="learnable", description="Position encoding type")
+    adaptive_token_length: int = Field(default=6, gt=0, description="Adaptive token length")
+    channel_adaptivity_hidden_sizes: Optional[List[int]] = Field(
+        default=None,
+        description="Hidden sizes for channel adaptation layers"
+    )
     device: str = Field(default="cpu", description="Device to use")
     @model_validator(mode='after')
 class ModelConfig(BaseModel):
+    patch_size: Tuple[int, int] = Field(..., description="Patch size as (height, width)")
+    num_layers: int = Field(..., gt=0, description="Number of transformer layers")
+    model_dim: int = Field(..., gt=0, description="Model dimension")
+    num_head: int = Field(..., gt=0, description="Number of attention heads")
+    activation: str = Field(default="gelu", description="Activation function")
+    dropout: float = Field(default=0.1, ge=0.0, le=1.0, description="Dropout rate")
+    max_seq_len: int = Field(default=512, gt=0, description="Maximum sequence length")
+    pos_encoding_type: str = Field(default="learnable", description="Position encoding type")
+    adaptive_token_length: int = Field(default=6, gt=0, description="Adaptive token length")
+    channel_adaptivity_hidden_sizes: Optional[List[int]] = Field(
+        default=None,
+        description="Hidden sizes for channel adaptation layers"
+    )
+    device: str = Field(default="cpu", description="Device to use")
     @model_validator(mode='after')
+    def validate_device(self) -> Self:
+        """Validate that the specified device is available."""
+        device_str = self.device.lower()
+        # Handle 'auto' case - automatically select best available device
+        if device_str == 'auto':
+            if torch.cuda.is_available():
+                self.device = 'cuda'
+            elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+                self.device = 'mps'  # Apple Silicon
+            else:
+                self.device = 'cpu'
+            return self
+        # Validate CPU
+        if device_str == 'cpu':
+            return self
+        # Validate CUDA devices
+        if device_str.startswith('cuda'):
+            if not torch.cuda.is_available():
+                raise ValueError("CUDA is not available on this system")
+            # Handle specific CUDA device (e.g., 'cuda:0', 'cuda:1')
+            if ':' in device_str:
+                try:
+                    device_id = int(device_str.split(':')[1])
+                    if device_id >= torch.cuda.device_count():
+                        available_devices = list(range(torch.cuda.device_count()))
+                        raise ValueError(
+                            f"CUDA device {device_id} not available. "
+                            f"Available CUDA devices: {available_devices}"
+                        )
+                except (ValueError, IndexError) as e:
+                    if "invalid literal" in str(e):
+                        raise ValueError(f"Invalid CUDA device format: {device_str}")
+                    raise
+            return self
+        # Validate MPS (Apple Silicon)
+        if device_str == 'mps':
+            if not (hasattr(torch.backends, 'mps') and torch.backends.mps.is_available()):
+                raise ValueError("MPS is not available on this system")
+            return self
+        # If we get here, the device is not recognized
+        available_devices = ['cpu']
+        if torch.cuda.is_available():
+            cuda_devices = [f'cuda:{i}' for i in range(torch.cuda.device_count())]
+            available_devices.extend(['cuda'] + cuda_devices)
+        if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+            available_devices.append('mps')
+        raise ValueError(
+            f"Unsupported device: '{self.device}'. "
+            f"Available devices: {available_devices}"
+        )
     model_config = {"extra": "forbid"}

src/main.py ADDED Viewed

	@@ -0,0 +1,78 @@

+#!/usr/bin/env python3
+"""
+Main entry point for OFDM channel estimation model training.
+This script provides the command-line interface for training OFDM channel estimation
+models. It loads configuration files, parses command-line arguments, and initiates
+the training process.
+"""
+import logging
+import sys
+from pathlib import Path
+from src.main.parser import parse_arguments
+from src.main.trainer import train
+from src.config.config_loader import load_config
+def setup_logging(log_level: str) -> None:
+    """Set up logging configuration.
+    Args:
+        log_level: Logging level string (DEBUG, INFO, WARNING, ERROR, CRITICAL)
+    """
+    logging.basicConfig(
+        level=getattr(logging, log_level.upper()),
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        handlers=[
+            logging.StreamHandler(sys.stdout),
+            logging.FileHandler('training.log')
+        ]
+    )
+def main() -> None:
+    """Main entry point for the training script."""
+    try:
+        # Parse command-line arguments
+        args = parse_arguments()
+        # Set up logging
+        setup_logging(args.python_log_level)
+        logger = logging.getLogger(__name__)
+        logger.info("Starting OFDM channel estimation model training")
+        logger.info(f"Model: {args.model_name}")
+        logger.info(f"System config: {args.system_config_path}")
+        logger.info(f"Model config: {args.model_config_path}")
+        logger.info(f"Experiment ID: {args.exp_id}")
+        # Load and validate configurations
+        logger.info("Loading configuration files...")
+        system_config, model_config = load_config(
+            args.system_config_path,
+            args.model_config_path
+        )
+        logger.info("Configuration loaded successfully")
+        logger.info(f"OFDM dimensions: {system_config.ofdm.num_scs} subcarriers x {system_config.ofdm.num_symbols} symbols")
+        logger.info(f"Pilot dimensions: {system_config.pilot.num_scs} subcarriers x {system_config.pilot.num_symbols} symbols")
+        logger.info(f"Model architecture: {model_config.num_layers} layers, {model_config.model_dim} dimensions")
+        # Start training
+        logger.info("Initializing training...")
+        train(system_config, model_config, args)
+        logger.info("Training completed successfully")
+    except KeyboardInterrupt:
+        logger.info("Training interrupted by user")
+        sys.exit(1)
+    except Exception as e:
+        logger.error(f"Training failed with error: {str(e)}")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

src/main/parser.py CHANGED Viewed

@@ -10,6 +10,14 @@ of training runs.
 from dataclasses import dataclass
 from pathlib import Path
 import argparse
 @dataclass
@@ -23,6 +31,7 @@ class TrainingArguments:
         # Model Configuration
         model_name: Supports Linear, AdaFortiTran, or FortiTran training
         system_config_path: Path to OFDM system configuration file
         # Dataset Paths
         train_set: Path to training dataset directory
@@ -39,6 +48,8 @@ class TrainingArguments:
         lr: Learning rate for optimizer
         max_epoch: Maximum number of training epochs
         patience: Early stopping patience in epochs
         # Hardware & Evaluation
         cuda: CUDA device index
@@ -48,6 +59,7 @@ class TrainingArguments:
     # Model Configuration
     model_name: str
     system_config_path: Path
     # Dataset Paths
     train_set: Path
@@ -64,6 +76,8 @@ class TrainingArguments:
     lr: float = 1e-3
     max_epoch: int = 10
     patience: int = 3
     # Hardware & Evaluation
     cuda: int = 0
@@ -84,16 +98,22 @@ class TrainingArguments:
     def _validate_paths(self) -> None:
         """Validate path-related arguments.
-        Checks that the config file exists and has the correct extension.
         Raises:
-            ValueError: If the config file doesn't exist or isn't a YAML file
         """
         if not self.system_config_path.exists():
-            raise ValueError(f"Config file not found: {self.system_config_path}")
         if not self.system_config_path.suffix == '.yaml':
-            raise ValueError(f"Config file must be a .yaml file: {self.system_config_path}")
     def _validate_numeric_args(self) -> None:
         """Validate numeric arguments.
@@ -159,6 +179,12 @@ def parse_arguments() -> TrainingArguments:
         required=True,
         help='Path to YAML file containing OFDM system parameters'
     )
     required.add_argument(
         '--train_set',
         type=Path,
@@ -234,8 +260,25 @@ def parse_arguments() -> TrainingArguments:
         default=1e-3,
         help='Initial learning rate'
     )
     args = parser.parse_args()
     # Create and validate TrainingArguments
     return TrainingArguments(**vars(args))

 from dataclasses import dataclass
 from pathlib import Path
 import argparse
+from enum import Enum
+class LossType(Enum):
+    """Enumeration of supported loss functions."""
+    MSE = "mse"
+    MAE = "mae"
+    HUBER = "huber"
 @dataclass
         # Model Configuration
         model_name: Supports Linear, AdaFortiTran, or FortiTran training
         system_config_path: Path to OFDM system configuration file
+        model_config_path: Path to model configuration file
         # Dataset Paths
         train_set: Path to training dataset directory
         lr: Learning rate for optimizer
         max_epoch: Maximum number of training epochs
         patience: Early stopping patience in epochs
+        loss_type: Type of loss function to use
+        return_type: Type of data to return from dataset
         # Hardware & Evaluation
         cuda: CUDA device index
     # Model Configuration
     model_name: str
     system_config_path: Path
+    model_config_path: Path
     # Dataset Paths
     train_set: Path
     lr: float = 1e-3
     max_epoch: int = 10
     patience: int = 3
+    loss_type: LossType = LossType.MSE
+    return_type: str = "complex"
     # Hardware & Evaluation
     cuda: int = 0
     def _validate_paths(self) -> None:
         """Validate path-related arguments.
+        Checks that the config files exist and have the correct extension.
         Raises:
+            ValueError: If the config files don't exist or aren't YAML files
         """
         if not self.system_config_path.exists():
+            raise ValueError(f"System config file not found: {self.system_config_path}")
         if not self.system_config_path.suffix == '.yaml':
+            raise ValueError(f"System config file must be a .yaml file: {self.system_config_path}")
+        if not self.model_config_path.exists():
+            raise ValueError(f"Model config file not found: {self.model_config_path}")
+        if not self.model_config_path.suffix == '.yaml':
+            raise ValueError(f"Model config file must be a .yaml file: {self.model_config_path}")
     def _validate_numeric_args(self) -> None:
         """Validate numeric arguments.
         required=True,
         help='Path to YAML file containing OFDM system parameters'
     )
+    required.add_argument(
+        '--model_config_path',
+        type=Path,
+        required=True,
+        help='Path to YAML file containing model architecture parameters'
+    )
     required.add_argument(
         '--train_set',
         type=Path,
         default=1e-3,
         help='Initial learning rate'
     )
+    optional.add_argument(
+        '--loss_type',
+        type=str,
+        default="mse",
+        choices=['mse', 'mae', 'huber'],
+        help='Loss function type'
+    )
+    optional.add_argument(
+        '--return_type',
+        type=str,
+        default="complex",
+        choices=['complex', 'real'],
+        help='Type of data to return from dataset'
+    )
     args = parser.parse_args()
+    # Convert loss_type string to enum
+    args.loss_type = LossType(args.loss_type)
     # Create and validate TrainingArguments
     return TrainingArguments(**vars(args))

src/main/trainer.py CHANGED Viewed

@@ -10,8 +10,10 @@ training loop management, evaluation, and result logging.
 import torch
 from torch import nn, optim
 from torch.utils.data import DataLoader
-from torch.utils.tensorboard import SummaryWriter
 from typing import Dict, Tuple, Type, Union
 from .parser import TrainingArguments
 from src.data.dataset import MatDataset, get_test_dataloaders
@@ -21,14 +23,11 @@ from src.utils import (
     get_ls_mse_per_folder,
     get_model_details,
     get_test_stats_plot,
-    get_error_images
-)
-from src.main.train_helpers import (
-    get_all_test_stats,
-    train_epoch,
-    eval_model,
-    predict_channels
 )
 # A union type representing supported model classes
 ModelType = Union[LinearEstimator, AdaFortiTranEstimator, FortiTranEstimator]
@@ -48,16 +47,18 @@ class ModelTrainer:
     Attributes:
         MODEL_REGISTRY: Dictionary mapping model names to model classes
         system_config: OFDM system configuration
-        args: Training arguments
         device: PyTorch device for computation
         writer: TensorBoard SummaryWriter for logging
-        model: Initialized model instance
         optimizer: Torch optimizer for training
-        scheduler: Learning rate scheduler
         early_stopper: Helper for early stopping
-        train_loader: DataLoader for training set
-        val_loader: DataLoader for validation set
-        test_loaders: Dictionary of test set DataLoaders
     """
     MODEL_REGISTRY: Dict[str, Type[ModelType]] = {
@@ -66,47 +67,33 @@ class ModelTrainer:
         "fortitran": FortiTranEstimator,
     }
-    def __init__(self, system_config: Dict, args: TrainingArguments):
         """
         Initialize the ModelTrainer.
         Args:
-            config: Model configuration dictionary from YAML file
-            args: Validated training arguments
         """
         self.system_config = system_config
         self.args = args
         self.device = torch.device(f"cuda:{args.cuda}")
         self.writer = self._setup_tensorboard()
         self.model = self._initialize_model()
         self.optimizer = optim.Adam(self.model.parameters(), lr=args.lr)
-        self.scheduler = optim.lr_scheduler.ExponentialLR(self.optimizer, gamma=0.995)
         self.early_stopper = EarlyStopping(patience=args.patience)
-        self.training_loss = self._get_loss_function()
-        self.comparison_loss = nn.MSELoss()  # used for test set evaluation
         self.train_loader, self.val_loader, self.test_loaders = self._get_dataloaders()
-    def _get_loss_function(self) -> nn.Module:
-        """Get the appropriate loss function based on arguments.
-        Returns:
-            The selected PyTorch loss function based on args.loss_type
-        Raises:
-            ValueError: If an unsupported loss type is specified
-        """
-        if self.args.loss_type == LossType.MSE:
-            return nn.MSELoss()
-        elif self.args.loss_type == LossType.MAE:
-            return nn.L1Loss()
-        elif self.args.loss_type == LossType.HUBER:
-            return nn.HuberLoss()
-        else:
-            raise ValueError(f"Unsupported loss type: {self.args.loss_type}")
     def _setup_tensorboard(self) -> SummaryWriter:
         """Set up TensorBoard logging.
@@ -134,38 +121,30 @@ class ModelTrainer:
             Initialized model instance of the specified type
         """
         model_class = self.MODEL_REGISTRY[self.args.model_name]
-        model = model_class(self.device, self.config, vars(self.args))
         num_params, model_summary = get_model_details(model)
-        print(model_summary)
-        print(f"Model name: {self.config['model_name']}\nNumber of parameters: {num_params}")
         self.writer.add_text("Number of Parameters", str(num_params))
         return model
     def _get_dataloaders(self) -> Tuple[DataLoader, DataLoader, dict[str, list[tuple[str, DataLoader]]]]:
-        """Initialize all required dataloaders.
-        Creates DataLoader instances for:
-        - Training dataset
-        - Validation dataset
-        - Test datasets grouped by test condition (DS, MDS, SNR)
-        Returns:
-            Tuple containing (train_loader, val_loader, test_loaders_dict)
-        """
         # Training and validation dataloaders
         train_dataset = MatDataset(
             self.args.train_set,
-            self.args.pilot_dims,
-            return_type=self.config["return_type"]
         )
         val_dataset = MatDataset(
             self.args.val_set,
-            self.args.pilot_dims,
-            return_type=self.config["return_type"]
         )
         train_loader = DataLoader(
             train_dataset,
             batch_size=self.args.batch_size,
@@ -176,43 +155,34 @@ class ModelTrainer:
             batch_size=self.args.batch_size,
             shuffle=True
         )
-        # Test dataloaders
         test_loaders = {
             "DS": get_test_dataloaders(
                 self.args.test_set / "DS_test_set",
-                vars(self.args),
-                self.config["return_type"]
             ),
             "MDS": get_test_dataloaders(
                 self.args.test_set / "MDS_test_set",
-                vars(self.args),
-                self.config["return_type"]
             ),
             "SNR": get_test_dataloaders(
                 self.args.test_set / "SNR_test_set",
-                vars(self.args),
-                self.config["return_type"]
             ),
         }
         return train_loader, val_loader, test_loaders
     def _log_test_results(
             self,
             epoch: int,
-            test_stats: Dict[str, Dict],
-            ls_stats: Dict[str, Dict]
     ) -> None:
         """Log test results to TensorBoard.
-        Creates and logs visualizations comparing model performance against
-        baseline LS estimator across different test conditions.
         Args:
             epoch: Current training epoch
             test_stats: Dictionary of test statistics for the model
-            ls_stats: Dictionary of test statistics for the LS baseline
         """
         for key in ("DS", "MDS", "SNR"):
             # Plot test statistics
@@ -220,16 +190,13 @@ class ModelTrainer:
                 tag=f"MSE vs. {key} (Epoch:{epoch + 1})",
                 figure=get_test_stats_plot(
                     x_name=key,
-                    stats=[test_stats[key], ls_stats[key]],
-                    methods=[self.config["model_name"], "LS"]
                 )
             )
             # Plot error images
-            predicted_channels = predict_channels(
-                self.model,
-                self.test_loaders[key]
-            )
             self.writer.add_figure(
                 tag=f"{key} Error Images (Epoch:{epoch + 1})",
                 figure=get_error_images(
@@ -242,23 +209,12 @@ class ModelTrainer:
     def _run_tests(self, epoch: int) -> None:
         """Run tests and log results.
-        Evaluates the model on all test datasets, compares with LS baseline,
-        and logs performance metrics and visualizations.
         Args:
             epoch: Current training epoch
         """
-        ds_stats, mds_stats, snr_stats = get_all_test_stats(
-            self.model,
-            self.test_loaders,
-            self.comparison_loss
-        )
-        ls_stats = {
-            "DS": get_ls_mse_per_folder(self.args.test_set / "DS_test_set"),
-            "MDS": get_ls_mse_per_folder(self.args.test_set / "MDS_test_set"),
-            "SNR": get_ls_mse_per_folder(self.args.test_set / "SNR_test_set")
-        }
         test_stats = {
             "DS": ds_stats,
@@ -266,7 +222,7 @@ class ModelTrainer:
             "SNR": snr_stats
         }
-        self._log_test_results(epoch, test_stats, ls_stats)
     def _log_final_metrics(self, final_epoch: int) -> None:
         """Log final training metrics and hyperparameters.
@@ -286,11 +242,7 @@ class ModelTrainer:
         try:
             for key in ("DS", "MDS", "SNR"):
-                ds_stats, mds_stats, snr_stats = get_all_test_stats(
-                    self.model,
-                    self.test_loaders,
-                    self.comparison_loss
-                )
                 ls_stats = {
                     "DS": get_ls_mse_per_folder(self.args.test_set / "DS_test_set"),
                     "MDS": get_ls_mse_per_folder(self.args.test_set / "MDS_test_set"),
@@ -309,13 +261,95 @@ class ModelTrainer:
                         key,
                         {
                             "LS": ls_stats[key][val],
-                            self.config["model_name"]: stats[val]
                         },
                         val
                     )
         except Exception as e:
             self.writer.add_text("Error", f"Failed to log final test results: {str(e)}")
     def train(self) -> None:
         """Execute the training loop.
@@ -325,62 +359,35 @@ class ModelTrainer:
         - Early stopping when validation loss plateaus
         - Logging final metrics and results
         """
-        try:
-            from tqdm import tqdm
-            use_tqdm = True
-        except ImportError:
-            use_tqdm = False
-            print("tqdm not found, progress bar will not be displayed")
         epoch = None
-        # Create progress bar if tqdm is available
-        if use_tqdm:
-            pbar = tqdm(range(self.args.max_epoch), desc="Training")
-        else:
-            pbar = range(self.args.max_epoch)
         for epoch in pbar:
             # Training step
-            train_loss = train_epoch(
-                self.model,
-                self.optimizer,
-                self.training_loss,
-                self.scheduler,
-                self.train_loader
-            )
             self.writer.add_scalar('Loss/Train', train_loss, epoch + 1)
             # Validation step
-            val_loss = eval_model(self.model, self.val_loader, self.training_loss)
             self.writer.add_scalar('Loss/Val', val_loss, epoch + 1)
-            # Update progress bar with loss info if tqdm is available
-            if use_tqdm:
-                pbar.set_description(
-                    f"Epoch {epoch + 1}/{self.args.max_epoch} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
             if self.early_stopper.early_stop(val_loss):
-                if use_tqdm:
-                    pbar.write(f"Early stopping triggered at epoch {epoch + 1}")
-                else:
-                    print(f"Early stopping triggered at epoch {epoch + 1}")
                 break
             # Periodic testing
             if (epoch + 1) % self.args.test_every_n == 0:
                 message = f"Test results after epoch {epoch + 1}:\n" + 50 * "-"
-                if use_tqdm:
-                    pbar.write(message)
-                else:
-                    print(message)
                 self._run_tests(epoch)
         self._log_final_metrics(epoch)
         self.writer.close()
-def train(config: Dict, args: TrainingArguments) -> None:
     """
     Train an OFDM channel estimation model.
@@ -388,11 +395,11 @@ def train(config: Dict, args: TrainingArguments) -> None:
     with the specified configuration and runs the training process.
     Args:
-        config: Model configuration dictionary loaded from YAML file,
-                containing model architecture and training parameters
         args: Validated training arguments containing all necessary parameters
               for model training, including dataset paths, hyperparameters,
               and logging configuration
     """
-    trainer = ModelTrainer(config, args)
     trainer.train()

 import torch
 from torch import nn, optim
 from torch.utils.data import DataLoader
+from torch.utils.tensorboard.writer import SummaryWriter
 from typing import Dict, Tuple, Type, Union
+import logging
+from tqdm import tqdm
 from .parser import TrainingArguments
 from src.data.dataset import MatDataset, get_test_dataloaders
     get_ls_mse_per_folder,
     get_model_details,
     get_test_stats_plot,
+    get_error_images,
+    concat_complex_channel,
+    to_db
 )
+from src.config.schemas import SystemConfig, ModelConfig
 # A union type representing supported model classes
 ModelType = Union[LinearEstimator, AdaFortiTranEstimator, FortiTranEstimator]
     Attributes:
         MODEL_REGISTRY: Dictionary mapping model names to model classes
         system_config: OFDM system configuration
+        model_config: OFDM model configuration
+        args: Training arguments parsed from command line
         device: PyTorch device for computation
         writer: TensorBoard SummaryWriter for logging
+        model: Initialized Torch model instance
         optimizer: Torch optimizer for training
+        scheduler: Learning rate scheduler for training
         early_stopper: Helper for early stopping
+        train_loader: DataLoader for training set (used for training)
+        val_loader: DataLoader for validation set (used for validation)
+        test_loaders: Dictionary of test set DataLoaders (used for testing)
+        logger: Logger instance for logging messages
     """
     MODEL_REGISTRY: Dict[str, Type[ModelType]] = {
         "fortitran": FortiTranEstimator,
     }
+    EXP_LR_GAMMA = 0.995
+    def __init__(self, system_config: SystemConfig, model_config: ModelConfig | None, args: TrainingArguments):
         """
         Initialize the ModelTrainer.
         Args:
+            system_config: OFDM system configuration dictionary from YAML file
+            model_config: OFDM model configuration dictionary from YAML file
+            args: Validated training arguments parsed from command line
         """
         self.system_config = system_config
+        self.model_config = model_config
         self.args = args
         self.device = torch.device(f"cuda:{args.cuda}")
         self.writer = self._setup_tensorboard()
+        self.logger = logging.getLogger(__name__)
         self.model = self._initialize_model()
         self.optimizer = optim.Adam(self.model.parameters(), lr=args.lr)
+        self.scheduler = optim.lr_scheduler.ExponentialLR(self.optimizer, gamma=self.EXP_LR_GAMMA)
         self.early_stopper = EarlyStopping(patience=args.patience)
+        self.training_loss = nn.MSELoss()
         self.train_loader, self.val_loader, self.test_loaders = self._get_dataloaders()
     def _setup_tensorboard(self) -> SummaryWriter:
         """Set up TensorBoard logging.
             Initialized model instance of the specified type
         """
         model_class = self.MODEL_REGISTRY[self.args.model_name]
+        if model_class is LinearEstimator:
+            model = model_class(self.system_config, device=str(self.device))
+        else:
+            if self.model_config is None:
+                raise ValueError("model_config must be provided for non-linear models.")
+            model = model_class(self.system_config, self.model_config)
         num_params, model_summary = get_model_details(model)
+        self.logger.info("\n" + model_summary)
+        self.logger.info(f"Model name: {self.args.model_name} | Number of parameters: {num_params}")
+        self.writer.add_text("Model Summary", model_summary)
         self.writer.add_text("Number of Parameters", str(num_params))
         return model
     def _get_dataloaders(self) -> Tuple[DataLoader, DataLoader, dict[str, list[tuple[str, DataLoader]]]]:
+        pilot_dims = [self.system_config.pilot.num_scs, self.system_config.pilot.num_symbols]
         # Training and validation dataloaders
         train_dataset = MatDataset(
             self.args.train_set,
+            pilot_dims
         )
         val_dataset = MatDataset(
             self.args.val_set,
+            pilot_dims
         )
         train_loader = DataLoader(
             train_dataset,
             batch_size=self.args.batch_size,
             batch_size=self.args.batch_size,
             shuffle=True
         )
         test_loaders = {
             "DS": get_test_dataloaders(
                 self.args.test_set / "DS_test_set",
+                {"pilot_dims": pilot_dims, "batch_size": self.args.batch_size}
             ),
             "MDS": get_test_dataloaders(
                 self.args.test_set / "MDS_test_set",
+                {"pilot_dims": pilot_dims, "batch_size": self.args.batch_size}
             ),
             "SNR": get_test_dataloaders(
                 self.args.test_set / "SNR_test_set",
+                {"pilot_dims": pilot_dims, "batch_size": self.args.batch_size}
             ),
         }
         return train_loader, val_loader, test_loaders
     def _log_test_results(
             self,
             epoch: int,
+            test_stats: Dict[str, Dict]
     ) -> None:
         """Log test results to TensorBoard.
+        Creates and logs visualizations for model performance across different test conditions.
         Args:
             epoch: Current training epoch
             test_stats: Dictionary of test statistics for the model
         """
         for key in ("DS", "MDS", "SNR"):
             # Plot test statistics
                 tag=f"MSE vs. {key} (Epoch:{epoch + 1})",
                 figure=get_test_stats_plot(
                     x_name=key,
+                    stats=[test_stats[key]],
+                    methods=[self.args.model_name]
                 )
             )
             # Plot error images
+            predicted_channels = self._predict_channels(self.test_loaders[key])
             self.writer.add_figure(
                 tag=f"{key} Error Images (Epoch:{epoch + 1})",
                 figure=get_error_images(
     def _run_tests(self, epoch: int) -> None:
         """Run tests and log results.
+        Evaluates the model on all test datasets and logs performance metrics and visualizations.
         Args:
             epoch: Current training epoch
         """
+        ds_stats, mds_stats, snr_stats = self._get_all_test_stats()
         test_stats = {
             "DS": ds_stats,
             "SNR": snr_stats
         }
+        self._log_test_results(epoch, test_stats)
     def _log_final_metrics(self, final_epoch: int) -> None:
         """Log final training metrics and hyperparameters.
         try:
             for key in ("DS", "MDS", "SNR"):
+                ds_stats, mds_stats, snr_stats = self._get_all_test_stats()
                 ls_stats = {
                     "DS": get_ls_mse_per_folder(self.args.test_set / "DS_test_set"),
                     "MDS": get_ls_mse_per_folder(self.args.test_set / "MDS_test_set"),
                         key,
                         {
                             "LS": ls_stats[key][val],
+                            self.args.model_name: stats[val]
                         },
                         val
                     )
         except Exception as e:
             self.writer.add_text("Error", f"Failed to log final test results: {str(e)}")
+    def _compute_loss(self, estimated_channel, ideal_channel, loss_fn):
+        return loss_fn(
+            concat_complex_channel(estimated_channel),
+            concat_complex_channel(ideal_channel)
+        )
+    def _forward_pass(self, batch, model):
+        estimated_channel, ideal_channel, meta_data = batch
+        if hasattr(model, 'name') and model.name in ["fortitran", "MMSE"]:
+            h_est_re = model(torch.real(estimated_channel))
+            h_est_im = model(torch.imag(estimated_channel))
+            estimated_channel = torch.complex(h_est_re, h_est_im)
+        elif hasattr(model, 'name') and model.name == "adafortitran":
+            h_est_re = model(torch.real(estimated_channel), meta_data)
+            h_est_im = model(torch.imag(estimated_channel), meta_data)
+            estimated_channel = torch.complex(h_est_re, h_est_im)
+        else:
+            raise ValueError(f"Unknown model type: {getattr(model, 'name', type(model))}")
+        return estimated_channel, ideal_channel.to(model.device)
+    def _train_epoch(self):
+        train_loss = 0.0
+        self.model.train()
+        for batch in self.train_loader:
+            self.optimizer.zero_grad()
+            estimated_channel, ideal_channel = self._forward_pass(batch, self.model)
+            output = self._compute_loss(estimated_channel, ideal_channel, self.training_loss)
+            output.backward()
+            self.optimizer.step()
+            train_loss += (2 * output.item() * batch[0].size(0))
+        self.scheduler.step()
+        train_loss /= len(self.train_loader.dataset)
+        return train_loss
+    def _eval_model(self, eval_dataloader):
+        val_loss = 0.0
+        self.model.eval()
+        with torch.no_grad():
+            for batch in eval_dataloader:
+                estimated_channel, ideal_channel = self._forward_pass(batch, self.model)
+                output = self._compute_loss(estimated_channel, ideal_channel, self.training_loss)
+                val_loss += (2 * output.item() * batch[0].size(0))
+        val_loss /= len(eval_dataloader.dataset)
+        return val_loss
+    def _predict_channels(self, test_dataloaders):
+        channels = {}
+        sorted_loaders = sorted(
+            test_dataloaders,
+            key=lambda x: int(x[0].split("_")[1])
+        )
+        for name, test_dataloader in sorted_loaders:
+            with torch.no_grad():
+                batch = next(iter(test_dataloader))
+                estimated_channels, ideal_channels = self._forward_pass(batch, self.model)
+            var, val = name.split("_")
+            channels[int(val)] = {
+                "estimated_channel": estimated_channels[0],
+                "ideal_channel": ideal_channels[0]
+            }
+        return channels
+    def _get_test_stats(self, test_dataloaders):
+        stats = {}
+        sorted_loaders = sorted(
+            test_dataloaders,
+            key=lambda x: int(x[0].split("_")[1])
+        )
+        for name, test_dataloader in sorted_loaders:
+            var, val = name.split("_")
+            test_loss = self._eval_model(test_dataloader)
+            db_error = to_db(test_loss)
+            self.logger.info(f"{var}:{val} Test MSE: {db_error:.4f} dB")
+            stats[int(val)] = db_error
+        return stats
+    def _get_all_test_stats(self):
+        ds_stats = self._get_test_stats(self.test_loaders["DS"])
+        mds_stats = self._get_test_stats(self.test_loaders["MDS"])
+        snr_stats = self._get_test_stats(self.test_loaders["SNR"])
+        return ds_stats, mds_stats, snr_stats
     def train(self) -> None:
         """Execute the training loop.
         - Early stopping when validation loss plateaus
         - Logging final metrics and results
         """
         epoch = None
+        pbar = tqdm(range(self.args.max_epoch), desc="Training")
         for epoch in pbar:
             # Training step
+            train_loss = self._train_epoch()
             self.writer.add_scalar('Loss/Train', train_loss, epoch + 1)
             # Validation step
+            val_loss = self._eval_model(self.val_loader)
             self.writer.add_scalar('Loss/Val', val_loss, epoch + 1)
+            # Update progress bar with loss info
+            pbar.set_description(
+                f"Epoch {epoch + 1}/{self.args.max_epoch} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
             if self.early_stopper.early_stop(val_loss):
+                pbar.write(f"Early stopping triggered at epoch {epoch + 1}")
                 break
             # Periodic testing
             if (epoch + 1) % self.args.test_every_n == 0:
                 message = f"Test results after epoch {epoch + 1}:\n" + 50 * "-"
+                pbar.write(message)
                 self._run_tests(epoch)
         self._log_final_metrics(epoch)
         self.writer.close()
+def train(system_config: SystemConfig, model_config: ModelConfig | None, args: TrainingArguments) -> None:
     """
     Train an OFDM channel estimation model.
     with the specified configuration and runs the training process.
     Args:
+        system_config: OFDM system configuration dictionary from YAML file
+        model_config: OFDM model configuration dictionary from YAML file
         args: Validated training arguments containing all necessary parameters
               for model training, including dataset paths, hyperparameters,
               and logging configuration
     """
+    trainer = ModelTrainer(system_config, model_config, args)
     trainer.train()

src/models/adafortitran.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from .fortitran import BaseFortiTranEstimator
-from src.config import SystemConfig, ModelConfig
 class AdaFortiTranEstimator(BaseFortiTranEstimator):

 from .fortitran import BaseFortiTranEstimator
+from src.config.schemas import SystemConfig, ModelConfig
 class AdaFortiTranEstimator(BaseFortiTranEstimator):

src/models/fortitran.py CHANGED Viewed

@@ -3,7 +3,7 @@ from torch import nn
 import logging
 from typing import Tuple, List, Optional
-from src.config import SystemConfig, ModelConfig
 from src.models.blocks import ConvEnhancer, PatchEmbedding, InversePatchEmbedding, TransformerEncoderForChannels, \
     ChannelAdapter

 import logging
 from typing import Tuple, List, Optional
+from src.config.schemas import SystemConfig, ModelConfig
 from src.models.blocks import ConvEnhancer, PatchEmbedding, InversePatchEmbedding, TransformerEncoderForChannels, \
     ChannelAdapter

src/models/linear.py CHANGED Viewed

@@ -27,16 +27,17 @@ class LinearEstimator(nn.Module):
             width (int): number of pilots across OFDM symbols
     """
-    def __init__(self, config: SystemConfig) -> None:
         """Initialize the MMSE estimator.
         Args:
             config: Validated SystemConfig object containing OFDM system parameters
         """
         super().__init__()
         self.config = config
-        self.device = torch.device(config.device)
         self.logger = logging.getLogger(__name__)
         # Extract dimensions from validated config

             width (int): number of pilots across OFDM symbols
     """
+    def __init__(self, config: SystemConfig, device: str = "cpu") -> None:
         """Initialize the MMSE estimator.
         Args:
             config: Validated SystemConfig object containing OFDM system parameters
+            device: Device to use for computation (cpu, cuda, etc.)
         """
         super().__init__()
         self.config = config
+        self.device = torch.device(device)
         self.logger = logging.getLogger(__name__)
         # Extract dimensions from validated config