added parser and trainer blueprints

Browse files

Files changed (8) hide show

config/{model_config.yaml → adafortitran.yaml} +0 -1
config/fortitran.yaml +9 -0
src/main/__init__.py +0 -0
src/main/parser.py +241 -0
src/main/train_helpers.py +268 -0
src/main/trainer.py +398 -0
src/models/__init__.py +3 -0
src/utils.py +283 -1

config/{model_config.yaml → adafortitran.yaml} RENAMED Viewed

@@ -1,6 +1,5 @@
 patch_size: [3, 2]
 num_layers: 6
-device: "cpu"
 model_dim: 128
 num_head: 4
 activation: 'gelu'

 patch_size: [3, 2]
 num_layers: 6
 model_dim: 128
 num_head: 4
 activation: 'gelu'

config/fortitran.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+patch_size: [3, 2]
+num_layers: 6
+model_dim: 128
+num_head: 4
+activation: 'gelu'
+dropout: 0.1
+max_seq_len: 512
+pos_encoding_type: 'learnable'
+adaptive_token_length: 6

src/main/__init__.py ADDED Viewed

File without changes

src/main/parser.py ADDED Viewed

	@@ -0,0 +1,241 @@

+"""
+Command line argument parser for OFDM channel estimation model training.
+This module provides functionality for parsing and validating command-line arguments
+used in training OFDM channel estimation models. It defines the available parameters,
+their types, default values, and validation rules to ensure proper configuration
+of training runs.
+"""
+from dataclasses import dataclass
+from pathlib import Path
+import argparse
+@dataclass
+class TrainingArguments:
+    """Container for OFDM model training arguments.
+    Stores, validates, and provides access to all parameters needed for
+    training an OFDM channel estimation model.
+    Attributes:
+        # Model Configuration
+        model_name: Supports Linear, AdaFortiTran, or FortiTran training
+        system_config_path: Path to OFDM system configuration file
+        # Dataset Paths
+        train_set: Path to training dataset directory
+        val_set: Path to validation dataset directory
+        test_set: Path to test dataset directory
+        # Experiment Settings
+        exp_id: Experiment identifier string
+        python_log_level: Logging verbosity level
+        tensorboard_log_dir: Directory for tensorboard logs
+        # Training Hyperparameters
+        batch_size: Number of samples per batch
+        lr: Learning rate for optimizer
+        max_epoch: Maximum number of training epochs
+        patience: Early stopping patience in epochs
+        # Hardware & Evaluation
+        cuda: CUDA device index
+        test_every_n: Number of epochs between test evaluations
+    """
+    # Model Configuration
+    model_name: str
+    system_config_path: Path
+    # Dataset Paths
+    train_set: Path
+    val_set: Path
+    test_set: Path
+    # Experiment Settings
+    exp_id: str
+    python_log_level: str = "INFO"
+    tensorboard_log_dir: Path = Path("runs")
+    # Training Hyperparameters
+    batch_size: int = 64
+    lr: float = 1e-3
+    max_epoch: int = 10
+    patience: int = 3
+    # Hardware & Evaluation
+    cuda: int = 0
+    test_every_n: int = 10
+    def __post_init__(self) -> None:
+        """Validate arguments after initialization.
+        Runs multiple validation checks on the provided arguments to ensure
+        they are consistent and valid for training.
+        Raises:
+            ValueError: If any validation check fails
+        """
+        self._validate_paths()
+        self._validate_numeric_args()
+    def _validate_paths(self) -> None:
+        """Validate path-related arguments.
+        Checks that the config file exists and has the correct extension.
+        Raises:
+            ValueError: If the config file doesn't exist or isn't a YAML file
+        """
+        if not self.system_config_path.exists():
+            raise ValueError(f"Config file not found: {self.system_config_path}")
+        if not self.system_config_path.suffix == '.yaml':
+            raise ValueError(f"Config file must be a .yaml file: {self.system_config_path}")
+    def _validate_numeric_args(self) -> None:
+        """Validate numeric arguments.
+        Ensures that all numeric parameters have appropriate values:
+        - test_every_n, max_epoch, patience, batch_size, lr must be positive
+        - cuda must be non-negative
+        Raises:
+            ValueError: If any numeric argument has an invalid value
+        """
+        if self.test_every_n <= 0:
+            raise ValueError(f"test_every_n must be positive, got: {self.test_every_n}")
+        if self.max_epoch <= 0:
+            raise ValueError(f"max_epoch must be positive, got: {self.max_epoch}")
+        if self.patience <= 0:
+            raise ValueError(f"patience must be positive, got: {self.patience}")
+        if self.batch_size <= 0:
+            raise ValueError(f"batch_size must be positive, got: {self.batch_size}")
+        if self.cuda < 0:
+            raise ValueError(f"cuda must be non-negative, got: {self.cuda}")
+        if self.lr <= 0:
+            raise ValueError(f"lr must be positive, got: {self.lr}")
+def parse_arguments() -> TrainingArguments:
+    """Parse command-line arguments for training an OFDM channel estimation model.
+    Sets up an argument parser with all required and optional arguments,
+    processes the command line input, and returns a validated TrainingArguments
+    object with all parameters needed for model training.
+    Returns:
+        TrainingArguments: Validated arguments for model training
+    Raises:
+        ValueError: If validation fails for any arguments
+        SystemExit: If argument parsing fails (raised by argparse)
+    """
+    parser = argparse.ArgumentParser(
+        description='Train an OFDM channel estimation model',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    # Required arguments
+    required = parser.add_argument_group('required arguments')
+    required.add_argument(
+        '--model_name',
+        type=str,
+        required=True,
+        choices=['Linear', 'AdaFortiTran', 'FortiTran'],
+        help='Model type to train (Linear, AdaFortiTran, or FortiTran)'
+    )
+    required.add_argument(
+        '--system_config_path',
+        type=Path,
+        required=True,
+        help='Path to YAML file containing OFDM system parameters'
+    )
+    required.add_argument(
+        '--train_set',
+        type=Path,
+        required=True,
+        help='Training dataset folder path'
+    )
+    required.add_argument(
+        '--val_set',
+        type=Path,
+        required=True,
+        help='Validation dataset folder path'
+    )
+    required.add_argument(
+        '--test_set',
+        type=Path,
+        required=True,
+        help='Test dataset folder path'
+    )
+    required.add_argument(
+        '--exp_id',
+        type=str,
+        required=True,
+        help='Experiment identifier for log folder naming'
+    )
+    # Optional arguments
+    optional = parser.add_argument_group('optional arguments')
+    optional.add_argument(
+        '--python_log_level',
+        type=str,
+        default="INFO",
+        help='Logger level for python logging module'
+    )
+    optional.add_argument(
+        '--tensorboard_log_dir',
+        type=Path,
+        default="runs",
+        help='Directory for tensorboard logs'
+    )
+    optional.add_argument(
+        '--test_every_n',
+        type=int,
+        default=10,
+        help='Test model every N epochs'
+    )
+    optional.add_argument(
+        '--max_epoch',
+        type=int,
+        default=10,
+        help='Maximum number of training epochs'
+    )
+    optional.add_argument(
+        '--patience',
+        type=int,
+        default=3,
+        help='Early stopping patience (epochs)'
+    )
+    optional.add_argument(
+        '--batch_size',
+        type=int,
+        default=64,
+        help='Training batch size'
+    )
+    optional.add_argument(
+        '--cuda',
+        type=int,
+        default=0,
+        help='CUDA device index (0 for single GPU)'
+    )
+    optional.add_argument(
+        '--lr',
+        type=float,
+        default=1e-3,
+        help='Initial learning rate'
+    )
+    args = parser.parse_args()
+    # Create and validate TrainingArguments
+    return TrainingArguments(**vars(args))

src/main/train_helpers.py ADDED Viewed

	@@ -0,0 +1,268 @@

+"""
+Training helper functions for OFDM channel estimation models.
+This module provides utility functions for training, evaluating, and testing
+deep learning models for OFDM channel estimation tasks. It includes functions
+for performing training epochs, model evaluation, prediction generation,
+and performance statistics calculation across different test conditions.
+"""
+from typing import Dict, List, Tuple, Union, Callable
+import torch
+from torch import nn
+from torch.utils.data import DataLoader
+from torch.optim import Optimizer
+from torch.optim.lr_scheduler import ExponentialLR
+from src.utils import to_db, concat_complex_channel
+# Type aliases
+ComplexTensor = torch.Tensor  # Complex tensor
+BatchType = Tuple[ComplexTensor, ComplexTensor, Union[Dict, None]]
+TestDataLoadersType = List[Tuple[str, DataLoader]]
+StatsType = Dict[int, float]
+def get_all_test_stats(
+        model: nn.Module,
+        test_dataloaders: Dict[str, TestDataLoadersType],
+        loss_fn: Callable
+) -> Tuple[StatsType, StatsType, StatsType]:
+    """
+    Evaluate model on all test datasets.
+    Calculates performance statistics (MSE in dB) for a model across different
+    test conditions: Delay Spread (DS), Max Doppler Shift (MDS), and
+    Signal-to-Noise Ratio (SNR).
+    Args:
+        model: Model to evaluate
+        test_dataloaders: Dictionary containing DataLoader objects for test sets:
+            - "DS": Delay Spread test set
+            - "MDS": Max Doppler Shift test set
+            - "SNR": Signal-to-Noise Ratio test set
+        loss_fn: Loss function for evaluation
+    Returns:
+        Tuple containing statistics (MSE in dB) for DS, MDS, and SNR test sets,
+        where each set of statistics is a dictionary mapping parameter values to MSE
+    """
+    ds_stats = get_test_stats(model, test_dataloaders["DS"], loss_fn)
+    mds_stats = get_test_stats(model, test_dataloaders["MDS"], loss_fn)
+    snr_stats = get_test_stats(model, test_dataloaders["SNR"], loss_fn)
+    return ds_stats, mds_stats, snr_stats
+def get_test_stats(
+        model: nn.Module,
+        test_dataloaders: TestDataLoadersType,
+        loss_fn: Callable
+) -> StatsType:
+    """
+    Evaluate model on provided test dataloaders.
+    Calculates performance statistics (MSE in dB) for a model on a
+    specific set of test conditions.
+    Args:
+        model: Model to evaluate
+        test_dataloaders: List of (name, DataLoader) tuples for test sets,
+                         where names are in format "parameter_value"
+        loss_fn: Loss function for evaluation
+    Returns:
+        Dictionary mapping test parameter values (as integers) to MSE values in dB
+    """
+    stats: StatsType = {}
+    sorted_loaders = sorted(
+        test_dataloaders,
+        key=lambda x: int(x[0].split("_")[1])
+    )
+    for name, test_dataloader in sorted_loaders:
+        var, val = name.split("_")
+        test_loss = eval_model(model, test_dataloader, loss_fn)
+        db_error = to_db(test_loss)
+        print(f"{var}:{val} Test MSE: {db_error:.4f} dB")
+        stats[int(val)] = db_error
+    return stats
+def eval_model(
+        model: nn.Module,
+        eval_dataloader: DataLoader,
+        loss_fn: Callable
+) -> float:
+    """
+    Evaluate model on given dataloader.
+    Calculates the average loss for a model on a dataset without
+    performing parameter updates.
+    Args:
+        model: Model to evaluate
+        eval_dataloader: DataLoader containing evaluation data
+        loss_fn: Loss function for computing error
+    Returns:
+        Average validation loss (adjusted for complex values)
+    Notes:
+        Loss is multiplied by 2 to account for complex-valued matrices being
+        represented as real-valued matrices of double size.
+    """
+    val_loss = 0.0
+    model.eval()
+    with torch.no_grad():
+        for batch in eval_dataloader:
+            estimated_channel, ideal_channel = _forward_pass(batch, model)
+            output = _compute_loss(estimated_channel, ideal_channel, loss_fn)
+            val_loss += (2 * output.item() * batch[0].size(0))
+    val_loss /= len(eval_dataloader.dataset)
+    return val_loss
+def predict_channels(
+        model: nn.Module,
+        test_dataloaders: TestDataLoadersType
+) -> Dict[int, Dict[str, ComplexTensor]]:
+    """
+    Generate channel predictions for test datasets.
+    Creates predictions for a sample from each test dataset to enable
+    visualization and error analysis.
+    Args:
+        model: Model to use for predictions
+        test_dataloaders: List of (name, DataLoader) tuples for test sets,
+                         where names are in format "parameter_value"
+    Returns:
+        Dictionary mapping test parameter values (as integers) to dictionaries containing
+        estimated and ideal channels for a single sample
+    """
+    channels: Dict[int, Dict[str, ComplexTensor]] = {}
+    sorted_loaders = sorted(
+        test_dataloaders,
+        key=lambda x: int(x[0].split("_")[1])
+    )
+    for name, test_dataloader in sorted_loaders:
+        with torch.no_grad():
+            batch = next(iter(test_dataloader))
+            estimated_channels, ideal_channels = _forward_pass(batch, model)
+        var, val = name.split("_")
+        channels[int(val)] = {
+            "estimated_channel": estimated_channels[0],
+            "ideal_channel": ideal_channels[0]
+        }
+    return channels
+def train_epoch(
+        model: nn.Module,
+        optimizer: Optimizer,
+        loss_fn: Callable,
+        scheduler: ExponentialLR,
+        train_dataloader: DataLoader
+) -> float:
+    """
+    Train model for one epoch.
+    Performs a complete training iteration over the dataset, including:
+    - Forward pass through the model
+    - Loss calculation
+    - Backpropagation
+    - Parameter updates
+    - Learning rate scheduling
+    Args:
+        model: Model to train
+        optimizer: Optimizer for updating model parameters
+        loss_fn: Loss function for computing error
+        scheduler: Learning rate scheduler
+        train_dataloader: DataLoader containing training data
+    Returns:
+        Average training loss for the epoch (adjusted for complex values)
+    Notes:
+        Loss is multiplied by 2 to account for complex-valued matrices being
+        represented as real-valued matrices of double size.
+    """
+    train_loss = 0.0
+    model.train()
+    for batch in train_dataloader:
+        optimizer.zero_grad()
+        estimated_channel, ideal_channel = _forward_pass(batch, model)
+        output = _compute_loss(estimated_channel, ideal_channel, loss_fn)
+        output.backward()
+        optimizer.step()
+        train_loss += (2 * output.item() * batch[0].size(0))
+    scheduler.step()
+    train_loss /= len(train_dataloader.dataset)
+    return train_loss
+def _forward_pass(batch: BatchType, model: nn.Module) -> Tuple[ComplexTensor, ComplexTensor]:
+    """
+    Perform forward pass through model.
+    Processes input data through the appropriate model based on its type,
+    handling different input requirements for different model architectures.
+    Args:
+        batch: Tuple containing (estimated_channel, ideal_channel, metadata)
+        model: Model to use for processing
+    Returns:
+        Tuple of (processed_estimated_channel, ideal_channel)
+    Raises:
+        ValueError: If model name is not recognized
+    """
+    estimated_channel, ideal_channel, meta_data = batch
+    if model.name in ["fortitran", "MMSE"]:
+        h_est_re = model(torch.real(estimated_channel))
+        h_est_im = model(torch.imag(estimated_channel))
+        estimated_channel = torch.complex(h_est_re, h_est_im)
+    elif model.name == "adafortitran":
+        h_est_re = model(torch.real(estimated_channel), meta_data)
+        h_est_im = model(torch.imag(estimated_channel), meta_data)
+        estimated_channel = torch.complex(h_est_re, h_est_im)
+    else:
+        raise ValueError(f"Unknown model type: {model.name}")
+    return estimated_channel, ideal_channel.to(model.device)
+def _compute_loss(
+        estimated_channel: ComplexTensor,
+        ideal_channel: ComplexTensor,
+        loss_fn: Callable
+) -> torch.Tensor:
+    """
+    Calculate loss between estimated and ideal channels.
+    Computes the loss between model output and ground truth using the specified
+    loss function, with appropriate handling of complex values.
+    Args:
+        estimated_channel: Estimated channel from model
+        ideal_channel: Ground truth ideal channel
+        loss_fn: Loss function to compute error
+    Returns:
+        Computed loss value as a scalar tensor
+    """
+    return loss_fn(
+        concat_complex_channel(estimated_channel),
+        concat_complex_channel(ideal_channel)
+    )

src/main/trainer.py ADDED Viewed

	@@ -0,0 +1,398 @@

+"""
+OFDM channel estimation model training module.
+This module provides functionality for training and evaluating deep learning models
+for OFDM channel estimation tasks. It includes a ModelTrainer class that handles
+the complete training workflow, including model initialization, data loading,
+training loop management, evaluation, and result logging.
+"""
+import torch
+from torch import nn, optim
+from torch.utils.data import DataLoader
+from torch.utils.tensorboard import SummaryWriter
+from typing import Dict, Tuple, Type, Union
+from .parser import TrainingArguments
+from src.data.dataset import MatDataset, get_test_dataloaders
+from src.models import LinearEstimator, AdaFortiTranEstimator, FortiTranEstimator
+from src.utils import (
+    EarlyStopping,
+    get_ls_mse_per_folder,
+    get_model_details,
+    get_test_stats_plot,
+    get_error_images
+)
+from src.main.train_helpers import (
+    get_all_test_stats,
+    train_epoch,
+    eval_model,
+    predict_channels
+)
+# A union type representing supported model classes
+ModelType = Union[LinearEstimator, AdaFortiTranEstimator, FortiTranEstimator]
+class ModelTrainer:
+    """Handles the training and evaluation of deep learning models.
+    This class manages the complete lifecycle of model training, including:
+    - Model initialization and configuration
+    - Optimizer and loss function setup
+    - Data loading and preprocessing
+    - Training loop execution
+    - Performance evaluation
+    - Result logging and visualization via TensorBoard
+    Attributes:
+        MODEL_REGISTRY: Dictionary mapping model names to model classes
+        system_config: OFDM system configuration
+        args: Training arguments
+        device: PyTorch device for computation
+        writer: TensorBoard SummaryWriter for logging
+        model: Initialized model instance
+        optimizer: Torch optimizer for training
+        scheduler: Learning rate scheduler
+        early_stopper: Helper for early stopping
+        train_loader: DataLoader for training set
+        val_loader: DataLoader for validation set
+        test_loaders: Dictionary of test set DataLoaders
+    """
+    MODEL_REGISTRY: Dict[str, Type[ModelType]] = {
+        "linear": LinearEstimator,
+        "adafortitran": AdaFortiTranEstimator,
+        "fortitran": FortiTranEstimator,
+    }
+    def __init__(self, system_config: Dict, args: TrainingArguments):
+        """
+        Initialize the ModelTrainer.
+        Args:
+            config: Model configuration dictionary from YAML file
+            args: Validated training arguments
+        """
+        self.system_config = system_config
+        self.args = args
+        self.device = torch.device(f"cuda:{args.cuda}")
+        self.writer = self._setup_tensorboard()
+        self.model = self._initialize_model()
+        self.optimizer = optim.Adam(self.model.parameters(), lr=args.lr)
+        self.scheduler = optim.lr_scheduler.ExponentialLR(self.optimizer, gamma=0.995)
+        self.early_stopper = EarlyStopping(patience=args.patience)
+        self.training_loss = self._get_loss_function()
+        self.comparison_loss = nn.MSELoss()  # used for test set evaluation
+        self.train_loader, self.val_loader, self.test_loaders = self._get_dataloaders()
+    def _get_loss_function(self) -> nn.Module:
+        """Get the appropriate loss function based on arguments.
+        Returns:
+            The selected PyTorch loss function based on args.loss_type
+        Raises:
+            ValueError: If an unsupported loss type is specified
+        """
+        if self.args.loss_type == LossType.MSE:
+            return nn.MSELoss()
+        elif self.args.loss_type == LossType.MAE:
+            return nn.L1Loss()
+        elif self.args.loss_type == LossType.HUBER:
+            return nn.HuberLoss()
+        else:
+            raise ValueError(f"Unsupported loss type: {self.args.loss_type}")
+    def _setup_tensorboard(self) -> SummaryWriter:
+        """Set up TensorBoard logging.
+        Creates a unique log directory based on model name and experiment ID.
+        Returns:
+            Initialized SummaryWriter for TensorBoard logging
+        Raises:
+            RuntimeError: If experiment directory already exists
+        """
+        log_path = self.args.tensorboard_log_dir / f"{self.args.model_name}_{self.args.exp_id}"
+        if log_path.exists():
+            raise RuntimeError(f"Experiment {log_path} already exists")
+        return SummaryWriter(str(log_path))
+    def _initialize_model(self) -> ModelType:
+        """Initialize the model based on configuration.
+        Creates an instance of the appropriate model class from the registry,
+        logs model summary information, and returns the initialized model.
+        Returns:
+            Initialized model instance of the specified type
+        """
+        model_class = self.MODEL_REGISTRY[self.args.model_name]
+        model = model_class(self.device, self.config, vars(self.args))
+        num_params, model_summary = get_model_details(model)
+        print(model_summary)
+        print(f"Model name: {self.config['model_name']}\nNumber of parameters: {num_params}")
+        self.writer.add_text("Number of Parameters", str(num_params))
+        return model
+    def _get_dataloaders(self) -> Tuple[DataLoader, DataLoader, dict[str, list[tuple[str, DataLoader]]]]:
+        """Initialize all required dataloaders.
+        Creates DataLoader instances for:
+        - Training dataset
+        - Validation dataset
+        - Test datasets grouped by test condition (DS, MDS, SNR)
+        Returns:
+            Tuple containing (train_loader, val_loader, test_loaders_dict)
+        """
+        # Training and validation dataloaders
+        train_dataset = MatDataset(
+            self.args.train_set,
+            self.args.pilot_dims,
+            return_type=self.config["return_type"]
+        )
+        val_dataset = MatDataset(
+            self.args.val_set,
+            self.args.pilot_dims,
+            return_type=self.config["return_type"]
+        )
+        train_loader = DataLoader(
+            train_dataset,
+            batch_size=self.args.batch_size,
+            shuffle=True
+        )
+        val_loader = DataLoader(
+            val_dataset,
+            batch_size=self.args.batch_size,
+            shuffle=True
+        )
+        # Test dataloaders
+        test_loaders = {
+            "DS": get_test_dataloaders(
+                self.args.test_set / "DS_test_set",
+                vars(self.args),
+                self.config["return_type"]
+            ),
+            "MDS": get_test_dataloaders(
+                self.args.test_set / "MDS_test_set",
+                vars(self.args),
+                self.config["return_type"]
+            ),
+            "SNR": get_test_dataloaders(
+                self.args.test_set / "SNR_test_set",
+                vars(self.args),
+                self.config["return_type"]
+            ),
+        }
+        return train_loader, val_loader, test_loaders
+    def _log_test_results(
+            self,
+            epoch: int,
+            test_stats: Dict[str, Dict],
+            ls_stats: Dict[str, Dict]
+    ) -> None:
+        """Log test results to TensorBoard.
+        Creates and logs visualizations comparing model performance against
+        baseline LS estimator across different test conditions.
+        Args:
+            epoch: Current training epoch
+            test_stats: Dictionary of test statistics for the model
+            ls_stats: Dictionary of test statistics for the LS baseline
+        """
+        for key in ("DS", "MDS", "SNR"):
+            # Plot test statistics
+            self.writer.add_figure(
+                tag=f"MSE vs. {key} (Epoch:{epoch + 1})",
+                figure=get_test_stats_plot(
+                    x_name=key,
+                    stats=[test_stats[key], ls_stats[key]],
+                    methods=[self.config["model_name"], "LS"]
+                )
+            )
+            # Plot error images
+            predicted_channels = predict_channels(
+                self.model,
+                self.test_loaders[key]
+            )
+            self.writer.add_figure(
+                tag=f"{key} Error Images (Epoch:{epoch + 1})",
+                figure=get_error_images(
+                    key,
+                    predicted_channels,
+                    show=False
+                )
+            )
+    def _run_tests(self, epoch: int) -> None:
+        """Run tests and log results.
+        Evaluates the model on all test datasets, compares with LS baseline,
+        and logs performance metrics and visualizations.
+        Args:
+            epoch: Current training epoch
+        """
+        ds_stats, mds_stats, snr_stats = get_all_test_stats(
+            self.model,
+            self.test_loaders,
+            self.comparison_loss
+        )
+        ls_stats = {
+            "DS": get_ls_mse_per_folder(self.args.test_set / "DS_test_set"),
+            "MDS": get_ls_mse_per_folder(self.args.test_set / "MDS_test_set"),
+            "SNR": get_ls_mse_per_folder(self.args.test_set / "SNR_test_set")
+        }
+        test_stats = {
+            "DS": ds_stats,
+            "MDS": mds_stats,
+            "SNR": snr_stats
+        }
+        self._log_test_results(epoch, test_stats, ls_stats)
+    def _log_final_metrics(self, final_epoch: int) -> None:
+        """Log final training metrics and hyperparameters.
+        Records hyperparameters used in training and final performance metrics
+        across all test conditions for experiment tracking.
+        Args:
+            final_epoch: The index of the final training epoch
+        """
+        str_params = {k: str(v) for k, v in vars(self.args).items()}
+        self.writer.add_hparams(
+            hparam_dict=str_params,
+            metric_dict={"last_epoch": final_epoch + 1},
+            run_name="."
+        )
+        try:
+            for key in ("DS", "MDS", "SNR"):
+                ds_stats, mds_stats, snr_stats = get_all_test_stats(
+                    self.model,
+                    self.test_loaders,
+                    self.comparison_loss
+                )
+                ls_stats = {
+                    "DS": get_ls_mse_per_folder(self.args.test_set / "DS_test_set"),
+                    "MDS": get_ls_mse_per_folder(self.args.test_set / "MDS_test_set"),
+                    "SNR": get_ls_mse_per_folder(self.args.test_set / "SNR_test_set")
+                }
+                if key == "DS":
+                    stats = ds_stats
+                elif key == "MDS":
+                    stats = mds_stats
+                else:
+                    stats = snr_stats
+                for val in stats.keys():
+                    self.writer.add_scalars(
+                        key,
+                        {
+                            "LS": ls_stats[key][val],
+                            self.config["model_name"]: stats[val]
+                        },
+                        val
+                    )
+        except Exception as e:
+            self.writer.add_text("Error", f"Failed to log final test results: {str(e)}")
+    def train(self) -> None:
+        """Execute the training loop.
+        Runs the complete training process including:
+        - Training and validation for each epoch
+        - Periodic testing based on test_every_n
+        - Early stopping when validation loss plateaus
+        - Logging final metrics and results
+        """
+        try:
+            from tqdm import tqdm
+            use_tqdm = True
+        except ImportError:
+            use_tqdm = False
+            print("tqdm not found, progress bar will not be displayed")
+        epoch = None
+        # Create progress bar if tqdm is available
+        if use_tqdm:
+            pbar = tqdm(range(self.args.max_epoch), desc="Training")
+        else:
+            pbar = range(self.args.max_epoch)
+        for epoch in pbar:
+            # Training step
+            train_loss = train_epoch(
+                self.model,
+                self.optimizer,
+                self.training_loss,
+                self.scheduler,
+                self.train_loader
+            )
+            self.writer.add_scalar('Loss/Train', train_loss, epoch + 1)
+            # Validation step
+            val_loss = eval_model(self.model, self.val_loader, self.training_loss)
+            self.writer.add_scalar('Loss/Val', val_loss, epoch + 1)
+            # Update progress bar with loss info if tqdm is available
+            if use_tqdm:
+                pbar.set_description(
+                    f"Epoch {epoch + 1}/{self.args.max_epoch} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
+            if self.early_stopper.early_stop(val_loss):
+                if use_tqdm:
+                    pbar.write(f"Early stopping triggered at epoch {epoch + 1}")
+                else:
+                    print(f"Early stopping triggered at epoch {epoch + 1}")
+                break
+            # Periodic testing
+            if (epoch + 1) % self.args.test_every_n == 0:
+                message = f"Test results after epoch {epoch + 1}:\n" + 50 * "-"
+                if use_tqdm:
+                    pbar.write(message)
+                else:
+                    print(message)
+                self._run_tests(epoch)
+        self._log_final_metrics(epoch)
+        self.writer.close()
+def train(config: Dict, args: TrainingArguments) -> None:
+    """
+    Train an OFDM channel estimation model.
+    This is the main entry point for model training. It initializes a ModelTrainer
+    with the specified configuration and runs the training process.
+    Args:
+        config: Model configuration dictionary loaded from YAML file,
+                containing model architecture and training parameters
+        args: Validated training arguments containing all necessary parameters
+              for model training, including dataset paths, hyperparameters,
+              and logging configuration
+    """
+    trainer = ModelTrainer(config, args)
+    trainer.train()

src/models/__init__.py CHANGED Viewed

	@@ -0,0 +1,3 @@

+from src.models.fortitran import FortiTranEstimator
+from src.models.adafortitran import AdaFortiTranEstimator
+from src.models.linear import LinearEstimator

src/utils.py CHANGED Viewed

@@ -1,7 +1,70 @@
-"""Utility functions for OFDM channel estimation."""
 import re
 import torch
 def extract_values(file_name):
     """
     Extract channel information from a file name.
@@ -37,6 +100,58 @@ def extract_values(file_name):
     else:
         raise ValueError("Cannot extract file information.")
 def concat_complex_channel(channel_matrix):
     """
     Convert a complex channel matrix into a real matrix by concatenating real and imaginary parts.
@@ -54,3 +169,170 @@ def concat_complex_channel(channel_matrix):
     imag_channel_m = torch.imag(channel_matrix)
     cat_channel_m = torch.cat((real_channel_m, imag_channel_m), dim=1)
     return cat_channel_m

+"""
+Utility functions for OFDM channel estimation.
+This module provides various utility functions for processing, visualizing,
+and analyzing OFDM channel estimation data, including complex channel matrices,
+error calculations, model statistics, and visualization tools for
+performance evaluation.
+"""
+from pathlib import Path
+from typing import Optional, Union
 import re
+import os
+import numpy as np
+import scipy.io as sio
+import matplotlib.pyplot as plt
+from prettytable import PrettyTable
 import torch
+class EarlyStopping:
+    """Handles early stopping logic for training.
+    Monitors validation loss during training and signals when to stop
+    training if the loss has not improved for a specified number of epochs.
+    Attributes:
+        patience: Number of epochs to wait before stopping training
+        remaining_patience: Current remaining patience counter
+        min_loss: Minimum validation loss observed so far
+    """
+    def __init__(self, patience: int = 3):
+        """
+        Initialize early stopping.
+        Args:
+            patience: Number of epochs to wait before stopping
+        """
+        self.patience = patience
+        self.remaining_patience = patience
+        self.min_loss: Optional[float] = None
+    def early_stop(self, loss: float) -> bool:
+        """
+        Check if training should stop.
+        Args:
+            loss: Current validation loss
+        Returns:
+            Whether to stop training
+        """
+        if self.min_loss is None:
+            self.min_loss = loss
+            return False
+        if loss < self.min_loss:
+            self.min_loss = loss
+            self.remaining_patience = self.patience
+            return False
+        self.remaining_patience -= 1
+        return self.remaining_patience == 0
 def extract_values(file_name):
     """
     Extract channel information from a file name.
     else:
         raise ValueError("Cannot extract file information.")
+def get_error_images(variable, channel_data, show=False):
+    """
+    Create visualizations of channel estimation errors.
+    Generates a figure with error heatmaps for different channel conditions,
+    showing the absolute difference between estimated and ideal channels.
+    Args:
+        variable: Name of the variable being visualized (e.g., 'SNR', 'DS')
+        channel_data: Dictionary mapping parameter values to dictionaries
+                     containing 'estimated_channel' and 'ideal_channel'
+        show: Whether to display the figure immediately (default: False)
+    Returns:
+        matplotlib.figure.Figure: The generated figure with error heatmaps
+    """
+    # Create a figure with 7 subplots
+    fig, axes = plt.subplots(1, len(channel_data), figsize=(20, 6))
+    # Plot each subplot with consistent color scaling
+    for i, (key, channels) in enumerate(channel_data.items()):
+        # Calculate absolute error between estimated and ideal channels
+        estimated_channel = channels['estimated_channel']
+        ideal_channel = channels['ideal_channel']
+        error_matrix = torch.abs(estimated_channel - ideal_channel)
+        error_numpy = error_matrix.detach().cpu().numpy()
+        # Plot in the corresponding subplot with shared colormap limits
+        ax = axes[i]
+        cax = ax.imshow(error_numpy, cmap='viridis', aspect=14 / 120, vmin=0, vmax=1)
+        ax.set_title(f"{variable} = {key}")
+        ax.set_xlabel('Columns (14)')
+        ax.set_ylabel('Rows (120)')
+    # Create a new axis for the color bar to the right of the subplots
+    cbar_ax = fig.add_axes((0.92, 0.15, 0.02, 0.7))  # [left, bottom, width, height]
+    fig.colorbar(cax, cax=cbar_ax, label='Error Magnitude')
+    # Adjust layout to prevent overlapping labels
+    fig.tight_layout(rect=(0, 0, 0.9, 1))  # Leave space for the color bar on the right
+    # Show the figure if `show` is True
+    if show:
+        plt.show()
+    # Return the main figure
+    return fig
 def concat_complex_channel(channel_matrix):
     """
     Convert a complex channel matrix into a real matrix by concatenating real and imaginary parts.
     imag_channel_m = torch.imag(channel_matrix)
     cat_channel_m = torch.cat((real_channel_m, imag_channel_m), dim=1)
     return cat_channel_m
+def inverse_concat_complex_channel(channel_matrix: torch.Tensor) -> torch.Tensor:
+    """
+    Reconstruct complex channel matrix from concatenated real matrix.
+    Reverses the operation performed by concat_complex_channel by
+    splitting the tensor and combining the parts into a complex tensor.
+    Args:
+        channel_matrix: Real-valued matrix of shape (B, F, 2*T)
+    Returns:
+        Complex matrix of shape (B, F, T)
+    """
+    split_idx = channel_matrix.shape[-1] // 2
+    return torch.complex(
+        channel_matrix[:, :split_idx],
+        channel_matrix[:, split_idx:]
+    )
+def get_test_stats_plot(x_name, stats, methods, show=False):
+    """
+    Plot test statistics for multiple methods as line graphs.
+    Creates a line plot comparing performance metrics (e.g., MSE) across
+    different conditions or parameters for multiple methods.
+    Args:
+        x_name: Label for the x-axis (e.g., 'SNR', 'DS', 'Epoch')
+        stats: List of dictionaries where each dictionary maps x-values to
+               performance metrics for a specific method
+        methods: List of method names corresponding to each entry in stats
+        show: Whether to display the plot immediately (default: False)
+    Returns:
+        matplotlib.figure.Figure: The generated figure object
+    Raises:
+        AssertionError: If stats and methods lists have different lengths
+    """
+    assert len(stats) == len(methods), "Provided stats and methods do not have the same length."
+    fig = plt.figure()
+    symbols = iter(["*", "x", "+", "D", "v", "^"])
+    for stat in stats:
+        try:
+            symbol = next(symbols)
+        except StopIteration:
+            symbols = iter(["o", "*", "x", "+", "D", "v", "^"])
+            symbol = next(symbols)
+        kv_pairs = sorted(list(stat.items()), key=lambda x: x[0])
+        x_vals = []
+        y_vals = []
+        for key, value in kv_pairs:
+            x_vals.append(key)
+            y_vals.append(value)
+        plt.plot(x_vals, y_vals, f"{symbol}--")
+        plt.xlabel(x_name)
+        plt.ylabel("MSE Error (dB)")
+        plt.grid()
+    plt.legend(methods)
+    if show:
+        plt.show()
+    return fig
+def to_db(val):
+    """
+    Convert values to decibels (dB).
+    Applies the formula 10 * log10(val) to convert values to the decibel scale.
+    Args:
+        val: Input value or array to convert to dB (must be positive)
+    Returns:
+        The input value(s) converted to decibels
+    """
+    return 10 * np.log10(val)
+def mse(x, y):
+    """
+    Calculate mean squared error (MSE) in dB between two complex arrays.
+    Computes the average squared magnitude of the difference between
+    two complex arrays and converts the result to decibels.
+    Args:
+        x: First complex numpy array
+        y: Second complex numpy array (same shape as x)
+    Returns:
+        MSE in decibels (dB) between the two arrays
+    """
+    mse_xy = np.mean(np.square(np.abs(x - y)))
+    mse_xy_db = to_db(mse_xy)
+    return mse_xy_db
+def get_ls_mse_per_folder(folders_dir: Union[Path, str]):
+    """
+    Calculate average MSE for LS estimates in each subfolder.
+    For each subfolder in the specified directory, calculates the average
+    mean squared error between least-squares channel estimates and ideal
+    channel values across all .mat files in that subfolder.
+    Args:
+        folders_dir: Path to directory containing subfolders with .mat files
+                    Each subfolder should be named 'prefix_val' where val is an integer
+    Returns:
+        Dictionary mapping integer values from subfolder names to average MSE values in dB
+    Notes:
+        - Each .mat file should contain a 3D matrix 'H' where:
+          - H[:,:,0] is the ideal channel
+          - H[:,:,2] is the LS channel estimate
+        - Subfolders are sorted by the integer in their names
+    """
+    mse_sums = {}
+    folders = os.listdir(folders_dir)
+    folders = sorted(folders, key=lambda x: int(x.split("_")[1]))
+    for folder in folders:
+        _, val = folder.split("_")
+        mse_sum = 0
+        folder_size = len(os.listdir(os.path.join(folders_dir, folder)))
+        for file in os.listdir(os.path.join(folders_dir, folder)):
+            mat_data = sio.loadmat(os.path.join(folders_dir, folder, file))['H']
+            ls_estimate = mat_data[:, :, 2]
+            ideal = mat_data[:, :, 0]
+            mse_sum += mse(ls_estimate, ideal)
+        mse_sum /= folder_size
+        mse_sums[int(val)] = mse_sum
+    return mse_sums
+def get_model_details(model):
+    """
+    Get parameter counts and structure details for a PyTorch model.
+    Analyzes a PyTorch model to determine the total number of trainable
+    parameters and creates a formatted table showing the parameter count
+    for each named parameter in the model.
+    Args:
+        model: PyTorch model to analyze
+    Returns:
+        tuple containing:
+            - total_params: Total number of trainable parameters
+            - table: PrettyTable showing parameter counts by module
+    """
+    table = PrettyTable(["Modules", "Parameters"])
+    total_params = 0
+    for name, parameter in model.named_parameters():
+        if not parameter.requires_grad:
+            continue
+        params = parameter.numel()
+        table.add_row([name, params])
+        total_params += params
+    return total_params, table