Time_RCD / models /time_rcd /time_rcd_config.py
Oliver Le
Initial commit
d03866e
from dataclasses import dataclass, field
from typing import Dict, Optional
@dataclass
class TimeSeriesConfig:
"""Configuration for time series encoder.
Attributes:
d_model: Dimension of model hidden states.
d_proj: Dimension of projection layer.
patch_size: Size of time series patches.
num_layers: Number of transformer layers.
num_heads: Number of attention heads.
d_ff_dropout: Dropout rate for feed-forward networks.
use_rope: Whether to use Rotary Position Embedding.
activation: Activation function name.
num_features: Number of input features.
"""
d_model: int = 512
d_proj: int = 256
patch_size: int = 4
num_query_tokens: int = 1
num_layers: int = 8
num_heads: int = 8
d_ff_dropout: float = 0.1
use_rope: bool = True
activation: str = "gelu"
num_features: int = 1
test_batch_limit: int = 20
@dataclass
class TimeRCDConfig:
"""Configuration class for Time_RCD model.
This class contains all hyperparameters and settings for the Time_RCD model.
It is implemented as a dataclass for easy instantiation and modification.
Attributes:
ts_config: Configuration for time series encoder.
batch_size: Training batch size.
learning_rate: Learning rate for optimization.
num_epochs: Number of training epochs.
max_seq_len: Maximum sequence length.
dropout: Dropout rate.
accumulation_steps: Gradient accumulation steps.
weight_decay: Weight decay for optimization.
enable_ts_train: Whether to train the time series encoder.
seed: Random seed for reproducibility.
"""
# Model configurations
ts_config: TimeSeriesConfig = field(default_factory=TimeSeriesConfig)
# Training parameters
batch_size: int = 3
learning_rate: float = 1e-4
num_epochs: int = 1000
max_seq_len: int = 512
dropout: float = 0.1
accumulation_steps: int = 1
weight_decay: float = 1e-5
enable_ts_train: bool = False
seed: int = 72
log_freq: int = 100
save_freq: int = 10
save_step_freq: int = 100
model_prefix: str = "time_rcd_qa_by_pretrain"
test_batch_limit: int = 20
early_stopping_patience: int = 7
seed: int = 72
cuda_devices: str = "0, 1, 2, 3"
dist_port: str = "12355" # Port for distributed training communication
device: str = "cuda"
def to_dict(self) -> Dict[str, any]:
return {
"ts_config": self.ts_config.__dict__,
"batch_size": self.batch_size,
"learning_rate": self.learning_rate,
"num_epochs": self.num_epochs,
"max_seq_len": self.max_seq_len,
"seed": self.seed,
"test_batch_limit": self.test_batch_limit,
"log_freq": self.log_freq,
"save_freq": self.save_freq,
"save_step_freq": self.save_step_freq,
"model_prefix": self.model_prefix,
"device": self.device,
}
default_config = TimeRCDConfig()