Spaces:
Running
Running
| from dataclasses import dataclass, field | |
| from typing import Dict, Optional | |
| class TimeSeriesConfig: | |
| """Configuration for time series encoder. | |
| Attributes: | |
| d_model: Dimension of model hidden states. | |
| d_proj: Dimension of projection layer. | |
| patch_size: Size of time series patches. | |
| num_layers: Number of transformer layers. | |
| num_heads: Number of attention heads. | |
| d_ff_dropout: Dropout rate for feed-forward networks. | |
| use_rope: Whether to use Rotary Position Embedding. | |
| activation: Activation function name. | |
| num_features: Number of input features. | |
| """ | |
| d_model: int = 512 | |
| d_proj: int = 256 | |
| patch_size: int = 4 | |
| num_query_tokens: int = 1 | |
| num_layers: int = 8 | |
| num_heads: int = 8 | |
| d_ff_dropout: float = 0.1 | |
| use_rope: bool = True | |
| activation: str = "gelu" | |
| num_features: int = 1 | |
| test_batch_limit: int = 20 | |
| class TimeRCDConfig: | |
| """Configuration class for Time_RCD model. | |
| This class contains all hyperparameters and settings for the Time_RCD model. | |
| It is implemented as a dataclass for easy instantiation and modification. | |
| Attributes: | |
| ts_config: Configuration for time series encoder. | |
| batch_size: Training batch size. | |
| learning_rate: Learning rate for optimization. | |
| num_epochs: Number of training epochs. | |
| max_seq_len: Maximum sequence length. | |
| dropout: Dropout rate. | |
| accumulation_steps: Gradient accumulation steps. | |
| weight_decay: Weight decay for optimization. | |
| enable_ts_train: Whether to train the time series encoder. | |
| seed: Random seed for reproducibility. | |
| """ | |
| # Model configurations | |
| ts_config: TimeSeriesConfig = field(default_factory=TimeSeriesConfig) | |
| # Training parameters | |
| batch_size: int = 3 | |
| learning_rate: float = 1e-4 | |
| num_epochs: int = 1000 | |
| max_seq_len: int = 512 | |
| dropout: float = 0.1 | |
| accumulation_steps: int = 1 | |
| weight_decay: float = 1e-5 | |
| enable_ts_train: bool = False | |
| seed: int = 72 | |
| log_freq: int = 100 | |
| save_freq: int = 10 | |
| save_step_freq: int = 100 | |
| model_prefix: str = "time_rcd_qa_by_pretrain" | |
| test_batch_limit: int = 20 | |
| early_stopping_patience: int = 7 | |
| seed: int = 72 | |
| cuda_devices: str = "0, 1, 2, 3" | |
| dist_port: str = "12355" # Port for distributed training communication | |
| device: str = "cuda" | |
| def to_dict(self) -> Dict[str, any]: | |
| return { | |
| "ts_config": self.ts_config.__dict__, | |
| "batch_size": self.batch_size, | |
| "learning_rate": self.learning_rate, | |
| "num_epochs": self.num_epochs, | |
| "max_seq_len": self.max_seq_len, | |
| "seed": self.seed, | |
| "test_batch_limit": self.test_batch_limit, | |
| "log_freq": self.log_freq, | |
| "save_freq": self.save_freq, | |
| "save_step_freq": self.save_step_freq, | |
| "model_prefix": self.model_prefix, | |
| "device": self.device, | |
| } | |
| default_config = TimeRCDConfig() |