| """ |
| PatchTST Configuration Reference — All Parameters |
| ==================================================== |
| Annotated configuration for PatchTSTForPrediction with all available parameters. |
| Sources: ibm-granite/granite-timeseries-patchtst config.json + HF Transformers docs. |
| |
| Reference papers: |
| - PatchTST: arxiv:2211.14730 (ICLR 2023) |
| - Wavelet recipe: arxiv:2408.12408 (2024) |
| """ |
|
|
| from transformers import PatchTSTConfig, PatchTSTForPrediction |
| import torch |
|
|
|
|
| def get_sp500_config(): |
| """ |
| PatchTST config optimized for S&P 500 OHLCV next-day forecasting. |
| Based on Recipe 1 (PatchTST + DWT wavelet denoising). |
| """ |
| config = PatchTSTConfig( |
| |
| num_input_channels=5, |
| context_length=512, |
| prediction_length=1, |
|
|
| |
| patch_length=16, |
| patch_stride=8, |
| |
|
|
| |
| d_model=128, |
| num_attention_heads=16, |
| num_hidden_layers=3, |
| ffn_dim=512, |
| dropout=0.2, |
| ff_dropout=0.0, |
| attention_dropout=0.0, |
| head_dropout=0.2, |
| path_dropout=0.0, |
|
|
| |
| norm_type="batchnorm", |
| norm_eps=1e-5, |
| pre_norm=True, |
|
|
| |
| loss="mse", |
| distribution_output="student_t", |
| scaling="std", |
|
|
| |
| positional_encoding_type="sincos", |
| positional_dropout=0.0, |
|
|
| |
| share_embedding=True, |
| share_projection=True, |
| channel_attention=False, |
|
|
| |
| do_mask_input=False, |
| mask_type="random", |
| random_mask_ratio=0.5, |
| num_forecast_mask_patches=[2], |
|
|
| |
| activation_function="gelu", |
| init_std=0.02, |
| use_cls_token=True, |
| num_parallel_samples=100, |
| ) |
| return config |
|
|
|
|
| def get_ibm_granite_config(): |
| """ |
| Config matching ibm-granite/granite-timeseries-patchtst (ETTh1 pre-trained). |
| Note: 7 channels (ETT) — NOT directly compatible with 5-channel OHLCV. |
| """ |
| config = PatchTSTConfig( |
| context_length=512, |
| prediction_length=96, |
| num_input_channels=7, |
| patch_length=12, |
| patch_stride=12, |
| d_model=128, |
| num_attention_heads=16, |
| num_hidden_layers=3, |
| ffn_dim=512, |
| dropout=0.2, |
| head_dropout=0.2, |
| norm_type="batchnorm", |
| scaling="std", |
| positional_encoding_type="sincos", |
| use_cls_token=True, |
| share_embedding=True, |
| channel_attention=False, |
| ) |
| return config |
|
|
|
|
| |
|
|
| if __name__ == "__main__": |
| |
| config = get_sp500_config() |
| model = PatchTSTForPrediction(config) |
| |
| |
| total_params = sum(p.numel() for p in model.parameters()) |
| trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) |
| print(f"S&P 500 PatchTST Config:") |
| print(f" Total parameters: {total_params:,}") |
| print(f" Trainable parameters: {trainable_params:,}") |
| print(f" Context length: {config.context_length}") |
| print(f" Prediction length: {config.prediction_length}") |
| print(f" Channels: {config.num_input_channels}") |
| print(f" Patches: ~{(config.context_length - config.patch_length) // config.patch_stride + 2}") |
| |
| |
| batch_size = 4 |
| past_values = torch.randn(batch_size, config.context_length, config.num_input_channels) |
| future_values = torch.randn(batch_size, config.prediction_length, config.num_input_channels) |
| |
| outputs = model(past_values=past_values, future_values=future_values) |
| print(f"\n Loss: {outputs.loss.item():.6f}") |
| print(f" Prediction shape: {outputs.prediction_outputs.shape}") |
| print(f" Expected shape: ({batch_size}, {config.prediction_length}, {config.num_input_channels})") |
|
|