| |
| model: |
| model_name: "model2" |
|
|
| vocab_size: 979 |
| embed_dim: 640 |
| num_heads: 10 |
| dropout: 0.1 |
| |
| tick_num_layers: 8 |
| temporal_num_layers: 8 |
|
|
| pad_token_id: 978 |
|
|
| num_cond: 0 |
| cond_vocab_size: 0 |
| n_logits: 1 |
|
|
| pretrained_path: 'checkpoints_pretraining_v2/final.pth' |
|
|
| pretrain: |
| model_name: "TickTransformerModelROPE" |
|
|
| vocab_size: 979 |
| embed_dim: 640 |
| seq_len: 512 |
| dropout: 0.1 |
| |
| |
| embedder_heads: 10 |
| embedder_layers: 6 |
| |
| |
| processor_heads: 10 |
| processor_layers: 8 |
| |
| |
| decoder_heads: 10 |
| decoder_layers: 6 |
|
|
| data: |
| tick_seq_len: 512 |
| temporal_seq_len: 32 |
| num_workers: 4 |
|
|
| training: |
| batch_size: 32 |
| grad_accum_steps: 1 |
| lr: 0.00012 |
| weight_decay: 0.05 |
| num_epochs: 22 |
| warmup_steps: 4500 |
| max_grad_norm: 1.0 |
| |
| checkpoint_dir: 'model2_win_ckpts' |
|
|
| logging: |
| project_name: 'model2_win' |
| test: 1024 |