File size: 1,390 Bytes
96476bc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | # Model configuration
model:
model_name: "model2" # Name of the model architecture to use
vocab_size: 979 # Vocabulary size for token embeddings
embed_dim: 640 # Embedding dimension
num_heads: 10 # Number of attention heads
dropout: 0.1 # Dropout rate
tick_num_layers: 8
temporal_num_layers: 8
pad_token_id: 978
num_cond: 1
cond_vocab_size: 10
n_logits: 1 # Number of logits to predict (e.g., 1 for win rate prediction)
pretrained_path: 'checkpoints_pretraining_v2/final.pth'
pretrain:
model_name: "TickTransformerModelROPE"
vocab_size: 979 # Vocabulary size for token embeddings
embed_dim: 640 # Embedding dimension
seq_len: 512 # Sequence length per tick
dropout: 0.1 # Dropout rate
# Embedder (non-causal transformer encoder)
embedder_heads: 10
embedder_layers: 6
# Processor (GPT-style causal transformer for next token prediction)
processor_heads: 10
processor_layers: 8
# Decoder (non-causal transformer to decode embeddings to sequences)
decoder_heads: 10
decoder_layers: 6
data:
tick_seq_len: 512
temporal_seq_len: 32
num_workers: 4
training:
batch_size: 32
grad_accum_steps: 1
lr: 0.00012
weight_decay: 0.05
num_epochs: 22
warmup_steps: 4500
max_grad_norm: 1.0
checkpoint_dir: 'model2_alive_ckpts'
logging:
project_name: 'model2_alive'
test: 1024 |