gary2oos commited on
Commit
6e052ab
·
verified ·
1 Parent(s): a10cd7b

Upload model2_win.yaml

Browse files
Files changed (1) hide show
  1. model2_win.yaml +33 -28
model2_win.yaml CHANGED
@@ -1,32 +1,48 @@
 
1
  model:
2
- model_name: model2
3
- vocab_size: 979
4
- embed_dim: 640
5
- num_heads: 10
6
- dropout: 0.1
 
 
7
  tick_num_layers: 8
8
  temporal_num_layers: 8
 
9
  pad_token_id: 978
 
10
  num_cond: 0
11
  cond_vocab_size: 0
12
- n_logits: 1
13
- pretrained_path: checkpoints_pretraining_v2/final.pth
 
 
14
  pretrain:
15
- model_name: TickTransformerModelROPE
16
- vocab_size: 979
17
- embed_dim: 640
18
- seq_len: 512
19
- dropout: 0.1
 
 
 
20
  embedder_heads: 10
21
  embedder_layers: 6
 
 
22
  processor_heads: 10
23
  processor_layers: 8
 
 
24
  decoder_heads: 10
25
  decoder_layers: 6
 
26
  data:
27
  tick_seq_len: 512
28
  temporal_seq_len: 32
29
  num_workers: 4
 
30
  training:
31
  batch_size: 32
32
  grad_accum_steps: 1
@@ -35,20 +51,9 @@ training:
35
  num_epochs: 22
36
  warmup_steps: 4500
37
  max_grad_norm: 1.0
38
- checkpoint_dir: model2_win_ckpts
 
 
39
  logging:
40
- project_name: model2_win
41
- test: 1024
42
- calibration:
43
- temperature_scaling:
44
- task: win
45
- checkpoint: /share/guwanjun-local/cs2-demo-analytics/model2_win_ckpts/latest_model.pt
46
- num_samples: 2000
47
- ece_bins: 15
48
- loss_before: 0.4216673672199249
49
- loss_after: 0.4213024973869324
50
- ece_before: 0.014014186337590218
51
- ece_after: 0.015347685664892197
52
- temperature: 0.9367762207984924
53
- bias: -0.0017134372610598803
54
- updated_at: '2026-04-16T20:09:45'
 
1
+ # Model configuration
2
  model:
3
+ model_name: "model2" # Name of the model architecture to use
4
+
5
+ vocab_size: 979 # Vocabulary size for token embeddings
6
+ embed_dim: 640 # Embedding dimension
7
+ num_heads: 10 # Number of attention heads
8
+ dropout: 0.1 # Dropout rate
9
+
10
  tick_num_layers: 8
11
  temporal_num_layers: 8
12
+
13
  pad_token_id: 978
14
+
15
  num_cond: 0
16
  cond_vocab_size: 0
17
+ n_logits: 1 # Number of logits to predict (e.g., 1 for win rate prediction)
18
+
19
+ pretrained_path: 'checkpoints_pretraining_v2/final.pth'
20
+
21
  pretrain:
22
+ model_name: "TickTransformerModelROPE"
23
+
24
+ vocab_size: 979 # Vocabulary size for token embeddings
25
+ embed_dim: 640 # Embedding dimension
26
+ seq_len: 512 # Sequence length per tick
27
+ dropout: 0.1 # Dropout rate
28
+
29
+ # Embedder (non-causal transformer encoder)
30
  embedder_heads: 10
31
  embedder_layers: 6
32
+
33
+ # Processor (GPT-style causal transformer for next token prediction)
34
  processor_heads: 10
35
  processor_layers: 8
36
+
37
+ # Decoder (non-causal transformer to decode embeddings to sequences)
38
  decoder_heads: 10
39
  decoder_layers: 6
40
+
41
  data:
42
  tick_seq_len: 512
43
  temporal_seq_len: 32
44
  num_workers: 4
45
+
46
  training:
47
  batch_size: 32
48
  grad_accum_steps: 1
 
51
  num_epochs: 22
52
  warmup_steps: 4500
53
  max_grad_norm: 1.0
54
+
55
+ checkpoint_dir: 'model2_win_ckpts'
56
+
57
  logging:
58
+ project_name: 'model2_win'
59
+ test: 1024