gary2oos commited on
Commit
96476bc
·
verified ·
1 Parent(s): 8921b9d

Upload 10 files

Browse files
model2_alive.yaml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model configuration
2
+ model:
3
+ model_name: "model2" # Name of the model architecture to use
4
+
5
+ vocab_size: 979 # Vocabulary size for token embeddings
6
+ embed_dim: 640 # Embedding dimension
7
+ num_heads: 10 # Number of attention heads
8
+ dropout: 0.1 # Dropout rate
9
+
10
+ tick_num_layers: 8
11
+ temporal_num_layers: 8
12
+
13
+ pad_token_id: 978
14
+
15
+ num_cond: 1
16
+ cond_vocab_size: 10
17
+ n_logits: 1 # Number of logits to predict (e.g., 1 for win rate prediction)
18
+
19
+ pretrained_path: 'checkpoints_pretraining_v2/final.pth'
20
+
21
+ pretrain:
22
+ model_name: "TickTransformerModelROPE"
23
+
24
+ vocab_size: 979 # Vocabulary size for token embeddings
25
+ embed_dim: 640 # Embedding dimension
26
+ seq_len: 512 # Sequence length per tick
27
+ dropout: 0.1 # Dropout rate
28
+
29
+ # Embedder (non-causal transformer encoder)
30
+ embedder_heads: 10
31
+ embedder_layers: 6
32
+
33
+ # Processor (GPT-style causal transformer for next token prediction)
34
+ processor_heads: 10
35
+ processor_layers: 8
36
+
37
+ # Decoder (non-causal transformer to decode embeddings to sequences)
38
+ decoder_heads: 10
39
+ decoder_layers: 6
40
+
41
+ data:
42
+ tick_seq_len: 512
43
+ temporal_seq_len: 32
44
+ num_workers: 4
45
+
46
+ training:
47
+ batch_size: 32
48
+ grad_accum_steps: 1
49
+ lr: 0.00012
50
+ weight_decay: 0.05
51
+ num_epochs: 22
52
+ warmup_steps: 4500
53
+ max_grad_norm: 1.0
54
+
55
+ checkpoint_dir: 'model2_alive_ckpts'
56
+
57
+ logging:
58
+ project_name: 'model2_alive'
59
+ test: 1024
model2_alive_latest_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec4f1b6c0ab9426a2dbba9d89110aee167f9399bcd997cc399c3147b02d5f764
3
+ size 620567264
model2_death.yaml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model configuration
2
+ model:
3
+ model_name: "model2" # Name of the model architecture to use
4
+
5
+ vocab_size: 979 # Vocabulary size for token embeddings
6
+ embed_dim: 640 # Embedding dimension
7
+ num_heads: 10 # Number of attention heads
8
+ dropout: 0.1 # Dropout rate
9
+
10
+ tick_num_layers: 8
11
+ temporal_num_layers: 8
12
+
13
+ pad_token_id: 978
14
+
15
+ num_cond: 0
16
+ cond_vocab_size: 0
17
+ n_logits: 11 # Number of logits to predict (e.g., 1 for win rate prediction)
18
+
19
+ pretrained_path: 'checkpoints_pretraining_v2/final.pth'
20
+
21
+ pretrain:
22
+ model_name: "TickTransformerModelROPE"
23
+
24
+ vocab_size: 979 # Vocabulary size for token embeddings
25
+ embed_dim: 640 # Embedding dimension
26
+ seq_len: 512 # Sequence length per tick
27
+ dropout: 0.1 # Dropout rate
28
+
29
+ # Embedder (non-causal transformer encoder)
30
+ embedder_heads: 10
31
+ embedder_layers: 6
32
+
33
+ # Processor (GPT-style causal transformer for next token prediction)
34
+ processor_heads: 10
35
+ processor_layers: 8
36
+
37
+ # Decoder (non-causal transformer to decode embeddings to sequences)
38
+ decoder_heads: 10
39
+ decoder_layers: 6
40
+
41
+ data:
42
+ tick_seq_len: 512
43
+ temporal_seq_len: 32
44
+ num_workers: 4
45
+
46
+ training:
47
+ batch_size: 32
48
+ grad_accum_steps: 1
49
+ lr: 0.00012
50
+ weight_decay: 0.05
51
+ num_epochs: 22
52
+ warmup_steps: 4500
53
+ max_grad_norm: 1.0
54
+
55
+ checkpoint_dir: 'model2_death_ckpts'
56
+
57
+ logging:
58
+ project_name: 'model2_death'
59
+ test: 1024
model2_death_latest_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebc588c5e87ab41a738f5a5426347bbdd1c1224be2f758684bdaba48fe80ceaa
3
+ size 612686356
model2_duel.yaml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model configuration
2
+ model:
3
+ model_name: "model2" # Name of the model architecture to use
4
+
5
+ vocab_size: 979 # Vocabulary size for token embeddings
6
+ embed_dim: 640 # Embedding dimension
7
+ num_heads: 10 # Number of attention heads
8
+ dropout: 0.1 # Dropout rate
9
+
10
+ tick_num_layers: 8
11
+ temporal_num_layers: 8
12
+
13
+ pad_token_id: 978
14
+
15
+ num_cond: 2
16
+ cond_vocab_size: 10
17
+ n_logits: 1 # Number of logits to predict (e.g., 1 for win rate prediction)
18
+
19
+ pretrained_path: 'checkpoints_pretraining_v2/final.pth'
20
+
21
+ pretrain:
22
+ model_name: "TickTransformerModelROPE"
23
+
24
+ vocab_size: 979 # Vocabulary size for token embeddings
25
+ embed_dim: 640 # Embedding dimension
26
+ seq_len: 512 # Sequence length per tick
27
+ dropout: 0.1 # Dropout rate
28
+
29
+ # Embedder (non-causal transformer encoder)
30
+ embedder_heads: 10
31
+ embedder_layers: 6
32
+
33
+ # Processor (GPT-style causal transformer for next token prediction)
34
+ processor_heads: 10
35
+ processor_layers: 8
36
+
37
+ # Decoder (non-causal transformer to decode embeddings to sequences)
38
+ decoder_heads: 10
39
+ decoder_layers: 6
40
+
41
+ data:
42
+ tick_seq_len: 512
43
+ temporal_seq_len: 32
44
+ num_workers: 4
45
+
46
+ training:
47
+ batch_size: 32
48
+ grad_accum_steps: 1
49
+ lr: 0.00012
50
+ weight_decay: 0.05
51
+ num_epochs: 22
52
+ warmup_steps: 4500
53
+ max_grad_norm: 1.0
54
+
55
+ checkpoint_dir: 'model2_duel_ckpts'
56
+
57
+ logging:
58
+ project_name: 'model2_duel'
59
+ test: 1024
model2_duel_latest_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7d5bbf82e5e4aa33a8fdf5494c1eb4821526c59342c6d262f3a5e53d713e7a6
3
+ size 628508384
model2_kill.yaml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model configuration
2
+ model:
3
+ model_name: "model2" # Name of the model architecture to use
4
+
5
+ vocab_size: 979 # Vocabulary size for token embeddings
6
+ embed_dim: 640 # Embedding dimension
7
+ num_heads: 10 # Number of attention heads
8
+ dropout: 0.1 # Dropout rate
9
+
10
+ tick_num_layers: 8
11
+ temporal_num_layers: 8
12
+
13
+ pad_token_id: 978
14
+
15
+ num_cond: 0
16
+ cond_vocab_size: 0
17
+ n_logits: 11 # Number of logits to predict (e.g., 1 for win rate prediction)
18
+
19
+ pretrained_path: 'checkpoints_pretraining_v2/final.pth'
20
+
21
+ pretrain:
22
+ model_name: "TickTransformerModelROPE"
23
+
24
+ vocab_size: 979 # Vocabulary size for token embeddings
25
+ embed_dim: 640 # Embedding dimension
26
+ seq_len: 512 # Sequence length per tick
27
+ dropout: 0.1 # Dropout rate
28
+
29
+ # Embedder (non-causal transformer encoder)
30
+ embedder_heads: 10
31
+ embedder_layers: 6
32
+
33
+ # Processor (GPT-style causal transformer for next token prediction)
34
+ processor_heads: 10
35
+ processor_layers: 8
36
+
37
+ # Decoder (non-causal transformer to decode embeddings to sequences)
38
+ decoder_heads: 10
39
+ decoder_layers: 6
40
+
41
+ data:
42
+ tick_seq_len: 512
43
+ temporal_seq_len: 32
44
+ num_workers: 4
45
+
46
+ training:
47
+ batch_size: 32
48
+ grad_accum_steps: 1
49
+ lr: 0.00012
50
+ weight_decay: 0.05
51
+ num_epochs: 22
52
+ warmup_steps: 4500
53
+ max_grad_norm: 1.0
54
+
55
+ checkpoint_dir: 'model2_kill_ckpts'
56
+
57
+ logging:
58
+ project_name: 'model2_kill'
59
+ test: 1024
model2_kill_latest_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39d2c328571337e3e2583a100bd58106e821bc3016b3724e850db672c27f6b3b
3
+ size 612686356
model2_win.yaml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model configuration
2
+ model:
3
+ model_name: "model2" # Name of the model architecture to use
4
+
5
+ vocab_size: 979 # Vocabulary size for token embeddings
6
+ embed_dim: 640 # Embedding dimension
7
+ num_heads: 10 # Number of attention heads
8
+ dropout: 0.1 # Dropout rate
9
+
10
+ tick_num_layers: 8
11
+ temporal_num_layers: 8
12
+
13
+ pad_token_id: 978
14
+
15
+ num_cond: 0
16
+ cond_vocab_size: 0
17
+ n_logits: 1 # Number of logits to predict (e.g., 1 for win rate prediction)
18
+
19
+ pretrained_path: 'checkpoints_pretraining_v2/final.pth'
20
+
21
+ pretrain:
22
+ model_name: "TickTransformerModelROPE"
23
+
24
+ vocab_size: 979 # Vocabulary size for token embeddings
25
+ embed_dim: 640 # Embedding dimension
26
+ seq_len: 512 # Sequence length per tick
27
+ dropout: 0.1 # Dropout rate
28
+
29
+ # Embedder (non-causal transformer encoder)
30
+ embedder_heads: 10
31
+ embedder_layers: 6
32
+
33
+ # Processor (GPT-style causal transformer for next token prediction)
34
+ processor_heads: 10
35
+ processor_layers: 8
36
+
37
+ # Decoder (non-causal transformer to decode embeddings to sequences)
38
+ decoder_heads: 10
39
+ decoder_layers: 6
40
+
41
+ data:
42
+ tick_seq_len: 512
43
+ temporal_seq_len: 32
44
+ num_workers: 4
45
+
46
+ training:
47
+ batch_size: 32
48
+ grad_accum_steps: 1
49
+ lr: 0.00012
50
+ weight_decay: 0.05
51
+ num_epochs: 22
52
+ warmup_steps: 4500
53
+ max_grad_norm: 1.0
54
+
55
+ checkpoint_dir: 'model2_win_ckpts'
56
+
57
+ logging:
58
+ project_name: 'model2_win'
59
+ test: 1024
model2_win_latest_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e92ca30ee2bc031db0858abbf1995485642111178acdcb6f9128a3433e83e75
3
+ size 612624916