Upload 10 files

Browse files

Files changed (10) hide show

model2_alive.yaml +59 -0
model2_alive_latest_model.pt +3 -0
model2_death.yaml +59 -0
model2_death_latest_model.pt +3 -0
model2_duel.yaml +59 -0
model2_duel_latest_model.pt +3 -0
model2_kill.yaml +59 -0
model2_kill_latest_model.pt +3 -0
model2_win.yaml +59 -0
model2_win_latest_model.pt +3 -0

model2_alive.yaml ADDED Viewed

	@@ -0,0 +1,59 @@

+# Model configuration
+model:
+  model_name: "model2"  # Name of the model architecture to use
+  vocab_size: 979  # Vocabulary size for token embeddings
+  embed_dim: 640     # Embedding dimension
+  num_heads: 10     # Number of attention heads
+  dropout: 0.1       # Dropout rate
+  tick_num_layers: 8
+  temporal_num_layers: 8
+  pad_token_id: 978
+  num_cond: 1
+  cond_vocab_size: 10
+  n_logits: 1 # Number of logits to predict (e.g., 1 for win rate prediction)
+  pretrained_path: 'checkpoints_pretraining_v2/final.pth'
+pretrain:
+  model_name: "TickTransformerModelROPE"
+  vocab_size: 979  # Vocabulary size for token embeddings
+  embed_dim: 640     # Embedding dimension
+  seq_len: 512       # Sequence length per tick
+  dropout: 0.1       # Dropout rate
+  # Embedder (non-causal transformer encoder)
+  embedder_heads: 10
+  embedder_layers: 6
+  # Processor (GPT-style causal transformer for next token prediction)
+  processor_heads: 10
+  processor_layers: 8
+  # Decoder (non-causal transformer to decode embeddings to sequences)
+  decoder_heads: 10
+  decoder_layers: 6
+data:
+  tick_seq_len: 512
+  temporal_seq_len: 32
+  num_workers: 4
+training:
+  batch_size: 32
+  grad_accum_steps: 1
+  lr: 0.00012
+  weight_decay: 0.05
+  num_epochs: 22
+  warmup_steps: 4500
+  max_grad_norm: 1.0
+  checkpoint_dir: 'model2_alive_ckpts'
+logging:
+  project_name: 'model2_alive'
+  test: 1024

model2_alive_latest_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec4f1b6c0ab9426a2dbba9d89110aee167f9399bcd997cc399c3147b02d5f764
+size 620567264

model2_death.yaml ADDED Viewed

	@@ -0,0 +1,59 @@

+# Model configuration
+model:
+  model_name: "model2"  # Name of the model architecture to use
+  vocab_size: 979  # Vocabulary size for token embeddings
+  embed_dim: 640     # Embedding dimension
+  num_heads: 10     # Number of attention heads
+  dropout: 0.1       # Dropout rate
+  tick_num_layers: 8
+  temporal_num_layers: 8
+  pad_token_id: 978
+  num_cond: 0
+  cond_vocab_size: 0
+  n_logits: 11 # Number of logits to predict (e.g., 1 for win rate prediction)
+  pretrained_path: 'checkpoints_pretraining_v2/final.pth'
+pretrain:
+  model_name: "TickTransformerModelROPE"
+  vocab_size: 979  # Vocabulary size for token embeddings
+  embed_dim: 640     # Embedding dimension
+  seq_len: 512       # Sequence length per tick
+  dropout: 0.1       # Dropout rate
+  # Embedder (non-causal transformer encoder)
+  embedder_heads: 10
+  embedder_layers: 6
+  # Processor (GPT-style causal transformer for next token prediction)
+  processor_heads: 10
+  processor_layers: 8
+  # Decoder (non-causal transformer to decode embeddings to sequences)
+  decoder_heads: 10
+  decoder_layers: 6
+data:
+  tick_seq_len: 512
+  temporal_seq_len: 32
+  num_workers: 4
+training:
+  batch_size: 32
+  grad_accum_steps: 1
+  lr: 0.00012
+  weight_decay: 0.05
+  num_epochs: 22
+  warmup_steps: 4500
+  max_grad_norm: 1.0
+  checkpoint_dir: 'model2_death_ckpts'
+logging:
+  project_name: 'model2_death'
+  test: 1024

model2_death_latest_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ebc588c5e87ab41a738f5a5426347bbdd1c1224be2f758684bdaba48fe80ceaa
+size 612686356

model2_duel.yaml ADDED Viewed

	@@ -0,0 +1,59 @@

+# Model configuration
+model:
+  model_name: "model2"  # Name of the model architecture to use
+  vocab_size: 979  # Vocabulary size for token embeddings
+  embed_dim: 640     # Embedding dimension
+  num_heads: 10     # Number of attention heads
+  dropout: 0.1       # Dropout rate
+  tick_num_layers: 8
+  temporal_num_layers: 8
+  pad_token_id: 978
+  num_cond: 2
+  cond_vocab_size: 10
+  n_logits: 1 # Number of logits to predict (e.g., 1 for win rate prediction)
+  pretrained_path: 'checkpoints_pretraining_v2/final.pth'
+pretrain:
+  model_name: "TickTransformerModelROPE"
+  vocab_size: 979  # Vocabulary size for token embeddings
+  embed_dim: 640     # Embedding dimension
+  seq_len: 512       # Sequence length per tick
+  dropout: 0.1       # Dropout rate
+  # Embedder (non-causal transformer encoder)
+  embedder_heads: 10
+  embedder_layers: 6
+  # Processor (GPT-style causal transformer for next token prediction)
+  processor_heads: 10
+  processor_layers: 8
+  # Decoder (non-causal transformer to decode embeddings to sequences)
+  decoder_heads: 10
+  decoder_layers: 6
+data:
+  tick_seq_len: 512
+  temporal_seq_len: 32
+  num_workers: 4
+training:
+  batch_size: 32
+  grad_accum_steps: 1
+  lr: 0.00012
+  weight_decay: 0.05
+  num_epochs: 22
+  warmup_steps: 4500
+  max_grad_norm: 1.0
+  checkpoint_dir: 'model2_duel_ckpts'
+logging:
+  project_name: 'model2_duel'
+  test: 1024

model2_duel_latest_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7d5bbf82e5e4aa33a8fdf5494c1eb4821526c59342c6d262f3a5e53d713e7a6
+size 628508384

model2_kill.yaml ADDED Viewed

	@@ -0,0 +1,59 @@

+# Model configuration
+model:
+  model_name: "model2"  # Name of the model architecture to use
+  vocab_size: 979  # Vocabulary size for token embeddings
+  embed_dim: 640     # Embedding dimension
+  num_heads: 10     # Number of attention heads
+  dropout: 0.1       # Dropout rate
+  tick_num_layers: 8
+  temporal_num_layers: 8
+  pad_token_id: 978
+  num_cond: 0
+  cond_vocab_size: 0
+  n_logits: 11 # Number of logits to predict (e.g., 1 for win rate prediction)
+  pretrained_path: 'checkpoints_pretraining_v2/final.pth'
+pretrain:
+  model_name: "TickTransformerModelROPE"
+  vocab_size: 979  # Vocabulary size for token embeddings
+  embed_dim: 640     # Embedding dimension
+  seq_len: 512       # Sequence length per tick
+  dropout: 0.1       # Dropout rate
+  # Embedder (non-causal transformer encoder)
+  embedder_heads: 10
+  embedder_layers: 6
+  # Processor (GPT-style causal transformer for next token prediction)
+  processor_heads: 10
+  processor_layers: 8
+  # Decoder (non-causal transformer to decode embeddings to sequences)
+  decoder_heads: 10
+  decoder_layers: 6
+data:
+  tick_seq_len: 512
+  temporal_seq_len: 32
+  num_workers: 4
+training:
+  batch_size: 32
+  grad_accum_steps: 1
+  lr: 0.00012
+  weight_decay: 0.05
+  num_epochs: 22
+  warmup_steps: 4500
+  max_grad_norm: 1.0
+  checkpoint_dir: 'model2_kill_ckpts'
+logging:
+  project_name: 'model2_kill'
+  test: 1024

model2_kill_latest_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39d2c328571337e3e2583a100bd58106e821bc3016b3724e850db672c27f6b3b
+size 612686356

model2_win.yaml ADDED Viewed

	@@ -0,0 +1,59 @@

+# Model configuration
+model:
+  model_name: "model2"  # Name of the model architecture to use
+  vocab_size: 979  # Vocabulary size for token embeddings
+  embed_dim: 640     # Embedding dimension
+  num_heads: 10     # Number of attention heads
+  dropout: 0.1       # Dropout rate
+  tick_num_layers: 8
+  temporal_num_layers: 8
+  pad_token_id: 978
+  num_cond: 0
+  cond_vocab_size: 0
+  n_logits: 1 # Number of logits to predict (e.g., 1 for win rate prediction)
+  pretrained_path: 'checkpoints_pretraining_v2/final.pth'
+pretrain:
+  model_name: "TickTransformerModelROPE"
+  vocab_size: 979  # Vocabulary size for token embeddings
+  embed_dim: 640     # Embedding dimension
+  seq_len: 512       # Sequence length per tick
+  dropout: 0.1       # Dropout rate
+  # Embedder (non-causal transformer encoder)
+  embedder_heads: 10
+  embedder_layers: 6
+  # Processor (GPT-style causal transformer for next token prediction)
+  processor_heads: 10
+  processor_layers: 8
+  # Decoder (non-causal transformer to decode embeddings to sequences)
+  decoder_heads: 10
+  decoder_layers: 6
+data:
+  tick_seq_len: 512
+  temporal_seq_len: 32
+  num_workers: 4
+training:
+  batch_size: 32
+  grad_accum_steps: 1
+  lr: 0.00012
+  weight_decay: 0.05
+  num_epochs: 22
+  warmup_steps: 4500
+  max_grad_norm: 1.0
+  checkpoint_dir: 'model2_win_ckpts'
+logging:
+  project_name: 'model2_win'
+  test: 1024

model2_win_latest_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e92ca30ee2bc031db0858abbf1995485642111178acdcb6f9128a3433e83e75
+size 612624916