Upload checkpoints/r42/run42_config.toml with huggingface_hub
Browse files
checkpoints/r42/run42_config.toml
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# R42: SimBa + Aux Phase Value + Zero PBRS
|
| 2 |
+
# Builds on R41 base (categorical reward, 5 new obs, P0/P1 alternation)
|
| 3 |
+
# Additions: SimBaAux architecture, phase-decomposed value, aux opp DW head, zero PBRS
|
| 4 |
+
|
| 5 |
+
[ppo]
|
| 6 |
+
lr = 2.5e-4
|
| 7 |
+
num_envs = 4096
|
| 8 |
+
num_steps = 128
|
| 9 |
+
total_timesteps = 1_500_000_000
|
| 10 |
+
num_minibatches = 4
|
| 11 |
+
update_epochs = 4
|
| 12 |
+
gamma = 1.0
|
| 13 |
+
gae_lambda = 0.98
|
| 14 |
+
clip_eps = 0.2
|
| 15 |
+
ent_coef = 0.025
|
| 16 |
+
vf_coef = 0.75
|
| 17 |
+
max_grad_norm = 0.5
|
| 18 |
+
anneal_lr = true
|
| 19 |
+
|
| 20 |
+
[reward]
|
| 21 |
+
step_dw_shaping = 0.0
|
| 22 |
+
|
| 23 |
+
[training]
|
| 24 |
+
opponent = "mixed"
|
| 25 |
+
simba = true
|
| 26 |
+
simba_aux = true
|
| 27 |
+
suit_augmentation = false
|
| 28 |
+
aux_coef = 0.1
|
| 29 |
+
|
| 30 |
+
[opponents]
|
| 31 |
+
heuristic_prob = 0.30
|
| 32 |
+
aggressive_knock_prob = 0.15
|
| 33 |
+
meld_builder_prob = 0.10
|
| 34 |
+
early_knock_prob = 0.10
|
| 35 |
+
defensive_prob = 0.10
|
| 36 |
+
superhuman_lv4_prob = 0.05
|
| 37 |
+
superhuman_lv5_prob = 0.10
|
| 38 |
+
superhuman_lv7_prob = 0.05
|
| 39 |
+
frozen_checkpoint_prob = 0.05
|
| 40 |
+
|
| 41 |
+
[wandb]
|
| 42 |
+
run_name = "R42-simba-aux-phase-value-zero-pbrs"
|
| 43 |
+
entity = "good-start-labs"
|
| 44 |
+
project = "gsl-gin-rummy-mdp"
|