Alex-GSL commited on
Commit
c9dc559
·
verified ·
1 Parent(s): 21fb04e

Upload checkpoints/r42/run42_config.toml with huggingface_hub

Browse files
Files changed (1) hide show
  1. checkpoints/r42/run42_config.toml +44 -0
checkpoints/r42/run42_config.toml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # R42: SimBa + Aux Phase Value + Zero PBRS
2
+ # Builds on R41 base (categorical reward, 5 new obs, P0/P1 alternation)
3
+ # Additions: SimBaAux architecture, phase-decomposed value, aux opp DW head, zero PBRS
4
+
5
+ [ppo]
6
+ lr = 2.5e-4
7
+ num_envs = 4096
8
+ num_steps = 128
9
+ total_timesteps = 1_500_000_000
10
+ num_minibatches = 4
11
+ update_epochs = 4
12
+ gamma = 1.0
13
+ gae_lambda = 0.98
14
+ clip_eps = 0.2
15
+ ent_coef = 0.025
16
+ vf_coef = 0.75
17
+ max_grad_norm = 0.5
18
+ anneal_lr = true
19
+
20
+ [reward]
21
+ step_dw_shaping = 0.0
22
+
23
+ [training]
24
+ opponent = "mixed"
25
+ simba = true
26
+ simba_aux = true
27
+ suit_augmentation = false
28
+ aux_coef = 0.1
29
+
30
+ [opponents]
31
+ heuristic_prob = 0.30
32
+ aggressive_knock_prob = 0.15
33
+ meld_builder_prob = 0.10
34
+ early_knock_prob = 0.10
35
+ defensive_prob = 0.10
36
+ superhuman_lv4_prob = 0.05
37
+ superhuman_lv5_prob = 0.10
38
+ superhuman_lv7_prob = 0.05
39
+ frozen_checkpoint_prob = 0.05
40
+
41
+ [wandb]
42
+ run_name = "R42-simba-aux-phase-value-zero-pbrs"
43
+ entity = "good-start-labs"
44
+ project = "gsl-gin-rummy-mdp"