| { | |
| "seed": 0, | |
| "steps": 1000, | |
| "batch_size": 64, | |
| "hidden_dim": 256, | |
| "num_simulations": 64, | |
| "learning_rate": 0.0003, | |
| "value_coef": 1.0, | |
| "entropy_coef": 0.0, | |
| "checkpoint_dir": "/content/drive/MyDrive/yahtzeeRL/checkpoints/win_loss_margin_32simsrun4", | |
| "checkpoint_every": 100, | |
| "log_every": 10, | |
| "reward_mode": "win_loss_margin", | |
| "margin_weight": 0.25, | |
| "margin_scale": 50.0, | |
| "buffer_size": 50000, | |
| "minibatches_per_update": 1, | |
| "minibatch_size": 1024, | |
| "value_target_outcome_weight": 0.5, | |
| "teacher_every": 3, | |
| "teacher_num_simulations": 128, | |
| "teacher_batch_size": 32, | |
| "teacher_minibatches_per_update": 2, | |
| "teacher_minibatch_size": null | |
| } |