{ "seed": 0, "steps": 1000, "batch_size": 64, "hidden_dim": 256, "num_simulations": 64, "learning_rate": 0.0003, "value_coef": 1.0, "entropy_coef": 0.0, "checkpoint_dir": "/content/drive/MyDrive/yahtzeeRL/checkpoints/win_loss_margin_32simsrun4", "checkpoint_every": 100, "log_every": 10, "reward_mode": "win_loss_margin", "margin_weight": 0.25, "margin_scale": 50.0, "buffer_size": 50000, "minibatches_per_update": 1, "minibatch_size": 1024, "value_target_outcome_weight": 0.5, "teacher_every": 3, "teacher_num_simulations": 128, "teacher_batch_size": 32, "teacher_minibatches_per_update": 2, "teacher_minibatch_size": null }