| [ | |
| { | |
| "train_loss": -0.15511070310068362, | |
| "train_policy_loss": 0.0, | |
| "train_kl_loss": -3.102214025259018, | |
| "train_reward": 0.30753333568573, | |
| "baseline": 0.3100000023841858, | |
| "epoch": 1 | |
| }, | |
| { | |
| "train_loss": -0.24350257394835353, | |
| "train_policy_loss": 0.0, | |
| "train_kl_loss": -5.263461359739304, | |
| "train_reward": 0.300500001758337, | |
| "baseline": 0.3100000023841858, | |
| "epoch": 2 | |
| } | |
| ] |