| [ |
| { |
| "loss": 0.0, |
| "grad_norm": 33.0, |
| "learning_rate": 5e-07, |
| "completion_length": 87.5125, |
| "rewards/real_reward_fn": -0.08881250247359276, |
| "reward": -0.08881250247359276, |
| "reward_std": 0.1590106257237494, |
| "kl": 0.0008317360421642661, |
| "epoch": 0.16, |
| "step": 5 |
| }, |
| { |
| "loss": 0.0, |
| "grad_norm": 55.25, |
| "learning_rate": 5e-07, |
| "completion_length": 85.86875, |
| "rewards/real_reward_fn": -0.08131250264123083, |
| "reward": -0.08131250264123083, |
| "reward_std": 0.1880020027048886, |
| "kl": 0.001102981199801434, |
| "epoch": 0.32, |
| "step": 10 |
| }, |
| { |
| "loss": 0.0, |
| "grad_norm": 39.0, |
| "learning_rate": 5e-07, |
| "completion_length": 83.225, |
| "rewards/real_reward_fn": 0.02462499784305692, |
| "reward": 0.02462499784305692, |
| "reward_std": 0.13523416444659234, |
| "kl": 0.001013497701205779, |
| "epoch": 0.48, |
| "step": 15 |
| }, |
| { |
| "loss": 0.0, |
| "grad_norm": 27.125, |
| "learning_rate": 5e-07, |
| "completion_length": 80.8125, |
| "rewards/real_reward_fn": -0.08068750295788049, |
| "reward": -0.08068750295788049, |
| "reward_std": 0.18747167382389307, |
| "kl": 0.0011422165334806778, |
| "epoch": 0.64, |
| "step": 20 |
| }, |
| { |
| "loss": 0.0, |
| "grad_norm": 30.5, |
| "learning_rate": 5e-07, |
| "completion_length": 80.29375, |
| "rewards/real_reward_fn": -0.03431250210851431, |
| "reward": -0.03431250210851431, |
| "reward_std": 0.18764845319092274, |
| "kl": 0.0010913557125604711, |
| "epoch": 0.8, |
| "step": 25 |
| }, |
| { |
| "loss": 0.0, |
| "grad_norm": 58.0, |
| "learning_rate": 5e-07, |
| "completion_length": 80.575, |
| "rewards/real_reward_fn": 0.024874999094754456, |
| "reward": 0.024874999094754456, |
| "reward_std": 0.1568009190261364, |
| "kl": 0.0010487768857274204, |
| "epoch": 0.96, |
| "step": 30 |
| }, |
| { |
| "loss": 0.0, |
| "grad_norm": 35.0, |
| "learning_rate": 5e-07, |
| "completion_length": 76.90441176470588, |
| "rewards/real_reward_fn": -0.05720588389564963, |
| "reward": -0.05720588389564963, |
| "reward_std": 0.14079742714324417, |
| "kl": 0.0010246854611015057, |
| "epoch": 1.096, |
| "step": 35 |
| }, |
| { |
| "loss": 0.0, |
| "grad_norm": 28.125, |
| "learning_rate": 5e-07, |
| "completion_length": 83.84375, |
| "rewards/real_reward_fn": -0.02056250227615237, |
| "reward": -0.02056250227615237, |
| "reward_std": 0.14504527123644947, |
| "kl": 0.001108163884782698, |
| "epoch": 1.256, |
| "step": 40 |
| }, |
| { |
| "loss": 0.0, |
| "grad_norm": 32.0, |
| "learning_rate": 5e-07, |
| "completion_length": 79.25, |
| "rewards/real_reward_fn": 0.021437497437000276, |
| "reward": 0.021437497437000276, |
| "reward_std": 0.17562763625755906, |
| "kl": 0.0010782188706798478, |
| "epoch": 1.416, |
| "step": 45 |
| }, |
| { |
| "loss": 0.0, |
| "grad_norm": 35.75, |
| "learning_rate": 5e-07, |
| "completion_length": 86.08125, |
| "rewards/real_reward_fn": -0.032250001840293405, |
| "reward": -0.032250001840293405, |
| "reward_std": 0.16334165595471858, |
| "kl": 0.0010450664689415135, |
| "epoch": 1.576, |
| "step": 50 |
| }, |
| { |
| "train_runtime": 1158.1834, |
| "train_samples_per_second": 0.691, |
| "train_steps_per_second": 0.043, |
| "total_flos": 0.0, |
| "train_loss": 4.1337845323141666e-05, |
| "epoch": 1.576, |
| "step": 50 |
| } |
| ] |