File size: 3,281 Bytes
c151ed0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | [
{
"loss": 0.0,
"grad_norm": 33.0,
"learning_rate": 5e-07,
"completion_length": 87.5125,
"rewards/real_reward_fn": -0.08881250247359276,
"reward": -0.08881250247359276,
"reward_std": 0.1590106257237494,
"kl": 0.0008317360421642661,
"epoch": 0.16,
"step": 5
},
{
"loss": 0.0,
"grad_norm": 55.25,
"learning_rate": 5e-07,
"completion_length": 85.86875,
"rewards/real_reward_fn": -0.08131250264123083,
"reward": -0.08131250264123083,
"reward_std": 0.1880020027048886,
"kl": 0.001102981199801434,
"epoch": 0.32,
"step": 10
},
{
"loss": 0.0,
"grad_norm": 39.0,
"learning_rate": 5e-07,
"completion_length": 83.225,
"rewards/real_reward_fn": 0.02462499784305692,
"reward": 0.02462499784305692,
"reward_std": 0.13523416444659234,
"kl": 0.001013497701205779,
"epoch": 0.48,
"step": 15
},
{
"loss": 0.0,
"grad_norm": 27.125,
"learning_rate": 5e-07,
"completion_length": 80.8125,
"rewards/real_reward_fn": -0.08068750295788049,
"reward": -0.08068750295788049,
"reward_std": 0.18747167382389307,
"kl": 0.0011422165334806778,
"epoch": 0.64,
"step": 20
},
{
"loss": 0.0,
"grad_norm": 30.5,
"learning_rate": 5e-07,
"completion_length": 80.29375,
"rewards/real_reward_fn": -0.03431250210851431,
"reward": -0.03431250210851431,
"reward_std": 0.18764845319092274,
"kl": 0.0010913557125604711,
"epoch": 0.8,
"step": 25
},
{
"loss": 0.0,
"grad_norm": 58.0,
"learning_rate": 5e-07,
"completion_length": 80.575,
"rewards/real_reward_fn": 0.024874999094754456,
"reward": 0.024874999094754456,
"reward_std": 0.1568009190261364,
"kl": 0.0010487768857274204,
"epoch": 0.96,
"step": 30
},
{
"loss": 0.0,
"grad_norm": 35.0,
"learning_rate": 5e-07,
"completion_length": 76.90441176470588,
"rewards/real_reward_fn": -0.05720588389564963,
"reward": -0.05720588389564963,
"reward_std": 0.14079742714324417,
"kl": 0.0010246854611015057,
"epoch": 1.096,
"step": 35
},
{
"loss": 0.0,
"grad_norm": 28.125,
"learning_rate": 5e-07,
"completion_length": 83.84375,
"rewards/real_reward_fn": -0.02056250227615237,
"reward": -0.02056250227615237,
"reward_std": 0.14504527123644947,
"kl": 0.001108163884782698,
"epoch": 1.256,
"step": 40
},
{
"loss": 0.0,
"grad_norm": 32.0,
"learning_rate": 5e-07,
"completion_length": 79.25,
"rewards/real_reward_fn": 0.021437497437000276,
"reward": 0.021437497437000276,
"reward_std": 0.17562763625755906,
"kl": 0.0010782188706798478,
"epoch": 1.416,
"step": 45
},
{
"loss": 0.0,
"grad_norm": 35.75,
"learning_rate": 5e-07,
"completion_length": 86.08125,
"rewards/real_reward_fn": -0.032250001840293405,
"reward": -0.032250001840293405,
"reward_std": 0.16334165595471858,
"kl": 0.0010450664689415135,
"epoch": 1.576,
"step": 50
},
{
"train_runtime": 1158.1834,
"train_samples_per_second": 0.691,
"train_steps_per_second": 0.043,
"total_flos": 0.0,
"train_loss": 4.1337845323141666e-05,
"epoch": 1.576,
"step": 50
}
] |