| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.06666666666666667, | |
| "eval_steps": 50, | |
| "global_step": 50, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.3988752365112305, | |
| "epoch": 0.0013333333333333333, | |
| "grad_norm": 0.01450820083840917, | |
| "importance_ratio": 0.9983458518981934, | |
| "learning_rate": 0.0, | |
| "loss": -0.0028, | |
| "mismatch_kl": 0.004329901188611984, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 1, | |
| "timing/generation_ms": 12196.653502061963, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 12196.653502061963, | |
| "tokens/completion": 562.04296875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 163.39810061454773 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.0297880172729492, | |
| "epoch": 0.0026666666666666666, | |
| "grad_norm": 0.006125098422428371, | |
| "importance_ratio": 0.9977808594703674, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 0.0118, | |
| "mismatch_kl": 0.0036596579011529684, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 2, | |
| "timing/generation_ms": 10855.522208847106, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 10855.522208847106, | |
| "tokens/completion": 652.203125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 98.15957498550415 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.2343848943710327, | |
| "epoch": 0.004, | |
| "grad_norm": 0.0093110934895908, | |
| "importance_ratio": 0.9983258843421936, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": -0.0068, | |
| "mismatch_kl": 0.00391958886757493, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 3, | |
| "timing/generation_ms": 14581.869984045625, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 14581.869984045625, | |
| "tokens/completion": 722.37109375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 171.60404181480408 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.581649661064148, | |
| "epoch": 0.005333333333333333, | |
| "grad_norm": 0.007696628408420481, | |
| "importance_ratio": 0.9986447095870972, | |
| "learning_rate": 3e-06, | |
| "loss": -0.0043, | |
| "mismatch_kl": 0.0024762798566371202, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 4, | |
| "timing/generation_ms": 11191.347393207252, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 11191.347393207252, | |
| "tokens/completion": 595.73046875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 186.33580946922302 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.8588891625404358, | |
| "epoch": 0.006666666666666667, | |
| "grad_norm": 0.0055080213738955075, | |
| "importance_ratio": 0.9988943934440613, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": -0.0033, | |
| "mismatch_kl": 0.0031517043244093657, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 5, | |
| "timing/generation_ms": 10668.582463636994, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 10668.582463636994, | |
| "tokens/completion": 636.53125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 82.27488708496094 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.071407675743103, | |
| "epoch": 0.008, | |
| "grad_norm": 0.02271832942623967, | |
| "importance_ratio": 0.998067319393158, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0019, | |
| "mismatch_kl": 0.003643231000751257, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 6, | |
| "timing/generation_ms": 3378.591795451939, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 3378.591795451939, | |
| "tokens/completion": 178.73828125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 105.60203862190247 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.8531922698020935, | |
| "epoch": 0.009333333333333334, | |
| "grad_norm": 0.018354067998903482, | |
| "importance_ratio": 0.9980432391166687, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0002, | |
| "mismatch_kl": 0.003655636915937066, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 7, | |
| "timing/generation_ms": 12279.695899225771, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 12279.695899225771, | |
| "tokens/completion": 631.35546875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 104.63563537597656 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.7072162628173828, | |
| "epoch": 0.010666666666666666, | |
| "grad_norm": 0.005552532749027135, | |
| "importance_ratio": 0.9982293844223022, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0014, | |
| "mismatch_kl": 0.0029395928140729666, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 8, | |
| "timing/generation_ms": 6614.743183366954, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 6614.743183366954, | |
| "tokens/completion": 339.1640625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 132.164165019989 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.8156145215034485, | |
| "epoch": 0.012, | |
| "grad_norm": 0.008176505226750404, | |
| "importance_ratio": 0.9981797933578491, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0027, | |
| "mismatch_kl": 0.0031279518734663725, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 9, | |
| "timing/generation_ms": 8826.908372342587, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 8826.908372342587, | |
| "tokens/completion": 444.53515625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 144.61542773246765 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.8708666563034058, | |
| "epoch": 0.013333333333333334, | |
| "grad_norm": 0.009382372847258274, | |
| "importance_ratio": 0.9981642961502075, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0126, | |
| "mismatch_kl": 0.0030885515734553337, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 10, | |
| "timing/generation_ms": 7367.4805322662, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 7367.4805322662, | |
| "tokens/completion": 400.74609375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 176.57727003097534 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.6906348466873169, | |
| "epoch": 0.014666666666666666, | |
| "grad_norm": 0.007616251351947248, | |
| "importance_ratio": 1.0045424699783325, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0542, | |
| "mismatch_kl": 0.03194786608219147, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 11, | |
| "timing/generation_ms": 26879.562875255942, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 26879.562875255942, | |
| "tokens/completion": 1682.0546875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 190.3386266231537 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.6506091356277466, | |
| "epoch": 0.016, | |
| "grad_norm": 0.004382353798954015, | |
| "importance_ratio": 0.9982648491859436, | |
| "learning_rate": 5e-06, | |
| "loss": 0.043, | |
| "mismatch_kl": 0.02482638508081436, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 12, | |
| "timing/generation_ms": 22301.60311050713, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 22301.60311050713, | |
| "tokens/completion": 1387.734375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 177.6784646511078 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.143129825592041, | |
| "epoch": 0.017333333333333333, | |
| "grad_norm": 0.009321138085104996, | |
| "importance_ratio": 1.001217007637024, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0139, | |
| "mismatch_kl": 0.0036374337505549192, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 13, | |
| "timing/generation_ms": 6277.724616229534, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 6277.724616229534, | |
| "tokens/completion": 432.66796875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 129.69259929656982 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.650863766670227, | |
| "epoch": 0.018666666666666668, | |
| "grad_norm": 0.0076614251264825245, | |
| "importance_ratio": 0.9983827471733093, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0049, | |
| "mismatch_kl": 0.0027237425092607737, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 14, | |
| "timing/generation_ms": 7103.812717832625, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 7103.812717832625, | |
| "tokens/completion": 404.96484375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 57.65379452705383 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.7439635992050171, | |
| "epoch": 0.02, | |
| "grad_norm": 0.009401568464987338, | |
| "importance_ratio": 0.9981654286384583, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0109, | |
| "mismatch_kl": 0.002909082220867276, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 15, | |
| "timing/generation_ms": 8292.532542720437, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 8292.532542720437, | |
| "tokens/completion": 459.85546875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 92.21157336235046 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.7638830542564392, | |
| "epoch": 0.021333333333333333, | |
| "grad_norm": 0.010374572910211358, | |
| "importance_ratio": 0.9969711899757385, | |
| "learning_rate": 5e-06, | |
| "loss": -0.005, | |
| "mismatch_kl": 0.0034673516638576984, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 16, | |
| "timing/generation_ms": 5712.000676430762, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 5712.000676430762, | |
| "tokens/completion": 308.4296875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 52.6866238117218 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.18189160525798798, | |
| "epoch": 0.02266666666666667, | |
| "grad_norm": 0.00257455457059234, | |
| "importance_ratio": 0.9984971880912781, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0681, | |
| "mismatch_kl": 0.018514186143875122, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 17, | |
| "timing/generation_ms": 14281.423358246684, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 14281.423358246684, | |
| "tokens/completion": 1101.546875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 129.24327325820923 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.5917271375656128, | |
| "epoch": 0.024, | |
| "grad_norm": 0.005668903472483887, | |
| "importance_ratio": 0.999828577041626, | |
| "learning_rate": 5e-06, | |
| "loss": 0.027, | |
| "mismatch_kl": 0.002100760815665126, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 18, | |
| "timing/generation_ms": 24175.399120897055, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 24175.399120897055, | |
| "tokens/completion": 1504.0390625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 158.57504653930664 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.3282008171081543, | |
| "epoch": 0.025333333333333333, | |
| "grad_norm": 0.006636047431888786, | |
| "importance_ratio": 1.0022344589233398, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0015, | |
| "mismatch_kl": 0.004634195473045111, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 19, | |
| "timing/generation_ms": 15713.139976374805, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 15713.139976374805, | |
| "tokens/completion": 764.3203125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 78.56244468688965 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.02470862865448, | |
| "epoch": 0.02666666666666667, | |
| "grad_norm": 0.00833481021786943, | |
| "importance_ratio": 1.0026451349258423, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0052, | |
| "mismatch_kl": 0.004158638883382082, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 20, | |
| "timing/generation_ms": 6632.851202040911, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 6632.851202040911, | |
| "tokens/completion": 382.6171875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 199.43552422523499 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.07275530695915222, | |
| "epoch": 0.028, | |
| "grad_norm": 0.005944388738403685, | |
| "importance_ratio": 0.9988561868667603, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0452, | |
| "mismatch_kl": 0.00023643655003979802, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 21, | |
| "timing/generation_ms": 119174.6030151844, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 119174.6030151844, | |
| "tokens/completion": 4008.38671875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 685.4423098564148 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.83598792552948, | |
| "epoch": 0.029333333333333333, | |
| "grad_norm": 0.00977617477475085, | |
| "importance_ratio": 0.995696485042572, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0066, | |
| "mismatch_kl": 0.003957619424909353, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 22, | |
| "timing/generation_ms": 12322.44247943163, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 12322.44247943163, | |
| "tokens/completion": 442.85546875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 94.37256598472595 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.8219886422157288, | |
| "epoch": 0.030666666666666665, | |
| "grad_norm": 0.0057449599218849946, | |
| "importance_ratio": 0.9990558624267578, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0388, | |
| "mismatch_kl": 0.031180420890450478, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 23, | |
| "timing/generation_ms": 29090.628595091403, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 29090.628595091403, | |
| "tokens/completion": 1716.3515625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 169.45334482192993 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.0089167356491089, | |
| "epoch": 0.032, | |
| "grad_norm": 0.009762837519367, | |
| "importance_ratio": 0.9979202151298523, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0012, | |
| "mismatch_kl": 0.00405939482152462, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 24, | |
| "timing/generation_ms": 17154.327374882996, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 17154.327374882996, | |
| "tokens/completion": 883.390625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 130.7891206741333 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.5053093433380127, | |
| "epoch": 0.03333333333333333, | |
| "grad_norm": 0.007416974683241316, | |
| "importance_ratio": 0.9982149600982666, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0068, | |
| "mismatch_kl": 0.0024536694400012493, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 25, | |
| "timing/generation_ms": 28463.361867703497, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 28463.361867703497, | |
| "tokens/completion": 1409.54296875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 178.60342526435852 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.4973055422306061, | |
| "epoch": 0.034666666666666665, | |
| "grad_norm": 0.004048717808220336, | |
| "importance_ratio": 1.0012173652648926, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0547, | |
| "mismatch_kl": 0.03473234549164772, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 26, | |
| "timing/generation_ms": 18848.746892996132, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 18848.746892996132, | |
| "tokens/completion": 1286.6875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 181.75563287734985 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.5914682149887085, | |
| "epoch": 0.036, | |
| "grad_norm": 0.010568088931367656, | |
| "importance_ratio": 0.9986244440078735, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0214, | |
| "mismatch_kl": 0.002536088228225708, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 27, | |
| "timing/generation_ms": 11602.461927570403, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 11602.461927570403, | |
| "tokens/completion": 734.80078125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 188.88015818595886 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.4526905119419098, | |
| "epoch": 0.037333333333333336, | |
| "grad_norm": 0.0035728175606856527, | |
| "importance_ratio": 0.9999799728393555, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0026, | |
| "mismatch_kl": 0.0024842985440045595, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 28, | |
| "timing/generation_ms": 30549.59301650524, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 30549.59301650524, | |
| "tokens/completion": 1536.96875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 328.0478210449219 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.32794511318206787, | |
| "epoch": 0.03866666666666667, | |
| "grad_norm": 0.003333518820406266, | |
| "importance_ratio": 0.9995192885398865, | |
| "learning_rate": 5e-06, | |
| "loss": 0.056, | |
| "mismatch_kl": 0.028769802302122116, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 29, | |
| "timing/generation_ms": 18838.333567604423, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 18838.333567604423, | |
| "tokens/completion": 1263.640625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 290.5948350429535 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.9063822031021118, | |
| "epoch": 0.04, | |
| "grad_norm": 0.007342388496075293, | |
| "importance_ratio": 0.9953157901763916, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0025, | |
| "mismatch_kl": 0.004266439005732536, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 30, | |
| "timing/generation_ms": 9477.213966660202, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 9477.213966660202, | |
| "tokens/completion": 473.26953125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 62.30127143859863 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.7977282404899597, | |
| "epoch": 0.04133333333333333, | |
| "grad_norm": 0.00884043332375607, | |
| "importance_ratio": 0.9971498847007751, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0029, | |
| "mismatch_kl": 0.004033135715872049, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 31, | |
| "timing/generation_ms": 18995.201839134097, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 18995.201839134097, | |
| "tokens/completion": 958.625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 88.6347918510437 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.8451470732688904, | |
| "epoch": 0.042666666666666665, | |
| "grad_norm": 0.018842389370386323, | |
| "importance_ratio": 0.9982671141624451, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0369, | |
| "mismatch_kl": 0.003600390162318945, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 32, | |
| "timing/generation_ms": 5587.277088314295, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 5587.277088314295, | |
| "tokens/completion": 407.12109375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 140.4788475036621 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.1521912813186646, | |
| "epoch": 0.044, | |
| "grad_norm": 0.006379742039913797, | |
| "importance_ratio": 0.997858464717865, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0065, | |
| "mismatch_kl": 0.005035887472331524, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 33, | |
| "timing/generation_ms": 18916.152058169246, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 18916.152058169246, | |
| "tokens/completion": 966.55859375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 90.41954302787781 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.1553761959075928, | |
| "epoch": 0.04533333333333334, | |
| "grad_norm": 0.010733713274389883, | |
| "importance_ratio": 1.0111567974090576, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0014, | |
| "mismatch_kl": 0.006704343948513269, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 34, | |
| "timing/generation_ms": 17302.85968258977, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 17302.85968258977, | |
| "tokens/completion": 864.44140625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 143.8659963607788 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3105199635028839, | |
| "epoch": 0.04666666666666667, | |
| "grad_norm": 0.003940130100379767, | |
| "importance_ratio": 1.0006911754608154, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0315, | |
| "mismatch_kl": 0.022524980828166008, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 35, | |
| "timing/generation_ms": 29806.298807263374, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 29806.298807263374, | |
| "tokens/completion": 1672.48828125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 164.04821372032166 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.7972971200942993, | |
| "epoch": 0.048, | |
| "grad_norm": 0.008409173142054645, | |
| "importance_ratio": 0.9948906898498535, | |
| "learning_rate": 5e-06, | |
| "loss": 0.004, | |
| "mismatch_kl": 0.004282351583242416, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 36, | |
| "timing/generation_ms": 14936.399303376675, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 14936.399303376675, | |
| "tokens/completion": 787.78125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 196.79586815834045 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.5769950747489929, | |
| "epoch": 0.04933333333333333, | |
| "grad_norm": 0.009636377939254703, | |
| "importance_ratio": 0.9972301721572876, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0011, | |
| "mismatch_kl": 0.003603809280321002, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 37, | |
| "timing/generation_ms": 13729.571803472936, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 13729.571803472936, | |
| "tokens/completion": 697.64453125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 76.77378511428833 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.715777575969696, | |
| "epoch": 0.050666666666666665, | |
| "grad_norm": 0.005305945077729364, | |
| "importance_ratio": 0.9969701766967773, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0093, | |
| "mismatch_kl": 0.004232620354741812, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 38, | |
| "timing/generation_ms": 26689.202761277556, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 26689.202761277556, | |
| "tokens/completion": 1302.75390625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 119.53459739685059 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.108477234840393, | |
| "epoch": 0.052, | |
| "grad_norm": 0.01158876392732835, | |
| "importance_ratio": 0.9918505549430847, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0069, | |
| "mismatch_kl": 0.0055715711787343025, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 39, | |
| "timing/generation_ms": 9316.26115180552, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 9316.26115180552, | |
| "tokens/completion": 510.88671875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 134.0537760257721 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.0468562841415405, | |
| "epoch": 0.05333333333333334, | |
| "grad_norm": 0.006250915142780056, | |
| "importance_ratio": 0.993874192237854, | |
| "learning_rate": 5e-06, | |
| "loss": -0.004, | |
| "mismatch_kl": 0.00569565873593092, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 40, | |
| "timing/generation_ms": 22657.30178449303, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 22657.30178449303, | |
| "tokens/completion": 1117.0390625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 104.31990480422974 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.0242066383361816, | |
| "epoch": 0.05466666666666667, | |
| "grad_norm": 0.009730238448609988, | |
| "importance_ratio": 1.0014866590499878, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0029, | |
| "mismatch_kl": 0.006813807878643274, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 41, | |
| "timing/generation_ms": 15266.15516282618, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 15266.15516282618, | |
| "tokens/completion": 789.296875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 137.6475269794464 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.9917812943458557, | |
| "epoch": 0.056, | |
| "grad_norm": 0.015130940878153589, | |
| "importance_ratio": 0.9915910959243774, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0016, | |
| "mismatch_kl": 0.006494010798633099, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 42, | |
| "timing/generation_ms": 8552.51188017428, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 8552.51188017428, | |
| "tokens/completion": 427.00390625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 58.246270418167114 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.7529230117797852, | |
| "epoch": 0.05733333333333333, | |
| "grad_norm": 0.017225340266775354, | |
| "importance_ratio": 0.9983583092689514, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0017, | |
| "mismatch_kl": 0.005849814508110285, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 43, | |
| "timing/generation_ms": 5776.03021170944, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 5776.03021170944, | |
| "tokens/completion": 292.6484375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 138.62879586219788 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.9116057753562927, | |
| "epoch": 0.058666666666666666, | |
| "grad_norm": 0.013240792131345649, | |
| "importance_ratio": 0.993713915348053, | |
| "learning_rate": 5e-06, | |
| "loss": -0.006, | |
| "mismatch_kl": 0.00599726801738143, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 44, | |
| "timing/generation_ms": 4909.729053266346, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 4909.729053266346, | |
| "tokens/completion": 252.2890625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 53.461458683013916 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.6952740550041199, | |
| "epoch": 0.06, | |
| "grad_norm": 0.007271643900369788, | |
| "importance_ratio": 0.9978048205375671, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0094, | |
| "mismatch_kl": 0.004028764553368092, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 45, | |
| "timing/generation_ms": 12042.251928709447, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 12042.251928709447, | |
| "tokens/completion": 668.03125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 77.72424340248108 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.8735002279281616, | |
| "epoch": 0.06133333333333333, | |
| "grad_norm": 0.00817327643152143, | |
| "importance_ratio": 1.0016076564788818, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0002, | |
| "mismatch_kl": 0.004535754211246967, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 46, | |
| "timing/generation_ms": 8553.523855283856, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 8553.523855283856, | |
| "tokens/completion": 459.87109375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 104.68091750144958 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.7288662195205688, | |
| "epoch": 0.06266666666666666, | |
| "grad_norm": 0.016180435920793518, | |
| "importance_ratio": 1.0001567602157593, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0002, | |
| "mismatch_kl": 0.006666674744337797, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 47, | |
| "timing/generation_ms": 7466.575676575303, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 7466.575676575303, | |
| "tokens/completion": 361.484375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 59.11225175857544 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.6449630856513977, | |
| "epoch": 0.064, | |
| "grad_norm": 0.004581873635760183, | |
| "importance_ratio": 1.0026441812515259, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0588, | |
| "mismatch_kl": 0.059744831174612045, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 48, | |
| "timing/generation_ms": 14945.35976741463, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 14945.35976741463, | |
| "tokens/completion": 1044.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 182.3247947692871 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.8048098683357239, | |
| "epoch": 0.06533333333333333, | |
| "grad_norm": 0.0052364810032066635, | |
| "importance_ratio": 0.9973055720329285, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0347, | |
| "mismatch_kl": 0.0451083704829216, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 49, | |
| "timing/generation_ms": 28440.53523708135, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 28440.53523708135, | |
| "tokens/completion": 1630.4296875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 221.0147523880005 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.7735000252723694, | |
| "epoch": 0.06666666666666667, | |
| "grad_norm": 0.015103596816141955, | |
| "importance_ratio": 0.9899436831474304, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0022, | |
| "mismatch_kl": 0.008240272291004658, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 50, | |
| "timing/generation_ms": 6061.147706583142, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 6061.147706583142, | |
| "tokens/completion": 331.859375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 108.05560183525085 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 750, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 5, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |