| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.2733333333333333, | |
| "eval_steps": 50, | |
| "global_step": 205, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.3932524919509888, | |
| "epoch": 0.0013333333333333333, | |
| "grad_norm": 0.016694727116637192, | |
| "importance_ratio": 0.9986082315444946, | |
| "learning_rate": 0.0, | |
| "loss": -0.0189, | |
| "mismatch_kl": 0.004300346598029137, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 1, | |
| "timing/generation_ms": 11961.050138808787, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 11961.050138808787, | |
| "tokens/completion": 551.78125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 248.72634100914001 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.0695139169692993, | |
| "epoch": 0.0026666666666666666, | |
| "grad_norm": 0.008567213424127631, | |
| "importance_ratio": 0.9980430603027344, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": -0.0055, | |
| "mismatch_kl": 0.0036789600271731615, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 2, | |
| "timing/generation_ms": 11398.794241249561, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 11398.794241249561, | |
| "tokens/completion": 647.02734375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 67.39928388595581 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.2690891027450562, | |
| "epoch": 0.004, | |
| "grad_norm": 0.007856590727089238, | |
| "importance_ratio": 0.9990478157997131, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": -0.0147, | |
| "mismatch_kl": 0.00404919171705842, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 3, | |
| "timing/generation_ms": 13145.053108222783, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 13145.053108222783, | |
| "tokens/completion": 695.94140625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 188.99010276794434 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.5635457634925842, | |
| "epoch": 0.005333333333333333, | |
| "grad_norm": 0.008427354641048032, | |
| "importance_ratio": 0.9995828866958618, | |
| "learning_rate": 3e-06, | |
| "loss": -0.0056, | |
| "mismatch_kl": 0.0024689023848623037, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 4, | |
| "timing/generation_ms": 12098.999472334981, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 12098.999472334981, | |
| "tokens/completion": 634.3515625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 52.7923378944397 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.8588207364082336, | |
| "epoch": 0.006666666666666667, | |
| "grad_norm": 0.015271082061520619, | |
| "importance_ratio": 0.9998404383659363, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": -0.0201, | |
| "mismatch_kl": 0.003175633493810892, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 5, | |
| "timing/generation_ms": 9795.204265974462, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 9795.204265974462, | |
| "tokens/completion": 595.30078125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 56.867586612701416 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.0917283296585083, | |
| "epoch": 0.008, | |
| "grad_norm": 0.015440441848262498, | |
| "importance_ratio": 1.0006937980651855, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0046, | |
| "mismatch_kl": 0.003965948708355427, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 6, | |
| "timing/generation_ms": 3492.4034476280212, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 3492.4034476280212, | |
| "tokens/completion": 176.77734375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 61.55981087684631 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.750698983669281, | |
| "epoch": 0.009333333333333334, | |
| "grad_norm": 0.008842566430176115, | |
| "importance_ratio": 1.0032514333724976, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0042, | |
| "mismatch_kl": 0.0037081094924360514, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 7, | |
| "timing/generation_ms": 12012.088196352124, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 12012.088196352124, | |
| "tokens/completion": 664.06640625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 80.06084942817688 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.6958726644515991, | |
| "epoch": 0.010666666666666666, | |
| "grad_norm": 0.020865513665125984, | |
| "importance_ratio": 0.9998727440834045, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0015, | |
| "mismatch_kl": 0.003091922029852867, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 8, | |
| "timing/generation_ms": 7164.519478566945, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 7164.519478566945, | |
| "tokens/completion": 376.96484375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 122.57408618927002 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.647992730140686, | |
| "epoch": 0.012, | |
| "grad_norm": 0.010516528439614162, | |
| "importance_ratio": 0.9973449110984802, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0348, | |
| "mismatch_kl": 0.002668753731995821, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 9, | |
| "timing/generation_ms": 9473.532978445292, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 9473.532978445292, | |
| "tokens/completion": 589.9375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 111.60580968856812 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.9175997972488403, | |
| "epoch": 0.013333333333333334, | |
| "grad_norm": 0.017217377658999368, | |
| "importance_ratio": 0.9963379502296448, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0133, | |
| "mismatch_kl": 0.003761034458875656, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 10, | |
| "timing/generation_ms": 8803.215935826302, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 8803.215935826302, | |
| "tokens/completion": 432.890625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 137.27361369132996 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.6505714654922485, | |
| "epoch": 0.014666666666666666, | |
| "grad_norm": 0.0034942507757306364, | |
| "importance_ratio": 0.9997450113296509, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0567, | |
| "mismatch_kl": 0.025293370708823204, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 11, | |
| "timing/generation_ms": 28037.367599084973, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 28037.367599084973, | |
| "tokens/completion": 1677.38671875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 147.27029275894165 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.623925507068634, | |
| "epoch": 0.016, | |
| "grad_norm": 0.004363286027787366, | |
| "importance_ratio": 0.9998379349708557, | |
| "learning_rate": 5e-06, | |
| "loss": 0.037, | |
| "mismatch_kl": 0.027607521042227745, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 12, | |
| "timing/generation_ms": 30658.961144275963, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 30658.961144275963, | |
| "tokens/completion": 1772.48046875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 228.39264035224915 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 1.2309722900390625, | |
| "epoch": 0.017333333333333333, | |
| "grad_norm": 0.01910079735377139, | |
| "importance_ratio": 0.9967860579490662, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0146, | |
| "mismatch_kl": 0.004334039054811001, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 13, | |
| "timing/generation_ms": 7481.697678565979, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 7481.697678565979, | |
| "tokens/completion": 458.546875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 125.09760117530823 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.583360493183136, | |
| "epoch": 0.018666666666666668, | |
| "grad_norm": 0.006859469099074894, | |
| "importance_ratio": 0.9988465905189514, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0041, | |
| "mismatch_kl": 0.0028068351093679667, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 14, | |
| "timing/generation_ms": 8050.086663104594, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 8050.086663104594, | |
| "tokens/completion": 466.06640625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 117.39565086364746 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.7860226035118103, | |
| "epoch": 0.02, | |
| "grad_norm": 0.011283066327858677, | |
| "importance_ratio": 1.002608299255371, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0035, | |
| "mismatch_kl": 0.004051415715366602, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 15, | |
| "timing/generation_ms": 9803.531439974904, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 9803.531439974904, | |
| "tokens/completion": 522.2109375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 48.61639094352722 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.7184260487556458, | |
| "epoch": 0.021333333333333333, | |
| "grad_norm": 0.010228445907240152, | |
| "importance_ratio": 1.000801920890808, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0066, | |
| "mismatch_kl": 0.006085229571908712, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 16, | |
| "timing/generation_ms": 8376.314821653068, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 8376.314821653068, | |
| "tokens/completion": 458.83984375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 69.11118984222412 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.18061073124408722, | |
| "epoch": 0.02266666666666667, | |
| "grad_norm": 0.0036474713562644418, | |
| "importance_ratio": 0.9987739324569702, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0657, | |
| "mismatch_kl": 0.025802385061979294, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 17, | |
| "timing/generation_ms": 16682.96501878649, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 16682.96501878649, | |
| "tokens/completion": 1178.22265625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 110.8058807849884 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3768082559108734, | |
| "epoch": 0.024, | |
| "grad_norm": 0.007994642717131743, | |
| "importance_ratio": 0.9989356398582458, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0198, | |
| "mismatch_kl": 0.0024773485492914915, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 18, | |
| "timing/generation_ms": 45841.41308255494, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 45841.41308255494, | |
| "tokens/completion": 2401.60546875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 182.70060086250305 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.9849978685379028, | |
| "epoch": 0.025333333333333333, | |
| "grad_norm": 0.007975010652496835, | |
| "importance_ratio": 0.9994485974311829, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0032, | |
| "mismatch_kl": 0.007306213956326246, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 19, | |
| "timing/generation_ms": 21281.952754594386, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 21281.952754594386, | |
| "tokens/completion": 1127.03515625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 118.257479429245 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.783280074596405, | |
| "epoch": 0.02666666666666667, | |
| "grad_norm": 0.01013309688610727, | |
| "importance_ratio": 1.0076329708099365, | |
| "learning_rate": 5e-06, | |
| "loss": -0.002, | |
| "mismatch_kl": 0.008437588810920715, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 20, | |
| "timing/generation_ms": 11283.36211759597, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 11283.36211759597, | |
| "tokens/completion": 603.92578125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 50.433815717697144 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.18535619974136353, | |
| "epoch": 0.028, | |
| "grad_norm": 0.12592122275182266, | |
| "importance_ratio": 0.994857132434845, | |
| "learning_rate": 5e-06, | |
| "loss": 0.057, | |
| "mismatch_kl": 0.004472589120268822, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 21, | |
| "timing/generation_ms": 69204.76855803281, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 69204.76855803281, | |
| "tokens/completion": 3062.171875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 335.8162593841553 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.7172983884811401, | |
| "epoch": 0.029333333333333333, | |
| "grad_norm": 0.011698600330274578, | |
| "importance_ratio": 1.0030107498168945, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0094, | |
| "mismatch_kl": 0.03951645269989967, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 22, | |
| "timing/generation_ms": 16505.55451028049, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 16505.55451028049, | |
| "tokens/completion": 675.60546875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 61.02479434013367 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.8583077192306519, | |
| "epoch": 0.030666666666666665, | |
| "grad_norm": 0.02332906550498323, | |
| "importance_ratio": 1.0737003087997437, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0468, | |
| "mismatch_kl": 0.21222208440303802, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 23, | |
| "timing/generation_ms": 47965.167357586324, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 47965.167357586324, | |
| "tokens/completion": 2437.57421875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 184.88851642608643 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.6403871178627014, | |
| "epoch": 0.032, | |
| "grad_norm": 0.0064139472738614185, | |
| "importance_ratio": 1.0027199983596802, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0079, | |
| "mismatch_kl": 0.029356306418776512, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 24, | |
| "timing/generation_ms": 25231.056010350585, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 25231.056010350585, | |
| "tokens/completion": 1253.125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 127.16959929466248 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.26308295130729675, | |
| "epoch": 0.03333333333333333, | |
| "grad_norm": 0.004856521131545869, | |
| "importance_ratio": 0.99989914894104, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0162, | |
| "mismatch_kl": 0.006057343445718288, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 25, | |
| "timing/generation_ms": 44386.24160736799, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 44386.24160736799, | |
| "tokens/completion": 2212.2421875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 173.18823885917664 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.12470932304859161, | |
| "epoch": 0.034666666666666665, | |
| "grad_norm": 0.001678447935003649, | |
| "importance_ratio": 1.0007412433624268, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0462, | |
| "mismatch_kl": 0.001119845313951373, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 26, | |
| "timing/generation_ms": 100999.46400336921, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 100999.46400336921, | |
| "tokens/completion": 3716.6796875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 303.84296584129333 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.17583802342414856, | |
| "epoch": 0.036, | |
| "grad_norm": 0.002584350761592735, | |
| "importance_ratio": 1.001440405845642, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0264, | |
| "mismatch_kl": 0.0013389256782829762, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 27, | |
| "timing/generation_ms": 55200.44738613069, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 55200.44738613069, | |
| "tokens/completion": 2656.7265625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 294.736074924469 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.19836626946926117, | |
| "epoch": 0.037333333333333336, | |
| "grad_norm": 0.005548904662699889, | |
| "importance_ratio": 1.0022764205932617, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0251, | |
| "mismatch_kl": 0.0019016863079741597, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 28, | |
| "timing/generation_ms": 57617.69188474864, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 57617.69188474864, | |
| "tokens/completion": 2797.6171875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 228.97359490394592 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.37241131067276, | |
| "epoch": 0.03866666666666667, | |
| "grad_norm": 0.02006388030524017, | |
| "importance_ratio": 1.053019404411316, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0557, | |
| "mismatch_kl": 0.9634742736816406, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 29, | |
| "timing/generation_ms": 41741.05513561517, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 41741.05513561517, | |
| "tokens/completion": 2055.87890625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 207.62974190711975 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.25762397050857544, | |
| "epoch": 0.04, | |
| "grad_norm": 0.006806951429177103, | |
| "importance_ratio": 0.983231246471405, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0364, | |
| "mismatch_kl": 0.06448693573474884, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 30, | |
| "timing/generation_ms": 29489.30747061968, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 29489.30747061968, | |
| "tokens/completion": 1709.59765625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 175.62516474723816 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.019520161673426628, | |
| "epoch": 0.04133333333333333, | |
| "grad_norm": 0.0005178617259035183, | |
| "importance_ratio": 0.9998506307601929, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0014, | |
| "mismatch_kl": 0.0017281156033277512, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 31, | |
| "timing/generation_ms": 255150.22794622928, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 255150.22794622928, | |
| "tokens/completion": 6100.89453125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 537.7091252803802 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.005344062577933073, | |
| "epoch": 0.042666666666666665, | |
| "grad_norm": 0.00042076548606043374, | |
| "importance_ratio": 1.0000818967819214, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "mismatch_kl": 0.00012820436677429825, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 32, | |
| "timing/generation_ms": 252645.98809182644, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 252645.98809182644, | |
| "tokens/completion": 6144.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 564.6809096336365 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.0041460455395281315, | |
| "epoch": 0.044, | |
| "grad_norm": 0.0004905946483254039, | |
| "importance_ratio": 1.0000282526016235, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "mismatch_kl": 6.918103463249281e-05, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 33, | |
| "timing/generation_ms": 262179.48642838746, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 262179.48642838746, | |
| "tokens/completion": 6144.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 574.2838616371155 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.005189419258385897, | |
| "epoch": 0.04533333333333334, | |
| "grad_norm": 0.0003380219234355203, | |
| "importance_ratio": 1.0000487565994263, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "mismatch_kl": 7.488115079468116e-05, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 34, | |
| "timing/generation_ms": 257649.44062847644, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 257649.44062847644, | |
| "tokens/completion": 6144.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 616.5528900623322 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.005219260696321726, | |
| "epoch": 0.04666666666666667, | |
| "grad_norm": 0.0006402287013777213, | |
| "importance_ratio": 1.0000388622283936, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "mismatch_kl": 0.00010059373016702011, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 35, | |
| "timing/generation_ms": 263093.6838546768, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 263093.6838546768, | |
| "tokens/completion": 6144.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 567.3024535179138 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.008569693192839622, | |
| "epoch": 0.048, | |
| "grad_norm": 0.0005047742243801816, | |
| "importance_ratio": 1.0000779628753662, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0004, | |
| "mismatch_kl": 0.0001211672934005037, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 36, | |
| "timing/generation_ms": 242657.4441930279, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 242657.4441930279, | |
| "tokens/completion": 6123.421875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 543.5283715724945 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.011535107158124447, | |
| "epoch": 0.04933333333333333, | |
| "grad_norm": 0.0004641880444433118, | |
| "importance_ratio": 1.0000940561294556, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "mismatch_kl": 0.00016296253306791186, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 37, | |
| "timing/generation_ms": 253055.44871557504, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 253055.44871557504, | |
| "tokens/completion": 6100.4375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 529.3097188472748 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.008278747089207172, | |
| "epoch": 0.050666666666666665, | |
| "grad_norm": 0.0015602978869027017, | |
| "importance_ratio": 1.000083565711975, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "mismatch_kl": 0.00012404406152199954, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 38, | |
| "timing/generation_ms": 259809.8956849426, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 259809.8956849426, | |
| "tokens/completion": 6144.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 572.6026647090912 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.0070807370357215405, | |
| "epoch": 0.052, | |
| "grad_norm": 0.0004621624833577141, | |
| "importance_ratio": 1.000075340270996, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0, | |
| "mismatch_kl": 0.00010999527876265347, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 39, | |
| "timing/generation_ms": 266124.4311518967, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 266124.4311518967, | |
| "tokens/completion": 6144.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 645.3593230247498 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.00655187526717782, | |
| "epoch": 0.05333333333333334, | |
| "grad_norm": 0.00032702966921445734, | |
| "importance_ratio": 1.0000351667404175, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "mismatch_kl": 0.00014068085874896497, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 40, | |
| "timing/generation_ms": 262011.0893426463, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 262011.0893426463, | |
| "tokens/completion": 6144.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 515.61732006073 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.0039160363376140594, | |
| "epoch": 0.05466666666666667, | |
| "grad_norm": 0.0003724535269895079, | |
| "importance_ratio": 1.0000481605529785, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "mismatch_kl": 7.484626985387877e-05, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 41, | |
| "timing/generation_ms": 255759.41647868603, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 255759.41647868603, | |
| "tokens/completion": 6144.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 565.8597645759583 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.01127232052385807, | |
| "epoch": 0.056, | |
| "grad_norm": 0.0008175801103252065, | |
| "importance_ratio": 1.0000771284103394, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0068, | |
| "mismatch_kl": 0.00016380040324293077, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 42, | |
| "timing/generation_ms": 238812.61033378541, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 238812.61033378541, | |
| "tokens/completion": 6073.61328125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 484.4759180545807 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.01085229218006134, | |
| "epoch": 0.05733333333333333, | |
| "grad_norm": 0.0004839828768652627, | |
| "importance_ratio": 1.0000557899475098, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0063, | |
| "mismatch_kl": 0.00013297870464157313, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 43, | |
| "timing/generation_ms": 256715.18344525248, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 256715.18344525248, | |
| "tokens/completion": 6078.20703125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 537.6344306468964 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.005195128731429577, | |
| "epoch": 0.058666666666666666, | |
| "grad_norm": 0.00023276391851811837, | |
| "importance_ratio": 1.0000344514846802, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0023, | |
| "mismatch_kl": 8.078882819972932e-05, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 44, | |
| "timing/generation_ms": 245682.50108975917, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 245682.50108975917, | |
| "tokens/completion": 6098.1015625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 491.3542585372925 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.0033533975947648287, | |
| "epoch": 0.06, | |
| "grad_norm": 0.00016439514868896496, | |
| "importance_ratio": 1.00002920627594, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "mismatch_kl": 7.133631879696622e-05, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 45, | |
| "timing/generation_ms": 261392.2018893063, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 261392.2018893063, | |
| "tokens/completion": 6144.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 638.6866817474365 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.009633159264922142, | |
| "epoch": 0.06133333333333333, | |
| "grad_norm": 0.0005837700251924664, | |
| "importance_ratio": 1.000110149383545, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0005, | |
| "mismatch_kl": 0.00014644436305388808, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 46, | |
| "timing/generation_ms": 259352.97147464007, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 259352.97147464007, | |
| "tokens/completion": 6100.9375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 726.6395015716553 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.008214793168008327, | |
| "epoch": 0.06266666666666666, | |
| "grad_norm": 0.0003491652028248511, | |
| "importance_ratio": 1.0000574588775635, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0005, | |
| "mismatch_kl": 0.00012681909720413387, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 47, | |
| "timing/generation_ms": 251731.6782604903, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 251731.6782604903, | |
| "tokens/completion": 6120.80078125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 504.8533480167389 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.012482496909797192, | |
| "epoch": 0.064, | |
| "grad_norm": 0.0008089181923655795, | |
| "importance_ratio": 1.0000419616699219, | |
| "learning_rate": 5e-06, | |
| "loss": 0.003, | |
| "mismatch_kl": 0.00024501114967279136, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 48, | |
| "timing/generation_ms": 260055.6455301121, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 260055.6455301121, | |
| "tokens/completion": 6038.9921875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 520.350103855133 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.008223201148211956, | |
| "epoch": 0.06533333333333333, | |
| "grad_norm": 0.0005775216775221585, | |
| "importance_ratio": 1.0000702142715454, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0, | |
| "mismatch_kl": 0.0001139239757321775, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 49, | |
| "timing/generation_ms": 262634.82890836895, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 262634.82890836895, | |
| "tokens/completion": 6144.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 694.4226834774017 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.006501559168100357, | |
| "epoch": 0.06666666666666667, | |
| "grad_norm": 0.0004452247469025534, | |
| "importance_ratio": 1.000080943107605, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "mismatch_kl": 0.00019989976135548204, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 50, | |
| "timing/generation_ms": 252373.39910119772, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 252373.39910119772, | |
| "tokens/completion": 6144.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 633.9480290412903 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.011557838879525661, | |
| "epoch": 0.068, | |
| "grad_norm": 0.00040538021426552616, | |
| "importance_ratio": 1.0000510215759277, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0163, | |
| "mismatch_kl": 0.00014912446204107255, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 51, | |
| "timing/generation_ms": 231235.03853101283, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 231235.03853101283, | |
| "tokens/completion": 5880.91015625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 553.8161387443542 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.008280275389552116, | |
| "epoch": 0.06933333333333333, | |
| "grad_norm": 0.0006837160840031847, | |
| "importance_ratio": 1.0000361204147339, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0009, | |
| "mismatch_kl": 0.00011032609472749755, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 52, | |
| "timing/generation_ms": 268335.500174202, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 268335.500174202, | |
| "tokens/completion": 6076.33984375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 532.5728721618652 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.008777043782174587, | |
| "epoch": 0.07066666666666667, | |
| "grad_norm": 0.00047547446087476704, | |
| "importance_ratio": 1.0000946521759033, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0, | |
| "mismatch_kl": 0.0001269574131583795, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 53, | |
| "timing/generation_ms": 256683.97525977343, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 256683.97525977343, | |
| "tokens/completion": 6144.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 506.92905497550964 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.011497734114527702, | |
| "epoch": 0.072, | |
| "grad_norm": 0.00027828097052508087, | |
| "importance_ratio": 1.000109076499939, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0042, | |
| "mismatch_kl": 0.00013832931290380657, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 54, | |
| "timing/generation_ms": 245946.20873313397, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 245946.20873313397, | |
| "tokens/completion": 6032.51953125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 505.11912751197815 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.010809739120304585, | |
| "epoch": 0.07333333333333333, | |
| "grad_norm": 0.0007032954488382401, | |
| "importance_ratio": 1.0000889301300049, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "mismatch_kl": 0.00015762390103191137, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 55, | |
| "timing/generation_ms": 264072.7631729096, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 264072.7631729096, | |
| "tokens/completion": 6144.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 523.6702523231506 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.009559578262269497, | |
| "epoch": 0.07466666666666667, | |
| "grad_norm": 0.0010708393934808242, | |
| "importance_ratio": 1.0000908374786377, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0051, | |
| "mismatch_kl": 0.00013747472257819027, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 56, | |
| "timing/generation_ms": 250449.08253196627, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 250449.08253196627, | |
| "tokens/completion": 6098.72265625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 556.8832399845123 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.011213499121367931, | |
| "epoch": 0.076, | |
| "grad_norm": 0.00044938202555849837, | |
| "importance_ratio": 1.0000908374786377, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "mismatch_kl": 0.00015059650468174368, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 57, | |
| "timing/generation_ms": 263455.5452140048, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 263455.5452140048, | |
| "tokens/completion": 6144.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 632.40900182724 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.005081878509372473, | |
| "epoch": 0.07733333333333334, | |
| "grad_norm": 0.0003246328757380694, | |
| "importance_ratio": 1.0000656843185425, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "mismatch_kl": 0.00019094608433078974, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 58, | |
| "timing/generation_ms": 256806.45045358688, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 256806.45045358688, | |
| "tokens/completion": 6144.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 503.00778365135193 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.01805613562464714, | |
| "epoch": 0.07866666666666666, | |
| "grad_norm": 0.0007634702119519025, | |
| "importance_ratio": 1.0001803636550903, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0025, | |
| "mismatch_kl": 0.00021581076725851744, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 59, | |
| "timing/generation_ms": 254470.52423935384, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 254470.52423935384, | |
| "tokens/completion": 6079.921875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 499.350706577301 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.047813381999731064, | |
| "epoch": 0.08, | |
| "grad_norm": 0.0034811244478141165, | |
| "importance_ratio": 1.0005850791931152, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0385, | |
| "mismatch_kl": 0.0006162600475363433, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 60, | |
| "timing/generation_ms": 122059.79803204536, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 122059.79803204536, | |
| "tokens/completion": 4056.4140625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 406.85777831077576 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.006517002824693918, | |
| "epoch": 0.08133333333333333, | |
| "grad_norm": 0.00045405486723584484, | |
| "importance_ratio": 1.0000643730163574, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "mismatch_kl": 8.087344031082466e-05, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 61, | |
| "timing/generation_ms": 262080.00864181668, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 262080.00864181668, | |
| "tokens/completion": 6144.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 513.6219637393951 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.0059960088692605495, | |
| "epoch": 0.08266666666666667, | |
| "grad_norm": 0.0003004741817689029, | |
| "importance_ratio": 1.0000419616699219, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "mismatch_kl": 7.99954796093516e-05, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 62, | |
| "timing/generation_ms": 261857.35533758998, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 261857.35533758998, | |
| "tokens/completion": 6144.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 537.6526563167572 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.012267248705029488, | |
| "epoch": 0.084, | |
| "grad_norm": 0.0018275298082432536, | |
| "importance_ratio": 1.0001516342163086, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0273, | |
| "mismatch_kl": 0.00015860867279116064, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 63, | |
| "timing/generation_ms": 223553.63579373807, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 223553.63579373807, | |
| "tokens/completion": 5578.8046875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 449.565260887146 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.017613664269447327, | |
| "epoch": 0.08533333333333333, | |
| "grad_norm": 0.0013137454797814432, | |
| "importance_ratio": 1.0001808404922485, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0296, | |
| "mismatch_kl": 0.00018238124903291464, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 64, | |
| "timing/generation_ms": 197715.4450826347, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 197715.4450826347, | |
| "tokens/completion": 5301.74609375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 467.5368883609772 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.012099393643438816, | |
| "epoch": 0.08666666666666667, | |
| "grad_norm": 0.00029163323031709923, | |
| "importance_ratio": 1.0000910758972168, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0101, | |
| "mismatch_kl": 0.0001367869263049215, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 65, | |
| "timing/generation_ms": 253292.40265209228, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 253292.40265209228, | |
| "tokens/completion": 5987.40234375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 504.62310814857483 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.016472794115543365, | |
| "epoch": 0.088, | |
| "grad_norm": 0.000537146473230196, | |
| "importance_ratio": 1.0002104043960571, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0046, | |
| "mismatch_kl": 0.00019632629118859768, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 66, | |
| "timing/generation_ms": 244626.61108747125, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 244626.61108747125, | |
| "tokens/completion": 5880.29296875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 546.9820070266724 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.03573580086231232, | |
| "epoch": 0.08933333333333333, | |
| "grad_norm": 0.0018214337047260279, | |
| "importance_ratio": 1.0006996393203735, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0366, | |
| "mismatch_kl": 0.0005711132544092834, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 67, | |
| "timing/generation_ms": 171141.10032841563, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 171141.10032841563, | |
| "tokens/completion": 4912.99609375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 387.35487270355225 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.024245120584964752, | |
| "epoch": 0.09066666666666667, | |
| "grad_norm": 0.0007171125744050383, | |
| "importance_ratio": 1.0004810094833374, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0327, | |
| "mismatch_kl": 0.0003458830469753593, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 68, | |
| "timing/generation_ms": 175763.37515283376, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 175763.37515283376, | |
| "tokens/completion": 5039.39453125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 399.21359062194824 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.025269493460655212, | |
| "epoch": 0.092, | |
| "grad_norm": 0.0004443143666122359, | |
| "importance_ratio": 1.000417947769165, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0151, | |
| "mismatch_kl": 0.000321421044645831, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 69, | |
| "timing/generation_ms": 250666.16093274206, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 250666.16093274206, | |
| "tokens/completion": 5965.16796875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 632.227735042572 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.024551477283239365, | |
| "epoch": 0.09333333333333334, | |
| "grad_norm": 0.0015252781439401258, | |
| "importance_ratio": 1.0006314516067505, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0348, | |
| "mismatch_kl": 0.0005003436817787588, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 70, | |
| "timing/generation_ms": 191529.1232522577, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 191529.1232522577, | |
| "tokens/completion": 5294.87890625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 407.7219111919403 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.02018953487277031, | |
| "epoch": 0.09466666666666666, | |
| "grad_norm": 0.0011570903491081794, | |
| "importance_ratio": 1.0002988576889038, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0237, | |
| "mismatch_kl": 0.00033742599771358073, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 71, | |
| "timing/generation_ms": 210619.99121960253, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 210619.99121960253, | |
| "tokens/completion": 5332.65625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 494.4582040309906 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.08414055407047272, | |
| "epoch": 0.096, | |
| "grad_norm": 0.005691985408928669, | |
| "importance_ratio": 1.002629280090332, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0631, | |
| "mismatch_kl": 0.0030276263132691383, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 72, | |
| "timing/generation_ms": 26491.081130690873, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 26491.081130690873, | |
| "tokens/completion": 1684.4921875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 175.0816376209259 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.08044799417257309, | |
| "epoch": 0.09733333333333333, | |
| "grad_norm": 0.0067108539111987095, | |
| "importance_ratio": 1.0022099018096924, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0512, | |
| "mismatch_kl": 0.0033263727091252804, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 73, | |
| "timing/generation_ms": 26663.206906057894, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 26663.206906057894, | |
| "tokens/completion": 1624.47265625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 179.0183322429657 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.14499743282794952, | |
| "epoch": 0.09866666666666667, | |
| "grad_norm": 0.010377228969329702, | |
| "importance_ratio": 1.0045510530471802, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0301, | |
| "mismatch_kl": 0.03058871254324913, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 74, | |
| "timing/generation_ms": 11363.965434022248, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 11363.965434022248, | |
| "tokens/completion": 733.40234375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 162.93997645378113 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.15485742688179016, | |
| "epoch": 0.1, | |
| "grad_norm": 0.037501291580980145, | |
| "importance_ratio": 1.0262236595153809, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0478, | |
| "mismatch_kl": 0.5780022144317627, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 75, | |
| "timing/generation_ms": 31973.80775306374, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 31973.80775306374, | |
| "tokens/completion": 1854.69921875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 206.36020827293396 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2712324857711792, | |
| "epoch": 0.10133333333333333, | |
| "grad_norm": 0.021496155900656944, | |
| "importance_ratio": 0.747008204460144, | |
| "learning_rate": 5e-06, | |
| "loss": -0.001, | |
| "mismatch_kl": 4.077150344848633, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 76, | |
| "timing/generation_ms": 19520.673436112702, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 19520.673436112702, | |
| "tokens/completion": 1019.1015625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 104.34236979484558 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3253353238105774, | |
| "epoch": 0.10266666666666667, | |
| "grad_norm": 0.014127787785753907, | |
| "importance_ratio": 0.5209717154502869, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0074, | |
| "mismatch_kl": 11.41779899597168, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 77, | |
| "timing/generation_ms": 33620.65821047872, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 33620.65821047872, | |
| "tokens/completion": 1925.72265625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 257.44123911857605 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3993019163608551, | |
| "epoch": 0.104, | |
| "grad_norm": 0.009151033649610016, | |
| "importance_ratio": 0.29432952404022217, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0157, | |
| "mismatch_kl": 11.372162818908691, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 78, | |
| "timing/generation_ms": 11082.484270446002, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 11082.484270446002, | |
| "tokens/completion": 828.0546875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 115.73264193534851 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.1168494001030922, | |
| "epoch": 0.10533333333333333, | |
| "grad_norm": 0.006117265962728229, | |
| "importance_ratio": 0.1935732513666153, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0017, | |
| "mismatch_kl": 21.00209617614746, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 79, | |
| "timing/generation_ms": 36345.630533993244, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 36345.630533993244, | |
| "tokens/completion": 2084.80859375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 232.0772545337677 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.4559866189956665, | |
| "epoch": 0.10666666666666667, | |
| "grad_norm": 0.02899073922789841, | |
| "importance_ratio": 0.9647712111473083, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0109, | |
| "mismatch_kl": 0.1562381535768509, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 80, | |
| "timing/generation_ms": 3708.529833704233, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 3708.529833704233, | |
| "tokens/completion": 172.21484375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 79.40927720069885 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.18381687998771667, | |
| "epoch": 0.108, | |
| "grad_norm": 0.03870938318729351, | |
| "importance_ratio": 0.9867123365402222, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0003, | |
| "mismatch_kl": 0.09630821645259857, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 81, | |
| "timing/generation_ms": 7641.556458547711, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 7641.556458547711, | |
| "tokens/completion": 342.55078125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 28.48853635787964 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.28385868668556213, | |
| "epoch": 0.10933333333333334, | |
| "grad_norm": 0.024463462093216322, | |
| "importance_ratio": 0.9961410760879517, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0027, | |
| "mismatch_kl": 0.046350929886102676, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 82, | |
| "timing/generation_ms": 14151.478135958314, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 14151.478135958314, | |
| "tokens/completion": 640.5703125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 51.07678151130676 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.25215646624565125, | |
| "epoch": 0.11066666666666666, | |
| "grad_norm": 0.025956305888591907, | |
| "importance_ratio": 0.9893953204154968, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0024, | |
| "mismatch_kl": 0.06097816303372383, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 83, | |
| "timing/generation_ms": 9802.852495573461, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 9802.852495573461, | |
| "tokens/completion": 486.23828125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 135.5597288608551 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.18832416832447052, | |
| "epoch": 0.112, | |
| "grad_norm": 0.05162272724580775, | |
| "importance_ratio": 0.9795369505882263, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0063, | |
| "mismatch_kl": 0.09001336991786957, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 84, | |
| "timing/generation_ms": 8744.545813649893, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 8744.545813649893, | |
| "tokens/completion": 422.9921875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 202.02377605438232 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.20757851004600525, | |
| "epoch": 0.11333333333333333, | |
| "grad_norm": 0.029849605436009424, | |
| "importance_ratio": 0.9847032427787781, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0003, | |
| "mismatch_kl": 0.08596009016036987, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 85, | |
| "timing/generation_ms": 6916.043497622013, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 6916.043497622013, | |
| "tokens/completion": 315.65625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 26.646199941635132 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.19341044127941132, | |
| "epoch": 0.11466666666666667, | |
| "grad_norm": 0.023761811444065736, | |
| "importance_ratio": 0.9906992316246033, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0037, | |
| "mismatch_kl": 0.04626338183879852, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 86, | |
| "timing/generation_ms": 10513.352582231164, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 10513.352582231164, | |
| "tokens/completion": 565.625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 43.092281341552734 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2988993227481842, | |
| "epoch": 0.116, | |
| "grad_norm": 0.08410779443510906, | |
| "importance_ratio": 0.9899005889892578, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0182, | |
| "mismatch_kl": 0.048949241638183594, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 87, | |
| "timing/generation_ms": 6337.426606565714, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 6337.426606565714, | |
| "tokens/completion": 288.53125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 29.87082028388977 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.24379415810108185, | |
| "epoch": 0.11733333333333333, | |
| "grad_norm": 0.033951546211805725, | |
| "importance_ratio": 0.9842061996459961, | |
| "learning_rate": 5e-06, | |
| "loss": -0.001, | |
| "mismatch_kl": 0.05609630420804024, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 88, | |
| "timing/generation_ms": 12948.228243738413, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 12948.228243738413, | |
| "tokens/completion": 572.8359375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 158.39017939567566 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.46492651104927063, | |
| "epoch": 0.11866666666666667, | |
| "grad_norm": 0.05385249484621595, | |
| "importance_ratio": 0.9755511283874512, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0005, | |
| "mismatch_kl": 0.16615039110183716, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 89, | |
| "timing/generation_ms": 11146.457904949784, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 11146.457904949784, | |
| "tokens/completion": 531.22265625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 125.18756413459778 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.26703542470932007, | |
| "epoch": 0.12, | |
| "grad_norm": 0.02367206113805114, | |
| "importance_ratio": 0.9910291433334351, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0072, | |
| "mismatch_kl": 0.041237972676754, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 90, | |
| "timing/generation_ms": 12296.578384935856, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 12296.578384935856, | |
| "tokens/completion": 619.4375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 123.89916157722473 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.5690855979919434, | |
| "epoch": 0.12133333333333333, | |
| "grad_norm": 0.030434949636985786, | |
| "importance_ratio": 0.9436249136924744, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0044, | |
| "mismatch_kl": 0.4027661979198456, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 91, | |
| "timing/generation_ms": 17300.37511046976, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 17300.37511046976, | |
| "tokens/completion": 803.75, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 68.73723077774048 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2795153260231018, | |
| "epoch": 0.12266666666666666, | |
| "grad_norm": 0.033606081779905164, | |
| "importance_ratio": 0.9910190105438232, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0021, | |
| "mismatch_kl": 0.048360757529735565, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 92, | |
| "timing/generation_ms": 10146.174241788685, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 10146.174241788685, | |
| "tokens/completion": 409.20703125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 115.50342917442322 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.25254565477371216, | |
| "epoch": 0.124, | |
| "grad_norm": 0.02170917112603325, | |
| "importance_ratio": 0.9928799867630005, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0035, | |
| "mismatch_kl": 0.03083646297454834, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 93, | |
| "timing/generation_ms": 14904.53880932182, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 14904.53880932182, | |
| "tokens/completion": 689.578125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 136.12913012504578 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.25149497389793396, | |
| "epoch": 0.12533333333333332, | |
| "grad_norm": 0.049807356598740776, | |
| "importance_ratio": 0.990451455116272, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0058, | |
| "mismatch_kl": 0.03808113560080528, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 94, | |
| "timing/generation_ms": 8459.820285439491, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 8459.820285439491, | |
| "tokens/completion": 413.421875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 34.11598253250122 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.21991755068302155, | |
| "epoch": 0.12666666666666668, | |
| "grad_norm": 0.02577498970131942, | |
| "importance_ratio": 0.9890254139900208, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0012, | |
| "mismatch_kl": 0.05755931884050369, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 95, | |
| "timing/generation_ms": 5316.206902265549, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 5316.206902265549, | |
| "tokens/completion": 254.72265625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 129.7372589111328 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.4472619593143463, | |
| "epoch": 0.128, | |
| "grad_norm": 0.040975406412791814, | |
| "importance_ratio": 0.9842396378517151, | |
| "learning_rate": 5e-06, | |
| "loss": -0.003, | |
| "mismatch_kl": 0.14270469546318054, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 96, | |
| "timing/generation_ms": 6448.528000153601, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 6448.528000153601, | |
| "tokens/completion": 303.2421875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 152.90578722953796 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.24905133247375488, | |
| "epoch": 0.12933333333333333, | |
| "grad_norm": 0.0336787422018486, | |
| "importance_ratio": 0.9942489862442017, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0073, | |
| "mismatch_kl": 0.03845536336302757, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 97, | |
| "timing/generation_ms": 10672.863409854472, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 10672.863409854472, | |
| "tokens/completion": 522.453125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 58.958009481430054 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.37947529554367065, | |
| "epoch": 0.13066666666666665, | |
| "grad_norm": 0.03256153448253783, | |
| "importance_ratio": 0.9943234324455261, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0033, | |
| "mismatch_kl": 0.0457632839679718, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 98, | |
| "timing/generation_ms": 7797.16813378036, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 7797.16813378036, | |
| "tokens/completion": 321.6484375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 57.01115918159485 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3295568525791168, | |
| "epoch": 0.132, | |
| "grad_norm": 0.025070691541196687, | |
| "importance_ratio": 0.9886187314987183, | |
| "learning_rate": 5e-06, | |
| "loss": 0.002, | |
| "mismatch_kl": 0.055542413145303726, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 99, | |
| "timing/generation_ms": 12520.016725175083, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 12520.016725175083, | |
| "tokens/completion": 560.515625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 134.89474534988403 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3819415867328644, | |
| "epoch": 0.13333333333333333, | |
| "grad_norm": 0.029430906337480585, | |
| "importance_ratio": 0.9973994493484497, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0014, | |
| "mismatch_kl": 0.03809521347284317, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 100, | |
| "timing/generation_ms": 7522.873256355524, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 7522.873256355524, | |
| "tokens/completion": 381.24609375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 42.47270226478577 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3258141577243805, | |
| "epoch": 0.13466666666666666, | |
| "grad_norm": 0.06302493851707891, | |
| "importance_ratio": 0.995746374130249, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0032, | |
| "mismatch_kl": 0.05126583203673363, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 101, | |
| "timing/generation_ms": 6897.25607726723, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 6897.25607726723, | |
| "tokens/completion": 331.53515625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 101.3789484500885 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.8970124125480652, | |
| "epoch": 0.136, | |
| "grad_norm": 0.03515811902568956, | |
| "importance_ratio": 0.8364270925521851, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0067, | |
| "mismatch_kl": 1.5947057008743286, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 102, | |
| "timing/generation_ms": 12960.892278701067, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 12960.892278701067, | |
| "tokens/completion": 679.25390625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 92.91760039329529 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2645859122276306, | |
| "epoch": 0.13733333333333334, | |
| "grad_norm": 0.03015986556668391, | |
| "importance_ratio": 0.9922869205474854, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0033, | |
| "mismatch_kl": 0.032752275466918945, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 103, | |
| "timing/generation_ms": 12081.96578361094, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 12081.96578361094, | |
| "tokens/completion": 635.26171875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 221.86856937408447 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.40493857860565186, | |
| "epoch": 0.13866666666666666, | |
| "grad_norm": 0.029340951142688608, | |
| "importance_ratio": 0.9976834058761597, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0075, | |
| "mismatch_kl": 0.039802681654691696, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 104, | |
| "timing/generation_ms": 8452.124254778028, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 8452.124254778028, | |
| "tokens/completion": 392.85546875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 78.09920930862427 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.42381417751312256, | |
| "epoch": 0.14, | |
| "grad_norm": 0.03251134797029109, | |
| "importance_ratio": 0.9939345121383667, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0025, | |
| "mismatch_kl": 0.045791786164045334, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 105, | |
| "timing/generation_ms": 11178.499449044466, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 11178.499449044466, | |
| "tokens/completion": 480.08984375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 36.62562108039856 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2739037871360779, | |
| "epoch": 0.14133333333333334, | |
| "grad_norm": 0.0476499263024248, | |
| "importance_ratio": 0.9929625988006592, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0024, | |
| "mismatch_kl": 0.036298882216215134, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 106, | |
| "timing/generation_ms": 10698.151003569365, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 10698.151003569365, | |
| "tokens/completion": 521.33203125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 130.2317771911621 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2831694781780243, | |
| "epoch": 0.14266666666666666, | |
| "grad_norm": 0.048559683162439526, | |
| "importance_ratio": 0.9895249605178833, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0018, | |
| "mismatch_kl": 0.04853809252381325, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 107, | |
| "timing/generation_ms": 10670.390761457384, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 10670.390761457384, | |
| "tokens/completion": 504.16015625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 154.62130737304688 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.4273696541786194, | |
| "epoch": 0.144, | |
| "grad_norm": 0.04246003800252577, | |
| "importance_ratio": 0.9897579550743103, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0004, | |
| "mismatch_kl": 0.05487997457385063, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 108, | |
| "timing/generation_ms": 5255.264617502689, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 5255.264617502689, | |
| "tokens/completion": 253.4296875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 62.357131004333496 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3309624195098877, | |
| "epoch": 0.14533333333333334, | |
| "grad_norm": 0.020612894864024223, | |
| "importance_ratio": 0.994171679019928, | |
| "learning_rate": 5e-06, | |
| "loss": 0.004, | |
| "mismatch_kl": 0.028750188648700714, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 109, | |
| "timing/generation_ms": 17462.82579470426, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 17462.82579470426, | |
| "tokens/completion": 909.28515625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 84.52479147911072 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.45720767974853516, | |
| "epoch": 0.14666666666666667, | |
| "grad_norm": 0.048825755999723545, | |
| "importance_ratio": 0.9917762279510498, | |
| "learning_rate": 5e-06, | |
| "loss": -0.003, | |
| "mismatch_kl": 0.03884867951273918, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 110, | |
| "timing/generation_ms": 10527.64255553484, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 10527.64255553484, | |
| "tokens/completion": 457.21875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 118.98395490646362 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3448692262172699, | |
| "epoch": 0.148, | |
| "grad_norm": 0.02391536511668303, | |
| "importance_ratio": 0.9938703775405884, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0118, | |
| "mismatch_kl": 0.03092486597597599, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 111, | |
| "timing/generation_ms": 11426.006315276027, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 11426.006315276027, | |
| "tokens/completion": 603.828125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 44.38506889343262 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.4210182726383209, | |
| "epoch": 0.14933333333333335, | |
| "grad_norm": 0.017744426750614804, | |
| "importance_ratio": 0.9841266870498657, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0031, | |
| "mismatch_kl": 0.15376684069633484, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 112, | |
| "timing/generation_ms": 15345.524672418833, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 15345.524672418833, | |
| "tokens/completion": 679.61328125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 77.3697247505188 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3890233039855957, | |
| "epoch": 0.15066666666666667, | |
| "grad_norm": 0.042319164028374844, | |
| "importance_ratio": 0.9905653595924377, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0067, | |
| "mismatch_kl": 0.03776917979121208, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 113, | |
| "timing/generation_ms": 8361.73670180142, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 8361.73670180142, | |
| "tokens/completion": 386.69921875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 33.98000693321228 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3506433963775635, | |
| "epoch": 0.152, | |
| "grad_norm": 0.022347419652582003, | |
| "importance_ratio": 0.9932938814163208, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0024, | |
| "mismatch_kl": 0.03900053724646568, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 114, | |
| "timing/generation_ms": 10107.008518651128, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 10107.008518651128, | |
| "tokens/completion": 531.8671875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 156.0705955028534 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.28853052854537964, | |
| "epoch": 0.15333333333333332, | |
| "grad_norm": 0.02467560875646059, | |
| "importance_ratio": 0.9956313967704773, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0077, | |
| "mismatch_kl": 0.021128181368112564, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 115, | |
| "timing/generation_ms": 13438.352120108902, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 13438.352120108902, | |
| "tokens/completion": 638.3359375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 51.55745196342468 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.5352842807769775, | |
| "epoch": 0.15466666666666667, | |
| "grad_norm": 0.0500581678773726, | |
| "importance_ratio": 0.9921436905860901, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0035, | |
| "mismatch_kl": 0.0745246633887291, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 116, | |
| "timing/generation_ms": 6379.514851607382, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 6379.514851607382, | |
| "tokens/completion": 304.5625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 38.366251945495605 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.36106666922569275, | |
| "epoch": 0.156, | |
| "grad_norm": 0.063234851546128, | |
| "importance_ratio": 0.9977811574935913, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0007, | |
| "mismatch_kl": 0.029981082305312157, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 117, | |
| "timing/generation_ms": 7269.031744450331, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 7269.031744450331, | |
| "tokens/completion": 359.06640625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 27.440030097961426 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3367100656032562, | |
| "epoch": 0.15733333333333333, | |
| "grad_norm": 0.059808565066134974, | |
| "importance_ratio": 0.988777220249176, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0044, | |
| "mismatch_kl": 0.044747766107320786, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 118, | |
| "timing/generation_ms": 9969.640973955393, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 9969.640973955393, | |
| "tokens/completion": 485.625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 82.32884860038757 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.35986092686653137, | |
| "epoch": 0.15866666666666668, | |
| "grad_norm": 0.020285418443392603, | |
| "importance_ratio": 0.9924752116203308, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0042, | |
| "mismatch_kl": 0.031399309635162354, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 119, | |
| "timing/generation_ms": 15499.55965206027, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 15499.55965206027, | |
| "tokens/completion": 796.76171875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 56.515456199645996 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.40867432951927185, | |
| "epoch": 0.16, | |
| "grad_norm": 0.018892048843934344, | |
| "importance_ratio": 0.9954840540885925, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0094, | |
| "mismatch_kl": 0.030410781502723694, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 120, | |
| "timing/generation_ms": 13046.93166166544, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 13046.93166166544, | |
| "tokens/completion": 672.06640625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 51.22301483154297 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.25322413444519043, | |
| "epoch": 0.16133333333333333, | |
| "grad_norm": 0.019402594506856746, | |
| "importance_ratio": 0.9968504309654236, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0018, | |
| "mismatch_kl": 0.020855166018009186, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 121, | |
| "timing/generation_ms": 33212.274321354926, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 33212.274321354926, | |
| "tokens/completion": 1494.39453125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 196.6885223388672 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3203243613243103, | |
| "epoch": 0.16266666666666665, | |
| "grad_norm": 0.016032102577421704, | |
| "importance_ratio": 0.9980469942092896, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0013, | |
| "mismatch_kl": 0.01909617707133293, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 122, | |
| "timing/generation_ms": 21461.640139110386, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 21461.640139110386, | |
| "tokens/completion": 1059.1953125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 84.59676575660706 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.5010811686515808, | |
| "epoch": 0.164, | |
| "grad_norm": 0.02848759848639813, | |
| "importance_ratio": 1.0016131401062012, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0097, | |
| "mismatch_kl": 0.02760869450867176, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 123, | |
| "timing/generation_ms": 9319.45723388344, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 9319.45723388344, | |
| "tokens/completion": 433.1015625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 36.64540505409241 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.4700590968132019, | |
| "epoch": 0.16533333333333333, | |
| "grad_norm": 0.025031920446653932, | |
| "importance_ratio": 0.9973174929618835, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0072, | |
| "mismatch_kl": 0.03977029770612717, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 124, | |
| "timing/generation_ms": 9967.066356912255, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 9967.066356912255, | |
| "tokens/completion": 478.1328125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 137.7500193119049 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.4410494863986969, | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 0.02102977498791798, | |
| "importance_ratio": 0.9927030801773071, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0044, | |
| "mismatch_kl": 0.05027690902352333, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 125, | |
| "timing/generation_ms": 13226.7307927832, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 13226.7307927832, | |
| "tokens/completion": 666.65234375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 119.67769002914429 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2941017150878906, | |
| "epoch": 0.168, | |
| "grad_norm": 0.01764622195762912, | |
| "importance_ratio": 0.9970736503601074, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0039, | |
| "mismatch_kl": 0.025975050404667854, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 126, | |
| "timing/generation_ms": 30093.59989501536, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 30093.59989501536, | |
| "tokens/completion": 1403.23046875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 111.32783484458923 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.37822288274765015, | |
| "epoch": 0.16933333333333334, | |
| "grad_norm": 0.03205413439415866, | |
| "importance_ratio": 0.9921689629554749, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0015, | |
| "mismatch_kl": 0.10021175444126129, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 127, | |
| "timing/generation_ms": 25918.55046711862, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 25918.55046711862, | |
| "tokens/completion": 1132.37890625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 101.07530164718628 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.46506795287132263, | |
| "epoch": 0.17066666666666666, | |
| "grad_norm": 0.026459518059964743, | |
| "importance_ratio": 0.995638906955719, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0065, | |
| "mismatch_kl": 0.03533043712377548, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 128, | |
| "timing/generation_ms": 8870.356048457325, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 8870.356048457325, | |
| "tokens/completion": 477.8046875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 41.62081718444824 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.4616319537162781, | |
| "epoch": 0.172, | |
| "grad_norm": 0.029689428333274717, | |
| "importance_ratio": 0.992743194103241, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0116, | |
| "mismatch_kl": 0.043640002608299255, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 129, | |
| "timing/generation_ms": 17582.845278084278, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 17582.845278084278, | |
| "tokens/completion": 896.60546875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 103.23663401603699 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.318230539560318, | |
| "epoch": 0.17333333333333334, | |
| "grad_norm": 0.021848886677287266, | |
| "importance_ratio": 1.0002652406692505, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0028, | |
| "mismatch_kl": 0.032250385731458664, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 130, | |
| "timing/generation_ms": 12501.79857108742, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 12501.79857108742, | |
| "tokens/completion": 636.82421875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 46.11354732513428 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2867668569087982, | |
| "epoch": 0.17466666666666666, | |
| "grad_norm": 0.0152850963716213, | |
| "importance_ratio": 0.9975439310073853, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0004, | |
| "mismatch_kl": 0.03095307946205139, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 131, | |
| "timing/generation_ms": 21872.447106055915, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 21872.447106055915, | |
| "tokens/completion": 1016.09765625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 75.5360016822815 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.6940531134605408, | |
| "epoch": 0.176, | |
| "grad_norm": 0.027620607135447624, | |
| "importance_ratio": 0.9872549176216125, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0013, | |
| "mismatch_kl": 0.14033383131027222, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 132, | |
| "timing/generation_ms": 11405.475388281047, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 11405.475388281047, | |
| "tokens/completion": 487.51953125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 55.63127040863037 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.36297503113746643, | |
| "epoch": 0.17733333333333334, | |
| "grad_norm": 0.029171908888413572, | |
| "importance_ratio": 0.9953750967979431, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0051, | |
| "mismatch_kl": 0.035398464649915695, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 133, | |
| "timing/generation_ms": 17919.221241027117, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 17919.221241027117, | |
| "tokens/completion": 900.453125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 170.36363244056702 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2939022481441498, | |
| "epoch": 0.17866666666666667, | |
| "grad_norm": 0.02565678896444847, | |
| "importance_ratio": 0.99770587682724, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0013, | |
| "mismatch_kl": 0.019702836871147156, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 134, | |
| "timing/generation_ms": 26027.854280546308, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 26027.854280546308, | |
| "tokens/completion": 1189.94921875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 208.00876903533936 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.27582186460494995, | |
| "epoch": 0.18, | |
| "grad_norm": 0.025995432419046362, | |
| "importance_ratio": 0.9993173480033875, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0001, | |
| "mismatch_kl": 0.023949675261974335, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 135, | |
| "timing/generation_ms": 19027.399071492255, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 19027.399071492255, | |
| "tokens/completion": 910.98828125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 69.73441195487976 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.31243762373924255, | |
| "epoch": 0.18133333333333335, | |
| "grad_norm": 0.021978924242567442, | |
| "importance_ratio": 0.9992286562919617, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0016, | |
| "mismatch_kl": 0.024040305987000465, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 136, | |
| "timing/generation_ms": 14330.211003310978, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 14330.211003310978, | |
| "tokens/completion": 671.7265625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 53.44596743583679 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.24772067368030548, | |
| "epoch": 0.18266666666666667, | |
| "grad_norm": 0.022707662268209423, | |
| "importance_ratio": 0.9990280866622925, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0023, | |
| "mismatch_kl": 0.022532925009727478, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 137, | |
| "timing/generation_ms": 35249.2256751284, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 35249.2256751284, | |
| "tokens/completion": 1598.390625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 216.32258987426758 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.38041970133781433, | |
| "epoch": 0.184, | |
| "grad_norm": 0.046110003811864524, | |
| "importance_ratio": 0.9846709370613098, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0024, | |
| "mismatch_kl": 0.1807573288679123, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 138, | |
| "timing/generation_ms": 10808.89296438545, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 10808.89296438545, | |
| "tokens/completion": 505.0625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 72.23299145698547 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3362736403942108, | |
| "epoch": 0.18533333333333332, | |
| "grad_norm": 0.057037876570506886, | |
| "importance_ratio": 0.9923868179321289, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0033, | |
| "mismatch_kl": 0.0626266598701477, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 139, | |
| "timing/generation_ms": 8591.852098703384, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 8591.852098703384, | |
| "tokens/completion": 445.6875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 83.33036708831787 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2598806321620941, | |
| "epoch": 0.18666666666666668, | |
| "grad_norm": 0.021433898880701664, | |
| "importance_ratio": 0.9913464784622192, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0022, | |
| "mismatch_kl": 0.04193839803338051, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 140, | |
| "timing/generation_ms": 22836.472398601472, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 22836.472398601472, | |
| "tokens/completion": 1069.79296875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 123.7300488948822 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.27669745683670044, | |
| "epoch": 0.188, | |
| "grad_norm": 0.040287051430344514, | |
| "importance_ratio": 0.9890030026435852, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0006, | |
| "mismatch_kl": 0.03683684393763542, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 141, | |
| "timing/generation_ms": 22967.52266585827, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 22967.52266585827, | |
| "tokens/completion": 1105.08203125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 119.94411706924438 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.32473960518836975, | |
| "epoch": 0.18933333333333333, | |
| "grad_norm": 0.03235428789871377, | |
| "importance_ratio": 0.9974983334541321, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0005, | |
| "mismatch_kl": 0.021878903731703758, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 142, | |
| "timing/generation_ms": 20083.584303036332, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 20083.584303036332, | |
| "tokens/completion": 1026.375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 182.45814514160156 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.32302016019821167, | |
| "epoch": 0.19066666666666668, | |
| "grad_norm": 0.02364068100843913, | |
| "importance_ratio": 1.000141978263855, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0026, | |
| "mismatch_kl": 0.027520477771759033, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 143, | |
| "timing/generation_ms": 13226.199164055288, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 13226.199164055288, | |
| "tokens/completion": 630.8828125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 68.72126913070679 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.4063912034034729, | |
| "epoch": 0.192, | |
| "grad_norm": 0.016855205380348858, | |
| "importance_ratio": 0.9972877502441406, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0044, | |
| "mismatch_kl": 0.02402544766664505, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 144, | |
| "timing/generation_ms": 18624.562999233603, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 18624.562999233603, | |
| "tokens/completion": 916.34765625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 63.37579298019409 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3544447124004364, | |
| "epoch": 0.19333333333333333, | |
| "grad_norm": 0.03420133721717633, | |
| "importance_ratio": 0.9964665174484253, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0075, | |
| "mismatch_kl": 0.020806703716516495, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 145, | |
| "timing/generation_ms": 18501.724537461996, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 18501.724537461996, | |
| "tokens/completion": 914.03515625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 63.586211919784546 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.31913280487060547, | |
| "epoch": 0.19466666666666665, | |
| "grad_norm": 0.025814291552238237, | |
| "importance_ratio": 0.9976394176483154, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0017, | |
| "mismatch_kl": 0.02318250946700573, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 146, | |
| "timing/generation_ms": 17320.88227570057, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 17320.88227570057, | |
| "tokens/completion": 802.69921875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 161.1075360774994 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3060760796070099, | |
| "epoch": 0.196, | |
| "grad_norm": 0.024041285955131858, | |
| "importance_ratio": 0.9983845353126526, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0044, | |
| "mismatch_kl": 0.021491888910531998, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 147, | |
| "timing/generation_ms": 20764.05915338546, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 20764.05915338546, | |
| "tokens/completion": 1029.03125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 80.10747575759888 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.4907422661781311, | |
| "epoch": 0.19733333333333333, | |
| "grad_norm": 0.01969056173140591, | |
| "importance_ratio": 0.9921115040779114, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0019, | |
| "mismatch_kl": 0.09054939448833466, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 148, | |
| "timing/generation_ms": 14571.548252366483, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 14571.548252366483, | |
| "tokens/completion": 646.578125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 94.1196072101593 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2544015944004059, | |
| "epoch": 0.19866666666666666, | |
| "grad_norm": 0.020070961466503938, | |
| "importance_ratio": 0.998515784740448, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0002, | |
| "mismatch_kl": 0.019744453951716423, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 149, | |
| "timing/generation_ms": 23987.087721936405, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 23987.087721936405, | |
| "tokens/completion": 1105.234375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 87.52198696136475 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.28248143196105957, | |
| "epoch": 0.2, | |
| "grad_norm": 0.0191634545508177, | |
| "importance_ratio": 0.9957163333892822, | |
| "learning_rate": 5e-06, | |
| "loss": -0.004, | |
| "mismatch_kl": 0.018821164965629578, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 150, | |
| "timing/generation_ms": 20559.32307895273, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 20559.32307895273, | |
| "tokens/completion": 1016.2265625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 131.24922895431519 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.4225759208202362, | |
| "epoch": 0.20133333333333334, | |
| "grad_norm": 0.01854881603951969, | |
| "importance_ratio": 0.9962813854217529, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0017, | |
| "mismatch_kl": 0.025664212182164192, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 151, | |
| "timing/generation_ms": 22859.651166945696, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 22859.651166945696, | |
| "tokens/completion": 1112.96484375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 170.4989137649536 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2856869399547577, | |
| "epoch": 0.20266666666666666, | |
| "grad_norm": 0.018394448039889547, | |
| "importance_ratio": 0.9985631704330444, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0018, | |
| "mismatch_kl": 0.024066420271992683, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 152, | |
| "timing/generation_ms": 37744.059775955975, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 37744.059775955975, | |
| "tokens/completion": 1768.79296875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 248.44115471839905 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2514509856700897, | |
| "epoch": 0.204, | |
| "grad_norm": 0.023912470711877663, | |
| "importance_ratio": 0.9981127381324768, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0029, | |
| "mismatch_kl": 0.020759448409080505, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 153, | |
| "timing/generation_ms": 25985.86314264685, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 25985.86314264685, | |
| "tokens/completion": 1309.546875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 143.50284838676453 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.30002838373184204, | |
| "epoch": 0.20533333333333334, | |
| "grad_norm": 0.018497092206319014, | |
| "importance_ratio": 0.9994171857833862, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0022, | |
| "mismatch_kl": 0.015115631744265556, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 154, | |
| "timing/generation_ms": 20836.18642948568, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 20836.18642948568, | |
| "tokens/completion": 972.66796875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 112.54808211326599 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3808918297290802, | |
| "epoch": 0.20666666666666667, | |
| "grad_norm": 0.014750747901418159, | |
| "importance_ratio": 0.9998784065246582, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0023, | |
| "mismatch_kl": 0.0203760527074337, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 155, | |
| "timing/generation_ms": 28712.269487790763, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 28712.269487790763, | |
| "tokens/completion": 1384.42578125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 116.96515583992004 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.404234915971756, | |
| "epoch": 0.208, | |
| "grad_norm": 0.02774018143964054, | |
| "importance_ratio": 0.9903627038002014, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0022, | |
| "mismatch_kl": 0.09949617087841034, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 156, | |
| "timing/generation_ms": 15220.996337942779, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 15220.996337942779, | |
| "tokens/completion": 733.44921875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 80.95505475997925 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2150656282901764, | |
| "epoch": 0.20933333333333334, | |
| "grad_norm": 0.012574265789504322, | |
| "importance_ratio": 0.9968655109405518, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0043, | |
| "mismatch_kl": 0.01895724982023239, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 157, | |
| "timing/generation_ms": 46771.82784862816, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 46771.82784862816, | |
| "tokens/completion": 2055.46875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 183.42079520225525 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2713158428668976, | |
| "epoch": 0.21066666666666667, | |
| "grad_norm": 0.03512934826143982, | |
| "importance_ratio": 0.9985222220420837, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0028, | |
| "mismatch_kl": 0.01624884642660618, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 158, | |
| "timing/generation_ms": 20947.266034781933, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 20947.266034781933, | |
| "tokens/completion": 1009.90234375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 87.24977517127991 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.32832008600234985, | |
| "epoch": 0.212, | |
| "grad_norm": 0.02405397079489038, | |
| "importance_ratio": 0.9991105198860168, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0056, | |
| "mismatch_kl": 0.016867484897375107, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 159, | |
| "timing/generation_ms": 21430.58088142425, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 21430.58088142425, | |
| "tokens/completion": 1012.43359375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 87.2035722732544 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.32067254185676575, | |
| "epoch": 0.21333333333333335, | |
| "grad_norm": 0.030583585605830663, | |
| "importance_ratio": 1.0010290145874023, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0029, | |
| "mismatch_kl": 0.01957845501601696, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 160, | |
| "timing/generation_ms": 12068.631175905466, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 12068.631175905466, | |
| "tokens/completion": 585.69921875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 46.4997832775116 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.20440350472927094, | |
| "epoch": 0.21466666666666667, | |
| "grad_norm": 0.009198384471964699, | |
| "importance_ratio": 0.9953656196594238, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0052, | |
| "mismatch_kl": 0.024851609021425247, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 161, | |
| "timing/generation_ms": 64061.363669112325, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 64061.363669112325, | |
| "tokens/completion": 2746.5390625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 252.9020836353302 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2289305031299591, | |
| "epoch": 0.216, | |
| "grad_norm": 0.017027620442399836, | |
| "importance_ratio": 0.9964645504951477, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0005, | |
| "mismatch_kl": 0.02016555331647396, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 162, | |
| "timing/generation_ms": 29072.1739763394, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 29072.1739763394, | |
| "tokens/completion": 1294.0546875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 187.8606402873993 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.23871932923793793, | |
| "epoch": 0.21733333333333332, | |
| "grad_norm": 0.026046585403665903, | |
| "importance_ratio": 0.998152494430542, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0052, | |
| "mismatch_kl": 0.016869615763425827, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 163, | |
| "timing/generation_ms": 33103.609337471426, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 33103.609337471426, | |
| "tokens/completion": 1545.50390625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 139.85770416259766 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.28158116340637207, | |
| "epoch": 0.21866666666666668, | |
| "grad_norm": 0.015259806348832568, | |
| "importance_ratio": 0.9982590079307556, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0053, | |
| "mismatch_kl": 0.022746765986084938, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 164, | |
| "timing/generation_ms": 26944.41274832934, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 26944.41274832934, | |
| "tokens/completion": 1337.65625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 109.10997653007507 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3031062185764313, | |
| "epoch": 0.22, | |
| "grad_norm": 0.016960115464425836, | |
| "importance_ratio": 0.9974260926246643, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0023, | |
| "mismatch_kl": 0.02418132871389389, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 165, | |
| "timing/generation_ms": 26665.55192042142, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 26665.55192042142, | |
| "tokens/completion": 1298.09765625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 233.19409203529358 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.30360692739486694, | |
| "epoch": 0.22133333333333333, | |
| "grad_norm": 0.03976443826488329, | |
| "importance_ratio": 0.9983341097831726, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0064, | |
| "mismatch_kl": 0.02314077690243721, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 166, | |
| "timing/generation_ms": 14128.881074488163, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 14128.881074488163, | |
| "tokens/completion": 701.61328125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 55.524725914001465 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2678433656692505, | |
| "epoch": 0.22266666666666668, | |
| "grad_norm": 0.03342438517457818, | |
| "importance_ratio": 0.9922596216201782, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0023, | |
| "mismatch_kl": 0.035250429064035416, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 167, | |
| "timing/generation_ms": 21135.669719427824, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 21135.669719427824, | |
| "tokens/completion": 1019.171875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 149.8279891014099 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.20458683371543884, | |
| "epoch": 0.224, | |
| "grad_norm": 0.022088093083212943, | |
| "importance_ratio": 0.9954257011413574, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0018, | |
| "mismatch_kl": 0.023710263893008232, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 168, | |
| "timing/generation_ms": 59294.02190912515, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 59294.02190912515, | |
| "tokens/completion": 2536.8828125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 207.61119556427002 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.4547651410102844, | |
| "epoch": 0.22533333333333333, | |
| "grad_norm": 0.03804278639742813, | |
| "importance_ratio": 0.9720731973648071, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0026, | |
| "mismatch_kl": 0.2540355324745178, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 169, | |
| "timing/generation_ms": 14632.340895012021, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 14632.340895012021, | |
| "tokens/completion": 634.8203125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 66.74064421653748 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.26701289415359497, | |
| "epoch": 0.22666666666666666, | |
| "grad_norm": 0.03041084967586165, | |
| "importance_ratio": 0.9971191883087158, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0024, | |
| "mismatch_kl": 0.02894790843129158, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 170, | |
| "timing/generation_ms": 21908.162399195135, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 21908.162399195135, | |
| "tokens/completion": 1060.19140625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 285.11374616622925 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3831964433193207, | |
| "epoch": 0.228, | |
| "grad_norm": 0.020277373003486452, | |
| "importance_ratio": 0.9703661799430847, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0013, | |
| "mismatch_kl": 0.288127064704895, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 171, | |
| "timing/generation_ms": 21739.85463846475, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 21739.85463846475, | |
| "tokens/completion": 1042.390625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 126.53577995300293 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3104299008846283, | |
| "epoch": 0.22933333333333333, | |
| "grad_norm": 0.05268300034795112, | |
| "importance_ratio": 0.9946843981742859, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0045, | |
| "mismatch_kl": 0.028223995119333267, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 172, | |
| "timing/generation_ms": 18181.49754870683, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 18181.49754870683, | |
| "tokens/completion": 876.87890625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 102.08800101280212 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.32149240374565125, | |
| "epoch": 0.23066666666666666, | |
| "grad_norm": 0.019198595379338976, | |
| "importance_ratio": 0.9882834553718567, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0031, | |
| "mismatch_kl": 0.09531966596841812, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 173, | |
| "timing/generation_ms": 26753.23315896094, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 26753.23315896094, | |
| "tokens/completion": 1199.828125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 243.50505256652832 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2507164180278778, | |
| "epoch": 0.232, | |
| "grad_norm": 0.0248134202199756, | |
| "importance_ratio": 0.9970893263816833, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0063, | |
| "mismatch_kl": 0.033440057188272476, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 174, | |
| "timing/generation_ms": 32734.658079221845, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 32734.658079221845, | |
| "tokens/completion": 1582.765625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 236.81393718719482 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2938965857028961, | |
| "epoch": 0.23333333333333334, | |
| "grad_norm": 0.023295024031541062, | |
| "importance_ratio": 0.9996641874313354, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0014, | |
| "mismatch_kl": 0.030382564291357994, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 175, | |
| "timing/generation_ms": 18484.799866564572, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 18484.799866564572, | |
| "tokens/completion": 869.8203125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 89.94726347923279 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.24128344655036926, | |
| "epoch": 0.23466666666666666, | |
| "grad_norm": 0.021681137287839845, | |
| "importance_ratio": 0.995689868927002, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0024, | |
| "mismatch_kl": 0.025076182559132576, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 176, | |
| "timing/generation_ms": 16699.054242111742, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 16699.054242111742, | |
| "tokens/completion": 831.890625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 76.11790347099304 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.26724985241889954, | |
| "epoch": 0.236, | |
| "grad_norm": 0.015254325506305103, | |
| "importance_ratio": 0.992223858833313, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0003, | |
| "mismatch_kl": 0.02879425697028637, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 177, | |
| "timing/generation_ms": 30596.904239617288, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 30596.904239617288, | |
| "tokens/completion": 1407.20703125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 199.58447432518005 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.28972604870796204, | |
| "epoch": 0.23733333333333334, | |
| "grad_norm": 0.01945907676336341, | |
| "importance_ratio": 0.9937379956245422, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0002, | |
| "mismatch_kl": 0.026391636580228806, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 178, | |
| "timing/generation_ms": 22168.457314372063, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 22168.457314372063, | |
| "tokens/completion": 1017.8515625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 198.82207107543945 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2955513298511505, | |
| "epoch": 0.23866666666666667, | |
| "grad_norm": 0.034061359790196394, | |
| "importance_ratio": 0.9955794811248779, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0017, | |
| "mismatch_kl": 0.026111198589205742, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 179, | |
| "timing/generation_ms": 17585.104428231716, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 17585.104428231716, | |
| "tokens/completion": 836.7421875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 98.93776655197144 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.33897051215171814, | |
| "epoch": 0.24, | |
| "grad_norm": 0.026732099750916328, | |
| "importance_ratio": 0.9968024492263794, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0016, | |
| "mismatch_kl": 0.03142106905579567, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 180, | |
| "timing/generation_ms": 14579.319617711008, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 14579.319617711008, | |
| "tokens/completion": 657.60546875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 48.83777070045471 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.27722474932670593, | |
| "epoch": 0.24133333333333334, | |
| "grad_norm": 0.02190113915349276, | |
| "importance_ratio": 0.9932956099510193, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0039, | |
| "mismatch_kl": 0.039353836327791214, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 181, | |
| "timing/generation_ms": 16838.846164755523, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 16838.846164755523, | |
| "tokens/completion": 837.53125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 90.39262366294861 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.21952733397483826, | |
| "epoch": 0.24266666666666667, | |
| "grad_norm": 0.019030162680243098, | |
| "importance_ratio": 0.9920942783355713, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0007, | |
| "mismatch_kl": 0.03863741457462311, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 182, | |
| "timing/generation_ms": 19943.43529921025, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 19943.43529921025, | |
| "tokens/completion": 959.51953125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 68.7491762638092 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.37819504737854004, | |
| "epoch": 0.244, | |
| "grad_norm": 0.030600275992650774, | |
| "importance_ratio": 0.9981564879417419, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0061, | |
| "mismatch_kl": 0.0258224718272686, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 183, | |
| "timing/generation_ms": 19337.73651625961, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 19337.73651625961, | |
| "tokens/completion": 909.80078125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 67.45709013938904 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.24391266703605652, | |
| "epoch": 0.24533333333333332, | |
| "grad_norm": 0.020045952746227204, | |
| "importance_ratio": 0.9952253103256226, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0035, | |
| "mismatch_kl": 0.022540580481290817, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 184, | |
| "timing/generation_ms": 29042.017024941742, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 29042.017024941742, | |
| "tokens/completion": 1416.3046875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 224.1438853740692 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2780689597129822, | |
| "epoch": 0.24666666666666667, | |
| "grad_norm": 0.0286906981880458, | |
| "importance_ratio": 0.9939864277839661, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0002, | |
| "mismatch_kl": 0.028331460431218147, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 185, | |
| "timing/generation_ms": 13990.399835631251, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 13990.399835631251, | |
| "tokens/completion": 712.27734375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 65.08906888961792 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2785170078277588, | |
| "epoch": 0.248, | |
| "grad_norm": 0.019455372327007777, | |
| "importance_ratio": 0.9962543249130249, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0021, | |
| "mismatch_kl": 0.030258335173130035, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 186, | |
| "timing/generation_ms": 29046.93407472223, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 29046.93407472223, | |
| "tokens/completion": 1342.078125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 117.269207239151 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.29877498745918274, | |
| "epoch": 0.24933333333333332, | |
| "grad_norm": 0.041522981103745076, | |
| "importance_ratio": 0.9973271489143372, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0005, | |
| "mismatch_kl": 0.027791054919362068, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 187, | |
| "timing/generation_ms": 27519.34172678739, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 27519.34172678739, | |
| "tokens/completion": 1335.86328125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 109.74448680877686 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2548399567604065, | |
| "epoch": 0.25066666666666665, | |
| "grad_norm": 0.01914209458227723, | |
| "importance_ratio": 0.9980031251907349, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0056, | |
| "mismatch_kl": 0.023154988884925842, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 188, | |
| "timing/generation_ms": 18434.748891741037, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 18434.748891741037, | |
| "tokens/completion": 841.21484375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 100.93693470954895 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.36281952261924744, | |
| "epoch": 0.252, | |
| "grad_norm": 0.04366345528631447, | |
| "importance_ratio": 0.997806966304779, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0104, | |
| "mismatch_kl": 0.0235320795327425, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 189, | |
| "timing/generation_ms": 25268.099238164723, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 25268.099238164723, | |
| "tokens/completion": 1256.1484375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 102.91489505767822 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.22508475184440613, | |
| "epoch": 0.25333333333333335, | |
| "grad_norm": 0.01385345071504184, | |
| "importance_ratio": 0.9968878626823425, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0107, | |
| "mismatch_kl": 0.02765449695289135, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 190, | |
| "timing/generation_ms": 37916.601489298046, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 37916.601489298046, | |
| "tokens/completion": 1717.34765625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 139.42678880691528 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.40229278802871704, | |
| "epoch": 0.25466666666666665, | |
| "grad_norm": 0.02875613000959139, | |
| "importance_ratio": 0.9828155040740967, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0055, | |
| "mismatch_kl": 0.19772163033485413, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 191, | |
| "timing/generation_ms": 32680.235791951418, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 32680.235791951418, | |
| "tokens/completion": 1459.58203125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 144.90490436553955 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2181045562028885, | |
| "epoch": 0.256, | |
| "grad_norm": 0.019693707478772454, | |
| "importance_ratio": 0.9942646026611328, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0029, | |
| "mismatch_kl": 0.03511533513665199, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 192, | |
| "timing/generation_ms": 36065.32556284219, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 36065.32556284219, | |
| "tokens/completion": 1708.7734375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 126.33067202568054 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.2962771952152252, | |
| "epoch": 0.25733333333333336, | |
| "grad_norm": 0.02416381381264868, | |
| "importance_ratio": 0.9941651821136475, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0024, | |
| "mismatch_kl": 0.0343640111386776, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 193, | |
| "timing/generation_ms": 36326.69063284993, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 36326.69063284993, | |
| "tokens/completion": 1645.30859375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 146.5855736732483 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.22655896842479706, | |
| "epoch": 0.25866666666666666, | |
| "grad_norm": 0.024160165001251035, | |
| "importance_ratio": 0.995488166809082, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0023, | |
| "mismatch_kl": 0.023622261360287666, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 194, | |
| "timing/generation_ms": 40274.337109178305, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 40274.337109178305, | |
| "tokens/completion": 1910.0, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 137.63950419425964 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.24619098007678986, | |
| "epoch": 0.26, | |
| "grad_norm": 0.008997397579246655, | |
| "importance_ratio": 0.9905009865760803, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0047, | |
| "mismatch_kl": 0.06482454389333725, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 195, | |
| "timing/generation_ms": 107369.31251455098, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 107369.31251455098, | |
| "tokens/completion": 3881.7421875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 291.5552787780762 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.24800750613212585, | |
| "epoch": 0.2613333333333333, | |
| "grad_norm": 0.041355633656673725, | |
| "importance_ratio": 0.996856689453125, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0027, | |
| "mismatch_kl": 0.023481056094169617, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 196, | |
| "timing/generation_ms": 23556.342590600252, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 23556.342590600252, | |
| "tokens/completion": 801.36328125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 66.23490047454834 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.20097197592258453, | |
| "epoch": 0.26266666666666666, | |
| "grad_norm": 0.01639665709788699, | |
| "importance_ratio": 0.995540201663971, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0009, | |
| "mismatch_kl": 0.02512766607105732, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 197, | |
| "timing/generation_ms": 54791.293187998235, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 54791.293187998235, | |
| "tokens/completion": 2467.2578125, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 184.51049184799194 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.24079304933547974, | |
| "epoch": 0.264, | |
| "grad_norm": 0.033558115100562454, | |
| "importance_ratio": 0.9966259002685547, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0129, | |
| "mismatch_kl": 0.02248232252895832, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 198, | |
| "timing/generation_ms": 38877.40421388298, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 38877.40421388298, | |
| "tokens/completion": 1947.15625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 256.89259123802185 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.22992920875549316, | |
| "epoch": 0.2653333333333333, | |
| "grad_norm": 0.019833326998120116, | |
| "importance_ratio": 0.996269166469574, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0002, | |
| "mismatch_kl": 0.02254408784210682, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 199, | |
| "timing/generation_ms": 22910.992676392198, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 22910.992676392198, | |
| "tokens/completion": 1146.32421875, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 153.08721899986267 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.21609917283058167, | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 0.017782941960253474, | |
| "importance_ratio": 0.9933099746704102, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0047, | |
| "mismatch_kl": 0.028513798490166664, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 200, | |
| "timing/generation_ms": 28995.982899330556, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 28995.982899330556, | |
| "tokens/completion": 1354.24609375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 139.1398515701294 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.3927169740200043, | |
| "epoch": 0.268, | |
| "grad_norm": 0.08540874966055562, | |
| "importance_ratio": 0.9711376428604126, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0081, | |
| "mismatch_kl": 0.2314944714307785, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 201, | |
| "timing/generation_ms": 31200.909822247922, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 31200.909822247922, | |
| "tokens/completion": 1405.9765625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 178.80973744392395 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.235797718167305, | |
| "epoch": 0.2693333333333333, | |
| "grad_norm": 0.01568085371274426, | |
| "importance_ratio": 0.9909575581550598, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0079, | |
| "mismatch_kl": 0.039374206215143204, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 202, | |
| "timing/generation_ms": 42998.49198944867, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 42998.49198944867, | |
| "tokens/completion": 1907.31640625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 141.76219058036804 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.23127324879169464, | |
| "epoch": 0.27066666666666667, | |
| "grad_norm": 0.02007459981352103, | |
| "importance_ratio": 0.9912987947463989, | |
| "learning_rate": 5e-06, | |
| "loss": -0.001, | |
| "mismatch_kl": 0.03943263366818428, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 203, | |
| "timing/generation_ms": 37774.500319734216, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 37774.500319734216, | |
| "tokens/completion": 1693.734375, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 302.7908329963684 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.22054153680801392, | |
| "epoch": 0.272, | |
| "grad_norm": 0.021761300841866088, | |
| "importance_ratio": 0.9904981851577759, | |
| "learning_rate": 5e-06, | |
| "loss": -0.0026, | |
| "mismatch_kl": 0.037401266396045685, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 204, | |
| "timing/generation_ms": 42541.27501603216, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 42541.27501603216, | |
| "tokens/completion": 1937.69140625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 233.74011135101318 | |
| }, | |
| { | |
| "advantage/absmean": 0.12451171875, | |
| "entropy": 0.22628618776798248, | |
| "epoch": 0.2733333333333333, | |
| "grad_norm": 0.011121419921268808, | |
| "importance_ratio": 0.9924519658088684, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0013, | |
| "mismatch_kl": 0.03573086857795715, | |
| "reward": 0.12451171875, | |
| "reward/std": 0.1738164722919464, | |
| "step": 205, | |
| "timing/generation_ms": 35010.2855078876, | |
| "timing/scoring_ms": 0.0, | |
| "timing/total_ms": 35010.2855078876, | |
| "tokens/completion": 1629.62890625, | |
| "tokens/masked_fraction": 0.0, | |
| "wall_clock/generate_s": 137.56320452690125 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 750, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 5, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |