diff --git "a/checkpoint-5000/trainer_state.json" "b/checkpoint-5000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-5000/trainer_state.json" @@ -0,0 +1,44252 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.39720368605020656, + "global_step": 5000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 7.936507936507936e-06, + "loss": 1.0932, + "theoretical_loss": 14.920781838632275, + "tokens_seen": 262144 + }, + { + "epoch": 0.0, + "learning_rate": 1.5873015873015872e-05, + "loss": 1.0937, + "theoretical_loss": 12.718594708127029, + "tokens_seen": 524288 + }, + { + "epoch": 0.0, + "learning_rate": 2.380952380952381e-05, + "loss": 1.0519, + "theoretical_loss": 11.615184291350435, + "tokens_seen": 786432 + }, + { + "epoch": 0.0, + "learning_rate": 3.1746031746031745e-05, + "loss": 1.0058, + "theoretical_loss": 10.904893169100655, + "tokens_seen": 1048576 + }, + { + "epoch": 0.0, + "learning_rate": 3.968253968253968e-05, + "loss": 0.976, + "theoretical_loss": 10.392029026407034, + "tokens_seen": 1310720 + }, + { + "epoch": 0.0, + "learning_rate": 4.761904761904762e-05, + "loss": 0.9521, + "theoretical_loss": 9.996134261483984, + "tokens_seen": 1572864 + }, + { + "epoch": 0.0, + "learning_rate": 5.555555555555555e-05, + "loss": 0.9364, + "theoretical_loss": 9.67682184172525, + "tokens_seen": 1835008 + }, + { + "epoch": 0.0, + "learning_rate": 6.349206349206349e-05, + "loss": 0.9245, + "theoretical_loss": 9.41114487355416, + "tokens_seen": 2097152 + }, + { + "epoch": 0.0, + "learning_rate": 7.142857142857142e-05, + "loss": 0.9103, + "theoretical_loss": 9.184905895151996, + "tokens_seen": 2359296 + }, + { + "epoch": 0.0, + "learning_rate": 7.936507936507937e-05, + "loss": 0.8933, + "theoretical_loss": 8.988754572553061, + "tokens_seen": 2621440 + }, + { + "epoch": 0.0, + "learning_rate": 8.73015873015873e-05, + "loss": 0.8721, + "theoretical_loss": 8.816230875422118, + "tokens_seen": 2883584 + }, + { + "epoch": 0.0, + "learning_rate": 9.523809523809524e-05, + "loss": 0.8552, + "theoretical_loss": 8.66269920037918, + "tokens_seen": 3145728 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.43383753299713135, + "objective/train/docs_used": 8371, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 8.528481483459473, + "objective/train/original_loss": 8.528482437133789, + "objective/train/theoretical_loss": 8.591947747254773, + "objective/train/tokens_used": 23736800, + "objective/train/value_avg": -0.4384765625, + "objective/train/value_loss": 0.1883670836687088, + "objective/train/value_max": -0.4375, + "objective/train/value_min": -0.441650390625, + "objective/train/value_reward_corr": -0.01855261992160691, + "objective/train/value_std": 0.0006289482116699219, + "objective/train/weight_avg": 1.5432828664779663, + "objective/train/weighted_lm_loss": 13.15995979309082, + "objective/train/weights_max": 1.555271863937378, + "objective/train/weights_min": 1.1650478839874268, + "theoretical_loss": 8.591947747254773, + "tokens_seen": 3276800 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010317460317460317, + "loss": 0.8283, + "theoretical_loss": 8.524729102289708, + "tokens_seen": 3407872 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001111111111111111, + "loss": 0.8015, + "theoretical_loss": 8.399716359763914, + "tokens_seen": 3670016 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011904761904761905, + "loss": 0.7709, + "theoretical_loss": 8.285641004895568, + "tokens_seen": 3932160 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012698412698412698, + "loss": 0.7402, + "theoretical_loss": 8.180907195283321, + "tokens_seen": 4194304 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001349206349206349, + "loss": 0.7102, + "theoretical_loss": 8.084233979345122, + "tokens_seen": 4456448 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014285714285714284, + "loss": 0.6751, + "theoretical_loss": 7.9945788049155055, + "tokens_seen": 4718592 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001507936507936508, + "loss": 0.6503, + "theoretical_loss": 7.911082722632908, + "tokens_seen": 4980736 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015873015873015873, + "loss": 0.6183, + "theoretical_loss": 7.83303033759787, + "tokens_seen": 5242880 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016666666666666666, + "loss": 0.5919, + "theoretical_loss": 7.759820016443023, + "tokens_seen": 5505024 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001746031746031746, + "loss": 0.5614, + "theoretical_loss": 7.690941370375033, + "tokens_seen": 5767168 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018253968253968252, + "loss": 0.5314, + "theoretical_loss": 7.6259579939239845, + "tokens_seen": 6029312 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019047619047619048, + "loss": 0.5048, + "theoretical_loss": 7.564494061943624, + "tokens_seen": 6291456 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.19523106515407562, + "objective/train/docs_used": 9704, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 7.315228462219238, + "objective/train/original_loss": 7.315227508544922, + "objective/train/theoretical_loss": 7.5062238006917354, + "objective/train/tokens_used": 27013600, + "objective/train/value_avg": -0.2044677734375, + "objective/train/value_loss": 0.03861678019165993, + "objective/train/value_max": -0.1988525390625, + "objective/train/value_min": -0.251220703125, + "objective/train/value_reward_corr": -0.023237293515685216, + "objective/train/value_std": 0.01251983642578125, + "objective/train/weight_avg": 1.2158902883529663, + "objective/train/weighted_lm_loss": 8.87794303894043, + "objective/train/weights_max": 1.285593867301941, + "objective/train/weights_min": 0.46789029240608215, + "theoretical_loss": 7.5062238006917354, + "tokens_seen": 6553600 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001984126984126984, + "loss": 0.4798, + "theoretical_loss": 7.5062238006917354, + "tokens_seen": 6553600 + }, + { + "epoch": 0.0, + "learning_rate": 0.00020634920634920634, + "loss": 0.4564, + "theoretical_loss": 7.45086312850561, + "tokens_seen": 6815744 + }, + { + "epoch": 0.0, + "learning_rate": 0.00021428571428571427, + "loss": 0.4397, + "theoretical_loss": 7.398162954262078, + "tokens_seen": 7077888 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002222222222222222, + "loss": 0.4264, + "theoretical_loss": 7.347903756717382, + "tokens_seen": 7340032 + }, + { + "epoch": 0.0, + "learning_rate": 0.00023015873015873016, + "loss": 0.401, + "theoretical_loss": 7.299891163694537, + "tokens_seen": 7602176 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002380952380952381, + "loss": 0.3912, + "theoretical_loss": 7.253952319156202, + "tokens_seen": 7864320 + }, + { + "epoch": 0.0, + "learning_rate": 0.000246031746031746, + "loss": 0.3754, + "theoretical_loss": 7.2099328765932205, + "tokens_seen": 8126464 + }, + { + "epoch": 0.0, + "learning_rate": 0.00025396825396825396, + "loss": 0.366, + "theoretical_loss": 7.167694494355343, + "tokens_seen": 8388608 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002619047619047619, + "loss": 0.3575, + "theoretical_loss": 7.127112736305475, + "tokens_seen": 8650752 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002698412698412698, + "loss": 0.3477, + "theoretical_loss": 7.0880753020982725, + "tokens_seen": 8912896 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002777777777777778, + "loss": 0.3462, + "theoretical_loss": 7.050480527300383, + "tokens_seen": 9175040 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002857142857142857, + "loss": 0.3397, + "theoretical_loss": 7.014236105786485, + "tokens_seen": 9437184 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002936507936507937, + "loss": 0.3387, + "theoretical_loss": 6.979257996300014, + "tokens_seen": 9699328 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.035979945212602615, + "objective/train/docs_used": 11015, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.488903999328613, + "objective/train/original_loss": 6.488903522491455, + "objective/train/theoretical_loss": 6.962219571538605, + "objective/train/tokens_used": 30290400, + "objective/train/value_avg": -0.044891357421875, + "objective/train/value_loss": 0.002101501217111945, + "objective/train/value_max": -0.038330078125, + "objective/train/value_min": -0.092529296875, + "objective/train/value_reward_corr": -0.002001422374817204, + "objective/train/value_std": 0.0086822509765625, + "objective/train/weight_avg": 1.0370042324066162, + "objective/train/weighted_lm_loss": 6.7182722091674805, + "objective/train/weights_max": 1.0969452857971191, + "objective/train/weights_min": 0.3830130398273468, + "theoretical_loss": 6.962219571538605, + "tokens_seen": 9830400 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003015873015873016, + "loss": 0.3357, + "theoretical_loss": 6.945469482441503, + "tokens_seen": 9961472 + }, + { + "epoch": 0.0, + "learning_rate": 0.00030952380952380956, + "loss": 0.332, + "theoretical_loss": 6.912800361140576, + "tokens_seen": 10223616 + }, + { + "epoch": 0.0, + "learning_rate": 0.00031746031746031746, + "loss": 0.3301, + "theoretical_loss": 6.881186239250335, + "tokens_seen": 10485760 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003253968253968254, + "loss": 0.3283, + "theoretical_loss": 6.8505679215514235, + "tokens_seen": 10747904 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003333333333333333, + "loss": 0.3248, + "theoretical_loss": 6.8208908763759295, + "tokens_seen": 11010048 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003412698412698413, + "loss": 0.3232, + "theoretical_loss": 6.79210476741633, + "tokens_seen": 11272192 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003492063492063492, + "loss": 0.3221, + "theoretical_loss": 6.76416304219278, + "tokens_seen": 11534336 + }, + { + "epoch": 0.0, + "learning_rate": 0.00035714285714285714, + "loss": 0.3173, + "theoretical_loss": 6.737022569206117, + "tokens_seen": 11796480 + }, + { + "epoch": 0.0, + "learning_rate": 0.00036507936507936505, + "loss": 0.3175, + "theoretical_loss": 6.710643317075979, + "tokens_seen": 12058624 + }, + { + "epoch": 0.0, + "learning_rate": 0.000373015873015873, + "loss": 0.3144, + "theoretical_loss": 6.684988070009584, + "tokens_seen": 12320768 + }, + { + "epoch": 0.0, + "learning_rate": 0.00038095238095238096, + "loss": 0.3176, + "theoretical_loss": 6.660022174811009, + "tokens_seen": 12582912 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003888888888888889, + "loss": 0.3123, + "theoretical_loss": 6.6357133153579175, + "tokens_seen": 12845056 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.02608620934188366, + "objective/train/docs_used": 12170, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.028903007507324, + "objective/train/original_loss": 6.028903961181641, + "objective/train/theoretical_loss": 6.612031311070119, + "objective/train/tokens_used": 33567200, + "objective/train/value_avg": -0.03485107421875, + "objective/train/value_loss": 0.0012108575319871306, + "objective/train/value_max": -0.0136871337890625, + "objective/train/value_min": -0.05511474609375, + "objective/train/value_reward_corr": -0.03430143501280518, + "objective/train/value_std": 0.00576019287109375, + "objective/train/weight_avg": 1.0266841650009155, + "objective/train/weighted_lm_loss": 6.190183162689209, + "objective/train/weights_max": 1.05666184425354, + "objective/train/weights_min": 0.3770461976528168, + "theoretical_loss": 6.612031311070119, + "tokens_seen": 13107200 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003968253968253968, + "loss": 0.3097, + "theoretical_loss": 6.612031311070119, + "tokens_seen": 13107200 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004047619047619048, + "loss": 0.3104, + "theoretical_loss": 6.588947936394168, + "tokens_seen": 13369344 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004126984126984127, + "loss": 0.3022, + "theoretical_loss": 6.566436758747731, + "tokens_seen": 13631488 + }, + { + "epoch": 0.0, + "learning_rate": 0.00042063492063492065, + "loss": 0.3027, + "theoretical_loss": 6.544472992721121, + "tokens_seen": 13893632 + }, + { + "epoch": 0.0, + "learning_rate": 0.00042857142857142855, + "loss": 0.3044, + "theoretical_loss": 6.523033368632323, + "tokens_seen": 14155776 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004365079365079365, + "loss": 0.3036, + "theoretical_loss": 6.502096013785574, + "tokens_seen": 14417920 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004444444444444444, + "loss": 0.3003, + "theoretical_loss": 6.481640344999435, + "tokens_seen": 14680064 + }, + { + "epoch": 0.0, + "learning_rate": 0.00045238095238095237, + "loss": 0.3021, + "theoretical_loss": 6.461646971154669, + "tokens_seen": 14942208 + }, + { + "epoch": 0.0, + "learning_rate": 0.00046031746031746033, + "loss": 0.3006, + "theoretical_loss": 6.442097604670096, + "tokens_seen": 15204352 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004682539682539683, + "loss": 0.2922, + "theoretical_loss": 6.422974980950157, + "tokens_seen": 15466496 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004761904761904762, + "loss": 0.2958, + "theoretical_loss": 6.404262784964672, + "tokens_seen": 15728640 + }, + { + "epoch": 0.0, + "learning_rate": 0.00048412698412698415, + "loss": 0.2952, + "theoretical_loss": 6.3859455842220765, + "tokens_seen": 15990784 + }, + { + "epoch": 0.0, + "learning_rate": 0.000492063492063492, + "loss": 0.2945, + "theoretical_loss": 6.368008767484675, + "tokens_seen": 16252928 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.028485985472798347, + "objective/train/docs_used": 13382, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.559032440185547, + "objective/train/original_loss": 5.559032917022705, + "objective/train/theoretical_loss": 6.359178647759789, + "objective/train/tokens_used": 36844000, + "objective/train/value_avg": -0.03570556640625, + "objective/train/value_loss": 0.001410536002367735, + "objective/train/value_max": -0.01111602783203125, + "objective/train/value_min": -0.0867919921875, + "objective/train/value_reward_corr": 0.08617836470205797, + "objective/train/value_std": 0.01192474365234375, + "objective/train/weight_avg": 1.0291692018508911, + "objective/train/weighted_lm_loss": 5.7365899085998535, + "objective/train/weights_max": 1.0881431102752686, + "objective/train/weights_min": 0.378071665763855, + "theoretical_loss": 6.359178647759789, + "tokens_seen": 16384000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005, + "loss": 0.293, + "theoretical_loss": 6.350438488650175, + "tokens_seen": 16515072 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005079365079365079, + "loss": 0.2876, + "theoretical_loss": 6.333221615289645, + "tokens_seen": 16777216 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005158730158730159, + "loss": 0.2909, + "theoretical_loss": 6.316345681389436, + "tokens_seen": 17039360 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005238095238095238, + "loss": 0.2851, + "theoretical_loss": 6.2997988438948465, + "tokens_seen": 17301504 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005317460317460317, + "loss": 0.2807, + "theoretical_loss": 6.283569842697203, + "tokens_seen": 17563648 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005396825396825396, + "loss": 0.2855, + "theoretical_loss": 6.26764796374462, + "tokens_seen": 17825792 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005476190476190477, + "loss": 0.2822, + "theoretical_loss": 6.25202300499066, + "tokens_seen": 18087936 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005555555555555556, + "loss": 0.2818, + "theoretical_loss": 6.236685244924882, + "tokens_seen": 18350080 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005634920634920635, + "loss": 0.2847, + "theoretical_loss": 6.2216254134558024, + "tokens_seen": 18612224 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005714285714285714, + "loss": 0.2838, + "theoretical_loss": 6.206834664939976, + "tokens_seen": 18874368 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005793650793650794, + "loss": 0.2875, + "theoretical_loss": 6.192304553171669, + "tokens_seen": 19136512 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005873015873015874, + "loss": 0.2852, + "theoretical_loss": 6.178027008165916, + "tokens_seen": 19398656 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.01716114766895771, + "objective/train/docs_used": 14656, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.421322822570801, + "objective/train/original_loss": 5.421322822570801, + "objective/train/theoretical_loss": 6.163994314584031, + "objective/train/tokens_used": 40120800, + "objective/train/value_avg": -0.0253753662109375, + "objective/train/value_loss": 0.0010442916536703706, + "objective/train/value_max": -0.0090179443359375, + "objective/train/value_min": -0.04986572265625, + "objective/train/value_reward_corr": -0.02662585421402189, + "objective/train/value_std": 0.0084381103515625, + "objective/train/weight_avg": 1.0176551342010498, + "objective/train/weighted_lm_loss": 5.523168087005615, + "objective/train/weights_max": 1.051129937171936, + "objective/train/weights_min": 0.37920987606048584, + "theoretical_loss": 6.163994314584031, + "tokens_seen": 19660800 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005952380952380953, + "loss": 0.283, + "theoretical_loss": 6.163994314584031, + "tokens_seen": 19660800 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006031746031746032, + "loss": 0.2769, + "theoretical_loss": 6.150199091665225, + "tokens_seen": 19922944 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006111111111111112, + "loss": 0.2773, + "theoretical_loss": 6.136634274540901, + "tokens_seen": 20185088 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006190476190476191, + "loss": 0.2813, + "theoretical_loss": 6.123293096819758, + "tokens_seen": 20447232 + }, + { + "epoch": 0.01, + "learning_rate": 0.000626984126984127, + "loss": 0.2788, + "theoretical_loss": 6.1101690743422505, + "tokens_seen": 20709376 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006349206349206349, + "loss": 0.2779, + "theoretical_loss": 6.097255990012153, + "tokens_seen": 20971520 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006428571428571429, + "loss": 0.2727, + "theoretical_loss": 6.084547879621354, + "tokens_seen": 21233664 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006507936507936508, + "loss": 0.2732, + "theoretical_loss": 6.072039018591484, + "tokens_seen": 21495808 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006587301587301587, + "loss": 0.2743, + "theoretical_loss": 6.059723909562683, + "tokens_seen": 21757952 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006666666666666666, + "loss": 0.2703, + "theoretical_loss": 6.047597270765904, + "tokens_seen": 22020096 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006746031746031747, + "loss": 0.2718, + "theoretical_loss": 6.035654025120612, + "tokens_seen": 22282240 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006825396825396826, + "loss": 0.269, + "theoretical_loss": 6.023889290004692, + "tokens_seen": 22544384 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006904761904761905, + "loss": 0.269, + "theoretical_loss": 6.012298367647816, + "tokens_seen": 22806528 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.018673593178391457, + "objective/train/docs_used": 15907, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.2787933349609375, + "objective/train/original_loss": 5.2787933349609375, + "objective/train/theoretical_loss": 6.006566666513313, + "objective/train/tokens_used": 43397600, + "objective/train/value_avg": -0.0252227783203125, + "objective/train/value_loss": 0.0007268089102581143, + "objective/train/value_max": -0.00916290283203125, + "objective/train/value_min": -0.059967041015625, + "objective/train/value_reward_corr": -0.009804935861391459, + "objective/train/value_std": 0.0091552734375, + "objective/train/weight_avg": 1.0190335512161255, + "objective/train/weighted_lm_loss": 5.3884196281433105, + "objective/train/weights_max": 1.0590507984161377, + "objective/train/weights_min": 0.37753555178642273, + "theoretical_loss": 6.006566666513313, + "tokens_seen": 22937600 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006984126984126984, + "loss": 0.266, + "theoretical_loss": 6.000876736103618, + "tokens_seen": 23068672 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007063492063492064, + "loss": 0.2679, + "theoretical_loss": 5.989620040759641, + "tokens_seen": 23330816 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007142857142857143, + "loss": 0.2638, + "theoretical_loss": 5.978524086347409, + "tokens_seen": 23592960 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007222222222222222, + "loss": 0.2676, + "theoretical_loss": 5.967584829417934, + "tokens_seen": 23855104 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007301587301587301, + "loss": 0.2632, + "theoretical_loss": 5.956798371250791, + "tokens_seen": 24117248 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007380952380952381, + "loss": 0.2593, + "theoretical_loss": 5.9461609511673625, + "tokens_seen": 24379392 + }, + { + "epoch": 0.01, + "learning_rate": 0.000746031746031746, + "loss": 0.2628, + "theoretical_loss": 5.935668940221127, + "tokens_seen": 24641536 + }, + { + "epoch": 0.01, + "learning_rate": 0.000753968253968254, + "loss": 0.261, + "theoretical_loss": 5.92531883523999, + "tokens_seen": 24903680 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007619047619047619, + "loss": 0.2601, + "theoretical_loss": 5.915107253197538, + "tokens_seen": 25165824 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007698412698412699, + "loss": 0.2627, + "theoretical_loss": 5.905030925891829, + "tokens_seen": 25427968 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007777777777777778, + "loss": 0.2632, + "theoretical_loss": 5.895086694911951, + "tokens_seen": 25690112 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007857142857142857, + "loss": 0.261, + "theoretical_loss": 5.88527150687402, + "tokens_seen": 25952256 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.014981591142714024, + "objective/train/docs_used": 17033, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.067455291748047, + "objective/train/original_loss": 5.067455291748047, + "objective/train/theoretical_loss": 5.8755824089096285, + "objective/train/tokens_used": 46674400, + "objective/train/value_avg": -0.0223236083984375, + "objective/train/value_loss": 0.001089372206479311, + "objective/train/value_max": -0.007785797119140625, + "objective/train/value_min": -0.08538818359375, + "objective/train/value_reward_corr": 0.11159853979639632, + "objective/train/value_std": 0.008026123046875, + "objective/train/weight_avg": 1.0154664516448975, + "objective/train/weighted_lm_loss": 5.153756618499756, + "objective/train/weights_max": 1.0678131580352783, + "objective/train/weights_min": 0.37597760558128357, + "theoretical_loss": 5.8755824089096285, + "tokens_seen": 26214400 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007936507936507937, + "loss": 0.2609, + "theoretical_loss": 5.8755824089096285, + "tokens_seen": 26214400 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008015873015873017, + "loss": 0.2559, + "theoretical_loss": 5.866016544391016, + "tokens_seen": 26476544 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008095238095238096, + "loss": 0.2535, + "theoretical_loss": 5.856571148878293, + "tokens_seen": 26738688 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008174603174603175, + "loss": 0.2534, + "theoretical_loss": 5.847243546275179, + "tokens_seen": 27000832 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008253968253968254, + "loss": 0.2527, + "theoretical_loss": 5.838031145180573, + "tokens_seen": 27262976 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008333333333333334, + "loss": 0.2561, + "theoretical_loss": 5.82893143542425, + "tokens_seen": 27525120 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008412698412698413, + "loss": 0.2535, + "theoretical_loss": 5.81994198477569, + "tokens_seen": 27787264 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008492063492063492, + "loss": 0.2519, + "theoretical_loss": 5.811060435815881, + "tokens_seen": 28049408 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008571428571428571, + "loss": 0.2538, + "theoretical_loss": 5.802284502962563, + "tokens_seen": 28311552 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008650793650793651, + "loss": 0.2515, + "theoretical_loss": 5.793611969640068, + "tokens_seen": 28573696 + }, + { + "epoch": 0.01, + "learning_rate": 0.000873015873015873, + "loss": 0.2494, + "theoretical_loss": 5.785040685585437, + "tokens_seen": 28835840 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008809523809523809, + "loss": 0.2496, + "theoretical_loss": 5.7765685642831155, + "tokens_seen": 29097984 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008888888888888888, + "loss": 0.2526, + "theoretical_loss": 5.768193580520972, + "tokens_seen": 29360128 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.015116632916033268, + "objective/train/docs_used": 18174, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.077436923980713, + "objective/train/original_loss": 5.0774359703063965, + "objective/train/theoretical_loss": 5.7640418985258295, + "objective/train/tokens_used": 49951200, + "objective/train/value_avg": -0.0243377685546875, + "objective/train/value_loss": 0.0008194705005735159, + "objective/train/value_max": -0.006877899169921875, + "objective/train/value_min": -0.139404296875, + "objective/train/value_reward_corr": 0.07501882751184434, + "objective/train/value_std": 0.01377105712890625, + "objective/train/weight_avg": 1.0155125856399536, + "objective/train/weighted_lm_loss": 5.159419536590576, + "objective/train/weights_max": 1.1385329961776733, + "objective/train/weights_min": 0.3745060861110687, + "theoretical_loss": 5.7640418985258295, + "tokens_seen": 29491200 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008968253968253968, + "loss": 0.249, + "theoretical_loss": 5.759913768060882, + "tokens_seen": 29622272 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009047619047619047, + "loss": 0.2492, + "theoretical_loss": 5.7517272174175496, + "tokens_seen": 29884416 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009126984126984126, + "loss": 0.2506, + "theoretical_loss": 5.743632073739626, + "tokens_seen": 30146560 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009206349206349207, + "loss": 0.2489, + "theoretical_loss": 5.735626534787584, + "tokens_seen": 30408704 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009285714285714287, + "loss": 0.2431, + "theoretical_loss": 5.727708849003127, + "tokens_seen": 30670848 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009365079365079366, + "loss": 0.2469, + "theoretical_loss": 5.719877313665254, + "tokens_seen": 30932992 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009444444444444445, + "loss": 0.2464, + "theoretical_loss": 5.712130273128388, + "tokens_seen": 31195136 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009523809523809524, + "loss": 0.2492, + "theoretical_loss": 5.704466117138258, + "tokens_seen": 31457280 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009603174603174604, + "loss": 0.2453, + "theoretical_loss": 5.696883279221504, + "tokens_seen": 31719424 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009682539682539683, + "loss": 0.243, + "theoretical_loss": 5.689380235145171, + "tokens_seen": 31981568 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009761904761904762, + "loss": 0.2464, + "theoretical_loss": 5.6819555014425305, + "tokens_seen": 32243712 + }, + { + "epoch": 0.01, + "learning_rate": 0.000984126984126984, + "loss": 0.2427, + "theoretical_loss": 5.674607634001871, + "tokens_seen": 32505856 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.014621232636272907, + "objective/train/docs_used": 19239, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.8382439613342285, + "objective/train/original_loss": 4.838243007659912, + "objective/train/theoretical_loss": 5.667335226715059, + "objective/train/tokens_used": 53228000, + "objective/train/value_avg": -0.02044677734375, + "objective/train/value_loss": 0.0005950998747721314, + "objective/train/value_max": -0.006191253662109375, + "objective/train/value_min": -0.11181640625, + "objective/train/value_reward_corr": 0.10875102217088843, + "objective/train/value_std": 0.00927734375, + "objective/train/weight_avg": 1.0149059295654297, + "objective/train/weighted_lm_loss": 4.914798736572266, + "objective/train/weights_max": 1.1008343696594238, + "objective/train/weights_min": 0.37406063079833984, + "theoretical_loss": 5.667335226715059, + "tokens_seen": 32768000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000992063492063492, + "loss": 0.249, + "theoretical_loss": 5.667335226715059, + "tokens_seen": 32768000 + }, + { + "epoch": 0.01, + "learning_rate": 0.001, + "loss": 0.2444, + "theoretical_loss": 5.6601369101828904, + "tokens_seen": 33030144 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009999197560584176, + "loss": 0.244, + "theoretical_loss": 5.6530113504744435, + "tokens_seen": 33292288 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009998395121168352, + "loss": 0.2429, + "theoretical_loss": 5.645957247937725, + "tokens_seen": 33554432 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009997592681752529, + "loss": 0.2414, + "theoretical_loss": 5.638973336059157, + "tokens_seen": 33816576 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009996790242336705, + "loss": 0.2359, + "theoretical_loss": 5.632058380369512, + "tokens_seen": 34078720 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009995987802920879, + "loss": 0.2366, + "theoretical_loss": 5.625211177394046, + "tokens_seen": 34340864 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009995185363505057, + "loss": 0.2361, + "theoretical_loss": 5.618430553644782, + "tokens_seen": 34603008 + }, + { + "epoch": 0.01, + "learning_rate": 0.000999438292408923, + "loss": 0.2379, + "theoretical_loss": 5.611715364652864, + "tokens_seen": 34865152 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009993580484673407, + "loss": 0.2324, + "theoretical_loss": 5.605064494039176, + "tokens_seen": 35127296 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009992778045257583, + "loss": 0.2315, + "theoretical_loss": 5.598476852621397, + "tokens_seen": 35389440 + }, + { + "epoch": 0.01, + "learning_rate": 0.000999197560584176, + "loss": 0.2364, + "theoretical_loss": 5.591951377555809, + "tokens_seen": 35651584 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009991173166425935, + "loss": 0.2376, + "theoretical_loss": 5.585487031512276, + "tokens_seen": 35913728 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.008301016874611378, + "objective/train/docs_used": 20470, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.471249103546143, + "objective/train/original_loss": 4.471248626708984, + "objective/train/theoretical_loss": 5.582277464488612, + "objective/train/tokens_used": 56504800, + "objective/train/value_avg": -0.017913818359375, + "objective/train/value_loss": 0.00041489681461825967, + "objective/train/value_max": -0.00595855712890625, + "objective/train/value_min": -0.11065673828125, + "objective/train/value_reward_corr": 0.3373516244538544, + "objective/train/value_std": 0.0095672607421875, + "objective/train/weight_avg": 1.0085070133209229, + "objective/train/weighted_lm_loss": 4.517763614654541, + "objective/train/weights_max": 1.0755970478057861, + "objective/train/weights_min": 0.7941007018089294, + "theoretical_loss": 5.582277464488612, + "tokens_seen": 36044800 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009990370727010112, + "loss": 0.2363, + "theoretical_loss": 5.579082801880871, + "tokens_seen": 36175872 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009989568287594288, + "loss": 0.234, + "theoretical_loss": 5.572737700008718, + "tokens_seen": 36438016 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009988765848178462, + "loss": 0.2357, + "theoretical_loss": 5.56645076046569, + "tokens_seen": 36700160 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009987963408762638, + "loss": 0.2324, + "theoretical_loss": 5.5602210403376775, + "tokens_seen": 36962304 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009987160969346814, + "loss": 0.2362, + "theoretical_loss": 5.554047618546193, + "tokens_seen": 37224448 + }, + { + "epoch": 0.01, + "learning_rate": 0.000998635852993099, + "loss": 0.2376, + "theoretical_loss": 5.547929595193182, + "tokens_seen": 37486592 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009985556090515166, + "loss": 0.2321, + "theoretical_loss": 5.5418660909298945, + "tokens_seen": 37748736 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009984753651099342, + "loss": 0.2291, + "theoretical_loss": 5.535856246348814, + "tokens_seen": 38010880 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009983951211683518, + "loss": 0.2304, + "theoretical_loss": 5.529899221397624, + "tokens_seen": 38273024 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009983148772267695, + "loss": 0.2279, + "theoretical_loss": 5.523994194814273, + "tokens_seen": 38535168 + }, + { + "epoch": 0.01, + "learning_rate": 0.000998234633285187, + "loss": 0.229, + "theoretical_loss": 5.518140363582252, + "tokens_seen": 38797312 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009981543893436047, + "loss": 0.2318, + "theoretical_loss": 5.512336942405216, + "tokens_seen": 39059456 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.0074028936214745045, + "objective/train/docs_used": 21760, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.182587146759033, + "objective/train/original_loss": 4.182587623596191, + "objective/train/theoretical_loss": 5.506583163200142, + "objective/train/tokens_used": 59781600, + "objective/train/value_avg": -0.0196990966796875, + "objective/train/value_loss": 0.0009806023444980383, + "objective/train/value_max": -0.00634002685546875, + "objective/train/value_min": -0.1546630859375, + "objective/train/value_reward_corr": 0.24312695151689936, + "objective/train/value_std": 0.0124359130859375, + "objective/train/weight_avg": 1.0078480243682861, + "objective/train/weighted_lm_loss": 4.2220258712768555, + "objective/train/weights_max": 1.1438496112823486, + "objective/train/weights_min": 0.3742261826992035, + "theoretical_loss": 5.506583163200142, + "tokens_seen": 39321600 + }, + { + "epoch": 0.01, + "learning_rate": 0.000998074145402022, + "loss": 0.2256, + "theoretical_loss": 5.506583163200142, + "tokens_seen": 39321600 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009979939014604397, + "loss": 0.2304, + "theoretical_loss": 5.5008782746082625, + "tokens_seen": 39583744 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009979136575188573, + "loss": 0.2272, + "theoretical_loss": 5.495221541523011, + "tokens_seen": 39845888 + }, + { + "epoch": 0.01, + "learning_rate": 0.000997833413577275, + "loss": 0.2288, + "theoretical_loss": 5.489612244634316, + "tokens_seen": 40108032 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009977531696356925, + "loss": 0.2282, + "theoretical_loss": 5.48404967998854, + "tokens_seen": 40370176 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009976729256941101, + "loss": 0.2278, + "theoretical_loss": 5.478533158563456, + "tokens_seen": 40632320 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009975926817525277, + "loss": 0.2312, + "theoretical_loss": 5.473062005857637, + "tokens_seen": 40894464 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009975124378109451, + "loss": 0.2301, + "theoretical_loss": 5.467635561493681, + "tokens_seen": 41156608 + }, + { + "epoch": 0.01, + "learning_rate": 0.000997432193869363, + "loss": 0.2279, + "theoretical_loss": 5.462253178834744, + "tokens_seen": 41418752 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009973519499277804, + "loss": 0.2244, + "theoretical_loss": 5.456914224613812, + "tokens_seen": 41680896 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009972717059861982, + "loss": 0.2276, + "theoretical_loss": 5.451618078575256, + "tokens_seen": 41943040 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009971914620446156, + "loss": 0.2234, + "theoretical_loss": 5.446364133128155, + "tokens_seen": 42205184 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009971112181030332, + "loss": 0.2237, + "theoretical_loss": 5.44115179301095, + "tokens_seen": 42467328 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.012641006149351597, + "objective/train/docs_used": 22938, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.515160083770752, + "objective/train/original_loss": 4.51516056060791, + "objective/train/theoretical_loss": 5.438561042223066, + "objective/train/tokens_used": 63058400, + "objective/train/value_avg": -0.01983642578125, + "objective/train/value_loss": 0.0004896495374850929, + "objective/train/value_max": -0.00661468505859375, + "objective/train/value_min": -0.0869140625, + "objective/train/value_reward_corr": 0.07471170255551907, + "objective/train/value_std": 0.0099029541015625, + "objective/train/weight_avg": 1.012885570526123, + "objective/train/weighted_lm_loss": 4.5774455070495605, + "objective/train/weights_max": 1.0841727256774902, + "objective/train/weights_min": 0.7922973036766052, + "theoretical_loss": 5.438561042223066, + "tokens_seen": 42598400 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009970309741614508, + "loss": 0.2268, + "theoretical_loss": 5.435980474966981, + "tokens_seen": 42729472 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009969507302198684, + "loss": 0.2231, + "theoretical_loss": 5.430849607430501, + "tokens_seen": 42991616 + }, + { + "epoch": 0.01, + "learning_rate": 0.000996870486278286, + "loss": 0.2219, + "theoretical_loss": 5.425758630222747, + "tokens_seen": 43253760 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009967902423367037, + "loss": 0.2263, + "theoretical_loss": 5.42070699425771, + "tokens_seen": 43515904 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009967099983951213, + "loss": 0.2268, + "theoretical_loss": 5.415694161257225, + "tokens_seen": 43778048 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009966297544535387, + "loss": 0.2216, + "theoretical_loss": 5.410719603475034, + "tokens_seen": 44040192 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009965495105119565, + "loss": 0.224, + "theoretical_loss": 5.405782803429483, + "tokens_seen": 44302336 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009964692665703739, + "loss": 0.2221, + "theoretical_loss": 5.400883253644551, + "tokens_seen": 44564480 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009963890226287915, + "loss": 0.226, + "theoretical_loss": 5.396020456398885, + "tokens_seen": 44826624 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009963087786872091, + "loss": 0.2213, + "theoretical_loss": 5.391193923482547, + "tokens_seen": 45088768 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009962285347456267, + "loss": 0.2216, + "theoretical_loss": 5.386403175961223, + "tokens_seen": 45350912 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009961482908040443, + "loss": 0.2215, + "theoretical_loss": 5.381647743947578, + "tokens_seen": 45613056 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.011346699669957161, + "objective/train/docs_used": 24173, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.3351922035217285, + "objective/train/original_loss": 4.335192680358887, + "objective/train/theoretical_loss": 5.37692716637954, + "objective/train/tokens_used": 66335200, + "objective/train/value_avg": -0.02081298828125, + "objective/train/value_loss": 0.0009375278605148196, + "objective/train/value_max": -0.006389617919921875, + "objective/train/value_min": -0.09466552734375, + "objective/train/value_reward_corr": 0.22103111537465234, + "objective/train/value_std": 0.01107025146484375, + "objective/train/weight_avg": 1.0117790699005127, + "objective/train/weighted_lm_loss": 4.3885650634765625, + "objective/train/weights_max": 1.0966105461120605, + "objective/train/weights_min": 0.37649428844451904, + "theoretical_loss": 5.37692716637954, + "tokens_seen": 45875200 + }, + { + "epoch": 0.01, + "learning_rate": 0.000996068046862462, + "loss": 0.2257, + "theoretical_loss": 5.37692716637954, + "tokens_seen": 45875200 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009959878029208796, + "loss": 0.2205, + "theoretical_loss": 5.372240990805237, + "tokens_seen": 46137344 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009959075589792972, + "loss": 0.2197, + "theoretical_loss": 5.367588773174377, + "tokens_seen": 46399488 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009958273150377146, + "loss": 0.225, + "theoretical_loss": 5.36297007763582, + "tokens_seen": 46661632 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009957470710961322, + "loss": 0.2232, + "theoretical_loss": 5.358384476341126, + "tokens_seen": 46923776 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009956668271545498, + "loss": 0.2187, + "theoretical_loss": 5.353831549253895, + "tokens_seen": 47185920 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009955865832129674, + "loss": 0.2213, + "theoretical_loss": 5.349310883964664, + "tokens_seen": 47448064 + }, + { + "epoch": 0.01, + "learning_rate": 0.000995506339271385, + "loss": 0.2195, + "theoretical_loss": 5.344822075511196, + "tokens_seen": 47710208 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009954260953298026, + "loss": 0.2177, + "theoretical_loss": 5.340364726203955, + "tokens_seen": 47972352 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009953458513882203, + "loss": 0.2156, + "theoretical_loss": 5.3359384454566055, + "tokens_seen": 48234496 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009952656074466376, + "loss": 0.2146, + "theoretical_loss": 5.331542849621357, + "tokens_seen": 48496640 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009951853635050555, + "loss": 0.2172, + "theoretical_loss": 5.327177561828993, + "tokens_seen": 48758784 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009951051195634729, + "loss": 0.219, + "theoretical_loss": 5.32284221183342, + "tokens_seen": 49020928 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.0004111192829441279, + "objective/train/docs_used": 25394, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.30299186706543, + "objective/train/original_loss": 4.30299186706543, + "objective/train/theoretical_loss": 5.3206856495812715, + "objective/train/tokens_used": 69612000, + "objective/train/value_avg": -0.018402099609375, + "objective/train/value_loss": 0.001803424907848239, + "objective/train/value_max": -0.00555419921875, + "objective/train/value_min": -0.13427734375, + "objective/train/value_reward_corr": 0.352072119180548, + "objective/train/value_std": 0.01030731201171875, + "objective/train/weight_avg": 1.0012028217315674, + "objective/train/weighted_lm_loss": 4.307867527008057, + "objective/train/weights_max": 1.0850714445114136, + "objective/train/weights_min": 0.37296921014785767, + "theoretical_loss": 5.3206856495812715, + "tokens_seen": 49152000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009950248756218905, + "loss": 0.2198, + "theoretical_loss": 5.318536435860599, + "tokens_seen": 49283072 + }, + { + "epoch": 0.02, + "learning_rate": 0.000994944631680308, + "loss": 0.2143, + "theoretical_loss": 5.314259876461705, + "tokens_seen": 49545216 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009948643877387257, + "loss": 0.2153, + "theoretical_loss": 5.310012182370359, + "tokens_seen": 49807360 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009947841437971433, + "loss": 0.2137, + "theoretical_loss": 5.305793008363841, + "tokens_seen": 50069504 + }, + { + "epoch": 0.02, + "learning_rate": 0.000994703899855561, + "loss": 0.2132, + "theoretical_loss": 5.301602015128104, + "tokens_seen": 50331648 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009946236559139785, + "loss": 0.2117, + "theoretical_loss": 5.297438869126498, + "tokens_seen": 50593792 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009945434119723962, + "loss": 0.2191, + "theoretical_loss": 5.293303242472074, + "tokens_seen": 50855936 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009944631680308138, + "loss": 0.2133, + "theoretical_loss": 5.289194812803347, + "tokens_seen": 51118080 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009943829240892312, + "loss": 0.2112, + "theoretical_loss": 5.285113263163414, + "tokens_seen": 51380224 + }, + { + "epoch": 0.02, + "learning_rate": 0.000994302680147649, + "loss": 0.2129, + "theoretical_loss": 5.2810582818823235, + "tokens_seen": 51642368 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009942224362060664, + "loss": 0.215, + "theoretical_loss": 5.27702956246258, + "tokens_seen": 51904512 + }, + { + "epoch": 0.02, + "learning_rate": 0.000994142192264484, + "loss": 0.2124, + "theoretical_loss": 5.273026803467695, + "tokens_seen": 52166656 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.01028979942202568, + "objective/train/docs_used": 26541, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.072467803955078, + "objective/train/original_loss": 4.072467803955078, + "objective/train/theoretical_loss": 5.269049708413682, + "objective/train/tokens_used": 72888800, + "objective/train/value_avg": -0.016021728515625, + "objective/train/value_loss": 0.00028009802917949855, + "objective/train/value_max": -0.004608154296875, + "objective/train/value_min": -0.0889892578125, + "objective/train/value_reward_corr": 0.4423854028983263, + "objective/train/value_std": 0.008819580078125, + "objective/train/weight_avg": 1.0104297399520874, + "objective/train/weighted_lm_loss": 4.120135307312012, + "objective/train/weights_max": 1.0731053352355957, + "objective/train/weights_min": 0.8264122009277344, + "theoretical_loss": 5.269049708413682, + "tokens_seen": 52428800 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009940619483229016, + "loss": 0.2105, + "theoretical_loss": 5.269049708413682, + "tokens_seen": 52428800 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009939817043813192, + "loss": 0.2123, + "theoretical_loss": 5.265097985663418, + "tokens_seen": 52690944 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009939014604397368, + "loss": 0.2115, + "theoretical_loss": 5.261171348323755, + "tokens_seen": 52953088 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009938212164981545, + "loss": 0.2084, + "theoretical_loss": 5.257269514145324, + "tokens_seen": 53215232 + }, + { + "epoch": 0.02, + "learning_rate": 0.000993740972556572, + "loss": 0.2062, + "theoretical_loss": 5.2533922054249365, + "tokens_seen": 53477376 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009936607286149895, + "loss": 0.2101, + "theoretical_loss": 5.2495391489104986, + "tokens_seen": 53739520 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009935804846734073, + "loss": 0.2142, + "theoretical_loss": 5.24571007570837, + "tokens_seen": 54001664 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009935002407318247, + "loss": 0.2097, + "theoretical_loss": 5.2419047211930865, + "tokens_seen": 54263808 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009934199967902423, + "loss": 0.206, + "theoretical_loss": 5.238122824919387, + "tokens_seen": 54525952 + }, + { + "epoch": 0.02, + "learning_rate": 0.00099333975284866, + "loss": 0.2103, + "theoretical_loss": 5.234364130536457, + "tokens_seen": 54788096 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009932595089070775, + "loss": 0.21, + "theoretical_loss": 5.230628385704337, + "tokens_seen": 55050240 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009931792649654951, + "loss": 0.2083, + "theoretical_loss": 5.2269153420124255, + "tokens_seen": 55312384 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009930990210239128, + "loss": 0.2085, + "theoretical_loss": 5.223224754900014, + "tokens_seen": 55574528 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.01192543189972639, + "objective/train/docs_used": 27758, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.997652769088745, + "objective/train/original_loss": 3.997653007507324, + "objective/train/theoretical_loss": 5.221387807169494, + "objective/train/tokens_used": 76165600, + "objective/train/value_avg": -0.0164794921875, + "objective/train/value_loss": 0.0003872321976814419, + "objective/train/value_max": -0.004451751708984375, + "objective/train/value_min": -0.07354736328125, + "objective/train/value_reward_corr": 0.07308995924059121, + "objective/train/value_std": 0.006809234619140625, + "objective/train/weight_avg": 1.0121105909347534, + "objective/train/weighted_lm_loss": 4.049195289611816, + "objective/train/weights_max": 1.0763195753097534, + "objective/train/weights_min": 0.37184974551200867, + "theoretical_loss": 5.221387807169494, + "tokens_seen": 55705600 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009930187770823304, + "loss": 0.2098, + "theoretical_loss": 5.219556383578795, + "tokens_seen": 55836672 + }, + { + "epoch": 0.02, + "learning_rate": 0.000992938533140748, + "loss": 0.2069, + "theoretical_loss": 5.215909990957291, + "tokens_seen": 56098816 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009928582891991654, + "loss": 0.2075, + "theoretical_loss": 5.212285343567135, + "tokens_seen": 56360960 + }, + { + "epoch": 0.02, + "learning_rate": 0.000992778045257583, + "loss": 0.2067, + "theoretical_loss": 5.208682211491157, + "tokens_seen": 56623104 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009926978013160006, + "loss": 0.2043, + "theoretical_loss": 5.205100368293225, + "tokens_seen": 56885248 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009926175573744182, + "loss": 0.2042, + "theoretical_loss": 5.201539590949796, + "tokens_seen": 57147392 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009925373134328358, + "loss": 0.206, + "theoretical_loss": 5.1979996597831, + "tokens_seen": 57409536 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009924570694912534, + "loss": 0.2041, + "theoretical_loss": 5.1944803583959525, + "tokens_seen": 57671680 + }, + { + "epoch": 0.02, + "learning_rate": 0.000992376825549671, + "loss": 0.2063, + "theoretical_loss": 5.190981473608112, + "tokens_seen": 57933824 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009922965816080887, + "loss": 0.2063, + "theoretical_loss": 5.18750279539416, + "tokens_seen": 58195968 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009922163376665063, + "loss": 0.2021, + "theoretical_loss": 5.184044116822849, + "tokens_seen": 58458112 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009921360937249237, + "loss": 0.2067, + "theoretical_loss": 5.1806052339978965, + "tokens_seen": 58720256 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.008248094469308853, + "objective/train/docs_used": 29071, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.7817437648773193, + "objective/train/original_loss": 3.7817440032958984, + "objective/train/theoretical_loss": 5.1771859460001455, + "objective/train/tokens_used": 79442400, + "objective/train/value_avg": -0.0163726806640625, + "objective/train/value_loss": 0.00034792307997122407, + "objective/train/value_max": -0.003795623779296875, + "objective/train/value_min": -0.0994873046875, + "objective/train/value_reward_corr": 0.447085684972137, + "objective/train/value_std": 0.00957489013671875, + "objective/train/weight_avg": 1.0084155797958374, + "objective/train/weighted_lm_loss": 3.8198161125183105, + "objective/train/weights_max": 1.0903370380401611, + "objective/train/weights_min": 0.37122613191604614, + "theoretical_loss": 5.1771859460001455, + "tokens_seen": 58982400 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009920558497833415, + "loss": 0.1986, + "theoretical_loss": 5.1771859460001455, + "tokens_seen": 58982400 + }, + { + "epoch": 0.02, + "learning_rate": 0.000991975605841759, + "loss": 0.2047, + "theoretical_loss": 5.1737860548311065, + "tokens_seen": 59244544 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009918953619001765, + "loss": 0.2018, + "theoretical_loss": 5.170405365357794, + "tokens_seen": 59506688 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009918151179585941, + "loss": 0.2003, + "theoretical_loss": 5.167043685258852, + "tokens_seen": 59768832 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009917348740170117, + "loss": 0.2018, + "theoretical_loss": 5.163700824971922, + "tokens_seen": 60030976 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009916546300754293, + "loss": 0.2067, + "theoretical_loss": 5.160376597642223, + "tokens_seen": 60293120 + }, + { + "epoch": 0.02, + "learning_rate": 0.000991574386133847, + "loss": 0.2066, + "theoretical_loss": 5.157070819072301, + "tokens_seen": 60555264 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009914941421922646, + "loss": 0.2065, + "theoretical_loss": 5.153783307672935, + "tokens_seen": 60817408 + }, + { + "epoch": 0.02, + "learning_rate": 0.000991413898250682, + "loss": 0.2048, + "theoretical_loss": 5.150513884415149, + "tokens_seen": 61079552 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009913336543090998, + "loss": 0.2018, + "theoretical_loss": 5.14726237278331, + "tokens_seen": 61341696 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009912534103675172, + "loss": 0.2058, + "theoretical_loss": 5.144028598729285, + "tokens_seen": 61603840 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009911731664259348, + "loss": 0.2015, + "theoretical_loss": 5.140812390627624, + "tokens_seen": 61865984 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009910929224843524, + "loss": 0.2039, + "theoretical_loss": 5.137613579231737, + "tokens_seen": 62128128 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.010975364595651627, + "objective/train/docs_used": 30276, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.172832489013672, + "objective/train/original_loss": 4.17283296585083, + "objective/train/theoretical_loss": 5.136020645048806, + "objective/train/tokens_used": 82719200, + "objective/train/value_avg": -0.016754150390625, + "objective/train/value_loss": 0.0010546700796112418, + "objective/train/value_max": -0.004718780517578125, + "objective/train/value_min": -0.08447265625, + "objective/train/value_reward_corr": 0.2364809296876953, + "objective/train/value_std": 0.007312774658203125, + "objective/train/weight_avg": 1.0114343166351318, + "objective/train/weighted_lm_loss": 4.22389554977417, + "objective/train/weights_max": 1.077765703201294, + "objective/train/weights_min": 0.372718870639801, + "theoretical_loss": 5.136020645048806, + "tokens_seen": 62259200 + }, + { + "epoch": 0.02, + "learning_rate": 0.00099101267854277, + "loss": 0.2036, + "theoretical_loss": 5.134431997631053, + "tokens_seen": 62390272 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009909324346011876, + "loss": 0.1996, + "theoretical_loss": 5.1312674812091235, + "tokens_seen": 62652416 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009908521906596053, + "loss": 0.1987, + "theoretical_loss": 5.128119867602646, + "tokens_seen": 62914560 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009907719467180229, + "loss": 0.2017, + "theoretical_loss": 5.124988996661393, + "tokens_seen": 63176704 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009906917027764405, + "loss": 0.1973, + "theoretical_loss": 5.121874710409012, + "tokens_seen": 63438848 + }, + { + "epoch": 0.02, + "learning_rate": 0.000990611458834858, + "loss": 0.1999, + "theoretical_loss": 5.118776853004677, + "tokens_seen": 63700992 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009905312148932755, + "loss": 0.1977, + "theoretical_loss": 5.115695270705579, + "tokens_seen": 63963136 + }, + { + "epoch": 0.02, + "learning_rate": 0.000990450970951693, + "loss": 0.1956, + "theoretical_loss": 5.112629811830217, + "tokens_seen": 64225280 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009903707270101107, + "loss": 0.1989, + "theoretical_loss": 5.10958032672248, + "tokens_seen": 64487424 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009902904830685283, + "loss": 0.1946, + "theoretical_loss": 5.106546667716508, + "tokens_seen": 64749568 + }, + { + "epoch": 0.02, + "learning_rate": 0.000990210239126946, + "loss": 0.2015, + "theoretical_loss": 5.103528689102281, + "tokens_seen": 65011712 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009901299951853636, + "loss": 0.1959, + "theoretical_loss": 5.100526247091967, + "tokens_seen": 65273856 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.007889259606599808, + "objective/train/docs_used": 31526, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.06732177734375, + "objective/train/original_loss": 4.06732177734375, + "objective/train/theoretical_loss": 5.097539199786951, + "objective/train/tokens_used": 85996000, + "objective/train/value_avg": -0.0160980224609375, + "objective/train/value_loss": 0.0005863794358447194, + "objective/train/value_max": -0.00464630126953125, + "objective/train/value_min": -0.1026611328125, + "objective/train/value_reward_corr": 0.33268385277114015, + "objective/train/value_std": 0.00913238525390625, + "objective/train/weight_avg": 1.0081636905670166, + "objective/train/weighted_lm_loss": 4.102910041809082, + "objective/train/weights_max": 1.1006338596343994, + "objective/train/weights_min": 0.49862930178642273, + "theoretical_loss": 5.097539199786951, + "tokens_seen": 65536000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009900497512437812, + "loss": 0.199, + "theoretical_loss": 5.097539199786951, + "tokens_seen": 65536000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009899695073021988, + "loss": 0.2, + "theoretical_loss": 5.094567407145588, + "tokens_seen": 65798144 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009898892633606162, + "loss": 0.1976, + "theoretical_loss": 5.09161073095161, + "tokens_seen": 66060288 + }, + { + "epoch": 0.02, + "learning_rate": 0.000989809019419034, + "loss": 0.1955, + "theoretical_loss": 5.0886690347832015, + "tokens_seen": 66322432 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009897287754774514, + "loss": 0.1975, + "theoretical_loss": 5.0857421839827275, + "tokens_seen": 66584576 + }, + { + "epoch": 0.02, + "learning_rate": 0.000989648531535869, + "loss": 0.193, + "theoretical_loss": 5.082830045627072, + "tokens_seen": 66846720 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009895682875942866, + "loss": 0.1985, + "theoretical_loss": 5.079932488498602, + "tokens_seen": 67108864 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009894880436527042, + "loss": 0.1952, + "theoretical_loss": 5.077049383056725, + "tokens_seen": 67371008 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009894077997111218, + "loss": 0.1976, + "theoretical_loss": 5.074180601410026, + "tokens_seen": 67633152 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009893275557695395, + "loss": 0.1896, + "theoretical_loss": 5.0713260172889845, + "tokens_seen": 67895296 + }, + { + "epoch": 0.02, + "learning_rate": 0.000989247311827957, + "loss": 0.1925, + "theoretical_loss": 5.068485506019231, + "tokens_seen": 68157440 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009891670678863745, + "loss": 0.1949, + "theoretical_loss": 5.06565894449535, + "tokens_seen": 68419584 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009890868239447923, + "loss": 0.1939, + "theoretical_loss": 5.06284621115523, + "tokens_seen": 68681728 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.008360575884580612, + "objective/train/docs_used": 32629, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.77909517288208, + "objective/train/original_loss": 3.77909517288208, + "objective/train/theoretical_loss": 5.061444992481711, + "objective/train/tokens_used": 89272800, + "objective/train/value_avg": -0.016082763671875, + "objective/train/value_loss": 0.0004964357358403504, + "objective/train/value_max": -0.00473785400390625, + "objective/train/value_min": -0.08770751953125, + "objective/train/value_reward_corr": 0.21250019484264865, + "objective/train/value_std": 0.00782012939453125, + "objective/train/weight_avg": 1.0085920095443726, + "objective/train/weighted_lm_loss": 3.811274290084839, + "objective/train/weights_max": 1.0717307329177856, + "objective/train/weights_min": 0.37172776460647583, + "theoretical_loss": 5.061444992481711, + "tokens_seen": 68812800 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009890065800032097, + "loss": 0.1985, + "theoretical_loss": 5.060047185954893, + "tokens_seen": 68943872 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009889263360616273, + "loss": 0.1918, + "theoretical_loss": 5.057261750343864, + "tokens_seen": 69206016 + }, + { + "epoch": 0.02, + "learning_rate": 0.000988846092120045, + "loss": 0.1924, + "theoretical_loss": 5.0544897872410095, + "tokens_seen": 69468160 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009887658481784625, + "loss": 0.191, + "theoretical_loss": 5.051731181010866, + "tokens_seen": 69730304 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009886856042368801, + "loss": 0.1899, + "theoretical_loss": 5.048985817440432, + "tokens_seen": 69992448 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009886053602952978, + "loss": 0.1937, + "theoretical_loss": 5.046253583716425, + "tokens_seen": 70254592 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009885251163537154, + "loss": 0.1905, + "theoretical_loss": 5.043534368402973, + "tokens_seen": 70516736 + }, + { + "epoch": 0.02, + "learning_rate": 0.000988444872412133, + "loss": 0.1894, + "theoretical_loss": 5.040828061419762, + "tokens_seen": 70778880 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009883646284705506, + "loss": 0.1918, + "theoretical_loss": 5.038134554020587, + "tokens_seen": 71041024 + }, + { + "epoch": 0.02, + "learning_rate": 0.000988284384528968, + "loss": 0.1892, + "theoretical_loss": 5.03545373877234, + "tokens_seen": 71303168 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009882041405873858, + "loss": 0.191, + "theoretical_loss": 5.032785509534391, + "tokens_seen": 71565312 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009881238966458032, + "loss": 0.1886, + "theoretical_loss": 5.030129761438376, + "tokens_seen": 71827456 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.008393102325499058, + "objective/train/docs_used": 33987, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.8430488109588623, + "objective/train/original_loss": 3.8430490493774414, + "objective/train/theoretical_loss": 5.0274863908683685, + "objective/train/tokens_used": 92549600, + "objective/train/value_avg": -0.014801025390625, + "objective/train/value_loss": 0.0004412951529957354, + "objective/train/value_max": -0.004314422607421875, + "objective/train/value_min": -0.091064453125, + "objective/train/value_reward_corr": 0.1426956661504004, + "objective/train/value_std": 0.006847381591796875, + "objective/train/weight_avg": 1.0085957050323486, + "objective/train/weighted_lm_loss": 3.876188039779663, + "objective/train/weights_max": 1.0953395366668701, + "objective/train/weights_min": 0.3716682195663452, + "theoretical_loss": 5.0274863908683685, + "tokens_seen": 72089600 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009880436527042208, + "loss": 0.1877, + "theoretical_loss": 5.0274863908683685, + "tokens_seen": 72089600 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009879634087626384, + "loss": 0.1913, + "theoretical_loss": 5.024855295441432, + "tokens_seen": 72351744 + }, + { + "epoch": 0.02, + "learning_rate": 0.000987883164821056, + "loss": 0.1894, + "theoretical_loss": 5.022236373988544, + "tokens_seen": 72613888 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009878029208794737, + "loss": 0.1873, + "theoretical_loss": 5.01962952653588, + "tokens_seen": 72876032 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009877226769378913, + "loss": 0.1886, + "theoretical_loss": 5.017034654286462, + "tokens_seen": 73138176 + }, + { + "epoch": 0.02, + "learning_rate": 0.000987642432996309, + "loss": 0.1903, + "theoretical_loss": 5.0144516596021385, + "tokens_seen": 73400320 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009875621890547263, + "loss": 0.183, + "theoretical_loss": 5.011880445985916, + "tokens_seen": 73662464 + }, + { + "epoch": 0.02, + "learning_rate": 0.000987481945113144, + "loss": 0.1888, + "theoretical_loss": 5.009320918064615, + "tokens_seen": 73924608 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009874017011715615, + "loss": 0.1886, + "theoretical_loss": 5.006772981571855, + "tokens_seen": 74186752 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009873214572299791, + "loss": 0.1868, + "theoretical_loss": 5.004236543331345, + "tokens_seen": 74448896 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009872412132883967, + "loss": 0.1858, + "theoretical_loss": 5.001711511240506, + "tokens_seen": 74711040 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009871609693468144, + "loss": 0.1867, + "theoretical_loss": 4.999197794254371, + "tokens_seen": 74973184 + }, + { + "epoch": 0.02, + "learning_rate": 0.000987080725405232, + "loss": 0.1865, + "theoretical_loss": 4.9966953023697975, + "tokens_seen": 75235328 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.007199964951723814, + "objective/train/docs_used": 35047, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.54990816116333, + "objective/train/original_loss": 3.54990816116333, + "objective/train/theoretical_loss": 4.995448237995667, + "objective/train/tokens_used": 95826400, + "objective/train/value_avg": -0.01419830322265625, + "objective/train/value_loss": 0.0003746067523024976, + "objective/train/value_max": -0.003337860107421875, + "objective/train/value_min": -0.08599853515625, + "objective/train/value_reward_corr": 0.27188860037013274, + "objective/train/value_std": 0.007312774658203125, + "objective/train/weight_avg": 1.0073779821395874, + "objective/train/weighted_lm_loss": 3.5777623653411865, + "objective/train/weights_max": 1.0898046493530273, + "objective/train/weights_min": 0.3704579770565033, + "theoretical_loss": 4.995448237995667, + "tokens_seen": 75366400 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009870004814636496, + "loss": 0.1861, + "theoretical_loss": 4.994203946609964, + "tokens_seen": 75497472 + }, + { + "epoch": 0.02, + "learning_rate": 0.000986920237522067, + "loss": 0.1833, + "theoretical_loss": 4.991723639009154, + "tokens_seen": 75759616 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009868399935804848, + "loss": 0.1844, + "theoretical_loss": 4.989254292597813, + "tokens_seen": 76021760 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009867597496389022, + "loss": 0.1873, + "theoretical_loss": 4.986795821387878, + "tokens_seen": 76283904 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009866795056973198, + "loss": 0.1853, + "theoretical_loss": 4.984348140358374, + "tokens_seen": 76546048 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009865992617557374, + "loss": 0.1862, + "theoretical_loss": 4.981911165441273, + "tokens_seen": 76808192 + }, + { + "epoch": 0.02, + "learning_rate": 0.000986519017814155, + "loss": 0.1847, + "theoretical_loss": 4.979484813507599, + "tokens_seen": 77070336 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009864387738725726, + "loss": 0.1831, + "theoretical_loss": 4.977069002353792, + "tokens_seen": 77332480 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009863585299309903, + "loss": 0.1819, + "theoretical_loss": 4.974663650688306, + "tokens_seen": 77594624 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009862782859894079, + "loss": 0.1851, + "theoretical_loss": 4.972268678118454, + "tokens_seen": 77856768 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009861980420478253, + "loss": 0.1813, + "theoretical_loss": 4.969884005137479, + "tokens_seen": 78118912 + }, + { + "epoch": 0.02, + "learning_rate": 0.000986117798106243, + "loss": 0.1859, + "theoretical_loss": 4.967509553111862, + "tokens_seen": 78381056 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.005118685308843851, + "objective/train/docs_used": 36308, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.8223462104797363, + "objective/train/original_loss": 3.8223462104797363, + "objective/train/theoretical_loss": 4.96514524426884, + "objective/train/tokens_used": 99103200, + "objective/train/value_avg": -0.0140838623046875, + "objective/train/value_loss": 0.00042888522148132324, + "objective/train/value_max": -0.00323486328125, + "objective/train/value_min": -0.122314453125, + "objective/train/value_reward_corr": 0.37372443614015616, + "objective/train/value_std": 0.00824737548828125, + "objective/train/weight_avg": 1.0053234100341797, + "objective/train/weighted_lm_loss": 3.844878673553467, + "objective/train/weights_max": 1.107279658317566, + "objective/train/weights_min": 0.3930663764476776, + "theoretical_loss": 4.96514524426884, + "tokens_seen": 78643200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009860375541646605, + "loss": 0.1816, + "theoretical_loss": 4.96514524426884, + "tokens_seen": 78643200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009859573102230783, + "loss": 0.184, + "theoretical_loss": 4.962791001684167, + "tokens_seen": 78905344 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009858770662814957, + "loss": 0.1822, + "theoretical_loss": 4.960446749270055, + "tokens_seen": 79167488 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009857968223399133, + "loss": 0.184, + "theoretical_loss": 4.958112411763365, + "tokens_seen": 79429632 + }, + { + "epoch": 0.02, + "learning_rate": 0.000985716578398331, + "loss": 0.1829, + "theoretical_loss": 4.955787914713962, + "tokens_seen": 79691776 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009856363344567486, + "loss": 0.1834, + "theoretical_loss": 4.953473184473312, + "tokens_seen": 79953920 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009855560905151662, + "loss": 0.183, + "theoretical_loss": 4.951168148183246, + "tokens_seen": 80216064 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009854758465735838, + "loss": 0.1818, + "theoretical_loss": 4.948872733764926, + "tokens_seen": 80478208 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009853956026320014, + "loss": 0.1799, + "theoretical_loss": 4.946586869908014, + "tokens_seen": 80740352 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009853153586904188, + "loss": 0.1828, + "theoretical_loss": 4.944310486060004, + "tokens_seen": 81002496 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009852351147488366, + "loss": 0.1825, + "theoretical_loss": 4.942043512415751, + "tokens_seen": 81264640 + }, + { + "epoch": 0.02, + "learning_rate": 0.000985154870807254, + "loss": 0.1838, + "theoretical_loss": 4.939785879907176, + "tokens_seen": 81526784 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009850746268656716, + "loss": 0.1813, + "theoretical_loss": 4.937537520193139, + "tokens_seen": 81788928 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.004108819644898176, + "objective/train/docs_used": 37569, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.581354856491089, + "objective/train/original_loss": 3.5813543796539307, + "objective/train/theoretical_loss": 4.936416796473786, + "objective/train/tokens_used": 102380000, + "objective/train/value_avg": -0.0154571533203125, + "objective/train/value_loss": 0.001122858258895576, + "objective/train/value_max": -0.00327301025390625, + "objective/train/value_min": -0.1788330078125, + "objective/train/value_reward_corr": 0.38812370857659007, + "objective/train/value_std": 0.0128631591796875, + "objective/train/weight_avg": 1.004586100578308, + "objective/train/weighted_lm_loss": 3.600618600845337, + "objective/train/weights_max": 1.1683320999145508, + "objective/train/weights_min": 0.37106189131736755, + "theoretical_loss": 4.936416796473786, + "tokens_seen": 81920000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009849943829240892, + "loss": 0.1815, + "theoretical_loss": 4.93529836564949, + "tokens_seen": 82051072 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009849141389825069, + "loss": 0.1813, + "theoretical_loss": 4.933068349359283, + "tokens_seen": 82313216 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009848338950409245, + "loss": 0.1783, + "theoretical_loss": 4.93084740510316, + "tokens_seen": 82575360 + }, + { + "epoch": 0.03, + "learning_rate": 0.000984753651099342, + "loss": 0.1779, + "theoretical_loss": 4.928635467349885, + "tokens_seen": 82837504 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009846734071577597, + "loss": 0.1768, + "theoretical_loss": 4.92643247124705, + "tokens_seen": 83099648 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009845931632161773, + "loss": 0.1805, + "theoretical_loss": 4.924238352611924, + "tokens_seen": 83361792 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009845129192745947, + "loss": 0.1793, + "theoretical_loss": 4.922053047922455, + "tokens_seen": 83623936 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009844326753330123, + "loss": 0.1758, + "theoretical_loss": 4.919876494308432, + "tokens_seen": 83886080 + }, + { + "epoch": 0.03, + "learning_rate": 0.00098435243139143, + "loss": 0.1795, + "theoretical_loss": 4.917708629542775, + "tokens_seen": 84148224 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009842721874498475, + "loss": 0.1775, + "theoretical_loss": 4.915549392032985, + "tokens_seen": 84410368 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009841919435082652, + "loss": 0.1797, + "theoretical_loss": 4.913398720812719, + "tokens_seen": 84672512 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009841116995666828, + "loss": 0.176, + "theoretical_loss": 4.9112565555335115, + "tokens_seen": 84934656 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.009054622612893581, + "objective/train/docs_used": 38678, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4969615936279297, + "objective/train/original_loss": 3.496961832046509, + "objective/train/theoretical_loss": 4.909122836456632, + "objective/train/tokens_used": 105656800, + "objective/train/value_avg": -0.013458251953125, + "objective/train/value_loss": 0.00028301688143983483, + "objective/train/value_max": -0.0036067962646484375, + "objective/train/value_min": -0.09588623046875, + "objective/train/value_reward_corr": 0.19258322737032904, + "objective/train/value_std": 0.00646209716796875, + "objective/train/weight_avg": 1.0091947317123413, + "objective/train/weighted_lm_loss": 3.5327301025390625, + "objective/train/weights_max": 1.0849108695983887, + "objective/train/weights_min": 0.8251521587371826, + "theoretical_loss": 4.909122836456632, + "tokens_seen": 85196800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009840314556251004, + "loss": 0.1783, + "theoretical_loss": 4.909122836456632, + "tokens_seen": 85196800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009839512116835178, + "loss": 0.1773, + "theoretical_loss": 4.906997504445066, + "tokens_seen": 85458944 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009838709677419356, + "loss": 0.1789, + "theoretical_loss": 4.904880500955633, + "tokens_seen": 85721088 + }, + { + "epoch": 0.03, + "learning_rate": 0.000983790723800353, + "loss": 0.1752, + "theoretical_loss": 4.90277176803123, + "tokens_seen": 85983232 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009837104798587706, + "loss": 0.1746, + "theoretical_loss": 4.9006712482931984, + "tokens_seen": 86245376 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009836302359171882, + "loss": 0.1762, + "theoretical_loss": 4.89857888493381, + "tokens_seen": 86507520 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009835499919756058, + "loss": 0.1782, + "theoretical_loss": 4.896494621708882, + "tokens_seen": 86769664 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009834697480340234, + "loss": 0.1751, + "theoretical_loss": 4.8944184029305, + "tokens_seen": 87031808 + }, + { + "epoch": 0.03, + "learning_rate": 0.000983389504092441, + "loss": 0.1761, + "theoretical_loss": 4.892350173459863, + "tokens_seen": 87293952 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009833092601508587, + "loss": 0.1734, + "theoretical_loss": 4.890289878700239, + "tokens_seen": 87556096 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009832290162092763, + "loss": 0.1735, + "theoretical_loss": 4.888237464590028, + "tokens_seen": 87818240 + }, + { + "epoch": 0.03, + "learning_rate": 0.000983148772267694, + "loss": 0.1766, + "theoretical_loss": 4.8861928775959464, + "tokens_seen": 88080384 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009830685283261113, + "loss": 0.1752, + "theoretical_loss": 4.884156064706302, + "tokens_seen": 88342528 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.008355207741260529, + "objective/train/docs_used": 39491, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4316039085388184, + "objective/train/original_loss": 3.4316043853759766, + "objective/train/theoretical_loss": 4.8831405571254844, + "objective/train/tokens_used": 108933600, + "objective/train/value_avg": -0.01488494873046875, + "objective/train/value_loss": 0.0006291866302490234, + "objective/train/value_max": -0.0031604766845703125, + "objective/train/value_min": -0.15185546875, + "objective/train/value_reward_corr": 0.39128727304023586, + "objective/train/value_std": 0.00885772705078125, + "objective/train/weight_avg": 1.0086404085159302, + "objective/train/weighted_lm_loss": 3.462775230407715, + "objective/train/weights_max": 1.1140156984329224, + "objective/train/weights_min": 0.3899836838245392, + "theoretical_loss": 4.8831405571254844, + "tokens_seen": 88473600 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009829882843845291, + "loss": 0.1745, + "theoretical_loss": 4.882126973424384, + "tokens_seen": 88604672 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009829080404429465, + "loss": 0.1729, + "theoretical_loss": 4.880105551761961, + "tokens_seen": 88866816 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009828277965013641, + "loss": 0.1784, + "theoretical_loss": 4.87809174823286, + "tokens_seen": 89128960 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009827475525597817, + "loss": 0.1722, + "theoretical_loss": 4.876085511846673, + "tokens_seen": 89391104 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009826673086181994, + "loss": 0.1772, + "theoretical_loss": 4.874086792102535, + "tokens_seen": 89653248 + }, + { + "epoch": 0.03, + "learning_rate": 0.000982587064676617, + "loss": 0.1766, + "theoretical_loss": 4.872095538983015, + "tokens_seen": 89915392 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009825068207350346, + "loss": 0.1784, + "theoretical_loss": 4.870111702948094, + "tokens_seen": 90177536 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009824265767934522, + "loss": 0.1736, + "theoretical_loss": 4.868135234929232, + "tokens_seen": 90439680 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009823463328518696, + "loss": 0.1743, + "theoretical_loss": 4.866166086323535, + "tokens_seen": 90701824 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009822660889102874, + "loss": 0.1779, + "theoretical_loss": 4.864204208988003, + "tokens_seen": 90963968 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009821858449687048, + "loss": 0.1733, + "theoretical_loss": 4.86224955523387, + "tokens_seen": 91226112 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009821056010271224, + "loss": 0.1766, + "theoretical_loss": 4.860302077821023, + "tokens_seen": 91488256 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.006558452267199755, + "objective/train/docs_used": 40748, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2776377201080322, + "objective/train/original_loss": 3.2776379585266113, + "objective/train/theoretical_loss": 4.858361729952518, + "objective/train/tokens_used": 112210400, + "objective/train/value_avg": -0.0146636962890625, + "objective/train/value_loss": 0.0004739796568173915, + "objective/train/value_max": -0.00359344482421875, + "objective/train/value_min": -0.0782470703125, + "objective/train/value_reward_corr": 0.21044601071028804, + "objective/train/value_std": 0.007022857666015625, + "objective/train/weight_avg": 1.0067769289016724, + "objective/train/weighted_lm_loss": 3.300762176513672, + "objective/train/weights_max": 1.0740199089050293, + "objective/train/weights_min": 0.3733791708946228, + "theoretical_loss": 4.858361729952518, + "tokens_seen": 91750400 + }, + { + "epoch": 0.03, + "learning_rate": 0.00098202535708554, + "loss": 0.1726, + "theoretical_loss": 4.858361729952518, + "tokens_seen": 91750400 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009819451131439577, + "loss": 0.1719, + "theoretical_loss": 4.856428465269159, + "tokens_seen": 92012544 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009818648692023753, + "loss": 0.1728, + "theoretical_loss": 4.8545022378441836, + "tokens_seen": 92274688 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009817846252607929, + "loss": 0.174, + "theoretical_loss": 4.852583002178001, + "tokens_seen": 92536832 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009817043813192105, + "loss": 0.1715, + "theoretical_loss": 4.8506707131930344, + "tokens_seen": 92798976 + }, + { + "epoch": 0.03, + "learning_rate": 0.000981624137377628, + "loss": 0.1724, + "theoretical_loss": 4.848765326228618, + "tokens_seen": 93061120 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009815438934360455, + "loss": 0.1693, + "theoretical_loss": 4.846866797035984, + "tokens_seen": 93323264 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009814636494944631, + "loss": 0.1727, + "theoretical_loss": 4.844975081773322, + "tokens_seen": 93585408 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009813834055528807, + "loss": 0.1724, + "theoretical_loss": 4.843090137000904, + "tokens_seen": 93847552 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009813031616112983, + "loss": 0.1726, + "theoretical_loss": 4.841211919676287, + "tokens_seen": 94109696 + }, + { + "epoch": 0.03, + "learning_rate": 0.000981222917669716, + "loss": 0.1679, + "theoretical_loss": 4.839340387149586, + "tokens_seen": 94371840 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009811426737281336, + "loss": 0.1715, + "theoretical_loss": 4.837475497158817, + "tokens_seen": 94633984 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009810624297865512, + "loss": 0.1691, + "theoretical_loss": 4.835617207825303, + "tokens_seen": 94896128 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": -0.0007320477161556482, + "objective/train/docs_used": 42039, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1431312561035156, + "objective/train/original_loss": 3.1431312561035156, + "objective/train/theoretical_loss": 4.8346905254216255, + "objective/train/tokens_used": 115487200, + "objective/train/value_avg": -0.01483154296875, + "objective/train/value_loss": 0.0013606772990897298, + "objective/train/value_max": -0.0031108856201171875, + "objective/train/value_min": -0.1416015625, + "objective/train/value_reward_corr": 0.5555549113783049, + "objective/train/value_std": 0.0105743408203125, + "objective/train/weight_avg": 0.9998704791069031, + "objective/train/weighted_lm_loss": 3.1514244079589844, + "objective/train/weights_max": 1.0741158723831177, + "objective/train/weights_min": 0.3726819157600403, + "theoretical_loss": 4.8346905254216255, + "tokens_seen": 95027200 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009809821858449686, + "loss": 0.1675, + "theoretical_loss": 4.83376547764915, + "tokens_seen": 95158272 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009809019419033864, + "loss": 0.1731, + "theoretical_loss": 4.831920265504792, + "tokens_seen": 95420416 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009808216979618038, + "loss": 0.1728, + "theoretical_loss": 4.830081530636594, + "tokens_seen": 95682560 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009807414540202216, + "loss": 0.1722, + "theoretical_loss": 4.8282492326545245, + "tokens_seen": 95944704 + }, + { + "epoch": 0.03, + "learning_rate": 0.000980661210078639, + "loss": 0.1718, + "theoretical_loss": 4.826423331529884, + "tokens_seen": 96206848 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009805809661370566, + "loss": 0.1696, + "theoretical_loss": 4.824603787591102, + "tokens_seen": 96468992 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009805007221954742, + "loss": 0.1729, + "theoretical_loss": 4.822790561519591, + "tokens_seen": 96731136 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009804204782538919, + "loss": 0.1719, + "theoretical_loss": 4.8209836143456535, + "tokens_seen": 96993280 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009803402343123095, + "loss": 0.1678, + "theoretical_loss": 4.81918290744446, + "tokens_seen": 97255424 + }, + { + "epoch": 0.03, + "learning_rate": 0.000980259990370727, + "loss": 0.1712, + "theoretical_loss": 4.817388402532074, + "tokens_seen": 97517568 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009801797464291447, + "loss": 0.1768, + "theoretical_loss": 4.815600061661536, + "tokens_seen": 97779712 + }, + { + "epoch": 0.03, + "learning_rate": 0.000980099502487562, + "loss": 0.1706, + "theoretical_loss": 4.813817847219008, + "tokens_seen": 98041856 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.006740586832165718, + "objective/train/docs_used": 43181, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3436176776885986, + "objective/train/original_loss": 3.3436179161071777, + "objective/train/theoretical_loss": 4.812041721919962, + "objective/train/tokens_used": 118764000, + "objective/train/value_avg": -0.013214111328125, + "objective/train/value_loss": 0.0005143888993188739, + "objective/train/value_max": -0.0032100677490234375, + "objective/train/value_min": -0.17919921875, + "objective/train/value_reward_corr": 0.2184640658155713, + "objective/train/value_std": 0.008026123046875, + "objective/train/weight_avg": 1.0069705247879028, + "objective/train/weighted_lm_loss": 3.366760492324829, + "objective/train/weights_max": 1.1044021844863892, + "objective/train/weights_min": 0.3713989555835724, + "theoretical_loss": 4.812041721919962, + "tokens_seen": 98304000 + }, + { + "epoch": 0.03, + "learning_rate": 0.00098001925854598, + "loss": 0.1654, + "theoretical_loss": 4.812041721919962, + "tokens_seen": 98304000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009799390146043973, + "loss": 0.1666, + "theoretical_loss": 4.810271648805427, + "tokens_seen": 98566144 + }, + { + "epoch": 0.03, + "learning_rate": 0.000979858770662815, + "loss": 0.1702, + "theoretical_loss": 4.8085075912383015, + "tokens_seen": 98828288 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009797785267212325, + "loss": 0.1683, + "theoretical_loss": 4.806749512899687, + "tokens_seen": 99090432 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009796982827796502, + "loss": 0.1686, + "theoretical_loss": 4.804997377785307, + "tokens_seen": 99352576 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009796180388380678, + "loss": 0.1674, + "theoretical_loss": 4.8032511502019535, + "tokens_seen": 99614720 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009795377948964854, + "loss": 0.1677, + "theoretical_loss": 4.801510794763988, + "tokens_seen": 99876864 + }, + { + "epoch": 0.03, + "learning_rate": 0.000979457550954903, + "loss": 0.1666, + "theoretical_loss": 4.799776276389897, + "tokens_seen": 100139008 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009793773070133206, + "loss": 0.1693, + "theoretical_loss": 4.798047560298882, + "tokens_seen": 100401152 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009792970630717382, + "loss": 0.1695, + "theoretical_loss": 4.796324612007515, + "tokens_seen": 100663296 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009792168191301556, + "loss": 0.1668, + "theoretical_loss": 4.794607397326421, + "tokens_seen": 100925440 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009791365751885732, + "loss": 0.1685, + "theoretical_loss": 4.792895882357019, + "tokens_seen": 101187584 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009790563312469908, + "loss": 0.1679, + "theoretical_loss": 4.791190033488302, + "tokens_seen": 101449728 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.005793612916022539, + "objective/train/docs_used": 44310, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2112574577331543, + "objective/train/original_loss": 3.2112574577331543, + "objective/train/theoretical_loss": 4.790339223416113, + "objective/train/tokens_used": 122040800, + "objective/train/value_avg": -0.01383209228515625, + "objective/train/value_loss": 0.0006092512048780918, + "objective/train/value_max": -0.0031719207763671875, + "objective/train/value_min": -0.1739501953125, + "objective/train/value_reward_corr": 0.3657779485356697, + "objective/train/value_std": 0.0101318359375, + "objective/train/weight_avg": 1.0060677528381348, + "objective/train/weighted_lm_loss": 3.2284328937530518, + "objective/train/weights_max": 1.1621463298797607, + "objective/train/weights_min": 0.3706558644771576, + "theoretical_loss": 4.790339223416113, + "tokens_seen": 101580800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009789760873054085, + "loss": 0.1651, + "theoretical_loss": 4.7894898173936635, + "tokens_seen": 101711872 + }, + { + "epoch": 0.03, + "learning_rate": 0.000978895843363826, + "loss": 0.1639, + "theoretical_loss": 4.787795201027757, + "tokens_seen": 101974016 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009788155994222437, + "loss": 0.1675, + "theoretical_loss": 4.786106151623423, + "tokens_seen": 102236160 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009787353554806613, + "loss": 0.1616, + "theoretical_loss": 4.784422636688622, + "tokens_seen": 102498304 + }, + { + "epoch": 0.03, + "learning_rate": 0.000978655111539079, + "loss": 0.1641, + "theoretical_loss": 4.782744624003442, + "tokens_seen": 102760448 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009785748675974963, + "loss": 0.167, + "theoretical_loss": 4.781072081617127, + "tokens_seen": 103022592 + }, + { + "epoch": 0.03, + "learning_rate": 0.000978494623655914, + "loss": 0.166, + "theoretical_loss": 4.779404977845148, + "tokens_seen": 103284736 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009784143797143315, + "loss": 0.1642, + "theoretical_loss": 4.777743281266321, + "tokens_seen": 103546880 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009783341357727491, + "loss": 0.1611, + "theoretical_loss": 4.776086960719956, + "tokens_seen": 103809024 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009782538918311668, + "loss": 0.1675, + "theoretical_loss": 4.774435985303043, + "tokens_seen": 104071168 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009781736478895844, + "loss": 0.1605, + "theoretical_loss": 4.772790324367482, + "tokens_seen": 104333312 + }, + { + "epoch": 0.03, + "learning_rate": 0.000978093403948002, + "loss": 0.1659, + "theoretical_loss": 4.771149947517346, + "tokens_seen": 104595456 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.006030434276908636, + "objective/train/docs_used": 45444, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.323787212371826, + "objective/train/original_loss": 3.323786973953247, + "objective/train/theoretical_loss": 4.769514824606178, + "objective/train/tokens_used": 125317600, + "objective/train/value_avg": -0.01244354248046875, + "objective/train/value_loss": 0.0006740050157532096, + "objective/train/value_max": -0.00269317626953125, + "objective/train/value_min": -0.1654052734375, + "objective/train/value_reward_corr": 0.42685833190639455, + "objective/train/value_std": 0.008056640625, + "objective/train/weight_avg": 1.0063230991363525, + "objective/train/weighted_lm_loss": 3.346615791320801, + "objective/train/weights_max": 1.1140156984329224, + "objective/train/weights_min": 0.3717136085033417, + "theoretical_loss": 4.769514824606178, + "tokens_seen": 104857600 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009780131600064196, + "loss": 0.1615, + "theoretical_loss": 4.769514824606178, + "tokens_seen": 104857600 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009779329160648372, + "loss": 0.1595, + "theoretical_loss": 4.7678849257343305, + "tokens_seen": 105119744 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009778526721232546, + "loss": 0.1621, + "theoretical_loss": 4.766260221246329, + "tokens_seen": 105381888 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009777724281816724, + "loss": 0.1613, + "theoretical_loss": 4.764640681728281, + "tokens_seen": 105644032 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009776921842400898, + "loss": 0.1644, + "theoretical_loss": 4.763026278005314, + "tokens_seen": 105906176 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009776119402985074, + "loss": 0.1608, + "theoretical_loss": 4.761416981139046, + "tokens_seen": 106168320 + }, + { + "epoch": 0.03, + "learning_rate": 0.000977531696356925, + "loss": 0.1626, + "theoretical_loss": 4.75981276242509, + "tokens_seen": 106430464 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009774514524153427, + "loss": 0.162, + "theoretical_loss": 4.758213593390595, + "tokens_seen": 106692608 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009773712084737603, + "loss": 0.1606, + "theoretical_loss": 4.756619445791808, + "tokens_seen": 106954752 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009772909645321779, + "loss": 0.1626, + "theoretical_loss": 4.755030291611678, + "tokens_seen": 107216896 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009772107205905955, + "loss": 0.1627, + "theoretical_loss": 4.753446103057492, + "tokens_seen": 107479040 + }, + { + "epoch": 0.03, + "learning_rate": 0.000977130476649013, + "loss": 0.1607, + "theoretical_loss": 4.751866852558529, + "tokens_seen": 107741184 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009770502327074307, + "loss": 0.1627, + "theoretical_loss": 4.7502925127637585, + "tokens_seen": 108003328 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.0075781201012432575, + "objective/train/docs_used": 46682, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.372586250305176, + "objective/train/original_loss": 3.372586727142334, + "objective/train/theoretical_loss": 4.749507175892756, + "objective/train/tokens_used": 128594400, + "objective/train/value_avg": -0.013763427734375, + "objective/train/value_loss": 0.0003684050461743027, + "objective/train/value_max": -0.0030040740966796875, + "objective/train/value_min": -0.1995849609375, + "objective/train/value_reward_corr": 0.3359668813555781, + "objective/train/value_std": 0.0087127685546875, + "objective/train/weight_avg": 1.007756233215332, + "objective/train/weighted_lm_loss": 3.40148663520813, + "objective/train/weights_max": 1.1865150928497314, + "objective/train/weights_min": 0.3717136085033417, + "theoretical_loss": 4.749507175892756, + "tokens_seen": 108134400 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009769699887658481, + "loss": 0.1621, + "theoretical_loss": 4.7487230565395535, + "tokens_seen": 108265472 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009768897448242657, + "loss": 0.1611, + "theoretical_loss": 4.747158456967452, + "tokens_seen": 108527616 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009768095008826833, + "loss": 0.162, + "theoretical_loss": 4.745598687341927, + "tokens_seen": 108789760 + }, + { + "epoch": 0.03, + "learning_rate": 0.000976729256941101, + "loss": 0.1631, + "theoretical_loss": 4.744043721168196, + "tokens_seen": 109051904 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009766490129995186, + "loss": 0.1598, + "theoretical_loss": 4.74249353216006, + "tokens_seen": 109314048 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009765687690579362, + "loss": 0.1589, + "theoretical_loss": 4.740948094237761, + "tokens_seen": 109576192 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009764885251163538, + "loss": 0.1612, + "theoretical_loss": 4.739407381525874, + "tokens_seen": 109838336 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009764082811747713, + "loss": 0.1607, + "theoretical_loss": 4.7378713683512235, + "tokens_seen": 110100480 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009763280372331889, + "loss": 0.1598, + "theoretical_loss": 4.7363400292408215, + "tokens_seen": 110362624 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009762477932916065, + "loss": 0.1606, + "theoretical_loss": 4.734813338919842, + "tokens_seen": 110624768 + }, + { + "epoch": 0.03, + "learning_rate": 0.000976167549350024, + "loss": 0.1582, + "theoretical_loss": 4.733291272309609, + "tokens_seen": 110886912 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009760873054084417, + "loss": 0.1579, + "theoretical_loss": 4.731773804525616, + "tokens_seen": 111149056 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.004441777244210243, + "objective/train/docs_used": 47788, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1115875244140625, + "objective/train/original_loss": 3.1115875244140625, + "objective/train/theoretical_loss": 4.73026091087557, + "objective/train/tokens_used": 131871200, + "objective/train/value_avg": -0.01221466064453125, + "objective/train/value_loss": 0.0004770817467942834, + "objective/train/value_max": -0.00266265869140625, + "objective/train/value_min": -0.118408203125, + "objective/train/value_reward_corr": 0.48949773977616423, + "objective/train/value_std": 0.007781982421875, + "objective/train/weight_avg": 1.0046648979187012, + "objective/train/weighted_lm_loss": 3.124300003051758, + "objective/train/weights_max": 1.0862188339233398, + "objective/train/weights_min": 0.3779332637786865, + "theoretical_loss": 4.73026091087557, + "tokens_seen": 111411200 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009760070614668593, + "loss": 0.1564, + "theoretical_loss": 4.73026091087557, + "tokens_seen": 111411200 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009759268175252769, + "loss": 0.1583, + "theoretical_loss": 4.728752566857459, + "tokens_seen": 111673344 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009758465735836945, + "loss": 0.1586, + "theoretical_loss": 4.72724874815764, + "tokens_seen": 111935488 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009757663296421121, + "loss": 0.1579, + "theoretical_loss": 4.725749430648958, + "tokens_seen": 112197632 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009756860857005296, + "loss": 0.1593, + "theoretical_loss": 4.724254590388881, + "tokens_seen": 112459776 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009756058417589472, + "loss": 0.1575, + "theoretical_loss": 4.722764203617663, + "tokens_seen": 112721920 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009755255978173648, + "loss": 0.159, + "theoretical_loss": 4.7212782467565235, + "tokens_seen": 112984064 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009754453538757823, + "loss": 0.1581, + "theoretical_loss": 4.719796696405858, + "tokens_seen": 113246208 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009753651099342, + "loss": 0.1561, + "theoretical_loss": 4.718319529343462, + "tokens_seen": 113508352 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009752848659926175, + "loss": 0.1577, + "theoretical_loss": 4.716846722522781, + "tokens_seen": 113770496 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009752046220510353, + "loss": 0.155, + "theoretical_loss": 4.715378253071181, + "tokens_seen": 114032640 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009751243781094528, + "loss": 0.1538, + "theoretical_loss": 4.713914098288242, + "tokens_seen": 114294784 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009750441341678703, + "loss": 0.1555, + "theoretical_loss": 4.712454235644064, + "tokens_seen": 114556928 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.008879084140062332, + "objective/train/docs_used": 48994, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0145037174224854, + "objective/train/original_loss": 3.0145039558410645, + "objective/train/theoretical_loss": 4.711725906879634, + "objective/train/tokens_used": 135148000, + "objective/train/value_avg": -0.0135955810546875, + "objective/train/value_loss": 0.0002984044549521059, + "objective/train/value_max": -0.0026531219482421875, + "objective/train/value_min": -0.254150390625, + "objective/train/value_reward_corr": 0.3232125163801465, + "objective/train/value_std": 0.0081634521484375, + "objective/train/weight_avg": 1.0090208053588867, + "objective/train/weighted_lm_loss": 3.04550838470459, + "objective/train/weights_max": 1.097188949584961, + "objective/train/weights_min": 0.3715803325176239, + "theoretical_loss": 4.711725906879634, + "tokens_seen": 114688000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000974963890226288, + "loss": 0.1568, + "theoretical_loss": 4.710998642777606, + "tokens_seen": 114819072 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009748836462847055, + "loss": 0.1594, + "theoretical_loss": 4.709547297495034, + "tokens_seen": 115081216 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009748034023431231, + "loss": 0.1531, + "theoretical_loss": 4.708100177768094, + "tokens_seen": 115343360 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009747231584015407, + "loss": 0.156, + "theoretical_loss": 4.7066572617325075, + "tokens_seen": 115605504 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009746429144599583, + "loss": 0.159, + "theoretical_loss": 4.705218527686375, + "tokens_seen": 115867648 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009745626705183758, + "loss": 0.1583, + "theoretical_loss": 4.703783954088612, + "tokens_seen": 116129792 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009744824265767935, + "loss": 0.159, + "theoretical_loss": 4.702353519557398, + "tokens_seen": 116391936 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009744021826352111, + "loss": 0.1551, + "theoretical_loss": 4.700927202868639, + "tokens_seen": 116654080 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009743219386936286, + "loss": 0.1544, + "theoretical_loss": 4.699504982954452, + "tokens_seen": 116916224 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009742416947520463, + "loss": 0.1559, + "theoretical_loss": 4.698086838901676, + "tokens_seen": 117178368 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009741614508104638, + "loss": 0.154, + "theoretical_loss": 4.696672749950385, + "tokens_seen": 117440512 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009740812068688814, + "loss": 0.1536, + "theoretical_loss": 4.695262695492428, + "tokens_seen": 117702656 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.005216541700065136, + "objective/train/docs_used": 50256, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.161985158920288, + "objective/train/original_loss": 3.161985397338867, + "objective/train/theoretical_loss": 4.693856655069986, + "objective/train/tokens_used": 138424800, + "objective/train/value_avg": -0.013153076171875, + "objective/train/value_loss": 0.0008696132572367787, + "objective/train/value_max": -0.00238800048828125, + "objective/train/value_min": -0.26220703125, + "objective/train/value_reward_corr": 0.28899940519313116, + "objective/train/value_std": 0.008697509765625, + "objective/train/weight_avg": 1.0055670738220215, + "objective/train/weighted_lm_loss": 3.1820406913757324, + "objective/train/weights_max": 1.1238499879837036, + "objective/train/weights_min": 0.22635191679000854, + "theoretical_loss": 4.693856655069986, + "tokens_seen": 117964800 + }, + { + "epoch": 0.04, + "learning_rate": 0.000974000962927299, + "loss": 0.1545, + "theoretical_loss": 4.693856655069986, + "tokens_seen": 117964800 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009739207189857165, + "loss": 0.1559, + "theoretical_loss": 4.692454608374145, + "tokens_seen": 118226944 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009738404750441343, + "loss": 0.1526, + "theoretical_loss": 4.6910565352434785, + "tokens_seen": 118489088 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009737602311025518, + "loss": 0.1531, + "theoretical_loss": 4.689662415662662, + "tokens_seen": 118751232 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009736799871609694, + "loss": 0.1525, + "theoretical_loss": 4.688272229761087, + "tokens_seen": 119013376 + }, + { + "epoch": 0.04, + "learning_rate": 0.000973599743219387, + "loss": 0.1542, + "theoretical_loss": 4.686885957811503, + "tokens_seen": 119275520 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009735194992778046, + "loss": 0.1505, + "theoretical_loss": 4.685503580228671, + "tokens_seen": 119537664 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009734392553362221, + "loss": 0.1508, + "theoretical_loss": 4.684125077568028, + "tokens_seen": 119799808 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009733590113946397, + "loss": 0.1548, + "theoretical_loss": 4.682750430524376, + "tokens_seen": 120061952 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009732787674530573, + "loss": 0.1532, + "theoretical_loss": 4.6813796199305795, + "tokens_seen": 120324096 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009731985235114748, + "loss": 0.1521, + "theoretical_loss": 4.68001262675628, + "tokens_seen": 120586240 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009731182795698925, + "loss": 0.1479, + "theoretical_loss": 4.678649432106627, + "tokens_seen": 120848384 + }, + { + "epoch": 0.04, + "learning_rate": 0.00097303803562831, + "loss": 0.1518, + "theoretical_loss": 4.677290017221017, + "tokens_seen": 121110528 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.006032022647559643, + "objective/train/docs_used": 51395, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9272947311401367, + "objective/train/original_loss": 2.9272947311401367, + "objective/train/theoretical_loss": 4.676611721363443, + "objective/train/tokens_used": 141701600, + "objective/train/value_avg": -0.01284027099609375, + "objective/train/value_loss": 0.000364342937245965, + "objective/train/value_max": -0.001987457275390625, + "objective/train/value_min": -0.30517578125, + "objective/train/value_reward_corr": 0.3808824724269897, + "objective/train/value_std": 0.010894775390625, + "objective/train/weight_avg": 1.0062044858932495, + "objective/train/weighted_lm_loss": 2.945836067199707, + "objective/train/weights_max": 1.2145042419433594, + "objective/train/weights_min": 0.3716398775577545, + "theoretical_loss": 4.676611721363443, + "tokens_seen": 121241600 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009729577916867277, + "loss": 0.1482, + "theoretical_loss": 4.675934363471857, + "tokens_seen": 121372672 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009728775477451453, + "loss": 0.1491, + "theoretical_loss": 4.674582452363334, + "tokens_seen": 121634816 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009727973038035629, + "loss": 0.1481, + "theoretical_loss": 4.673234265530201, + "tokens_seen": 121896960 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009727170598619804, + "loss": 0.1474, + "theoretical_loss": 4.671889784736576, + "tokens_seen": 122159104 + }, + { + "epoch": 0.04, + "learning_rate": 0.000972636815920398, + "loss": 0.148, + "theoretical_loss": 4.670548991874758, + "tokens_seen": 122421248 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009725565719788156, + "loss": 0.1472, + "theoretical_loss": 4.669211868964052, + "tokens_seen": 122683392 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009724763280372332, + "loss": 0.1496, + "theoretical_loss": 4.66787839814961, + "tokens_seen": 122945536 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009723960840956508, + "loss": 0.1483, + "theoretical_loss": 4.666548561701285, + "tokens_seen": 123207680 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009723158401540683, + "loss": 0.1466, + "theoretical_loss": 4.665222342012491, + "tokens_seen": 123469824 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009722355962124861, + "loss": 0.1473, + "theoretical_loss": 4.663899721599093, + "tokens_seen": 123731968 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009721553522709036, + "loss": 0.1437, + "theoretical_loss": 4.6625806830982865, + "tokens_seen": 123994112 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009720751083293211, + "loss": 0.1438, + "theoretical_loss": 4.661265209267507, + "tokens_seen": 124256256 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.005084542091935873, + "objective/train/docs_used": 52572, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7900288105010986, + "objective/train/original_loss": 2.7900285720825195, + "objective/train/theoretical_loss": 4.659953282983348, + "objective/train/tokens_used": 144978400, + "objective/train/value_avg": -0.01073455810546875, + "objective/train/value_loss": 0.00028898261371068656, + "objective/train/value_max": -0.001483917236328125, + "objective/train/value_min": -0.2239990234375, + "objective/train/value_reward_corr": 0.39944171540418233, + "objective/train/value_std": 0.008880615234375, + "objective/train/weight_avg": 1.00521981716156, + "objective/train/weighted_lm_loss": 2.8061976432800293, + "objective/train/weights_max": 1.1481295824050903, + "objective/train/weights_min": 0.3976996839046478, + "theoretical_loss": 4.659953282983348, + "tokens_seen": 124518400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009719948643877388, + "loss": 0.1466, + "theoretical_loss": 4.659953282983348, + "tokens_seen": 124518400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009719146204461563, + "loss": 0.1488, + "theoretical_loss": 4.658644887240481, + "tokens_seen": 124780544 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009718343765045739, + "loss": 0.1485, + "theoretical_loss": 4.657340005150602, + "tokens_seen": 125042688 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009717541325629915, + "loss": 0.1492, + "theoretical_loss": 4.656038619941382, + "tokens_seen": 125304832 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009716738886214091, + "loss": 0.1457, + "theoretical_loss": 4.654740714955429, + "tokens_seen": 125566976 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009715936446798266, + "loss": 0.145, + "theoretical_loss": 4.653446273649259, + "tokens_seen": 125829120 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009715134007382443, + "loss": 0.1478, + "theoretical_loss": 4.652155279592286, + "tokens_seen": 126091264 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009714331567966619, + "loss": 0.1463, + "theoretical_loss": 4.650867716465819, + "tokens_seen": 126353408 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009713529128550794, + "loss": 0.1463, + "theoretical_loss": 4.6495835680620665, + "tokens_seen": 126615552 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009712726689134971, + "loss": 0.1432, + "theoretical_loss": 4.648302818283158, + "tokens_seen": 126877696 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009711924249719146, + "loss": 0.1477, + "theoretical_loss": 4.64702545114017, + "tokens_seen": 127139840 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009711121810303323, + "loss": 0.1452, + "theoretical_loss": 4.645751450752172, + "tokens_seen": 127401984 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009710319370887498, + "loss": 0.1419, + "theoretical_loss": 4.644480801345268, + "tokens_seen": 127664128 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.0065161967650055885, + "objective/train/docs_used": 53741, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8748562335968018, + "objective/train/original_loss": 2.8748559951782227, + "objective/train/theoretical_loss": 4.6438467283594225, + "objective/train/tokens_used": 148255200, + "objective/train/value_avg": -0.0144805908203125, + "objective/train/value_loss": 0.0003499473095871508, + "objective/train/value_max": -0.00269317626953125, + "objective/train/value_min": -0.36376953125, + "objective/train/value_reward_corr": 0.4529447418859527, + "objective/train/value_std": 0.01155853271484375, + "objective/train/weight_avg": 1.0066853761672974, + "objective/train/weighted_lm_loss": 2.8947949409484863, + "objective/train/weights_max": 1.2178692817687988, + "objective/train/weights_min": 0.37203988432884216, + "theoretical_loss": 4.6438467283594225, + "tokens_seen": 127795200 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009709516931471673, + "loss": 0.1469, + "theoretical_loss": 4.643213487251664, + "tokens_seen": 127926272 + }, + { + "epoch": 0.04, + "learning_rate": 0.000970871449205585, + "loss": 0.1431, + "theoretical_loss": 4.641949492908737, + "tokens_seen": 128188416 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009707912052640026, + "loss": 0.1445, + "theoretical_loss": 4.640688802858113, + "tokens_seen": 128450560 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009707109613224202, + "loss": 0.1478, + "theoretical_loss": 4.63943140174476, + "tokens_seen": 128712704 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009706307173808378, + "loss": 0.1449, + "theoretical_loss": 4.6381772743160905, + "tokens_seen": 128974848 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009705504734392554, + "loss": 0.1467, + "theoretical_loss": 4.636926405421065, + "tokens_seen": 129236992 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009704702294976729, + "loss": 0.1425, + "theoretical_loss": 4.635678780009318, + "tokens_seen": 129499136 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009703899855560905, + "loss": 0.1433, + "theoretical_loss": 4.634434383130284, + "tokens_seen": 129761280 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009703097416145081, + "loss": 0.1452, + "theoretical_loss": 4.633193199932336, + "tokens_seen": 130023424 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009702294976729256, + "loss": 0.145, + "theoretical_loss": 4.631955215661932, + "tokens_seen": 130285568 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009701492537313433, + "loss": 0.1436, + "theoretical_loss": 4.630720415662774, + "tokens_seen": 130547712 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009700690097897609, + "loss": 0.1374, + "theoretical_loss": 4.629488785374969, + "tokens_seen": 130809856 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.005413413047790527, + "objective/train/docs_used": 54897, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7717833518981934, + "objective/train/original_loss": 2.7717831134796143, + "objective/train/theoretical_loss": 4.628260310334209, + "objective/train/tokens_used": 151532000, + "objective/train/value_avg": -0.012939453125, + "objective/train/value_loss": 0.0007990457233972847, + "objective/train/value_max": -0.002124786376953125, + "objective/train/value_min": -0.2451171875, + "objective/train/value_reward_corr": 0.2380011948215656, + "objective/train/value_std": 0.01189422607421875, + "objective/train/weight_avg": 1.0057445764541626, + "objective/train/weighted_lm_loss": 2.7883291244506836, + "objective/train/weights_max": 1.2066720724105835, + "objective/train/weights_min": 0.22692078351974487, + "theoretical_loss": 4.628260310334209, + "tokens_seen": 131072000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009699887658481786, + "loss": 0.1404, + "theoretical_loss": 4.628260310334209, + "tokens_seen": 131072000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009699085219065961, + "loss": 0.1406, + "theoretical_loss": 4.6270349761709495, + "tokens_seen": 131334144 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009698282779650137, + "loss": 0.1373, + "theoretical_loss": 4.625812768609601, + "tokens_seen": 131596288 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009697480340234313, + "loss": 0.1405, + "theoretical_loss": 4.62459367346773, + "tokens_seen": 131858432 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009696677900818488, + "loss": 0.1395, + "theoretical_loss": 4.623377676655271, + "tokens_seen": 132120576 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009695875461402664, + "loss": 0.1398, + "theoretical_loss": 4.622164764173735, + "tokens_seen": 132382720 + }, + { + "epoch": 0.04, + "learning_rate": 0.000969507302198684, + "loss": 0.1402, + "theoretical_loss": 4.62095492211544, + "tokens_seen": 132644864 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009694270582571016, + "loss": 0.1464, + "theoretical_loss": 4.6197481366627455, + "tokens_seen": 132907008 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009693468143155191, + "loss": 0.1408, + "theoretical_loss": 4.618544394087287, + "tokens_seen": 133169152 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009692665703739369, + "loss": 0.1341, + "theoretical_loss": 4.617343680749233, + "tokens_seen": 133431296 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009691863264323544, + "loss": 0.1365, + "theoretical_loss": 4.61614598309653, + "tokens_seen": 133693440 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009691060824907719, + "loss": 0.1375, + "theoretical_loss": 4.614951287664179, + "tokens_seen": 133955584 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009690258385491896, + "loss": 0.139, + "theoretical_loss": 4.613759581073502, + "tokens_seen": 134217728 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.005763855762779713, + "objective/train/docs_used": 56110, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6633012294769287, + "objective/train/original_loss": 2.663301467895508, + "objective/train/theoretical_loss": 4.613164844436379, + "objective/train/tokens_used": 154808800, + "objective/train/value_avg": -0.01210784912109375, + "objective/train/value_loss": 0.00031003085314296186, + "objective/train/value_max": -0.0025310516357421875, + "objective/train/value_min": -0.236572265625, + "objective/train/value_reward_corr": 0.1698134460465261, + "objective/train/value_std": 0.0075225830078125, + "objective/train/weight_avg": 1.0059170722961426, + "objective/train/weighted_lm_loss": 2.677839756011963, + "objective/train/weights_max": 1.1739808320999146, + "objective/train/weights_min": 0.7241759300231934, + "theoretical_loss": 4.613164844436379, + "tokens_seen": 134348800 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009689455946076071, + "loss": 0.1382, + "theoretical_loss": 4.612570850031418, + "tokens_seen": 134479872 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009688653506660247, + "loss": 0.1358, + "theoretical_loss": 4.611385081329736, + "tokens_seen": 134742016 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009687851067244423, + "loss": 0.1363, + "theoretical_loss": 4.610202261844444, + "tokens_seen": 135004160 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009687048627828599, + "loss": 0.1375, + "theoretical_loss": 4.6090223785350135, + "tokens_seen": 135266304 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009686246188412776, + "loss": 0.1364, + "theoretical_loss": 4.607845418443706, + "tokens_seen": 135528448 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009685443748996951, + "loss": 0.1384, + "theoretical_loss": 4.606671368694888, + "tokens_seen": 135790592 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009684641309581127, + "loss": 0.1369, + "theoretical_loss": 4.6055002164943595, + "tokens_seen": 136052736 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009683838870165303, + "loss": 0.1321, + "theoretical_loss": 4.604331949128672, + "tokens_seen": 136314880 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009683036430749479, + "loss": 0.1365, + "theoretical_loss": 4.603166553964474, + "tokens_seen": 136577024 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009682233991333654, + "loss": 0.1396, + "theoretical_loss": 4.60200401844785, + "tokens_seen": 136839168 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009681431551917831, + "loss": 0.133, + "theoretical_loss": 4.6008443301036746, + "tokens_seen": 137101312 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009680629112502006, + "loss": 0.1364, + "theoretical_loss": 4.5996874765349585, + "tokens_seen": 137363456 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.0023926738649606705, + "objective/train/docs_used": 57180, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8781347274780273, + "objective/train/original_loss": 2.8781347274780273, + "objective/train/theoretical_loss": 4.598533445422221, + "objective/train/tokens_used": 158085600, + "objective/train/value_avg": -0.0099029541015625, + "objective/train/value_loss": 0.00033420659019611776, + "objective/train/value_max": -0.0016613006591796875, + "objective/train/value_min": -0.1622314453125, + "objective/train/value_reward_corr": 0.23598254350018186, + "objective/train/value_std": 0.00702667236328125, + "objective/train/weight_avg": 1.0025557279586792, + "objective/train/weighted_lm_loss": 2.8859775066375732, + "objective/train/weights_max": 1.1505653858184814, + "objective/train/weights_min": 0.7221318483352661, + "theoretical_loss": 4.598533445422221, + "tokens_seen": 137625600 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009679826673086181, + "loss": 0.1361, + "theoretical_loss": 4.598533445422221, + "tokens_seen": 137625600 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009679024233670359, + "loss": 0.1347, + "theoretical_loss": 4.597382224522855, + "tokens_seen": 137887744 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009678221794254534, + "loss": 0.1362, + "theoretical_loss": 4.596233801670502, + "tokens_seen": 138149888 + }, + { + "epoch": 0.04, + "learning_rate": 0.000967741935483871, + "loss": 0.1346, + "theoretical_loss": 4.595088164774435, + "tokens_seen": 138412032 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009676616915422886, + "loss": 0.1367, + "theoretical_loss": 4.593945301818941, + "tokens_seen": 138674176 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009675814476007062, + "loss": 0.1307, + "theoretical_loss": 4.592805200862726, + "tokens_seen": 138936320 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009675012036591238, + "loss": 0.133, + "theoretical_loss": 4.591667850038302, + "tokens_seen": 139198464 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009674209597175413, + "loss": 0.1317, + "theoretical_loss": 4.590533237551401, + "tokens_seen": 139460608 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009673407157759589, + "loss": 0.1315, + "theoretical_loss": 4.589401351680385, + "tokens_seen": 139722752 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009672604718343765, + "loss": 0.1337, + "theoretical_loss": 4.588272180775659, + "tokens_seen": 139984896 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009671802278927941, + "loss": 0.1296, + "theoretical_loss": 4.587145713259102, + "tokens_seen": 140247040 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009670999839512117, + "loss": 0.1301, + "theoretical_loss": 4.5860219376234905, + "tokens_seen": 140509184 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009670197400096294, + "loss": 0.1319, + "theoretical_loss": 4.584900842431934, + "tokens_seen": 140771328 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.006780576426535845, + "objective/train/docs_used": 58312, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.43525767326355, + "objective/train/original_loss": 2.43525767326355, + "objective/train/theoretical_loss": 4.584341296447742, + "objective/train/tokens_used": 161362400, + "objective/train/value_avg": -0.010894775390625, + "objective/train/value_loss": 0.00027257209876552224, + "objective/train/value_max": -0.0015916824340820312, + "objective/train/value_min": -0.367919921875, + "objective/train/value_reward_corr": 0.26859711054421476, + "objective/train/value_std": 0.00720977783203125, + "objective/train/weight_avg": 1.0069042444229126, + "objective/train/weighted_lm_loss": 2.4517366886138916, + "objective/train/weights_max": 1.1828418970108032, + "objective/train/weights_min": 0.3722556531429291, + "theoretical_loss": 4.584341296447742, + "tokens_seen": 140902400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009669394960680469, + "loss": 0.1284, + "theoretical_loss": 4.583782416317316, + "tokens_seen": 141033472 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009668592521264644, + "loss": 0.1302, + "theoretical_loss": 4.582666647981739, + "tokens_seen": 141295616 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009667790081848821, + "loss": 0.128, + "theoretical_loss": 4.581553526195974, + "tokens_seen": 141557760 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009666987642432996, + "loss": 0.1304, + "theoretical_loss": 4.580443039798922, + "tokens_seen": 141819904 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009666185203017172, + "loss": 0.1305, + "theoretical_loss": 4.57933517769707, + "tokens_seen": 142082048 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009665382763601348, + "loss": 0.1314, + "theoretical_loss": 4.578229928863959, + "tokens_seen": 142344192 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009664580324185524, + "loss": 0.1286, + "theoretical_loss": 4.57712728233966, + "tokens_seen": 142606336 + }, + { + "epoch": 0.04, + "learning_rate": 0.00096637778847697, + "loss": 0.127, + "theoretical_loss": 4.576027227230245, + "tokens_seen": 142868480 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009662975445353877, + "loss": 0.1315, + "theoretical_loss": 4.574929752707274, + "tokens_seen": 143130624 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009662173005938052, + "loss": 0.128, + "theoretical_loss": 4.573834848007284, + "tokens_seen": 143392768 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009661370566522228, + "loss": 0.1269, + "theoretical_loss": 4.572742502431272, + "tokens_seen": 143654912 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009660568127106404, + "loss": 0.1264, + "theoretical_loss": 4.571652705344202, + "tokens_seen": 143917056 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.003318200586363673, + "objective/train/docs_used": 59583, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.476109027862549, + "objective/train/original_loss": 2.476109027862549, + "objective/train/theoretical_loss": 4.570565446174504, + "objective/train/tokens_used": 164639200, + "objective/train/value_avg": -0.0129547119140625, + "objective/train/value_loss": 0.0007415720028802752, + "objective/train/value_max": -0.0013942718505859375, + "objective/train/value_min": -0.254150390625, + "objective/train/value_reward_corr": 0.4452921464992741, + "objective/train/value_std": 0.01247406005859375, + "objective/train/weight_avg": 1.0036523342132568, + "objective/train/weighted_lm_loss": 2.485182285308838, + "objective/train/weights_max": 1.1576626300811768, + "objective/train/weights_min": 0.37203988432884216, + "theoretical_loss": 4.570565446174504, + "tokens_seen": 144179200 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009659765687690579, + "loss": 0.1281, + "theoretical_loss": 4.570565446174504, + "tokens_seen": 144179200 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009658963248274756, + "loss": 0.1276, + "theoretical_loss": 4.569480714413578, + "tokens_seen": 144441344 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009658160808858931, + "loss": 0.1301, + "theoretical_loss": 4.568398499615305, + "tokens_seen": 144703488 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009657358369443107, + "loss": 0.1291, + "theoretical_loss": 4.56731879139557, + "tokens_seen": 144965632 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009656555930027284, + "loss": 0.1264, + "theoretical_loss": 4.566241579431776, + "tokens_seen": 145227776 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009655753490611459, + "loss": 0.1285, + "theoretical_loss": 4.565166853462371, + "tokens_seen": 145489920 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009654951051195635, + "loss": 0.13, + "theoretical_loss": 4.564094603286375, + "tokens_seen": 145752064 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009654148611779811, + "loss": 0.1248, + "theoretical_loss": 4.5630248187629245, + "tokens_seen": 146014208 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009653346172363987, + "loss": 0.1261, + "theoretical_loss": 4.561957489810798, + "tokens_seen": 146276352 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009652543732948162, + "loss": 0.1243, + "theoretical_loss": 4.5608926064079665, + "tokens_seen": 146538496 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009651741293532339, + "loss": 0.1281, + "theoretical_loss": 4.559830158591139, + "tokens_seen": 146800640 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009650938854116514, + "loss": 0.1229, + "theoretical_loss": 4.558770136455316, + "tokens_seen": 147062784 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009650136414700689, + "loss": 0.1253, + "theoretical_loss": 4.557712530153342, + "tokens_seen": 147324928 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.002189334249123931, + "objective/train/docs_used": 60747, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.354844570159912, + "objective/train/original_loss": 2.354844570159912, + "objective/train/theoretical_loss": 4.557184629878522, + "objective/train/tokens_used": 167916000, + "objective/train/value_avg": -0.00820159912109375, + "objective/train/value_loss": 0.0003651907609310001, + "objective/train/value_max": -0.001239776611328125, + "objective/train/value_min": -0.23779296875, + "objective/train/value_reward_corr": 0.19971720203272725, + "objective/train/value_std": 0.00504302978515625, + "objective/train/weight_avg": 1.0023622512817383, + "objective/train/weighted_lm_loss": 2.3594002723693848, + "objective/train/weights_max": 1.057178020477295, + "objective/train/weights_min": 0.6187294125556946, + "theoretical_loss": 4.557184629878522, + "tokens_seen": 147456000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009649333975284866, + "loss": 0.1229, + "theoretical_loss": 4.556657329895469, + "tokens_seen": 147587072 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009648531535869042, + "loss": 0.1265, + "theoretical_loss": 4.5556045259489135, + "tokens_seen": 147849216 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009647729096453219, + "loss": 0.1269, + "theoretical_loss": 4.554554108637437, + "tokens_seen": 148111360 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009646926657037394, + "loss": 0.1242, + "theoretical_loss": 4.553506068340907, + "tokens_seen": 148373504 + }, + { + "epoch": 0.05, + "learning_rate": 0.000964612421762157, + "loss": 0.1229, + "theoretical_loss": 4.552460395494878, + "tokens_seen": 148635648 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009645321778205746, + "loss": 0.1247, + "theoretical_loss": 4.55141708059017, + "tokens_seen": 148897792 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009644519338789921, + "loss": 0.1208, + "theoretical_loss": 4.5503761141724555, + "tokens_seen": 149159936 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009643716899374097, + "loss": 0.1224, + "theoretical_loss": 4.549337486841843, + "tokens_seen": 149422080 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009642914459958273, + "loss": 0.1231, + "theoretical_loss": 4.548301189252473, + "tokens_seen": 149684224 + }, + { + "epoch": 0.05, + "learning_rate": 0.000964211202054245, + "loss": 0.124, + "theoretical_loss": 4.54726721211211, + "tokens_seen": 149946368 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009641309581126625, + "loss": 0.1215, + "theoretical_loss": 4.546235546181743, + "tokens_seen": 150208512 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009640507141710802, + "loss": 0.1179, + "theoretical_loss": 4.545206182275189, + "tokens_seen": 150470656 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.004590987227857113, + "objective/train/docs_used": 61752, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.4665567874908447, + "objective/train/original_loss": 2.466557025909424, + "objective/train/theoretical_loss": 4.5441791112587016, + "objective/train/tokens_used": 171192800, + "objective/train/value_avg": -0.01053619384765625, + "objective/train/value_loss": 0.0002645233762450516, + "objective/train/value_max": -0.00133514404296875, + "objective/train/value_min": -0.203369140625, + "objective/train/value_reward_corr": 0.45074871885232737, + "objective/train/value_std": 0.0080718994140625, + "objective/train/weight_avg": 1.004720687866211, + "objective/train/weighted_lm_loss": 2.478607177734375, + "objective/train/weights_max": 1.1166024208068848, + "objective/train/weights_min": 0.6107982993125916, + "theoretical_loss": 4.5441791112587016, + "tokens_seen": 150732800 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009639704702294977, + "loss": 0.1199, + "theoretical_loss": 4.5441791112587016, + "tokens_seen": 150732800 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009638902262879152, + "loss": 0.1258, + "theoretical_loss": 4.5431543240505725, + "tokens_seen": 150994944 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009638099823463329, + "loss": 0.123, + "theoretical_loss": 4.5421318116207585, + "tokens_seen": 151257088 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009637297384047504, + "loss": 0.1184, + "theoretical_loss": 4.541111564990485, + "tokens_seen": 151519232 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009636494944631681, + "loss": 0.1201, + "theoretical_loss": 4.540093575231879, + "tokens_seen": 151781376 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009635692505215856, + "loss": 0.1185, + "theoretical_loss": 4.539077833467582, + "tokens_seen": 152043520 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009634890065800032, + "loss": 0.12, + "theoretical_loss": 4.538064330870389, + "tokens_seen": 152305664 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009634087626384209, + "loss": 0.1218, + "theoretical_loss": 4.537053058662869, + "tokens_seen": 152567808 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009633285186968385, + "loss": 0.1205, + "theoretical_loss": 4.536044008117005, + "tokens_seen": 152829952 + }, + { + "epoch": 0.05, + "learning_rate": 0.000963248274755256, + "loss": 0.124, + "theoretical_loss": 4.535037170553833, + "tokens_seen": 153092096 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009631680308136736, + "loss": 0.1195, + "theoretical_loss": 4.534032537343078, + "tokens_seen": 153354240 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009630877868720912, + "loss": 0.1228, + "theoretical_loss": 4.533030099902803, + "tokens_seen": 153616384 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009630075429305087, + "loss": 0.1237, + "theoretical_loss": 4.53202984969905, + "tokens_seen": 153878528 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.004563726019114256, + "objective/train/docs_used": 63027, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6050822734832764, + "objective/train/original_loss": 2.6050822734832764, + "objective/train/theoretical_loss": 4.531530542157043, + "objective/train/tokens_used": 174469600, + "objective/train/value_avg": -0.00949859619140625, + "objective/train/value_loss": 0.0006905548507347703, + "objective/train/value_max": -0.0010986328125, + "objective/train/value_min": -0.29541015625, + "objective/train/value_reward_corr": 0.3721653551526731, + "objective/train/value_std": 0.00933074951171875, + "objective/train/weight_avg": 1.004849910736084, + "objective/train/weighted_lm_loss": 2.618398904800415, + "objective/train/weights_max": 1.2023754119873047, + "objective/train/weights_min": 0.36947569251060486, + "theoretical_loss": 4.531530542157043, + "tokens_seen": 154009600 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009629272989889264, + "loss": 0.1251, + "theoretical_loss": 4.531031778245499, + "tokens_seen": 154140672 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009628470550473439, + "loss": 0.1225, + "theoretical_loss": 4.530035877103115, + "tokens_seen": 154402816 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009627668111057615, + "loss": 0.1218, + "theoretical_loss": 4.529042137879809, + "tokens_seen": 154664960 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009626865671641792, + "loss": 0.1192, + "theoretical_loss": 4.528050552230092, + "tokens_seen": 154927104 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009626063232225967, + "loss": 0.1212, + "theoretical_loss": 4.527061111854746, + "tokens_seen": 155189248 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009625260792810143, + "loss": 0.1221, + "theoretical_loss": 4.526073808500481, + "tokens_seen": 155451392 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009624458353394319, + "loss": 0.1189, + "theoretical_loss": 4.525088633959613, + "tokens_seen": 155713536 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009623655913978495, + "loss": 0.1184, + "theoretical_loss": 4.524105580069728, + "tokens_seen": 155975680 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009622853474562671, + "loss": 0.1185, + "theoretical_loss": 4.523124638713361, + "tokens_seen": 156237824 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009622051035146847, + "loss": 0.1208, + "theoretical_loss": 4.522145801817673, + "tokens_seen": 156499968 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009621248595731022, + "loss": 0.1158, + "theoretical_loss": 4.521169061354129, + "tokens_seen": 156762112 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009620446156315198, + "loss": 0.1177, + "theoretical_loss": 4.520194409338185, + "tokens_seen": 157024256 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.004033830948174, + "objective/train/docs_used": 64097, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.5532474517822266, + "objective/train/original_loss": 2.5532474517822266, + "objective/train/theoretical_loss": 4.519221837828971, + "objective/train/tokens_used": 177746400, + "objective/train/value_avg": -0.01045989990234375, + "objective/train/value_loss": 0.00029761483892798424, + "objective/train/value_max": -0.0013408660888671875, + "objective/train/value_min": -0.1632080078125, + "objective/train/value_reward_corr": 0.47638880122386174, + "objective/train/value_std": 0.006755828857421875, + "objective/train/weight_avg": 1.0041792392730713, + "objective/train/weighted_lm_loss": 2.564786434173584, + "objective/train/weights_max": 1.0706846714019775, + "objective/train/weights_min": 0.7207310795783997, + "theoretical_loss": 4.519221837828971, + "tokens_seen": 157286400 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009619643716899374, + "loss": 0.1201, + "theoretical_loss": 4.519221837828971, + "tokens_seen": 157286400 + }, + { + "epoch": 0.05, + "learning_rate": 0.000961884127748355, + "loss": 0.1197, + "theoretical_loss": 4.51825133892898, + "tokens_seen": 157548544 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009618038838067727, + "loss": 0.1175, + "theoretical_loss": 4.517282904783764, + "tokens_seen": 157810688 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009617236398651902, + "loss": 0.1204, + "theoretical_loss": 4.516316527581621, + "tokens_seen": 158072832 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009616433959236078, + "loss": 0.1225, + "theoretical_loss": 4.515352199553295, + "tokens_seen": 158334976 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009615631519820254, + "loss": 0.1203, + "theoretical_loss": 4.514389912971679, + "tokens_seen": 158597120 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009614829080404429, + "loss": 0.1203, + "theoretical_loss": 4.513429660151513, + "tokens_seen": 158859264 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009614026640988605, + "loss": 0.1171, + "theoretical_loss": 4.51247143344909, + "tokens_seen": 159121408 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009613224201572781, + "loss": 0.12, + "theoretical_loss": 4.511515225261961, + "tokens_seen": 159383552 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009612421762156957, + "loss": 0.1183, + "theoretical_loss": 4.5105610280286506, + "tokens_seen": 159645696 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009611619322741134, + "loss": 0.1196, + "theoretical_loss": 4.509608834228365, + "tokens_seen": 159907840 + }, + { + "epoch": 0.05, + "learning_rate": 0.000961081688332531, + "loss": 0.1155, + "theoretical_loss": 4.508658636380705, + "tokens_seen": 160169984 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009610014443909485, + "loss": 0.1206, + "theoretical_loss": 4.507710427045389, + "tokens_seen": 160432128 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.0053785815834999084, + "objective/train/docs_used": 65316, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.3940212726593018, + "objective/train/original_loss": 2.3940210342407227, + "objective/train/theoretical_loss": 4.507237065755672, + "objective/train/tokens_used": 181023200, + "objective/train/value_avg": -0.00850677490234375, + "objective/train/value_loss": 0.00017240087618120015, + "objective/train/value_max": -0.0016107559204101562, + "objective/train/value_min": -0.1729736328125, + "objective/train/value_reward_corr": 0.16862680553399217, + "objective/train/value_std": 0.005374908447265625, + "objective/train/weight_avg": 1.005456805229187, + "objective/train/weighted_lm_loss": 2.4071598052978516, + "objective/train/weights_max": 1.0916688442230225, + "objective/train/weights_min": 0.370330810546875, + "theoretical_loss": 4.507237065755672, + "tokens_seen": 160563200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009609212004493661, + "loss": 0.1181, + "theoretical_loss": 4.50676419882197, + "tokens_seen": 160694272 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009608409565077837, + "loss": 0.1183, + "theoretical_loss": 4.505819944349556, + "tokens_seen": 160956416 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009607607125662012, + "loss": 0.1155, + "theoretical_loss": 4.504877656306535, + "tokens_seen": 161218560 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009606804686246189, + "loss": 0.1189, + "theoretical_loss": 4.503937327410306, + "tokens_seen": 161480704 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009606002246830364, + "loss": 0.1185, + "theoretical_loss": 4.502998950417004, + "tokens_seen": 161742848 + }, + { + "epoch": 0.05, + "learning_rate": 0.000960519980741454, + "loss": 0.1188, + "theoretical_loss": 4.502062518121232, + "tokens_seen": 162004992 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009604397367998717, + "loss": 0.1163, + "theoretical_loss": 4.501128023355796, + "tokens_seen": 162267136 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009603594928582893, + "loss": 0.1164, + "theoretical_loss": 4.500195458991443, + "tokens_seen": 162529280 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009602792489167068, + "loss": 0.1192, + "theoretical_loss": 4.499264817936593, + "tokens_seen": 162791424 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009601990049751244, + "loss": 0.1209, + "theoretical_loss": 4.498336093137089, + "tokens_seen": 163053568 + }, + { + "epoch": 0.05, + "learning_rate": 0.000960118761033542, + "loss": 0.1193, + "theoretical_loss": 4.49740927757593, + "tokens_seen": 163315712 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009600385170919595, + "loss": 0.1164, + "theoretical_loss": 4.496484364273021, + "tokens_seen": 163577856 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.00311545399017632, + "objective/train/docs_used": 66569, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.244389295578003, + "objective/train/original_loss": 2.244389533996582, + "objective/train/theoretical_loss": 4.495561346284925, + "objective/train/tokens_used": 184300000, + "objective/train/value_avg": -0.00873565673828125, + "objective/train/value_loss": 0.00021333472977858037, + "objective/train/value_max": -0.0010862350463867188, + "objective/train/value_min": -0.260009765625, + "objective/train/value_reward_corr": 0.3512147373222607, + "objective/train/value_std": 0.00701904296875, + "objective/train/weight_avg": 1.0032168626785278, + "objective/train/weighted_lm_loss": 2.2516353130340576, + "objective/train/weights_max": 1.1541095972061157, + "objective/train/weights_min": 0.3705597221851349, + "theoretical_loss": 4.495561346284925, + "tokens_seen": 163840000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009599582731503772, + "loss": 0.1161, + "theoretical_loss": 4.495561346284925, + "tokens_seen": 163840000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009598780292087947, + "loss": 0.1176, + "theoretical_loss": 4.494640216704598, + "tokens_seen": 164102144 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009597977852672124, + "loss": 0.1193, + "theoretical_loss": 4.493720968661158, + "tokens_seen": 164364288 + }, + { + "epoch": 0.05, + "learning_rate": 0.00095971754132563, + "loss": 0.116, + "theoretical_loss": 4.492803595319623, + "tokens_seen": 164626432 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009596372973840475, + "loss": 0.1149, + "theoretical_loss": 4.49188808988068, + "tokens_seen": 164888576 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009595570534424652, + "loss": 0.1118, + "theoretical_loss": 4.490974445580429, + "tokens_seen": 165150720 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009594768095008827, + "loss": 0.113, + "theoretical_loss": 4.490062655690153, + "tokens_seen": 165412864 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009593965655593003, + "loss": 0.113, + "theoretical_loss": 4.489152713516077, + "tokens_seen": 165675008 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009593163216177179, + "loss": 0.1102, + "theoretical_loss": 4.488244612399129, + "tokens_seen": 165937152 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009592360776761355, + "loss": 0.1119, + "theoretical_loss": 4.487338345714707, + "tokens_seen": 166199296 + }, + { + "epoch": 0.05, + "learning_rate": 0.000959155833734553, + "loss": 0.1138, + "theoretical_loss": 4.486433906872448, + "tokens_seen": 166461440 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009590755897929706, + "loss": 0.1152, + "theoretical_loss": 4.485531289315997, + "tokens_seen": 166723584 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009589953458513882, + "loss": 0.1099, + "theoretical_loss": 4.484630486522775, + "tokens_seen": 166985728 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.002676489297300577, + "objective/train/docs_used": 67770, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.380342483520508, + "objective/train/original_loss": 2.380342483520508, + "objective/train/theoretical_loss": 4.48418076363325, + "objective/train/tokens_used": 187576800, + "objective/train/value_avg": -0.01169586181640625, + "objective/train/value_loss": 0.0005142788286320865, + "objective/train/value_max": -0.0011920928955078125, + "objective/train/value_min": -0.332763671875, + "objective/train/value_reward_corr": 0.49718080147400245, + "objective/train/value_std": 0.01336669921875, + "objective/train/weight_avg": 1.0029011964797974, + "objective/train/weighted_lm_loss": 2.385775327682495, + "objective/train/weights_max": 1.2041622400283813, + "objective/train/weights_min": 0.39096081256866455, + "theoretical_loss": 4.48418076363325, + "tokens_seen": 167116800 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009589151019098058, + "loss": 0.117, + "theoretical_loss": 4.483731492003757, + "tokens_seen": 167247872 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009588348579682235, + "loss": 0.113, + "theoretical_loss": 4.482834299303246, + "tokens_seen": 167510016 + }, + { + "epoch": 0.05, + "learning_rate": 0.000958754614026641, + "loss": 0.1132, + "theoretical_loss": 4.481938901998647, + "tokens_seen": 167772160 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009586743700850587, + "loss": 0.1124, + "theoretical_loss": 4.481045293700248, + "tokens_seen": 168034304 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009585941261434762, + "loss": 0.1135, + "theoretical_loss": 4.480153468051001, + "tokens_seen": 168296448 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009585138822018937, + "loss": 0.1152, + "theoretical_loss": 4.4792634187263065, + "tokens_seen": 168558592 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009584336382603114, + "loss": 0.1142, + "theoretical_loss": 4.4783751394337905, + "tokens_seen": 168820736 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009583533943187289, + "loss": 0.1093, + "theoretical_loss": 4.4774886239131, + "tokens_seen": 169082880 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009582731503771465, + "loss": 0.116, + "theoretical_loss": 4.476603865935683, + "tokens_seen": 169345024 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009581929064355642, + "loss": 0.1161, + "theoretical_loss": 4.475720859304583, + "tokens_seen": 169607168 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009581126624939818, + "loss": 0.1115, + "theoretical_loss": 4.474839597854226, + "tokens_seen": 169869312 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009580324185523993, + "loss": 0.1149, + "theoretical_loss": 4.473960075450218, + "tokens_seen": 170131456 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.005700128152966499, + "objective/train/docs_used": 69022, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.486732006072998, + "objective/train/original_loss": 2.486732006072998, + "objective/train/theoretical_loss": 4.473082285989134, + "objective/train/tokens_used": 190853600, + "objective/train/value_avg": -0.0110015869140625, + "objective/train/value_loss": 0.0002670148969627917, + "objective/train/value_max": -0.0014438629150390625, + "objective/train/value_min": -0.30224609375, + "objective/train/value_reward_corr": 0.4285667679812899, + "objective/train/value_std": 0.01032257080078125, + "objective/train/weight_avg": 1.0058281421661377, + "objective/train/weighted_lm_loss": 2.500220775604248, + "objective/train/weights_max": 1.1895605325698853, + "objective/train/weights_min": 0.3700695335865021, + "theoretical_loss": 4.473082285989134, + "tokens_seen": 170393600 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009579521746108169, + "loss": 0.118, + "theoretical_loss": 4.473082285989134, + "tokens_seen": 170393600 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009578719306692345, + "loss": 0.1144, + "theoretical_loss": 4.472206223398325, + "tokens_seen": 170655744 + }, + { + "epoch": 0.05, + "learning_rate": 0.000957791686727652, + "loss": 0.1134, + "theoretical_loss": 4.471331881635698, + "tokens_seen": 170917888 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009577114427860697, + "loss": 0.113, + "theoretical_loss": 4.470459254689533, + "tokens_seen": 171180032 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009576311988444872, + "loss": 0.1129, + "theoretical_loss": 4.469588336578277, + "tokens_seen": 171442176 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009575509549029048, + "loss": 0.1111, + "theoretical_loss": 4.468719121350343, + "tokens_seen": 171704320 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009574707109613225, + "loss": 0.1102, + "theoretical_loss": 4.467851603083923, + "tokens_seen": 171966464 + }, + { + "epoch": 0.05, + "learning_rate": 0.00095739046701974, + "loss": 0.1133, + "theoretical_loss": 4.466985775886784, + "tokens_seen": 172228608 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009573102230781577, + "loss": 0.1121, + "theoretical_loss": 4.466121633896087, + "tokens_seen": 172490752 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009572299791365752, + "loss": 0.1152, + "theoretical_loss": 4.465259171278182, + "tokens_seen": 172752896 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009571497351949928, + "loss": 0.1114, + "theoretical_loss": 4.464398382228435, + "tokens_seen": 173015040 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009570694912534104, + "loss": 0.1146, + "theoretical_loss": 4.463539260971023, + "tokens_seen": 173277184 + }, + { + "epoch": 0.05, + "learning_rate": 0.000956989247311828, + "loss": 0.1145, + "theoretical_loss": 4.462681801758762, + "tokens_seen": 173539328 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.0046385833993554115, + "objective/train/docs_used": 70257, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.40885853767395, + "objective/train/original_loss": 2.408858299255371, + "objective/train/theoretical_loss": 4.462253693631352, + "objective/train/tokens_used": 194130400, + "objective/train/value_avg": -0.0102081298828125, + "objective/train/value_loss": 0.00034067677916027606, + "objective/train/value_max": -0.0015306472778320312, + "objective/train/value_min": -0.171630859375, + "objective/train/value_reward_corr": 0.4651631801087514, + "objective/train/value_std": 0.008270263671875, + "objective/train/weight_avg": 1.0047951936721802, + "objective/train/weighted_lm_loss": 2.420976161956787, + "objective/train/weights_max": 1.1363425254821777, + "objective/train/weights_min": 0.5437347888946533, + "theoretical_loss": 4.462253693631352, + "tokens_seen": 173670400 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009569090033702455, + "loss": 0.1156, + "theoretical_loss": 4.461825998872914, + "tokens_seen": 173801472 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009568287594286632, + "loss": 0.1096, + "theoretical_loss": 4.460971846623005, + "tokens_seen": 174063616 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009567485154870808, + "loss": 0.1121, + "theoretical_loss": 4.460119339346643, + "tokens_seen": 174325760 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009566682715454983, + "loss": 0.1099, + "theoretical_loss": 4.45926847140934, + "tokens_seen": 174587904 + }, + { + "epoch": 0.05, + "learning_rate": 0.000956588027603916, + "loss": 0.1113, + "theoretical_loss": 4.45841923720433, + "tokens_seen": 174850048 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009565077836623335, + "loss": 0.1171, + "theoretical_loss": 4.4575716311523905, + "tokens_seen": 175112192 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009564275397207511, + "loss": 0.1131, + "theoretical_loss": 4.456725647701669, + "tokens_seen": 175374336 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009563472957791687, + "loss": 0.1123, + "theoretical_loss": 4.455881281327508, + "tokens_seen": 175636480 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009562670518375863, + "loss": 0.1149, + "theoretical_loss": 4.4550385265322685, + "tokens_seen": 175898624 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009561868078960038, + "loss": 0.1116, + "theoretical_loss": 4.45419737784516, + "tokens_seen": 176160768 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009561065639544214, + "loss": 0.1141, + "theoretical_loss": 4.45335782982207, + "tokens_seen": 176422912 + }, + { + "epoch": 0.05, + "learning_rate": 0.000956026320012839, + "loss": 0.1142, + "theoretical_loss": 4.452519877045393, + "tokens_seen": 176685056 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.004688139073550701, + "objective/train/docs_used": 71338, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.032062292098999, + "objective/train/original_loss": 2.032062292098999, + "objective/train/theoretical_loss": 4.451683514123864, + "objective/train/tokens_used": 197407200, + "objective/train/value_avg": -0.01114654541015625, + "objective/train/value_loss": 0.00021585206559393555, + "objective/train/value_max": -0.00131988525390625, + "objective/train/value_min": -0.267822265625, + "objective/train/value_reward_corr": 0.5101379035115723, + "objective/train/value_std": 0.008941650390625, + "objective/train/weight_avg": 1.004790186882019, + "objective/train/weighted_lm_loss": 2.041867971420288, + "objective/train/weights_max": 1.1229585409164429, + "objective/train/weights_min": 0.36935028433799744, + "theoretical_loss": 4.451683514123864, + "tokens_seen": 176947200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009559460760712567, + "loss": 0.1141, + "theoretical_loss": 4.451683514123864, + "tokens_seen": 176947200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009558658321296743, + "loss": 0.1138, + "theoretical_loss": 4.450848735692391, + "tokens_seen": 177209344 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009557855881880918, + "loss": 0.1133, + "theoretical_loss": 4.450015536411886, + "tokens_seen": 177471488 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009557053442465095, + "loss": 0.1106, + "theoretical_loss": 4.449183910969108, + "tokens_seen": 177733632 + }, + { + "epoch": 0.05, + "learning_rate": 0.000955625100304927, + "loss": 0.1089, + "theoretical_loss": 4.448353854076494, + "tokens_seen": 177995776 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009555448563633445, + "loss": 0.1118, + "theoretical_loss": 4.4475253604719995, + "tokens_seen": 178257920 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009554646124217622, + "loss": 0.1077, + "theoretical_loss": 4.446698424918937, + "tokens_seen": 178520064 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009553843684801797, + "loss": 0.1118, + "theoretical_loss": 4.44587304220582, + "tokens_seen": 178782208 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009553041245385973, + "loss": 0.1074, + "theoretical_loss": 4.4450492071462, + "tokens_seen": 179044352 + }, + { + "epoch": 0.05, + "learning_rate": 0.000955223880597015, + "loss": 0.1145, + "theoretical_loss": 4.444226914578513, + "tokens_seen": 179306496 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009551436366554326, + "loss": 0.1149, + "theoretical_loss": 4.4434061593659235, + "tokens_seen": 179568640 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009550633927138501, + "loss": 0.116, + "theoretical_loss": 4.442586936396171, + "tokens_seen": 179830784 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009549831487722677, + "loss": 0.1145, + "theoretical_loss": 4.441769240581412, + "tokens_seen": 180092928 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.0043923812918365, + "objective/train/docs_used": 72550, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.181901216506958, + "objective/train/original_loss": 2.181901454925537, + "objective/train/theoretical_loss": 4.441360963773846, + "objective/train/tokens_used": 200684000, + "objective/train/value_avg": -0.010162353515625, + "objective/train/value_loss": 0.00025610229931771755, + "objective/train/value_max": -0.0012645721435546875, + "objective/train/value_min": -0.2271728515625, + "objective/train/value_reward_corr": 0.5822503996716603, + "objective/train/value_std": 0.00917816162109375, + "objective/train/weight_avg": 1.0045078992843628, + "objective/train/weighted_lm_loss": 2.191445827484131, + "objective/train/weights_max": 1.0695090293884277, + "objective/train/weights_min": 0.3694573640823364, + "theoretical_loss": 4.441360963773846, + "tokens_seen": 180224000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009549029048306853, + "loss": 0.1193, + "theoretical_loss": 4.440953066858077, + "tokens_seen": 180355072 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009548226608891028, + "loss": 0.1133, + "theoretical_loss": 4.4401384101867105, + "tokens_seen": 180617216 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009547424169475205, + "loss": 0.1127, + "theoretical_loss": 4.439325265551826, + "tokens_seen": 180879360 + }, + { + "epoch": 0.05, + "learning_rate": 0.000954662173005938, + "loss": 0.1135, + "theoretical_loss": 4.438513627961757, + "tokens_seen": 181141504 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009545819290643557, + "loss": 0.1131, + "theoretical_loss": 4.437703492448509, + "tokens_seen": 181403648 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009545016851227733, + "loss": 0.1149, + "theoretical_loss": 4.436894854067614, + "tokens_seen": 181665792 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009544214411811908, + "loss": 0.1132, + "theoretical_loss": 4.436087707897984, + "tokens_seen": 181927936 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009543411972396085, + "loss": 0.1131, + "theoretical_loss": 4.435282049041769, + "tokens_seen": 182190080 + }, + { + "epoch": 0.06, + "learning_rate": 0.000954260953298026, + "loss": 0.1115, + "theoretical_loss": 4.434477872624212, + "tokens_seen": 182452224 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009541807093564436, + "loss": 0.1139, + "theoretical_loss": 4.433675173793507, + "tokens_seen": 182714368 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009541004654148612, + "loss": 0.1125, + "theoretical_loss": 4.43287394772066, + "tokens_seen": 182976512 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009540202214732788, + "loss": 0.1156, + "theoretical_loss": 4.43207418959935, + "tokens_seen": 183238656 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": -0.0008087606402114034, + "objective/train/docs_used": 73723, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.2917051315307617, + "objective/train/original_loss": 2.2917051315307617, + "objective/train/theoretical_loss": 4.431275894645784, + "objective/train/tokens_used": 203960800, + "objective/train/value_avg": -0.01041412353515625, + "objective/train/value_loss": 0.0009768769377842546, + "objective/train/value_max": -0.0010118484497070312, + "objective/train/value_min": -0.2391357421875, + "objective/train/value_reward_corr": 0.8343898472090797, + "objective/train/value_std": 0.0133514404296875, + "objective/train/weight_avg": 0.9996550679206848, + "objective/train/weighted_lm_loss": 2.28629732131958, + "objective/train/weights_max": 1.0813897848129272, + "objective/train/weights_min": 0.3707435429096222, + "theoretical_loss": 4.431275894645784, + "tokens_seen": 183500800 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009539399775316963, + "loss": 0.1081, + "theoretical_loss": 4.431275894645784, + "tokens_seen": 183500800 + }, + { + "epoch": 0.06, + "learning_rate": 0.000953859733590114, + "loss": 0.1118, + "theoretical_loss": 4.43047905809857, + "tokens_seen": 183762944 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009537794896485315, + "loss": 0.1102, + "theoretical_loss": 4.42968367521857, + "tokens_seen": 184025088 + }, + { + "epoch": 0.06, + "learning_rate": 0.000953699245706949, + "loss": 0.1121, + "theoretical_loss": 4.428889741288771, + "tokens_seen": 184287232 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009536190017653668, + "loss": 0.1135, + "theoretical_loss": 4.428097251614145, + "tokens_seen": 184549376 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009535387578237843, + "loss": 0.1115, + "theoretical_loss": 4.427306201521524, + "tokens_seen": 184811520 + }, + { + "epoch": 0.06, + "learning_rate": 0.000953458513882202, + "loss": 0.1107, + "theoretical_loss": 4.426516586359458, + "tokens_seen": 185073664 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009533782699406195, + "loss": 0.1126, + "theoretical_loss": 4.425728401498089, + "tokens_seen": 185335808 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009532980259990371, + "loss": 0.1136, + "theoretical_loss": 4.424941642329019, + "tokens_seen": 185597952 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009532177820574547, + "loss": 0.1118, + "theoretical_loss": 4.42415630426518, + "tokens_seen": 185860096 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009531375381158722, + "loss": 0.1099, + "theoretical_loss": 4.423372382740707, + "tokens_seen": 186122240 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009530572941742898, + "loss": 0.1075, + "theoretical_loss": 4.422589873210806, + "tokens_seen": 186384384 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009529770502327075, + "loss": 0.1079, + "theoretical_loss": 4.4218087711516345, + "tokens_seen": 186646528 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.0038929590955376625, + "objective/train/docs_used": 74904, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.182293653488159, + "objective/train/original_loss": 2.18229341506958, + "objective/train/theoretical_loss": 4.421418746515567, + "objective/train/tokens_used": 207237600, + "objective/train/value_avg": -0.01084136962890625, + "objective/train/value_loss": 0.0002643898769747466, + "objective/train/value_max": -0.0011692047119140625, + "objective/train/value_min": -0.2230224609375, + "objective/train/value_reward_corr": 0.4553354681669406, + "objective/train/value_std": 0.0082855224609375, + "objective/train/weight_avg": 1.0040152072906494, + "objective/train/weighted_lm_loss": 2.1902849674224854, + "objective/train/weights_max": 1.0839669704437256, + "objective/train/weights_min": 0.3749292194843292, + "theoretical_loss": 4.421418746515567, + "tokens_seen": 186777600 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009528968062911251, + "loss": 0.1131, + "theoretical_loss": 4.421029072060167, + "tokens_seen": 186908672 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009528165623495426, + "loss": 0.1097, + "theoretical_loss": 4.420250771454078, + "tokens_seen": 187170816 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009527363184079603, + "loss": 0.1131, + "theoretical_loss": 4.419473864871613, + "tokens_seen": 187432960 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009526560744663778, + "loss": 0.1132, + "theoretical_loss": 4.4186983478714685, + "tokens_seen": 187695104 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009525758305247953, + "loss": 0.1127, + "theoretical_loss": 4.417924216032667, + "tokens_seen": 187957248 + }, + { + "epoch": 0.06, + "learning_rate": 0.000952495586583213, + "loss": 0.111, + "theoretical_loss": 4.417151464954437, + "tokens_seen": 188219392 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009524153426416305, + "loss": 0.1102, + "theoretical_loss": 4.416380090256095, + "tokens_seen": 188481536 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009523350987000481, + "loss": 0.1144, + "theoretical_loss": 4.415610087576923, + "tokens_seen": 188743680 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009522548547584658, + "loss": 0.1116, + "theoretical_loss": 4.414841452576049, + "tokens_seen": 189005824 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009521746108168834, + "loss": 0.1083, + "theoretical_loss": 4.414074180932333, + "tokens_seen": 189267968 + }, + { + "epoch": 0.06, + "learning_rate": 0.000952094366875301, + "loss": 0.1086, + "theoretical_loss": 4.413308268344249, + "tokens_seen": 189530112 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009520141229337185, + "loss": 0.1115, + "theoretical_loss": 4.412543710529766, + "tokens_seen": 189792256 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.0025555319152772427, + "objective/train/docs_used": 76077, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.207807779312134, + "objective/train/original_loss": 2.207808017730713, + "objective/train/theoretical_loss": 4.411780503226238, + "objective/train/tokens_used": 210514400, + "objective/train/value_avg": -0.0090789794921875, + "objective/train/value_loss": 0.0002816997584886849, + "objective/train/value_max": -0.0007886886596679688, + "objective/train/value_min": -0.25732421875, + "objective/train/value_reward_corr": 0.4408398455844697, + "objective/train/value_std": 0.0070037841796875, + "objective/train/weight_avg": 1.0026865005493164, + "objective/train/weighted_lm_loss": 2.213961362838745, + "objective/train/weights_max": 1.1967458724975586, + "objective/train/weights_min": 0.38571086525917053, + "theoretical_loss": 4.411780503226238, + "tokens_seen": 190054400 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009519338789921361, + "loss": 0.111, + "theoretical_loss": 4.411780503226238, + "tokens_seen": 190054400 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009518536350505537, + "loss": 0.1131, + "theoretical_loss": 4.4110186421902835, + "tokens_seen": 190316544 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009517733911089713, + "loss": 0.1077, + "theoretical_loss": 4.4102581231976785, + "tokens_seen": 190578688 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009516931471673888, + "loss": 0.1121, + "theoretical_loss": 4.409498942043237, + "tokens_seen": 190840832 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009516129032258065, + "loss": 0.1111, + "theoretical_loss": 4.408741094540707, + "tokens_seen": 191102976 + }, + { + "epoch": 0.06, + "learning_rate": 0.000951532659284224, + "loss": 0.1087, + "theoretical_loss": 4.407984576522653, + "tokens_seen": 191365120 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009514524153426416, + "loss": 0.111, + "theoretical_loss": 4.407229383840347, + "tokens_seen": 191627264 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009513721714010593, + "loss": 0.1098, + "theoretical_loss": 4.406475512363663, + "tokens_seen": 191889408 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009512919274594768, + "loss": 0.1062, + "theoretical_loss": 4.405722957980962, + "tokens_seen": 192151552 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009512116835178944, + "loss": 0.1097, + "theoretical_loss": 4.40497171659899, + "tokens_seen": 192413696 + }, + { + "epoch": 0.06, + "learning_rate": 0.000951131439576312, + "loss": 0.1064, + "theoretical_loss": 4.404221784142768, + "tokens_seen": 192675840 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009510511956347296, + "loss": 0.1104, + "theoretical_loss": 4.403473156555487, + "tokens_seen": 192937984 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009509709516931472, + "loss": 0.1077, + "theoretical_loss": 4.402725829798397, + "tokens_seen": 193200128 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.0021290904842317104, + "objective/train/docs_used": 77266, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.261606454849243, + "objective/train/original_loss": 2.261606454849243, + "objective/train/theoretical_loss": 4.40235265297398, + "objective/train/tokens_used": 213791200, + "objective/train/value_avg": -0.00881195068359375, + "objective/train/value_loss": 0.00027175722061656415, + "objective/train/value_max": -0.0007319450378417969, + "objective/train/value_min": -0.278076171875, + "objective/train/value_reward_corr": 0.45116222074040196, + "objective/train/value_std": 0.00730133056640625, + "objective/train/weight_avg": 1.0022554397583008, + "objective/train/weighted_lm_loss": 2.266122341156006, + "objective/train/weights_max": 1.106561303138733, + "objective/train/weights_min": 0.36955323815345764, + "theoretical_loss": 4.40235265297398, + "tokens_seen": 193331200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009508907077515648, + "loss": 0.1074, + "theoretical_loss": 4.4019797998507135, + "tokens_seen": 193462272 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009508104638099823, + "loss": 0.1048, + "theoretical_loss": 4.401235062709502, + "tokens_seen": 193724416 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009507302198684, + "loss": 0.1085, + "theoretical_loss": 4.400491614389582, + "tokens_seen": 193986560 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009506499759268176, + "loss": 0.1047, + "theoretical_loss": 4.3997494509234185, + "tokens_seen": 194248704 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009505697319852351, + "loss": 0.108, + "theoretical_loss": 4.399008568361027, + "tokens_seen": 194510848 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009504894880436528, + "loss": 0.11, + "theoretical_loss": 4.398268962769867, + "tokens_seen": 194772992 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009504092441020703, + "loss": 0.1121, + "theoretical_loss": 4.397530630234744, + "tokens_seen": 195035136 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009503290001604879, + "loss": 0.1068, + "theoretical_loss": 4.396793566857708, + "tokens_seen": 195297280 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009502487562189055, + "loss": 0.1059, + "theoretical_loss": 4.396057768757957, + "tokens_seen": 195559424 + }, + { + "epoch": 0.06, + "learning_rate": 0.000950168512277323, + "loss": 0.1079, + "theoretical_loss": 4.395323232071737, + "tokens_seen": 195821568 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009500882683357406, + "loss": 0.1078, + "theoretical_loss": 4.394589952952247, + "tokens_seen": 196083712 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009500080243941583, + "loss": 0.1036, + "theoretical_loss": 4.393857927569534, + "tokens_seen": 196345856 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.005338339600712061, + "objective/train/docs_used": 78313, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9282069206237793, + "objective/train/original_loss": 1.9282069206237793, + "objective/train/theoretical_loss": 4.393127152110409, + "objective/train/tokens_used": 217068000, + "objective/train/value_avg": -0.01195526123046875, + "objective/train/value_loss": 0.0004946094704791903, + "objective/train/value_max": -0.0013303756713867188, + "objective/train/value_min": -0.52392578125, + "objective/train/value_reward_corr": 0.5750984043308986, + "objective/train/value_std": 0.014373779296875, + "objective/train/weight_avg": 1.0055615901947021, + "objective/train/weighted_lm_loss": 1.9375295639038086, + "objective/train/weights_max": 1.6414918899536133, + "objective/train/weights_min": 0.4095984101295471, + "theoretical_loss": 4.393127152110409, + "tokens_seen": 196608000 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009499277804525759, + "loss": 0.1065, + "theoretical_loss": 4.393127152110409, + "tokens_seen": 196608000 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009498475365109934, + "loss": 0.1074, + "theoretical_loss": 4.392397622778343, + "tokens_seen": 196870144 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009497672925694111, + "loss": 0.107, + "theoretical_loss": 4.391669335793372, + "tokens_seen": 197132288 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009496870486278286, + "loss": 0.1047, + "theoretical_loss": 4.39094228739201, + "tokens_seen": 197394432 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009496068046862462, + "loss": 0.1091, + "theoretical_loss": 4.390216473827143, + "tokens_seen": 197656576 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009495265607446638, + "loss": 0.103, + "theoretical_loss": 4.389491891367953, + "tokens_seen": 197918720 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009494463168030813, + "loss": 0.108, + "theoretical_loss": 4.388768536299808, + "tokens_seen": 198180864 + }, + { + "epoch": 0.06, + "learning_rate": 0.000949366072861499, + "loss": 0.1101, + "theoretical_loss": 4.388046404924184, + "tokens_seen": 198443008 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009492858289199166, + "loss": 0.1064, + "theoretical_loss": 4.387325493558566, + "tokens_seen": 198705152 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009492055849783342, + "loss": 0.1078, + "theoretical_loss": 4.386605798536362, + "tokens_seen": 198967296 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009491253410367518, + "loss": 0.1102, + "theoretical_loss": 4.385887316206812, + "tokens_seen": 199229440 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009490450970951693, + "loss": 0.1037, + "theoretical_loss": 4.385170042934896, + "tokens_seen": 199491584 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009489648531535869, + "loss": 0.1051, + "theoretical_loss": 4.384453975101251, + "tokens_seen": 199753728 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.0038920550141483545, + "objective/train/docs_used": 79488, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9931830167770386, + "objective/train/original_loss": 1.993182897567749, + "objective/train/theoretical_loss": 4.384096392096988, + "objective/train/tokens_used": 220344800, + "objective/train/value_avg": -0.00962066650390625, + "objective/train/value_loss": 0.00014405451656784862, + "objective/train/value_max": -0.0009326934814453125, + "objective/train/value_min": -0.26123046875, + "objective/train/value_reward_corr": 0.6021631165702985, + "objective/train/value_std": 0.00872039794921875, + "objective/train/weight_avg": 1.0039596557617188, + "objective/train/weighted_lm_loss": 2.001870632171631, + "objective/train/weights_max": 1.085601806640625, + "objective/train/weights_min": 0.3811125159263611, + "theoretical_loss": 4.384096392096988, + "tokens_seen": 199884800 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009488846092120045, + "loss": 0.1077, + "theoretical_loss": 4.38373910910208, + "tokens_seen": 200015872 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009488043652704221, + "loss": 0.1058, + "theoretical_loss": 4.383025441349063, + "tokens_seen": 200278016 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009487241213288396, + "loss": 0.1104, + "theoretical_loss": 4.382312968269276, + "tokens_seen": 200540160 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009486438773872573, + "loss": 0.1058, + "theoretical_loss": 4.381601686305098, + "tokens_seen": 200802304 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009485636334456749, + "loss": 0.1045, + "theoretical_loss": 4.38089159191413, + "tokens_seen": 201064448 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009484833895040924, + "loss": 0.1023, + "theoretical_loss": 4.380182681569111, + "tokens_seen": 201326592 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009484031455625101, + "loss": 0.1068, + "theoretical_loss": 4.379474951757829, + "tokens_seen": 201588736 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009483229016209276, + "loss": 0.107, + "theoretical_loss": 4.378768398983042, + "tokens_seen": 201850880 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009482426576793453, + "loss": 0.1039, + "theoretical_loss": 4.378063019762392, + "tokens_seen": 202113024 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009481624137377628, + "loss": 0.1046, + "theoretical_loss": 4.377358810628324, + "tokens_seen": 202375168 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009480821697961804, + "loss": 0.1048, + "theoretical_loss": 4.3766557681280025, + "tokens_seen": 202637312 + }, + { + "epoch": 0.06, + "learning_rate": 0.000948001925854598, + "loss": 0.1058, + "theoretical_loss": 4.375953888823233, + "tokens_seen": 202899456 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.001105781877413392, + "objective/train/docs_used": 80605, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.189178228378296, + "objective/train/original_loss": 2.189178228378296, + "objective/train/theoretical_loss": 4.375253169290376, + "objective/train/tokens_used": 223621600, + "objective/train/value_avg": -0.01064300537109375, + "objective/train/value_loss": 0.00040862776222638786, + "objective/train/value_max": -0.0009965896606445312, + "objective/train/value_min": -0.306640625, + "objective/train/value_reward_corr": 0.6113931009977059, + "objective/train/value_std": 0.01380157470703125, + "objective/train/weight_avg": 1.0012942552566528, + "objective/train/weighted_lm_loss": 2.192763328552246, + "objective/train/weights_max": 1.2806123495101929, + "objective/train/weights_min": 0.37096279859542847, + "theoretical_loss": 4.375253169290376, + "tokens_seen": 203161600 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009479216819130155, + "loss": 0.105, + "theoretical_loss": 4.375253169290376, + "tokens_seen": 203161600 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009478414379714331, + "loss": 0.1059, + "theoretical_loss": 4.374553606120274, + "tokens_seen": 203423744 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009477611940298508, + "loss": 0.1047, + "theoretical_loss": 4.373855195918162, + "tokens_seen": 203685888 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009476809500882684, + "loss": 0.1076, + "theoretical_loss": 4.3731579353036, + "tokens_seen": 203948032 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009476007061466859, + "loss": 0.104, + "theoretical_loss": 4.372461820910382, + "tokens_seen": 204210176 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009475204622051036, + "loss": 0.1065, + "theoretical_loss": 4.371766849386468, + "tokens_seen": 204472320 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009474402182635211, + "loss": 0.1068, + "theoretical_loss": 4.3710730173939005, + "tokens_seen": 204734464 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009473599743219387, + "loss": 0.1063, + "theoretical_loss": 4.370380321608731, + "tokens_seen": 204996608 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009472797303803563, + "loss": 0.108, + "theoretical_loss": 4.369688758720937, + "tokens_seen": 205258752 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009471994864387738, + "loss": 0.1043, + "theoretical_loss": 4.368998325434355, + "tokens_seen": 205520896 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009471192424971916, + "loss": 0.1023, + "theoretical_loss": 4.3683090184666, + "tokens_seen": 205783040 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009470389985556091, + "loss": 0.1021, + "theoretical_loss": 4.367620834548987, + "tokens_seen": 206045184 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009469587546140267, + "loss": 0.1042, + "theoretical_loss": 4.3669337704264635, + "tokens_seen": 206307328 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.0008768333937041461, + "objective/train/docs_used": 81724, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.068549871444702, + "objective/train/original_loss": 2.068549871444702, + "objective/train/theoretical_loss": 4.366590657274865, + "objective/train/tokens_used": 226898400, + "objective/train/value_avg": -0.0103302001953125, + "objective/train/value_loss": 0.00021303060930222273, + "objective/train/value_max": -0.0006880760192871094, + "objective/train/value_min": -0.205078125, + "objective/train/value_reward_corr": 0.6560487348637545, + "objective/train/value_std": 0.01080322265625, + "objective/train/weight_avg": 1.0009812116622925, + "objective/train/weighted_lm_loss": 2.0716254711151123, + "objective/train/weights_max": 1.1215885877609253, + "objective/train/weights_min": 0.8218191862106323, + "theoretical_loss": 4.366590657274865, + "tokens_seen": 206438400 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009468785106724443, + "loss": 0.1075, + "theoretical_loss": 4.366247822857533, + "tokens_seen": 206569472 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009467982667308619, + "loss": 0.1054, + "theoretical_loss": 4.365562988614176, + "tokens_seen": 206831616 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009467180227892794, + "loss": 0.1035, + "theoretical_loss": 4.364879264481787, + "tokens_seen": 207093760 + }, + { + "epoch": 0.06, + "learning_rate": 0.000946637778847697, + "loss": 0.1039, + "theoretical_loss": 4.364196647259092, + "tokens_seen": 207355904 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009465575349061146, + "loss": 0.1061, + "theoretical_loss": 4.363515133758084, + "tokens_seen": 207618048 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009464772909645321, + "loss": 0.1078, + "theoretical_loss": 4.3628347208039475, + "tokens_seen": 207880192 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009463970470229499, + "loss": 0.1021, + "theoretical_loss": 4.362155405234985, + "tokens_seen": 208142336 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009463168030813674, + "loss": 0.1093, + "theoretical_loss": 4.361477183902554, + "tokens_seen": 208404480 + }, + { + "epoch": 0.06, + "learning_rate": 0.000946236559139785, + "loss": 0.1055, + "theoretical_loss": 4.360800053670989, + "tokens_seen": 208666624 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009461563151982026, + "loss": 0.1034, + "theoretical_loss": 4.360124011417536, + "tokens_seen": 208928768 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009460760712566201, + "loss": 0.105, + "theoretical_loss": 4.359449054032282, + "tokens_seen": 209190912 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009459958273150377, + "loss": 0.1048, + "theoretical_loss": 4.358775178418089, + "tokens_seen": 209453056 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.001143173431046307, + "objective/train/docs_used": 82917, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.0207271575927734, + "objective/train/original_loss": 2.0207271575927734, + "objective/train/theoretical_loss": 4.358102381490517, + "objective/train/tokens_used": 230175200, + "objective/train/value_avg": -0.009368896484375, + "objective/train/value_loss": 0.00035423377994447947, + "objective/train/value_max": -0.0006718635559082031, + "objective/train/value_min": -0.259033203125, + "objective/train/value_reward_corr": 0.4731410524064374, + "objective/train/value_std": 0.00994110107421875, + "objective/train/weight_avg": 1.001305103302002, + "objective/train/weighted_lm_loss": 2.0232884883880615, + "objective/train/weights_max": 1.2774591445922852, + "objective/train/weights_min": 0.3702460527420044, + "theoretical_loss": 4.358102381490517, + "tokens_seen": 209715200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009459155833734553, + "loss": 0.1056, + "theoretical_loss": 4.358102381490517, + "tokens_seen": 209715200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009458353394318729, + "loss": 0.1055, + "theoretical_loss": 4.3574306601777675, + "tokens_seen": 209977344 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009457550954902905, + "loss": 0.1016, + "theoretical_loss": 4.356760011420608, + "tokens_seen": 210239488 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009456748515487081, + "loss": 0.1089, + "theoretical_loss": 4.3560904321723095, + "tokens_seen": 210501632 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009455946076071257, + "loss": 0.106, + "theoretical_loss": 4.355421919398576, + "tokens_seen": 210763776 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009455143636655433, + "loss": 0.1057, + "theoretical_loss": 4.354754470077481, + "tokens_seen": 211025920 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009454341197239609, + "loss": 0.1032, + "theoretical_loss": 4.354088081199402, + "tokens_seen": 211288064 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009453538757823784, + "loss": 0.1041, + "theoretical_loss": 4.3534227497669535, + "tokens_seen": 211550208 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009452736318407961, + "loss": 0.1035, + "theoretical_loss": 4.352758472794923, + "tokens_seen": 211812352 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009451933878992136, + "loss": 0.1058, + "theoretical_loss": 4.352095247310208, + "tokens_seen": 212074496 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009451131439576312, + "loss": 0.1048, + "theoretical_loss": 4.351433070351748, + "tokens_seen": 212336640 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009450329000160488, + "loss": 0.1064, + "theoretical_loss": 4.350771938970466, + "tokens_seen": 212598784 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009449526560744663, + "loss": 0.1028, + "theoretical_loss": 4.350111850229202, + "tokens_seen": 212860928 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.0043486496433615685, + "objective/train/docs_used": 84140, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9397648572921753, + "objective/train/original_loss": 1.9397649765014648, + "objective/train/theoretical_loss": 4.349782195933957, + "objective/train/tokens_used": 233452000, + "objective/train/value_avg": -0.01163482666015625, + "objective/train/value_loss": 0.00020028821018058807, + "objective/train/value_max": -0.0011072158813476562, + "objective/train/value_min": -0.1822509765625, + "objective/train/value_reward_corr": 0.6988513016032423, + "objective/train/value_std": 0.0160675048828125, + "objective/train/weight_avg": 1.0044445991516113, + "objective/train/weighted_lm_loss": 1.948439121246338, + "objective/train/weights_max": 1.1115705966949463, + "objective/train/weights_min": 0.3973115086555481, + "theoretical_loss": 4.349782195933957, + "tokens_seen": 212992000 + }, + { + "epoch": 0.06, + "learning_rate": 0.000944872412132884, + "loss": 0.1017, + "theoretical_loss": 4.34945280120265, + "tokens_seen": 213123072 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009447921681913016, + "loss": 0.103, + "theoretical_loss": 4.348794788977298, + "tokens_seen": 213385216 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009447119242497192, + "loss": 0.1039, + "theoretical_loss": 4.348137810651366, + "tokens_seen": 213647360 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009446316803081368, + "loss": 0.102, + "theoretical_loss": 4.347481863334738, + "tokens_seen": 213909504 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009445514363665544, + "loss": 0.1045, + "theoretical_loss": 4.346826944148912, + "tokens_seen": 214171648 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009444711924249719, + "loss": 0.1049, + "theoretical_loss": 4.3461730502269305, + "tokens_seen": 214433792 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009443909484833896, + "loss": 0.1036, + "theoretical_loss": 4.345520178713323, + "tokens_seen": 214695936 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009443107045418071, + "loss": 0.1077, + "theoretical_loss": 4.344868326764045, + "tokens_seen": 214958080 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009442304606002246, + "loss": 0.1054, + "theoretical_loss": 4.344217491546422, + "tokens_seen": 215220224 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009441502166586424, + "loss": 0.1037, + "theoretical_loss": 4.343567670239084, + "tokens_seen": 215482368 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009440699727170599, + "loss": 0.1059, + "theoretical_loss": 4.342918860031914, + "tokens_seen": 215744512 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009439897287754775, + "loss": 0.1007, + "theoretical_loss": 4.342271058125983, + "tokens_seen": 216006656 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.0013779407599940896, + "objective/train/docs_used": 85191, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.144592761993408, + "objective/train/original_loss": 2.1445932388305664, + "objective/train/theoretical_loss": 4.341624261733497, + "objective/train/tokens_used": 236728800, + "objective/train/value_avg": -0.01018524169921875, + "objective/train/value_loss": 0.0004004819202236831, + "objective/train/value_max": -0.0008134841918945312, + "objective/train/value_min": -0.294677734375, + "objective/train/value_reward_corr": 0.4947933347916151, + "objective/train/value_std": 0.01059722900390625, + "objective/train/weight_avg": 1.0015608072280884, + "objective/train/weighted_lm_loss": 2.14778208732605, + "objective/train/weights_max": 1.2878975868225098, + "objective/train/weights_min": 0.3711496591567993, + "theoretical_loss": 4.341624261733497, + "tokens_seen": 216268800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009439094848338951, + "loss": 0.1053, + "theoretical_loss": 4.341624261733497, + "tokens_seen": 216268800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009438292408923127, + "loss": 0.1064, + "theoretical_loss": 4.340978468077735, + "tokens_seen": 216530944 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009437489969507302, + "loss": 0.1038, + "theoretical_loss": 4.340333674392992, + "tokens_seen": 216793088 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009436687530091478, + "loss": 0.1046, + "theoretical_loss": 4.339689877924531, + "tokens_seen": 217055232 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009435885090675654, + "loss": 0.1038, + "theoretical_loss": 4.3390470759285105, + "tokens_seen": 217317376 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009435082651259829, + "loss": 0.1039, + "theoretical_loss": 4.338405265671941, + "tokens_seen": 217579520 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009434280211844006, + "loss": 0.0991, + "theoretical_loss": 4.337764444432625, + "tokens_seen": 217841664 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009433477772428182, + "loss": 0.1038, + "theoretical_loss": 4.337124609499101, + "tokens_seen": 218103808 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009432675333012359, + "loss": 0.1003, + "theoretical_loss": 4.336485758170589, + "tokens_seen": 218365952 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009431872893596534, + "loss": 0.106, + "theoretical_loss": 4.335847887756934, + "tokens_seen": 218628096 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009431070454180709, + "loss": 0.1021, + "theoretical_loss": 4.335210995578553, + "tokens_seen": 218890240 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009430268014764886, + "loss": 0.1041, + "theoretical_loss": 4.334575078966383, + "tokens_seen": 219152384 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009429465575349061, + "loss": 0.1049, + "theoretical_loss": 4.333940135261823, + "tokens_seen": 219414528 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.002295425161719322, + "objective/train/docs_used": 86440, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9799681901931763, + "objective/train/original_loss": 1.9799679517745972, + "objective/train/theoretical_loss": 4.3336230274219325, + "objective/train/tokens_used": 240005600, + "objective/train/value_avg": -0.00746917724609375, + "objective/train/value_loss": 0.00019181027892045677, + "objective/train/value_max": -0.0007948875427246094, + "objective/train/value_min": -0.2081298828125, + "objective/train/value_reward_corr": 0.306679385928593, + "objective/train/value_std": 0.00490570068359375, + "objective/train/weight_avg": 1.002386212348938, + "objective/train/weighted_lm_loss": 1.9838651418685913, + "objective/train/weights_max": 1.0839669704437256, + "objective/train/weights_min": 0.37081003189086914, + "theoretical_loss": 4.3336230274219325, + "tokens_seen": 219545600 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009428663135933237, + "loss": 0.1019, + "theoretical_loss": 4.333306161816684, + "tokens_seen": 219676672 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009427860696517413, + "loss": 0.1057, + "theoretical_loss": 4.332673155993131, + "tokens_seen": 219938816 + }, + { + "epoch": 0.07, + "learning_rate": 0.000942705825710159, + "loss": 0.1016, + "theoretical_loss": 4.332041115163636, + "tokens_seen": 220200960 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009426255817685765, + "loss": 0.1049, + "theoretical_loss": 4.331410036710925, + "tokens_seen": 220463104 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009425453378269941, + "loss": 0.1027, + "theoretical_loss": 4.330779918027919, + "tokens_seen": 220725248 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009424650938854117, + "loss": 0.1047, + "theoretical_loss": 4.330150756517692, + "tokens_seen": 220987392 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009423848499438292, + "loss": 0.1025, + "theoretical_loss": 4.3295225495934115, + "tokens_seen": 221249536 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009423046060022469, + "loss": 0.0982, + "theoretical_loss": 4.328895294678292, + "tokens_seen": 221511680 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009422243620606644, + "loss": 0.103, + "theoretical_loss": 4.32826898920554, + "tokens_seen": 221773824 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009421441181190821, + "loss": 0.1057, + "theoretical_loss": 4.3276436306183115, + "tokens_seen": 222035968 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009420638741774996, + "loss": 0.1003, + "theoretical_loss": 4.327019216369651, + "tokens_seen": 222298112 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009419836302359171, + "loss": 0.1012, + "theoretical_loss": 4.32639574392245, + "tokens_seen": 222560256 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.0020489406306296587, + "objective/train/docs_used": 87601, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8337969779968262, + "objective/train/original_loss": 1.8337969779968262, + "objective/train/theoretical_loss": 4.325773210749392, + "objective/train/tokens_used": 243282400, + "objective/train/value_avg": -0.0086669921875, + "objective/train/value_loss": 0.000297574297292158, + "objective/train/value_max": -0.0009889602661132812, + "objective/train/value_min": -0.6943359375, + "objective/train/value_reward_corr": 0.5675373911477959, + "objective/train/value_std": 0.0108795166015625, + "objective/train/weight_avg": 1.0021827220916748, + "objective/train/weighted_lm_loss": 1.838815450668335, + "objective/train/weights_max": 1.4390860795974731, + "objective/train/weights_min": 0.37089207768440247, + "theoretical_loss": 4.325773210749392, + "tokens_seen": 222822400 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009419033862943349, + "loss": 0.0976, + "theoretical_loss": 4.325773210749392, + "tokens_seen": 222822400 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009418231423527524, + "loss": 0.1022, + "theoretical_loss": 4.325151614332908, + "tokens_seen": 223084544 + }, + { + "epoch": 0.07, + "learning_rate": 0.00094174289841117, + "loss": 0.1034, + "theoretical_loss": 4.3245309521651265, + "tokens_seen": 223346688 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009416626544695876, + "loss": 0.103, + "theoretical_loss": 4.323911221747817, + "tokens_seen": 223608832 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009415824105280052, + "loss": 0.1048, + "theoretical_loss": 4.323292420592356, + "tokens_seen": 223870976 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009415021665864227, + "loss": 0.1024, + "theoretical_loss": 4.322674546219666, + "tokens_seen": 224133120 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009414219226448404, + "loss": 0.1028, + "theoretical_loss": 4.322057596160174, + "tokens_seen": 224395264 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009413416787032579, + "loss": 0.0994, + "theoretical_loss": 4.321441567953762, + "tokens_seen": 224657408 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009412614347616754, + "loss": 0.1017, + "theoretical_loss": 4.320826459149725, + "tokens_seen": 224919552 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009411811908200932, + "loss": 0.1075, + "theoretical_loss": 4.3202122673067125, + "tokens_seen": 225181696 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009411009468785107, + "loss": 0.1007, + "theoretical_loss": 4.319598989992695, + "tokens_seen": 225443840 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009410207029369283, + "loss": 0.1049, + "theoretical_loss": 4.318986624784908, + "tokens_seen": 225705984 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009409404589953459, + "loss": 0.1034, + "theoretical_loss": 4.318375169269813, + "tokens_seen": 225968128 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.0023550805635750294, + "objective/train/docs_used": 88830, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9909827709197998, + "objective/train/original_loss": 1.9909827709197998, + "objective/train/theoretical_loss": 4.3180697818953035, + "objective/train/tokens_used": 246559200, + "objective/train/value_avg": -0.00814056396484375, + "objective/train/value_loss": 0.00033672992140054703, + "objective/train/value_max": -0.0005397796630859375, + "objective/train/value_min": -0.321533203125, + "objective/train/value_reward_corr": 0.5563445957193008, + "objective/train/value_std": 0.010406494140625, + "objective/train/weight_avg": 1.0025025606155396, + "objective/train/weighted_lm_loss": 1.9960408210754395, + "objective/train/weights_max": 1.258882761001587, + "objective/train/weights_min": 0.3709288537502289, + "theoretical_loss": 4.3180697818953035, + "tokens_seen": 226099200 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009408602150537635, + "loss": 0.1014, + "theoretical_loss": 4.317764621043046, + "tokens_seen": 226230272 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009407799711121811, + "loss": 0.0995, + "theoretical_loss": 4.317154977709375, + "tokens_seen": 226492416 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009406997271705986, + "loss": 0.1001, + "theoretical_loss": 4.3165462368826555, + "tokens_seen": 226754560 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009406194832290162, + "loss": 0.0995, + "theoretical_loss": 4.315938396185782, + "tokens_seen": 227016704 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009405392392874338, + "loss": 0.1001, + "theoretical_loss": 4.315331453250648, + "tokens_seen": 227278848 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009404589953458514, + "loss": 0.0994, + "theoretical_loss": 4.314725405718099, + "tokens_seen": 227540992 + }, + { + "epoch": 0.07, + "learning_rate": 0.000940378751404269, + "loss": 0.0997, + "theoretical_loss": 4.314120251237887, + "tokens_seen": 227803136 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009402985074626867, + "loss": 0.103, + "theoretical_loss": 4.31351598746863, + "tokens_seen": 228065280 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009402182635211042, + "loss": 0.1013, + "theoretical_loss": 4.312912612077767, + "tokens_seen": 228327424 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009401380195795217, + "loss": 0.1022, + "theoretical_loss": 4.312310122741512, + "tokens_seen": 228589568 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009400577756379394, + "loss": 0.1028, + "theoretical_loss": 4.311708517144817, + "tokens_seen": 228851712 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009399775316963569, + "loss": 0.1024, + "theoretical_loss": 4.311107792981323, + "tokens_seen": 229113856 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.0026854872703552246, + "objective/train/docs_used": 90061, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.144618511199951, + "objective/train/original_loss": 2.144618511199951, + "objective/train/theoretical_loss": 4.310507947953321, + "objective/train/tokens_used": 249836000, + "objective/train/value_avg": -0.00982666015625, + "objective/train/value_loss": 0.00028081017080694437, + "objective/train/value_max": -0.0007295608520507812, + "objective/train/value_min": -0.66845703125, + "objective/train/value_reward_corr": 0.6199593894084428, + "objective/train/value_std": 0.01155853271484375, + "objective/train/weight_avg": 1.0028172731399536, + "objective/train/weighted_lm_loss": 2.1502115726470947, + "objective/train/weights_max": 1.3387657403945923, + "objective/train/weights_min": 0.37122613191604614, + "theoretical_loss": 4.310507947953321, + "tokens_seen": 229376000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009398972877547745, + "loss": 0.1031, + "theoretical_loss": 4.310507947953321, + "tokens_seen": 229376000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009398170438131921, + "loss": 0.1045, + "theoretical_loss": 4.309908979771709, + "tokens_seen": 229638144 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009397367998716097, + "loss": 0.1044, + "theoretical_loss": 4.3093108861559495, + "tokens_seen": 229900288 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009396565559300272, + "loss": 0.1022, + "theoretical_loss": 4.308713664834029, + "tokens_seen": 230162432 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009395763119884449, + "loss": 0.0958, + "theoretical_loss": 4.308117313542413, + "tokens_seen": 230424576 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009394960680468625, + "loss": 0.1022, + "theoretical_loss": 4.30752183002601, + "tokens_seen": 230686720 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009394158241052801, + "loss": 0.099, + "theoretical_loss": 4.3069272120381275, + "tokens_seen": 230948864 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009393355801636977, + "loss": 0.102, + "theoretical_loss": 4.30633345734043, + "tokens_seen": 231211008 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009392553362221152, + "loss": 0.1004, + "theoretical_loss": 4.3057405637029, + "tokens_seen": 231473152 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009391750922805329, + "loss": 0.1015, + "theoretical_loss": 4.305148528903798, + "tokens_seen": 231735296 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009390948483389504, + "loss": 0.1023, + "theoretical_loss": 4.304557350729623, + "tokens_seen": 231997440 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009390146043973679, + "loss": 0.1002, + "theoretical_loss": 4.303967026975072, + "tokens_seen": 232259584 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009389343604557857, + "loss": 0.1024, + "theoretical_loss": 4.303377555442998, + "tokens_seen": 232521728 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.0007922460208646953, + "objective/train/docs_used": 91258, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9420995712280273, + "objective/train/original_loss": 1.9420994520187378, + "objective/train/theoretical_loss": 4.303083138576003, + "objective/train/tokens_used": 253112800, + "objective/train/value_avg": -0.006786346435546875, + "objective/train/value_loss": 0.0005959350382909179, + "objective/train/value_max": -0.0007352828979492188, + "objective/train/value_min": -0.293701171875, + "objective/train/value_reward_corr": 0.39993119929541304, + "objective/train/value_std": 0.007472991943359375, + "objective/train/weight_avg": 1.0010305643081665, + "objective/train/weighted_lm_loss": 1.9428025484085083, + "objective/train/weights_max": 1.215839147567749, + "objective/train/weights_min": 0.2238616794347763, + "theoretical_loss": 4.303083138576003, + "tokens_seen": 232652800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009388541165142032, + "loss": 0.098, + "theoretical_loss": 4.302788933944375, + "tokens_seen": 232783872 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009387738725726208, + "loss": 0.1014, + "theoretical_loss": 4.302201160298255, + "tokens_seen": 233046016 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009386936286310384, + "loss": 0.1019, + "theoretical_loss": 4.301614232331733, + "tokens_seen": 233308160 + }, + { + "epoch": 0.07, + "learning_rate": 0.000938613384689456, + "loss": 0.101, + "theoretical_loss": 4.301028147879904, + "tokens_seen": 233570304 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009385331407478735, + "loss": 0.0983, + "theoretical_loss": 4.300442904785831, + "tokens_seen": 233832448 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009384528968062911, + "loss": 0.1014, + "theoretical_loss": 4.299858500900495, + "tokens_seen": 234094592 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009383726528647087, + "loss": 0.1024, + "theoretical_loss": 4.2992749340827725, + "tokens_seen": 234356736 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009382924089231262, + "loss": 0.1028, + "theoretical_loss": 4.298692202199386, + "tokens_seen": 234618880 + }, + { + "epoch": 0.07, + "learning_rate": 0.000938212164981544, + "loss": 0.101, + "theoretical_loss": 4.298110303124871, + "tokens_seen": 234881024 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009381319210399615, + "loss": 0.1022, + "theoretical_loss": 4.29752923474154, + "tokens_seen": 235143168 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009380516770983792, + "loss": 0.1006, + "theoretical_loss": 4.29694899493944, + "tokens_seen": 235405312 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009379714331567967, + "loss": 0.1017, + "theoretical_loss": 4.2963695816163225, + "tokens_seen": 235667456 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.0031256088986992836, + "objective/train/docs_used": 92471, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.102999448776245, + "objective/train/original_loss": 2.102999687194824, + "objective/train/theoretical_loss": 4.295790992677603, + "objective/train/tokens_used": 256389600, + "objective/train/value_avg": -0.01329803466796875, + "objective/train/value_loss": 0.0007132674218155444, + "objective/train/value_max": -0.0005130767822265625, + "objective/train/value_min": -0.8544921875, + "objective/train/value_reward_corr": 0.6135290805544735, + "objective/train/value_std": 0.0253448486328125, + "objective/train/weight_avg": 1.0034579038619995, + "objective/train/weighted_lm_loss": 2.107316493988037, + "objective/train/weights_max": 1.5163893699645996, + "objective/train/weights_min": 0.39436399936676025, + "theoretical_loss": 4.295790992677603, + "tokens_seen": 235929600 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009378911892152143, + "loss": 0.0997, + "theoretical_loss": 4.295790992677603, + "tokens_seen": 235929600 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009378109452736319, + "loss": 0.1009, + "theoretical_loss": 4.2952132260363225, + "tokens_seen": 236191744 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009377307013320494, + "loss": 0.0996, + "theoretical_loss": 4.294636279613117, + "tokens_seen": 236453888 + }, + { + "epoch": 0.07, + "learning_rate": 0.000937650457390467, + "loss": 0.1036, + "theoretical_loss": 4.294060151336178, + "tokens_seen": 236716032 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009375702134488846, + "loss": 0.1019, + "theoretical_loss": 4.293484839141217, + "tokens_seen": 236978176 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009374899695073022, + "loss": 0.1, + "theoretical_loss": 4.29291034097143, + "tokens_seen": 237240320 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009374097255657198, + "loss": 0.097, + "theoretical_loss": 4.2923366547774595, + "tokens_seen": 237502464 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009373294816241375, + "loss": 0.1002, + "theoretical_loss": 4.2917637785173675, + "tokens_seen": 237764608 + }, + { + "epoch": 0.07, + "learning_rate": 0.000937249237682555, + "loss": 0.1004, + "theoretical_loss": 4.291191710156591, + "tokens_seen": 238026752 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009371689937409725, + "loss": 0.1003, + "theoretical_loss": 4.290620447667912, + "tokens_seen": 238288896 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009370887497993902, + "loss": 0.0979, + "theoretical_loss": 4.290049989031424, + "tokens_seen": 238551040 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009370085058578077, + "loss": 0.1033, + "theoretical_loss": 4.289480332234493, + "tokens_seen": 238813184 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009369282619162254, + "loss": 0.1012, + "theoretical_loss": 4.288911475271731, + "tokens_seen": 239075328 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.001886105164885521, + "objective/train/docs_used": 93570, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.0680296421051025, + "objective/train/original_loss": 2.0680294036865234, + "objective/train/theoretical_loss": 4.28862734610345, + "objective/train/tokens_used": 259666400, + "objective/train/value_avg": -0.008026123046875, + "objective/train/value_loss": 0.0002583606983534992, + "objective/train/value_max": -0.0006852149963378906, + "objective/train/value_min": -0.2454833984375, + "objective/train/value_reward_corr": 0.48885293063683233, + "objective/train/value_std": 0.007061004638671875, + "objective/train/weight_avg": 1.0020033121109009, + "objective/train/weighted_lm_loss": 2.0723109245300293, + "objective/train/weights_max": 1.1007682085037231, + "objective/train/weights_min": 0.3832702934741974, + "theoretical_loss": 4.28862734610345, + "tokens_seen": 239206400 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009368480179746429, + "loss": 0.1002, + "theoretical_loss": 4.288343416144952, + "tokens_seen": 239337472 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009367677740330605, + "loss": 0.1023, + "theoretical_loss": 4.287776152863146, + "tokens_seen": 239599616 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009366875300914782, + "loss": 0.1016, + "theoretical_loss": 4.287209683442444, + "tokens_seen": 239861760 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009366072861498957, + "loss": 0.1004, + "theoretical_loss": 4.286644005906081, + "tokens_seen": 240123904 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009365270422083133, + "loss": 0.101, + "theoretical_loss": 4.286079118284368, + "tokens_seen": 240386048 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009364467982667309, + "loss": 0.1034, + "theoretical_loss": 4.285515018614655, + "tokens_seen": 240648192 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009363665543251485, + "loss": 0.096, + "theoretical_loss": 4.2849517049412995, + "tokens_seen": 240910336 + }, + { + "epoch": 0.07, + "learning_rate": 0.000936286310383566, + "loss": 0.0974, + "theoretical_loss": 4.284389175315636, + "tokens_seen": 241172480 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009362060664419837, + "loss": 0.0995, + "theoretical_loss": 4.283827427795939, + "tokens_seen": 241434624 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009361258225004012, + "loss": 0.0987, + "theoretical_loss": 4.283266460447394, + "tokens_seen": 241696768 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009360455785588187, + "loss": 0.0967, + "theoretical_loss": 4.282706271342066, + "tokens_seen": 241958912 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009359653346172365, + "loss": 0.1028, + "theoretical_loss": 4.282146858558866, + "tokens_seen": 242221056 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.001827435800805688, + "objective/train/docs_used": 94878, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.0017893314361572, + "objective/train/original_loss": 2.001789093017578, + "objective/train/theoretical_loss": 4.281588220183519, + "objective/train/tokens_used": 262943200, + "objective/train/value_avg": -0.01013946533203125, + "objective/train/value_loss": 0.0004723604361061007, + "objective/train/value_max": -0.0006642341613769531, + "objective/train/value_min": -0.4013671875, + "objective/train/value_reward_corr": 0.613649177640665, + "objective/train/value_std": 0.0154876708984375, + "objective/train/weight_avg": 1.0020326375961304, + "objective/train/weighted_lm_loss": 2.0051186084747314, + "objective/train/weights_max": 1.3551794290542603, + "objective/train/weights_min": 0.37200865149497986, + "theoretical_loss": 4.281588220183519, + "tokens_seen": 242483200 + }, + { + "epoch": 0.07, + "learning_rate": 0.000935885090675654, + "loss": 0.0977, + "theoretical_loss": 4.281588220183519, + "tokens_seen": 242483200 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009358048467340716, + "loss": 0.0995, + "theoretical_loss": 4.281030354308533, + "tokens_seen": 242745344 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009357246027924892, + "loss": 0.0989, + "theoretical_loss": 4.280473259033169, + "tokens_seen": 243007488 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009356443588509068, + "loss": 0.0982, + "theoretical_loss": 4.27991693246341, + "tokens_seen": 243269632 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009355641149093244, + "loss": 0.0986, + "theoretical_loss": 4.279361372711923, + "tokens_seen": 243531776 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009354838709677419, + "loss": 0.1022, + "theoretical_loss": 4.278806577898042, + "tokens_seen": 243793920 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009354036270261595, + "loss": 0.0979, + "theoretical_loss": 4.278252546147724, + "tokens_seen": 244056064 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009353233830845771, + "loss": 0.0995, + "theoretical_loss": 4.277699275593523, + "tokens_seen": 244318208 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009352431391429948, + "loss": 0.0949, + "theoretical_loss": 4.277146764374566, + "tokens_seen": 244580352 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009351628952014123, + "loss": 0.0999, + "theoretical_loss": 4.276595010636514, + "tokens_seen": 244842496 + }, + { + "epoch": 0.07, + "learning_rate": 0.00093508265125983, + "loss": 0.0993, + "theoretical_loss": 4.276044012531534, + "tokens_seen": 245104640 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009350024073182475, + "loss": 0.1013, + "theoretical_loss": 4.275493768218274, + "tokens_seen": 245366784 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009349221633766651, + "loss": 0.0975, + "theoretical_loss": 4.274944275861828, + "tokens_seen": 245628928 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.002574330661445856, + "objective/train/docs_used": 96014, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.0550389289855957, + "objective/train/original_loss": 2.055039405822754, + "objective/train/theoretical_loss": 4.274669811095759, + "objective/train/tokens_used": 266220000, + "objective/train/value_avg": -0.01256561279296875, + "objective/train/value_loss": 0.0013382832985371351, + "objective/train/value_max": -0.0008039474487304688, + "objective/train/value_min": -0.810546875, + "objective/train/value_reward_corr": 0.6458940332158234, + "objective/train/value_std": 0.0258026123046875, + "objective/train/weight_avg": 1.003141164779663, + "objective/train/weighted_lm_loss": 2.06048846244812, + "objective/train/weights_max": 1.4646140336990356, + "objective/train/weights_min": 0.36990442872047424, + "theoretical_loss": 4.274669811095759, + "tokens_seen": 245760000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009348419194350827, + "loss": 0.098, + "theoretical_loss": 4.274395533633712, + "tokens_seen": 245891072 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009347616754935002, + "loss": 0.096, + "theoretical_loss": 4.273847539711825, + "tokens_seen": 246153216 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009346814315519178, + "loss": 0.1013, + "theoretical_loss": 4.273300292280435, + "tokens_seen": 246415360 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009346011876103354, + "loss": 0.0989, + "theoretical_loss": 4.272753789530134, + "tokens_seen": 246677504 + }, + { + "epoch": 0.07, + "learning_rate": 0.000934520943668753, + "loss": 0.096, + "theoretical_loss": 4.272208029657822, + "tokens_seen": 246939648 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009344406997271707, + "loss": 0.0971, + "theoretical_loss": 4.271663010866669, + "tokens_seen": 247201792 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009343604557855883, + "loss": 0.1003, + "theoretical_loss": 4.2711187313660925, + "tokens_seen": 247463936 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009342802118440058, + "loss": 0.0983, + "theoretical_loss": 4.270575189371727, + "tokens_seen": 247726080 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009341999679024234, + "loss": 0.0991, + "theoretical_loss": 4.270032383105398, + "tokens_seen": 247988224 + }, + { + "epoch": 0.08, + "learning_rate": 0.000934119723960841, + "loss": 0.0973, + "theoretical_loss": 4.269490310795089, + "tokens_seen": 248250368 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009340394800192585, + "loss": 0.1025, + "theoretical_loss": 4.268948970674917, + "tokens_seen": 248512512 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009339592360776762, + "loss": 0.1013, + "theoretical_loss": 4.268408360985109, + "tokens_seen": 248774656 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.00040733805508352816, + "objective/train/docs_used": 97104, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9560189247131348, + "objective/train/original_loss": 1.9560186862945557, + "objective/train/theoretical_loss": 4.267868479971966, + "objective/train/tokens_used": 269496800, + "objective/train/value_avg": -0.010101318359375, + "objective/train/value_loss": 0.0005951999919489026, + "objective/train/value_max": -0.0006189346313476562, + "objective/train/value_min": -0.81201171875, + "objective/train/value_reward_corr": 0.654036095652701, + "objective/train/value_std": 0.0165863037109375, + "objective/train/weight_avg": 1.0006780624389648, + "objective/train/weighted_lm_loss": 1.9563541412353516, + "objective/train/weights_max": 1.3073004484176636, + "objective/train/weights_min": 0.4099235534667969, + "theoretical_loss": 4.267868479971966, + "tokens_seen": 249036800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009338789921360937, + "loss": 0.0996, + "theoretical_loss": 4.267868479971966, + "tokens_seen": 249036800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009337987481945113, + "loss": 0.099, + "theoretical_loss": 4.267329325887841, + "tokens_seen": 249298944 + }, + { + "epoch": 0.08, + "learning_rate": 0.000933718504252929, + "loss": 0.0923, + "theoretical_loss": 4.266790896991109, + "tokens_seen": 249561088 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009336382603113465, + "loss": 0.1004, + "theoretical_loss": 4.266253191546146, + "tokens_seen": 249823232 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009335580163697641, + "loss": 0.0992, + "theoretical_loss": 4.265716207823292, + "tokens_seen": 250085376 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009334777724281817, + "loss": 0.0991, + "theoretical_loss": 4.2651799440988345, + "tokens_seen": 250347520 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009333975284865993, + "loss": 0.0974, + "theoretical_loss": 4.2646443986549745, + "tokens_seen": 250609664 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009333172845450168, + "loss": 0.099, + "theoretical_loss": 4.264109569779803, + "tokens_seen": 250871808 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009332370406034345, + "loss": 0.1013, + "theoretical_loss": 4.263575455767277, + "tokens_seen": 251133952 + }, + { + "epoch": 0.08, + "learning_rate": 0.000933156796661852, + "loss": 0.0997, + "theoretical_loss": 4.263042054917186, + "tokens_seen": 251396096 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009330765527202696, + "loss": 0.0991, + "theoretical_loss": 4.262509365535134, + "tokens_seen": 251658240 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009329963087786873, + "loss": 0.0967, + "theoretical_loss": 4.261977385932512, + "tokens_seen": 251920384 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009329160648371048, + "loss": 0.0985, + "theoretical_loss": 4.261446114426466, + "tokens_seen": 252182528 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.002480955794453621, + "objective/train/docs_used": 98257, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.974042534828186, + "objective/train/original_loss": 1.974042534828186, + "objective/train/theoretical_loss": 4.261180743685337, + "objective/train/tokens_used": 272773600, + "objective/train/value_avg": -0.00753021240234375, + "objective/train/value_loss": 0.00019367090135347098, + "objective/train/value_max": -0.0005908012390136719, + "objective/train/value_min": -0.181640625, + "objective/train/value_reward_corr": 0.6212227796200605, + "objective/train/value_std": 0.008819580078125, + "objective/train/weight_avg": 1.0025759935379028, + "objective/train/weighted_lm_loss": 1.9780480861663818, + "objective/train/weights_max": 1.16512930393219, + "objective/train/weights_min": 0.7835448384284973, + "theoretical_loss": 4.261180743685337, + "tokens_seen": 252313600 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009328358208955225, + "loss": 0.0973, + "theoretical_loss": 4.260915549339879, + "tokens_seen": 252444672 + }, + { + "epoch": 0.08, + "learning_rate": 0.00093275557695394, + "loss": 0.097, + "theoretical_loss": 4.2603856890013425, + "tokens_seen": 252706816 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009326753330123576, + "loss": 0.0991, + "theoretical_loss": 4.25985653174513, + "tokens_seen": 252968960 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009325950890707752, + "loss": 0.0948, + "theoretical_loss": 4.259328075911173, + "tokens_seen": 253231104 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009325148451291927, + "loss": 0.0989, + "theoretical_loss": 4.258800319845038, + "tokens_seen": 253493248 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009324346011876103, + "loss": 0.0974, + "theoretical_loss": 4.258273261897896, + "tokens_seen": 253755392 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009323543572460279, + "loss": 0.097, + "theoretical_loss": 4.257746900426506, + "tokens_seen": 254017536 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009322741133044456, + "loss": 0.0975, + "theoretical_loss": 4.25722123379318, + "tokens_seen": 254279680 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009321938693628631, + "loss": 0.1009, + "theoretical_loss": 4.256696260365768, + "tokens_seen": 254541824 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009321136254212808, + "loss": 0.0996, + "theoretical_loss": 4.256171978517629, + "tokens_seen": 254803968 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009320333814796983, + "loss": 0.0952, + "theoretical_loss": 4.255648386627607, + "tokens_seen": 255066112 + }, + { + "epoch": 0.08, + "learning_rate": 0.000931953137538116, + "loss": 0.0987, + "theoretical_loss": 4.255125483080007, + "tokens_seen": 255328256 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.0014403994427993894, + "objective/train/docs_used": 99403, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.000425100326538, + "objective/train/original_loss": 2.000425338745117, + "objective/train/theoretical_loss": 4.254603266264572, + "objective/train/tokens_used": 276050400, + "objective/train/value_avg": -0.00920867919921875, + "objective/train/value_loss": 0.0005131934303790331, + "objective/train/value_max": -0.0005726814270019531, + "objective/train/value_min": -0.29736328125, + "objective/train/value_reward_corr": 0.55161832441128, + "objective/train/value_std": 0.011138916015625, + "objective/train/weight_avg": 1.0016599893569946, + "objective/train/weighted_lm_loss": 2.0043511390686035, + "objective/train/weights_max": 1.3048828840255737, + "objective/train/weights_min": 0.3703209161758423, + "theoretical_loss": 4.254603266264572, + "tokens_seen": 255590400 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009318728935965335, + "loss": 0.0987, + "theoretical_loss": 4.254603266264572, + "tokens_seen": 255590400 + }, + { + "epoch": 0.08, + "learning_rate": 0.000931792649654951, + "loss": 0.0957, + "theoretical_loss": 4.254081734576458, + "tokens_seen": 255852544 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009317124057133687, + "loss": 0.0968, + "theoretical_loss": 4.253560886416212, + "tokens_seen": 256114688 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009316321617717862, + "loss": 0.0972, + "theoretical_loss": 4.253040720189746, + "tokens_seen": 256376832 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009315519178302038, + "loss": 0.0973, + "theoretical_loss": 4.252521234308315, + "tokens_seen": 256638976 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009314716738886215, + "loss": 0.0967, + "theoretical_loss": 4.2520024271884935, + "tokens_seen": 256901120 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009313914299470391, + "loss": 0.0981, + "theoretical_loss": 4.251484297252151, + "tokens_seen": 257163264 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009313111860054566, + "loss": 0.1018, + "theoretical_loss": 4.250966842926434, + "tokens_seen": 257425408 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009312309420638742, + "loss": 0.0991, + "theoretical_loss": 4.250450062643734, + "tokens_seen": 257687552 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009311506981222918, + "loss": 0.0955, + "theoretical_loss": 4.249933954841672, + "tokens_seen": 257949696 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009310704541807093, + "loss": 0.1025, + "theoretical_loss": 4.2494185179630755, + "tokens_seen": 258211840 + }, + { + "epoch": 0.08, + "learning_rate": 0.000930990210239127, + "loss": 0.0993, + "theoretical_loss": 4.24890375045595, + "tokens_seen": 258473984 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009309099662975445, + "loss": 0.0974, + "theoretical_loss": 4.248389650773463, + "tokens_seen": 258736128 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.0018602707423269749, + "objective/train/docs_used": 100739, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8192548751831055, + "objective/train/original_loss": 1.8192548751831055, + "objective/train/theoretical_loss": 4.24813285088448, + "objective/train/tokens_used": 279327200, + "objective/train/value_avg": -0.00965118408203125, + "objective/train/value_loss": 0.00019530275312718004, + "objective/train/value_max": -0.0006694793701171875, + "objective/train/value_min": -0.4150390625, + "objective/train/value_reward_corr": 0.596449167871979, + "objective/train/value_std": 0.01064300537109375, + "objective/train/weight_avg": 1.0019567012786865, + "objective/train/weighted_lm_loss": 1.8218411207199097, + "objective/train/weights_max": 1.1794391870498657, + "objective/train/weights_min": 0.8217251300811768, + "theoretical_loss": 4.24813285088448, + "tokens_seen": 258867200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009308297223559621, + "loss": 0.099, + "theoretical_loss": 4.24787621737392, + "tokens_seen": 258998272 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009307494784143798, + "loss": 0.0972, + "theoretical_loss": 4.247363448720739, + "tokens_seen": 259260416 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009306692344727973, + "loss": 0.0988, + "theoretical_loss": 4.246851343282432, + "tokens_seen": 259522560 + }, + { + "epoch": 0.08, + "learning_rate": 0.000930588990531215, + "loss": 0.1011, + "theoretical_loss": 4.246339899532582, + "tokens_seen": 259784704 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009305087465896325, + "loss": 0.0971, + "theoretical_loss": 4.245829115949818, + "tokens_seen": 260046848 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009304285026480501, + "loss": 0.1012, + "theoretical_loss": 4.245318991017802, + "tokens_seen": 260308992 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009303482587064677, + "loss": 0.0982, + "theoretical_loss": 4.244809523225195, + "tokens_seen": 260571136 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009302680147648853, + "loss": 0.1, + "theoretical_loss": 4.244300711065646, + "tokens_seen": 260833280 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009301877708233028, + "loss": 0.0938, + "theoretical_loss": 4.243792553037767, + "tokens_seen": 261095424 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009301075268817204, + "loss": 0.0983, + "theoretical_loss": 4.243285047645106, + "tokens_seen": 261357568 + }, + { + "epoch": 0.08, + "learning_rate": 0.000930027282940138, + "loss": 0.1027, + "theoretical_loss": 4.242778193396136, + "tokens_seen": 261619712 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009299470389985556, + "loss": 0.0991, + "theoretical_loss": 4.242271988804228, + "tokens_seen": 261881856 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.003497189376503229, + "objective/train/docs_used": 101934, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.0956976413726807, + "objective/train/original_loss": 2.0956978797912598, + "objective/train/theoretical_loss": 4.241766432387629, + "objective/train/tokens_used": 282604000, + "objective/train/value_avg": -0.0075836181640625, + "objective/train/value_loss": 0.00010112895688507706, + "objective/train/value_max": -0.00064849853515625, + "objective/train/value_min": -0.1431884765625, + "objective/train/value_reward_corr": 0.3128660984446543, + "objective/train/value_std": 0.005107879638671875, + "objective/train/weight_avg": 1.0035475492477417, + "objective/train/weighted_lm_loss": 2.104475498199463, + "objective/train/weights_max": 1.1146957874298096, + "objective/train/weights_min": 0.901778519153595, + "theoretical_loss": 4.241766432387629, + "tokens_seen": 262144000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009298667950569733, + "loss": 0.0955, + "theoretical_loss": 4.241766432387629, + "tokens_seen": 262144000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009297865511153908, + "loss": 0.0972, + "theoretical_loss": 4.241261522669445, + "tokens_seen": 262406144 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009297063071738084, + "loss": 0.0996, + "theoretical_loss": 4.240757258177617, + "tokens_seen": 262668288 + }, + { + "epoch": 0.08, + "learning_rate": 0.000929626063232226, + "loss": 0.0959, + "theoretical_loss": 4.240253637444903, + "tokens_seen": 262930432 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009295458192906435, + "loss": 0.0955, + "theoretical_loss": 4.239750659008854, + "tokens_seen": 263192576 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009294655753490611, + "loss": 0.0971, + "theoretical_loss": 4.2392483214117975, + "tokens_seen": 263454720 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009293853314074787, + "loss": 0.0999, + "theoretical_loss": 4.238746623200815, + "tokens_seen": 263716864 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009293050874658963, + "loss": 0.0985, + "theoretical_loss": 4.238245562927722, + "tokens_seen": 263979008 + }, + { + "epoch": 0.08, + "learning_rate": 0.000929224843524314, + "loss": 0.0991, + "theoretical_loss": 4.237745139149047, + "tokens_seen": 264241152 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009291445995827316, + "loss": 0.0964, + "theoretical_loss": 4.237245350426015, + "tokens_seen": 264503296 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009290643556411491, + "loss": 0.0942, + "theoretical_loss": 4.236746195324523, + "tokens_seen": 264765440 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009289841116995667, + "loss": 0.1008, + "theoretical_loss": 4.2362476724151215, + "tokens_seen": 265027584 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009289038677579843, + "loss": 0.0977, + "theoretical_loss": 4.235749780272998, + "tokens_seen": 265289728 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.0023133521899580956, + "objective/train/docs_used": 103026, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7841287851333618, + "objective/train/original_loss": 1.7841286659240723, + "objective/train/theoretical_loss": 4.235501070295674, + "objective/train/tokens_used": 285880800, + "objective/train/value_avg": -0.007236480712890625, + "objective/train/value_loss": 0.00020630829385481775, + "objective/train/value_max": -0.0005254745483398438, + "objective/train/value_min": -0.1910400390625, + "objective/train/value_reward_corr": 0.3257111556668698, + "objective/train/value_std": 0.00516510009765625, + "objective/train/weight_avg": 1.0024032592773438, + "objective/train/weighted_lm_loss": 1.7887521982192993, + "objective/train/weights_max": 1.092002034187317, + "objective/train/weights_min": 0.36931154131889343, + "theoretical_loss": 4.235501070295674, + "tokens_seen": 265420800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009288236238164018, + "loss": 0.1003, + "theoretical_loss": 4.235252517477956, + "tokens_seen": 265551872 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009287433798748195, + "loss": 0.0946, + "theoretical_loss": 4.23475588261439, + "tokens_seen": 265814016 + }, + { + "epoch": 0.08, + "learning_rate": 0.000928663135933237, + "loss": 0.0971, + "theoretical_loss": 4.234259874271275, + "tokens_seen": 266076160 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009285828919916546, + "loss": 0.0972, + "theoretical_loss": 4.23376449104214, + "tokens_seen": 266338304 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009285026480500723, + "loss": 0.1004, + "theoretical_loss": 4.233269731525055, + "tokens_seen": 266600448 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009284224041084899, + "loss": 0.0981, + "theoretical_loss": 4.232775594322605, + "tokens_seen": 266862592 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009283421601669074, + "loss": 0.0975, + "theoretical_loss": 4.232282078041876, + "tokens_seen": 267124736 + }, + { + "epoch": 0.08, + "learning_rate": 0.000928261916225325, + "loss": 0.0978, + "theoretical_loss": 4.231789181294436, + "tokens_seen": 267386880 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009281816722837426, + "loss": 0.1, + "theoretical_loss": 4.231296902696314, + "tokens_seen": 267649024 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009281014283421601, + "loss": 0.0954, + "theoretical_loss": 4.230805240867982, + "tokens_seen": 267911168 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009280211844005778, + "loss": 0.0972, + "theoretical_loss": 4.230314194434336, + "tokens_seen": 268173312 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009279409404589953, + "loss": 0.0976, + "theoretical_loss": 4.229823762024681, + "tokens_seen": 268435456 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.001721393782645464, + "objective/train/docs_used": 104173, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.79710054397583, + "objective/train/original_loss": 1.79710054397583, + "objective/train/theoretical_loss": 4.2293339422727065, + "objective/train/tokens_used": 289157600, + "objective/train/value_avg": -0.00934600830078125, + "objective/train/value_loss": 0.00033259327756240964, + "objective/train/value_max": -0.0006589889526367188, + "objective/train/value_min": -0.2215576171875, + "objective/train/value_reward_corr": 0.6127857051291834, + "objective/train/value_std": 0.0121307373046875, + "objective/train/weight_avg": 1.0018748044967651, + "objective/train/weighted_lm_loss": 1.7999058961868286, + "objective/train/weights_max": 1.2480190992355347, + "objective/train/weights_min": 0.40334662795066833, + "theoretical_loss": 4.2293339422727065, + "tokens_seen": 268697600 + }, + { + "epoch": 0.08, + "learning_rate": 0.000927860696517413, + "loss": 0.0974, + "theoretical_loss": 4.2293339422727065, + "tokens_seen": 268697600 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009277804525758306, + "loss": 0.0988, + "theoretical_loss": 4.228844733816474, + "tokens_seen": 268959744 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009277002086342481, + "loss": 0.0948, + "theoretical_loss": 4.228356135298394, + "tokens_seen": 269221888 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009276199646926658, + "loss": 0.0941, + "theoretical_loss": 4.227868145365211, + "tokens_seen": 269484032 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009275397207510833, + "loss": 0.0966, + "theoretical_loss": 4.227380762667987, + "tokens_seen": 269746176 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009274594768095009, + "loss": 0.0953, + "theoretical_loss": 4.226893985862076, + "tokens_seen": 270008320 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009273792328679185, + "loss": 0.099, + "theoretical_loss": 4.226407813607116, + "tokens_seen": 270270464 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009272989889263361, + "loss": 0.0969, + "theoretical_loss": 4.2259222445670055, + "tokens_seen": 270532608 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009272187449847536, + "loss": 0.0936, + "theoretical_loss": 4.225437277409885, + "tokens_seen": 270794752 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009271385010431712, + "loss": 0.0955, + "theoretical_loss": 4.224952910808122, + "tokens_seen": 271056896 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009270582571015889, + "loss": 0.0976, + "theoretical_loss": 4.224469143438294, + "tokens_seen": 271319040 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009269780131600064, + "loss": 0.1001, + "theoretical_loss": 4.223985973981171, + "tokens_seen": 271581184 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009268977692184241, + "loss": 0.0996, + "theoretical_loss": 4.223503401121693, + "tokens_seen": 271843328 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.002592921955510974, + "objective/train/docs_used": 105443, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8855316638946533, + "objective/train/original_loss": 1.8855319023132324, + "objective/train/theoretical_loss": 4.223262338006254, + "objective/train/tokens_used": 292434400, + "objective/train/value_avg": -0.00720977783203125, + "objective/train/value_loss": 0.00012640572094824165, + "objective/train/value_max": -0.00036835670471191406, + "objective/train/value_min": -0.2186279296875, + "objective/train/value_reward_corr": 0.5305388442877321, + "objective/train/value_std": 0.00785064697265625, + "objective/train/weight_avg": 1.0026540756225586, + "objective/train/weighted_lm_loss": 1.8909047842025757, + "objective/train/weights_max": 1.1803033351898193, + "objective/train/weights_min": 0.4927148222923279, + "theoretical_loss": 4.223262338006254, + "tokens_seen": 271974400 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009268175252768416, + "loss": 0.0944, + "theoretical_loss": 4.223021423548962, + "tokens_seen": 272105472 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009267372813352593, + "loss": 0.0964, + "theoretical_loss": 4.222540039956215, + "tokens_seen": 272367616 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009266570373936768, + "loss": 0.0953, + "theoretical_loss": 4.222059249040814, + "tokens_seen": 272629760 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009265767934520943, + "loss": 0.0991, + "theoretical_loss": 4.2215790495042285, + "tokens_seen": 272891904 + }, + { + "epoch": 0.08, + "learning_rate": 0.000926496549510512, + "loss": 0.0986, + "theoretical_loss": 4.221099440052014, + "tokens_seen": 273154048 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009264163055689295, + "loss": 0.0959, + "theoretical_loss": 4.220620419393799, + "tokens_seen": 273416192 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009263360616273471, + "loss": 0.0982, + "theoretical_loss": 4.220141986243268, + "tokens_seen": 273678336 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009262558176857648, + "loss": 0.0984, + "theoretical_loss": 4.219664139318145, + "tokens_seen": 273940480 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009261755737441824, + "loss": 0.0973, + "theoretical_loss": 4.219186877340174, + "tokens_seen": 274202624 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009260953298025999, + "loss": 0.0947, + "theoretical_loss": 4.218710199035108, + "tokens_seen": 274464768 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009260150858610175, + "loss": 0.0947, + "theoretical_loss": 4.218234103132686, + "tokens_seen": 274726912 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009259348419194351, + "loss": 0.0974, + "theoretical_loss": 4.217758588366623, + "tokens_seen": 274989056 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.002839514520019293, + "objective/train/docs_used": 106703, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8725626468658447, + "objective/train/original_loss": 1.8725626468658447, + "objective/train/theoretical_loss": 4.217283653474588, + "objective/train/tokens_used": 295711200, + "objective/train/value_avg": -0.01064300537109375, + "objective/train/value_loss": 0.00048408194561488926, + "objective/train/value_max": -0.0005173683166503906, + "objective/train/value_min": -0.8681640625, + "objective/train/value_reward_corr": 0.614751588044035, + "objective/train/value_std": 0.016937255859375, + "objective/train/weight_avg": 1.003056287765503, + "objective/train/weighted_lm_loss": 1.8782776594161987, + "objective/train/weights_max": 1.6007287502288818, + "objective/train/weights_min": 0.3700864911079407, + "theoretical_loss": 4.217283653474588, + "tokens_seen": 275251200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009258545979778526, + "loss": 0.094, + "theoretical_loss": 4.217283653474588, + "tokens_seen": 275251200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009257743540362703, + "loss": 0.0967, + "theoretical_loss": 4.216809297198195, + "tokens_seen": 275513344 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009256941100946878, + "loss": 0.0972, + "theoretical_loss": 4.21633551828298, + "tokens_seen": 275775488 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009256138661531054, + "loss": 0.0984, + "theoretical_loss": 4.215862315478388, + "tokens_seen": 276037632 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009255336222115231, + "loss": 0.0994, + "theoretical_loss": 4.2153896875377574, + "tokens_seen": 276299776 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009254533782699407, + "loss": 0.0979, + "theoretical_loss": 4.214917633218304, + "tokens_seen": 276561920 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009253731343283583, + "loss": 0.098, + "theoretical_loss": 4.214446151281106, + "tokens_seen": 276824064 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009252928903867758, + "loss": 0.0947, + "theoretical_loss": 4.213975240491084, + "tokens_seen": 277086208 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009252126464451934, + "loss": 0.0965, + "theoretical_loss": 4.213504899616995, + "tokens_seen": 277348352 + }, + { + "epoch": 0.08, + "learning_rate": 0.000925132402503611, + "loss": 0.0967, + "theoretical_loss": 4.213035127431402, + "tokens_seen": 277610496 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009250521585620286, + "loss": 0.0971, + "theoretical_loss": 4.212565922710677, + "tokens_seen": 277872640 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009249719146204461, + "loss": 0.0967, + "theoretical_loss": 4.21209728423497, + "tokens_seen": 278134784 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009248916706788639, + "loss": 0.0989, + "theoretical_loss": 4.2116292107882, + "tokens_seen": 278396928 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.0019138733623549342, + "objective/train/docs_used": 107929, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.952666163444519, + "objective/train/original_loss": 1.9526660442352295, + "objective/train/theoretical_loss": 4.211395385571668, + "objective/train/tokens_used": 298988000, + "objective/train/value_avg": -0.00894927978515625, + "objective/train/value_loss": 0.0007483014487661421, + "objective/train/value_max": -0.00033664703369140625, + "objective/train/value_min": -0.8779296875, + "objective/train/value_reward_corr": 0.5763799754051704, + "objective/train/value_std": 0.0159759521484375, + "objective/train/weight_avg": 1.002224087715149, + "objective/train/weighted_lm_loss": 1.956479549407959, + "objective/train/weights_max": 1.5199860334396362, + "objective/train/weights_min": 0.37053996324539185, + "theoretical_loss": 4.211395385571668, + "tokens_seen": 278528000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009248114267372814, + "loss": 0.0977, + "theoretical_loss": 4.211161701158042, + "tokens_seen": 278659072 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009247311827956989, + "loss": 0.0988, + "theoretical_loss": 4.2106947541359085, + "tokens_seen": 278921216 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009246509388541166, + "loss": 0.0962, + "theoretical_loss": 4.210228368516935, + "tokens_seen": 279183360 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009245706949125341, + "loss": 0.1018, + "theoretical_loss": 4.209762543099966, + "tokens_seen": 279445504 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009244904509709517, + "loss": 0.0973, + "theoretical_loss": 4.209297276687541, + "tokens_seen": 279707648 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009244102070293693, + "loss": 0.1001, + "theoretical_loss": 4.2088325680858745, + "tokens_seen": 279969792 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009243299630877869, + "loss": 0.0956, + "theoretical_loss": 4.208368416104849, + "tokens_seen": 280231936 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009242497191462045, + "loss": 0.0991, + "theoretical_loss": 4.207904819557995, + "tokens_seen": 280494080 + }, + { + "epoch": 0.09, + "learning_rate": 0.000924169475204622, + "loss": 0.0967, + "theoretical_loss": 4.207441777262477, + "tokens_seen": 280756224 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009240892312630397, + "loss": 0.0966, + "theoretical_loss": 4.206979288039081, + "tokens_seen": 281018368 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009240089873214573, + "loss": 0.0953, + "theoretical_loss": 4.206517350712199, + "tokens_seen": 281280512 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009239287433798749, + "loss": 0.0974, + "theoretical_loss": 4.206055964109813, + "tokens_seen": 281542656 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.002816372085362673, + "objective/train/docs_used": 109222, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8340928554534912, + "objective/train/original_loss": 1.8340927362442017, + "objective/train/theoretical_loss": 4.205595127063485, + "objective/train/tokens_used": 302264800, + "objective/train/value_avg": -0.00853729248046875, + "objective/train/value_loss": 0.0001849338150350377, + "objective/train/value_max": -0.0005483627319335938, + "objective/train/value_min": -0.266357421875, + "objective/train/value_reward_corr": 0.5370670101322965, + "objective/train/value_std": 0.0100250244140625, + "objective/train/weight_avg": 1.0029041767120361, + "objective/train/weighted_lm_loss": 1.8380697965621948, + "objective/train/weights_max": 1.208823800086975, + "objective/train/weights_min": 0.3825574517250061, + "theoretical_loss": 4.205595127063485, + "tokens_seen": 281804800 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009238484994382924, + "loss": 0.0958, + "theoretical_loss": 4.205595127063485, + "tokens_seen": 281804800 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009237682554967101, + "loss": 0.0947, + "theoretical_loss": 4.205134838408337, + "tokens_seen": 282066944 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009236880115551276, + "loss": 0.0966, + "theoretical_loss": 4.20467509698304, + "tokens_seen": 282329088 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009236077676135451, + "loss": 0.0962, + "theoretical_loss": 4.204215901629803, + "tokens_seen": 282591232 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009235275236719628, + "loss": 0.0969, + "theoretical_loss": 4.203757251194353, + "tokens_seen": 282853376 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009234472797303803, + "loss": 0.096, + "theoretical_loss": 4.203299144525923, + "tokens_seen": 283115520 + }, + { + "epoch": 0.09, + "learning_rate": 0.000923367035788798, + "loss": 0.0966, + "theoretical_loss": 4.202841580477241, + "tokens_seen": 283377664 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009232867918472156, + "loss": 0.0995, + "theoretical_loss": 4.202384557904513, + "tokens_seen": 283639808 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009232065479056332, + "loss": 0.096, + "theoretical_loss": 4.201928075667411, + "tokens_seen": 283901952 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009231263039640507, + "loss": 0.0955, + "theoretical_loss": 4.201472132629057, + "tokens_seen": 284164096 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009230460600224683, + "loss": 0.1011, + "theoretical_loss": 4.201016727656012, + "tokens_seen": 284426240 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009229658160808859, + "loss": 0.0984, + "theoretical_loss": 4.2005618596182615, + "tokens_seen": 284688384 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009228855721393035, + "loss": 0.0986, + "theoretical_loss": 4.200107527389202, + "tokens_seen": 284950528 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.00423348369076848, + "objective/train/docs_used": 110428, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.866652250289917, + "objective/train/original_loss": 1.866652488708496, + "objective/train/theoretical_loss": 4.1998805618517965, + "objective/train/tokens_used": 305541600, + "objective/train/value_avg": -0.00862884521484375, + "objective/train/value_loss": 0.00014239439042285085, + "objective/train/value_max": -0.0005507469177246094, + "objective/train/value_min": -0.1759033203125, + "objective/train/value_reward_corr": 0.4883459042398012, + "objective/train/value_std": 0.0081787109375, + "objective/train/weight_avg": 1.0043009519577026, + "objective/train/weighted_lm_loss": 1.8749480247497559, + "objective/train/weights_max": 1.1676921844482422, + "objective/train/weights_min": 0.3836681544780731, + "theoretical_loss": 4.1998805618517965, + "tokens_seen": 285081600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009228053281977211, + "loss": 0.0967, + "theoretical_loss": 4.199653729845626, + "tokens_seen": 285212672 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009227250842561386, + "loss": 0.0968, + "theoretical_loss": 4.199200465867714, + "tokens_seen": 285474816 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009226448403145564, + "loss": 0.0961, + "theoretical_loss": 4.198747734339013, + "tokens_seen": 285736960 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009225645963729739, + "loss": 0.0945, + "theoretical_loss": 4.198295534146429, + "tokens_seen": 285999104 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009224843524313914, + "loss": 0.0952, + "theoretical_loss": 4.197843864180214, + "tokens_seen": 286261248 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009224041084898091, + "loss": 0.0955, + "theoretical_loss": 4.197392723333951, + "tokens_seen": 286523392 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009223238645482266, + "loss": 0.098, + "theoretical_loss": 4.196942110504538, + "tokens_seen": 286785536 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009222436206066442, + "loss": 0.0955, + "theoretical_loss": 4.196492024592183, + "tokens_seen": 287047680 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009221633766650618, + "loss": 0.0951, + "theoretical_loss": 4.196042464500382, + "tokens_seen": 287309824 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009220831327234794, + "loss": 0.0948, + "theoretical_loss": 4.195593429135916, + "tokens_seen": 287571968 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009220028887818969, + "loss": 0.1016, + "theoretical_loss": 4.195144917408828, + "tokens_seen": 287834112 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009219226448403146, + "loss": 0.095, + "theoretical_loss": 4.194696928232417, + "tokens_seen": 288096256 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.003713709767907858, + "objective/train/docs_used": 111591, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8504618406295776, + "objective/train/original_loss": 1.850461721420288, + "objective/train/theoretical_loss": 4.194249460523222, + "objective/train/tokens_used": 308818400, + "objective/train/value_avg": -0.0093841552734375, + "objective/train/value_loss": 0.0001626669109100476, + "objective/train/value_max": -0.00070953369140625, + "objective/train/value_min": -0.247802734375, + "objective/train/value_reward_corr": 0.5734128643584034, + "objective/train/value_std": 0.01049041748046875, + "objective/train/weight_avg": 1.0037906169891357, + "objective/train/weighted_lm_loss": 1.857253909111023, + "objective/train/weights_max": 1.1863479614257812, + "objective/train/weights_min": 0.3687554597854614, + "theoretical_loss": 4.194249460523222, + "tokens_seen": 288358400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009218424008987322, + "loss": 0.0936, + "theoretical_loss": 4.194249460523222, + "tokens_seen": 288358400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009217621569571497, + "loss": 0.0967, + "theoretical_loss": 4.193802513201015, + "tokens_seen": 288620544 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009216819130155674, + "loss": 0.0998, + "theoretical_loss": 4.193356085188778, + "tokens_seen": 288882688 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009216016690739849, + "loss": 0.0987, + "theoretical_loss": 4.1929101754127025, + "tokens_seen": 289144832 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009215214251324026, + "loss": 0.0962, + "theoretical_loss": 4.192464782802167, + "tokens_seen": 289406976 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009214411811908201, + "loss": 0.096, + "theoretical_loss": 4.192019906289733, + "tokens_seen": 289669120 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009213609372492377, + "loss": 0.099, + "theoretical_loss": 4.1915755448111245, + "tokens_seen": 289931264 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009212806933076553, + "loss": 0.0974, + "theoretical_loss": 4.191131697305222, + "tokens_seen": 290193408 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009212004493660728, + "loss": 0.0959, + "theoretical_loss": 4.1906883627140505, + "tokens_seen": 290455552 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009211202054244905, + "loss": 0.0929, + "theoretical_loss": 4.19024553998276, + "tokens_seen": 290717696 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009210399614829081, + "loss": 0.0956, + "theoretical_loss": 4.189803228059623, + "tokens_seen": 290979840 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009209597175413257, + "loss": 0.096, + "theoretical_loss": 4.189361425896016, + "tokens_seen": 291241984 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009208794735997432, + "loss": 0.0929, + "theoretical_loss": 4.188920132446411, + "tokens_seen": 291504128 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.0008069810573942959, + "objective/train/docs_used": 112844, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8364742994308472, + "objective/train/original_loss": 1.8364744186401367, + "objective/train/theoretical_loss": 4.188699676163473, + "objective/train/tokens_used": 312095200, + "objective/train/value_avg": -0.008056640625, + "objective/train/value_loss": 0.00030392000917345285, + "objective/train/value_max": -0.0004012584686279297, + "objective/train/value_min": -0.474609375, + "objective/train/value_reward_corr": 0.44846135234212026, + "objective/train/value_std": 0.00936126708984375, + "objective/train/weight_avg": 1.0009410381317139, + "objective/train/weighted_lm_loss": 1.8375290632247925, + "objective/train/weights_max": 1.5633509159088135, + "objective/train/weights_min": 0.3711496591567993, + "theoretical_loss": 4.188699676163473, + "tokens_seen": 291635200 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009207992296581609, + "loss": 0.0937, + "theoretical_loss": 4.188479346668359, + "tokens_seen": 291766272 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009207189857165784, + "loss": 0.0943, + "theoretical_loss": 4.188039067522484, + "tokens_seen": 292028416 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009206387417749959, + "loss": 0.0952, + "theoretical_loss": 4.18759929397247, + "tokens_seen": 292290560 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009205584978334136, + "loss": 0.0959, + "theoretical_loss": 4.187160024985044, + "tokens_seen": 292552704 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009204782538918311, + "loss": 0.0975, + "theoretical_loss": 4.1867212595299685, + "tokens_seen": 292814848 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009203980099502489, + "loss": 0.0961, + "theoretical_loss": 4.186282996580034, + "tokens_seen": 293076992 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009203177660086664, + "loss": 0.0984, + "theoretical_loss": 4.185845235111037, + "tokens_seen": 293339136 + }, + { + "epoch": 0.09, + "learning_rate": 0.000920237522067084, + "loss": 0.0962, + "theoretical_loss": 4.185407974101779, + "tokens_seen": 293601280 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009201572781255016, + "loss": 0.095, + "theoretical_loss": 4.184971212534048, + "tokens_seen": 293863424 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009200770341839191, + "loss": 0.096, + "theoretical_loss": 4.184534949392611, + "tokens_seen": 294125568 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009199967902423367, + "loss": 0.0951, + "theoretical_loss": 4.184099183665199, + "tokens_seen": 294387712 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009199165463007543, + "loss": 0.0985, + "theoretical_loss": 4.1836639143425, + "tokens_seen": 294649856 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.002410832792520523, + "objective/train/docs_used": 114046, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9235732555389404, + "objective/train/original_loss": 1.92357337474823, + "objective/train/theoretical_loss": 4.1832291404181445, + "objective/train/tokens_used": 315372000, + "objective/train/value_avg": -0.00838470458984375, + "objective/train/value_loss": 0.00013771136582363397, + "objective/train/value_max": -0.0008459091186523438, + "objective/train/value_min": -0.28466796875, + "objective/train/value_reward_corr": 0.5819581613531781, + "objective/train/value_std": 0.0088043212890625, + "objective/train/weight_avg": 1.0024785995483398, + "objective/train/weighted_lm_loss": 1.928615689277649, + "objective/train/weights_max": 1.112996220588684, + "objective/train/weights_min": 0.6144814491271973, + "theoretical_loss": 4.1832291404181445, + "tokens_seen": 294912000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009198363023591719, + "loss": 0.0955, + "theoretical_loss": 4.1832291404181445, + "tokens_seen": 294912000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009197560584175894, + "loss": 0.0958, + "theoretical_loss": 4.182794860888696, + "tokens_seen": 295174144 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009196758144760072, + "loss": 0.0974, + "theoretical_loss": 4.18236107475364, + "tokens_seen": 295436288 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009195955705344247, + "loss": 0.0974, + "theoretical_loss": 4.18192778101537, + "tokens_seen": 295698432 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009195153265928422, + "loss": 0.0943, + "theoretical_loss": 4.181494978679181, + "tokens_seen": 295960576 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009194350826512599, + "loss": 0.0952, + "theoretical_loss": 4.181062666753256, + "tokens_seen": 296222720 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009193548387096774, + "loss": 0.0945, + "theoretical_loss": 4.180630844248653, + "tokens_seen": 296484864 + }, + { + "epoch": 0.09, + "learning_rate": 0.000919274594768095, + "loss": 0.0977, + "theoretical_loss": 4.180199510179299, + "tokens_seen": 296747008 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009191943508265126, + "loss": 0.0951, + "theoretical_loss": 4.179768663561975, + "tokens_seen": 297009152 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009191141068849302, + "loss": 0.0925, + "theoretical_loss": 4.1793383034163085, + "tokens_seen": 297271296 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009190338629433478, + "loss": 0.0942, + "theoretical_loss": 4.178908428764759, + "tokens_seen": 297533440 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009189536190017654, + "loss": 0.0948, + "theoretical_loss": 4.17847903863261, + "tokens_seen": 297795584 + }, + { + "epoch": 0.09, + "learning_rate": 0.000918873375060183, + "loss": 0.0953, + "theoretical_loss": 4.178050132047958, + "tokens_seen": 298057728 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.0007046199752949178, + "objective/train/docs_used": 115247, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8377935886383057, + "objective/train/original_loss": 1.8377935886383057, + "objective/train/theoretical_loss": 4.1778358597829905, + "objective/train/tokens_used": 318648800, + "objective/train/value_avg": -0.0088348388671875, + "objective/train/value_loss": 0.0002799044887069613, + "objective/train/value_max": -0.00047469139099121094, + "objective/train/value_min": -0.80615234375, + "objective/train/value_reward_corr": 0.4904552822474981, + "objective/train/value_std": 0.01067352294921875, + "objective/train/weight_avg": 1.0008306503295898, + "objective/train/weighted_lm_loss": 1.8391979932785034, + "objective/train/weights_max": 1.235874056816101, + "objective/train/weights_min": 0.37098827958106995, + "theoretical_loss": 4.1778358597829905, + "tokens_seen": 298188800 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009187931311186006, + "loss": 0.0941, + "theoretical_loss": 4.177621708041703, + "tokens_seen": 298319872 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009187128871770182, + "loss": 0.0969, + "theoretical_loss": 4.177193765647534, + "tokens_seen": 298582016 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009186326432354357, + "loss": 0.0977, + "theoretical_loss": 4.176766303901922, + "tokens_seen": 298844160 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009185523992938534, + "loss": 0.0959, + "theoretical_loss": 4.17633932184411, + "tokens_seen": 299106304 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009184721553522709, + "loss": 0.0955, + "theoretical_loss": 4.1759128185161005, + "tokens_seen": 299368448 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009183919114106885, + "loss": 0.0921, + "theoretical_loss": 4.175486792962646, + "tokens_seen": 299630592 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009183116674691061, + "loss": 0.0956, + "theoretical_loss": 4.175061244231237, + "tokens_seen": 299892736 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009182314235275236, + "loss": 0.0972, + "theoretical_loss": 4.174636171372097, + "tokens_seen": 300154880 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009181511795859412, + "loss": 0.0917, + "theoretical_loss": 4.174211573438166, + "tokens_seen": 300417024 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009180709356443589, + "loss": 0.0939, + "theoretical_loss": 4.173787449485094, + "tokens_seen": 300679168 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009179906917027765, + "loss": 0.098, + "theoretical_loss": 4.17336379857123, + "tokens_seen": 300941312 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009179104477611941, + "loss": 0.0945, + "theoretical_loss": 4.172940619757611, + "tokens_seen": 301203456 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.0009184295777231455, + "objective/train/docs_used": 116418, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7909634113311768, + "objective/train/original_loss": 1.7909634113311768, + "objective/train/theoretical_loss": 4.172517912107954, + "objective/train/tokens_used": 321925600, + "objective/train/value_avg": -0.0111846923828125, + "objective/train/value_loss": 0.00032702440512366593, + "objective/train/value_max": -0.0006213188171386719, + "objective/train/value_min": -0.42529296875, + "objective/train/value_reward_corr": 0.6038215415930122, + "objective/train/value_std": 0.013702392578125, + "objective/train/weight_avg": 1.0010701417922974, + "objective/train/weighted_lm_loss": 1.792219877243042, + "objective/train/weights_max": 1.2377305030822754, + "objective/train/weights_min": 0.3794703483581543, + "theoretical_loss": 4.172517912107954, + "tokens_seen": 301465600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009178302038196117, + "loss": 0.0937, + "theoretical_loss": 4.172517912107954, + "tokens_seen": 301465600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009177499598780292, + "loss": 0.0927, + "theoretical_loss": 4.172095674688645, + "tokens_seen": 301727744 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009176697159364468, + "loss": 0.0956, + "theoretical_loss": 4.171673906568729, + "tokens_seen": 301989888 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009175894719948644, + "loss": 0.0972, + "theoretical_loss": 4.171252606819899, + "tokens_seen": 302252032 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009175092280532819, + "loss": 0.0951, + "theoretical_loss": 4.170831774516489, + "tokens_seen": 302514176 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009174289841116997, + "loss": 0.0955, + "theoretical_loss": 4.170411408735461, + "tokens_seen": 302776320 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009173487401701172, + "loss": 0.0954, + "theoretical_loss": 4.169991508556398, + "tokens_seen": 303038464 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009172684962285348, + "loss": 0.0944, + "theoretical_loss": 4.169572073061493, + "tokens_seen": 303300608 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009171882522869524, + "loss": 0.0975, + "theoretical_loss": 4.16915310133554, + "tokens_seen": 303562752 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009171080083453699, + "loss": 0.0963, + "theoretical_loss": 4.1687345924659205, + "tokens_seen": 303824896 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009170277644037875, + "loss": 0.094, + "theoretical_loss": 4.168316545542602, + "tokens_seen": 304087040 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009169475204622051, + "loss": 0.0935, + "theoretical_loss": 4.167898959658121, + "tokens_seen": 304349184 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009168672765206227, + "loss": 0.0902, + "theoretical_loss": 4.167481833907576, + "tokens_seen": 304611328 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.0005431807949207723, + "objective/train/docs_used": 117554, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7858117818832397, + "objective/train/original_loss": 1.7858116626739502, + "objective/train/theoretical_loss": 4.167273443300447, + "objective/train/tokens_used": 325202400, + "objective/train/value_avg": -0.0095367431640625, + "objective/train/value_loss": 0.00023923047410789877, + "objective/train/value_max": -0.0003447532653808594, + "objective/train/value_min": -0.414794921875, + "objective/train/value_reward_corr": 0.6826350512891199, + "objective/train/value_std": 0.01375579833984375, + "objective/train/weight_avg": 1.0006574392318726, + "objective/train/weighted_lm_loss": 1.7867764234542847, + "objective/train/weights_max": 1.3765175342559814, + "objective/train/weights_min": 0.3716852366924286, + "theoretical_loss": 4.167273443300447, + "tokens_seen": 304742400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009167870325790402, + "loss": 0.0982, + "theoretical_loss": 4.16706516738862, + "tokens_seen": 304873472 + }, + { + "epoch": 0.09, + "learning_rate": 0.000916706788637458, + "loss": 0.0941, + "theoretical_loss": 4.166648959201449, + "tokens_seen": 305135616 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009166265446958755, + "loss": 0.0978, + "theoretical_loss": 4.166233208448794, + "tokens_seen": 305397760 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009165463007542931, + "loss": 0.0968, + "theoretical_loss": 4.165817914235908, + "tokens_seen": 305659904 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009164660568127107, + "loss": 0.0952, + "theoretical_loss": 4.165403075670562, + "tokens_seen": 305922048 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009163858128711282, + "loss": 0.0942, + "theoretical_loss": 4.164988691863032, + "tokens_seen": 306184192 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009163055689295459, + "loss": 0.0944, + "theoretical_loss": 4.164574761926092, + "tokens_seen": 306446336 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009162253249879634, + "loss": 0.0949, + "theoretical_loss": 4.164161284975005, + "tokens_seen": 306708480 + }, + { + "epoch": 0.09, + "learning_rate": 0.000916145081046381, + "loss": 0.0941, + "theoretical_loss": 4.1637482601275115, + "tokens_seen": 306970624 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009160648371047986, + "loss": 0.0983, + "theoretical_loss": 4.163335686503822, + "tokens_seen": 307232768 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009159845931632162, + "loss": 0.0961, + "theoretical_loss": 4.162923563226607, + "tokens_seen": 307494912 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009159043492216338, + "loss": 0.0935, + "theoretical_loss": 4.1625118894209905, + "tokens_seen": 307757056 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.0024794533383101225, + "objective/train/docs_used": 118729, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.930225133895874, + "objective/train/original_loss": 1.930225133895874, + "objective/train/theoretical_loss": 4.16210066421454, + "objective/train/tokens_used": 328479200, + "objective/train/value_avg": -0.01091766357421875, + "objective/train/value_loss": 0.0003562370839063078, + "objective/train/value_max": -0.0003101825714111328, + "objective/train/value_min": -0.7587890625, + "objective/train/value_reward_corr": 0.711995979515493, + "objective/train/value_std": 0.02008056640625, + "objective/train/weight_avg": 1.002644658088684, + "objective/train/weighted_lm_loss": 1.9346448183059692, + "objective/train/weights_max": 1.8252228498458862, + "objective/train/weights_min": 0.36886805295944214, + "theoretical_loss": 4.16210066421454, + "tokens_seen": 308019200 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009158241052800514, + "loss": 0.0936, + "theoretical_loss": 4.16210066421454, + "tokens_seen": 308019200 + }, + { + "epoch": 0.09, + "learning_rate": 0.000915743861338469, + "loss": 0.0949, + "theoretical_loss": 4.161689886737255, + "tokens_seen": 308281344 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009156636173968865, + "loss": 0.0969, + "theoretical_loss": 4.161279556121562, + "tokens_seen": 308543488 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009155833734553042, + "loss": 0.0955, + "theoretical_loss": 4.160869671502302, + "tokens_seen": 308805632 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009155031295137217, + "loss": 0.0963, + "theoretical_loss": 4.160460232016725, + "tokens_seen": 309067776 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009154228855721394, + "loss": 0.0969, + "theoretical_loss": 4.16005123680448, + "tokens_seen": 309329920 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009153426416305569, + "loss": 0.0937, + "theoretical_loss": 4.159642685007606, + "tokens_seen": 309592064 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009152623976889744, + "loss": 0.0927, + "theoretical_loss": 4.1592345757705225, + "tokens_seen": 309854208 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009151821537473922, + "loss": 0.0921, + "theoretical_loss": 4.158826908240022, + "tokens_seen": 310116352 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009151019098058097, + "loss": 0.0931, + "theoretical_loss": 4.158419681565265, + "tokens_seen": 310378496 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009150216658642273, + "loss": 0.0949, + "theoretical_loss": 4.1580128948977615, + "tokens_seen": 310640640 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009149414219226449, + "loss": 0.094, + "theoretical_loss": 4.157606547391374, + "tokens_seen": 310902784 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009148611779810625, + "loss": 0.0976, + "theoretical_loss": 4.157200638202301, + "tokens_seen": 311164928 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.002907957648858428, + "objective/train/docs_used": 120076, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.0078868865966797, + "objective/train/original_loss": 2.0078868865966797, + "objective/train/theoretical_loss": 4.156997847713709, + "objective/train/tokens_used": 331756000, + "objective/train/value_avg": -0.010467529296875, + "objective/train/value_loss": 0.0005483797285705805, + "objective/train/value_max": -0.0007014274597167969, + "objective/train/value_min": -0.6416015625, + "objective/train/value_reward_corr": 0.563955012185076, + "objective/train/value_std": 0.0168914794921875, + "objective/train/weight_avg": 1.0031453371047974, + "objective/train/weighted_lm_loss": 2.013608694076538, + "objective/train/weights_max": 1.2697027921676636, + "objective/train/weights_min": 0.37099963426589966, + "theoretical_loss": 4.156997847713709, + "tokens_seen": 311296000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00091478093403948, + "loss": 0.0907, + "theoretical_loss": 4.156795166489074, + "tokens_seen": 311427072 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009147006900978976, + "loss": 0.0952, + "theoretical_loss": 4.156390131412543, + "tokens_seen": 311689216 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009146204461563152, + "loss": 0.0937, + "theoretical_loss": 4.155985532135875, + "tokens_seen": 311951360 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009145402022147327, + "loss": 0.097, + "theoretical_loss": 4.1555813678245395, + "tokens_seen": 312213504 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009144599582731505, + "loss": 0.0941, + "theoretical_loss": 4.155177637646306, + "tokens_seen": 312475648 + }, + { + "epoch": 0.09, + "learning_rate": 0.000914379714331568, + "loss": 0.0916, + "theoretical_loss": 4.154774340771228, + "tokens_seen": 312737792 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009142994703899856, + "loss": 0.0953, + "theoretical_loss": 4.154371476371646, + "tokens_seen": 312999936 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009142192264484032, + "loss": 0.0935, + "theoretical_loss": 4.153969043622169, + "tokens_seen": 313262080 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009141389825068207, + "loss": 0.0967, + "theoretical_loss": 4.15356704169967, + "tokens_seen": 313524224 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009140587385652384, + "loss": 0.0973, + "theoretical_loss": 4.153165469783279, + "tokens_seen": 313786368 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009139784946236559, + "loss": 0.0972, + "theoretical_loss": 4.152764327054376, + "tokens_seen": 314048512 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009138982506820735, + "loss": 0.095, + "theoretical_loss": 4.152363612696579, + "tokens_seen": 314310656 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.0016880643088370562, + "objective/train/docs_used": 121169, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8933781385421753, + "objective/train/original_loss": 1.8933782577514648, + "objective/train/theoretical_loss": 4.151963325895737, + "objective/train/tokens_used": 335032800, + "objective/train/value_avg": -0.00879669189453125, + "objective/train/value_loss": 0.0004417987947817892, + "objective/train/value_max": -0.0004973411560058594, + "objective/train/value_min": -0.265380859375, + "objective/train/value_reward_corr": 0.4943290748479078, + "objective/train/value_std": 0.01016998291015625, + "objective/train/weight_avg": 1.0018750429153442, + "objective/train/weighted_lm_loss": 1.8967622518539429, + "objective/train/weights_max": 1.139946699142456, + "objective/train/weights_min": 0.36912843585014343, + "theoretical_loss": 4.151963325895737, + "tokens_seen": 314572800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009138180067404911, + "loss": 0.0935, + "theoretical_loss": 4.151963325895737, + "tokens_seen": 314572800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009137377627989088, + "loss": 0.0948, + "theoretical_loss": 4.151563465839927, + "tokens_seen": 314834944 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009136575188573263, + "loss": 0.0952, + "theoretical_loss": 4.151164031719437, + "tokens_seen": 315097088 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009135772749157439, + "loss": 0.0959, + "theoretical_loss": 4.15076502272677, + "tokens_seen": 315359232 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009134970309741615, + "loss": 0.0937, + "theoretical_loss": 4.150366438056622, + "tokens_seen": 315621376 + }, + { + "epoch": 0.1, + "learning_rate": 0.000913416787032579, + "loss": 0.0926, + "theoretical_loss": 4.149968276905888, + "tokens_seen": 315883520 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009133365430909967, + "loss": 0.099, + "theoretical_loss": 4.149570538473644, + "tokens_seen": 316145664 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009132562991494142, + "loss": 0.0913, + "theoretical_loss": 4.149173221961146, + "tokens_seen": 316407808 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009131760552078318, + "loss": 0.094, + "theoretical_loss": 4.1487763265718165, + "tokens_seen": 316669952 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009130958112662494, + "loss": 0.0944, + "theoretical_loss": 4.148379851511241, + "tokens_seen": 316932096 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009130155673246669, + "loss": 0.0942, + "theoretical_loss": 4.147983795987161, + "tokens_seen": 317194240 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009129353233830846, + "loss": 0.0937, + "theoretical_loss": 4.14758815920946, + "tokens_seen": 317456384 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009128550794415022, + "loss": 0.0916, + "theoretical_loss": 4.147192940390165, + "tokens_seen": 317718528 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.00349609088152647, + "objective/train/docs_used": 122440, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.709967017173767, + "objective/train/original_loss": 1.7099668979644775, + "objective/train/theoretical_loss": 4.146995487469262, + "objective/train/tokens_used": 338309600, + "objective/train/value_avg": -0.00899505615234375, + "objective/train/value_loss": 0.00012839387636631727, + "objective/train/value_max": -0.0005526542663574219, + "objective/train/value_min": -0.255615234375, + "objective/train/value_reward_corr": 0.6779735039057588, + "objective/train/value_std": 0.0102081298828125, + "objective/train/weight_avg": 1.0035591125488281, + "objective/train/weighted_lm_loss": 1.7162126302719116, + "objective/train/weights_max": 1.1629977226257324, + "objective/train/weights_min": 0.6108145713806152, + "theoretical_loss": 4.146995487469262, + "tokens_seen": 317849600 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009127748354999198, + "loss": 0.0971, + "theoretical_loss": 4.146798138743433, + "tokens_seen": 317980672 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009126945915583374, + "loss": 0.0904, + "theoretical_loss": 4.146403753485544, + "tokens_seen": 318242816 + }, + { + "epoch": 0.1, + "learning_rate": 0.000912614347616755, + "loss": 0.093, + "theoretical_loss": 4.146009783834892, + "tokens_seen": 318504960 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009125341036751725, + "loss": 0.0953, + "theoretical_loss": 4.145616229011987, + "tokens_seen": 318767104 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009124538597335902, + "loss": 0.0957, + "theoretical_loss": 4.145223088239432, + "tokens_seen": 319029248 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009123736157920077, + "loss": 0.0923, + "theoretical_loss": 4.14483036074193, + "tokens_seen": 319291392 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009122933718504252, + "loss": 0.0962, + "theoretical_loss": 4.14443804574627, + "tokens_seen": 319553536 + }, + { + "epoch": 0.1, + "learning_rate": 0.000912213127908843, + "loss": 0.0927, + "theoretical_loss": 4.144046142481317, + "tokens_seen": 319815680 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009121328839672605, + "loss": 0.0968, + "theoretical_loss": 4.143654650178012, + "tokens_seen": 320077824 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009120526400256781, + "loss": 0.097, + "theoretical_loss": 4.143263568069358, + "tokens_seen": 320339968 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009119723960840957, + "loss": 0.0928, + "theoretical_loss": 4.142872895390417, + "tokens_seen": 320602112 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009118921521425133, + "loss": 0.0939, + "theoretical_loss": 4.142482631378303, + "tokens_seen": 320864256 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.0006320319953374565, + "objective/train/docs_used": 123582, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8734241724014282, + "objective/train/original_loss": 1.8734240531921387, + "objective/train/theoretical_loss": 4.142092775272169, + "objective/train/tokens_used": 341586400, + "objective/train/value_avg": -0.01175689697265625, + "objective/train/value_loss": 0.00034092742134816945, + "objective/train/value_max": -0.0005526542663574219, + "objective/train/value_min": -0.2489013671875, + "objective/train/value_reward_corr": 0.649642412628804, + "objective/train/value_std": 0.01458740234375, + "objective/train/weight_avg": 1.0007905960083008, + "objective/train/weighted_lm_loss": 1.8751580715179443, + "objective/train/weights_max": 1.2165815830230713, + "objective/train/weights_min": 0.37836602330207825, + "theoretical_loss": 4.142092775272169, + "tokens_seen": 321126400 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009118119082009308, + "loss": 0.0918, + "theoretical_loss": 4.142092775272169, + "tokens_seen": 321126400 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009117316642593484, + "loss": 0.0898, + "theoretical_loss": 4.141703326313209, + "tokens_seen": 321388544 + }, + { + "epoch": 0.1, + "learning_rate": 0.000911651420317766, + "loss": 0.0933, + "theoretical_loss": 4.141314283744643, + "tokens_seen": 321650688 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009115711763761835, + "loss": 0.093, + "theoretical_loss": 4.140925646811714, + "tokens_seen": 321912832 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009114909324346013, + "loss": 0.0948, + "theoretical_loss": 4.1405374147616785, + "tokens_seen": 322174976 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009114106884930188, + "loss": 0.0929, + "theoretical_loss": 4.140149586843803, + "tokens_seen": 322437120 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009113304445514365, + "loss": 0.094, + "theoretical_loss": 4.139762162309355, + "tokens_seen": 322699264 + }, + { + "epoch": 0.1, + "learning_rate": 0.000911250200609854, + "loss": 0.0934, + "theoretical_loss": 4.139375140411592, + "tokens_seen": 322961408 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009111699566682715, + "loss": 0.0913, + "theoretical_loss": 4.138988520405764, + "tokens_seen": 323223552 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009110897127266892, + "loss": 0.0922, + "theoretical_loss": 4.138602301549097, + "tokens_seen": 323485696 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009110094687851067, + "loss": 0.0922, + "theoretical_loss": 4.138216483100795, + "tokens_seen": 323747840 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009109292248435243, + "loss": 0.0901, + "theoretical_loss": 4.137831064322021, + "tokens_seen": 324009984 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009108489809019419, + "loss": 0.0906, + "theoretical_loss": 4.1374460444759045, + "tokens_seen": 324272128 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.0036898814141750336, + "objective/train/docs_used": 124782, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.784644365310669, + "objective/train/original_loss": 1.784644603729248, + "objective/train/theoretical_loss": 4.137253683922854, + "objective/train/tokens_used": 344863200, + "objective/train/value_avg": -0.00811767578125, + "objective/train/value_loss": 0.0003189310082234442, + "objective/train/value_max": -0.0005593299865722656, + "objective/train/value_min": -0.67138671875, + "objective/train/value_reward_corr": 0.6118297905242213, + "objective/train/value_std": 0.0111846923828125, + "objective/train/weight_avg": 1.0038195848464966, + "objective/train/weighted_lm_loss": 1.790905475616455, + "objective/train/weights_max": 1.1770083904266357, + "objective/train/weights_min": 0.23969075083732605, + "theoretical_loss": 4.137253683922854, + "tokens_seen": 324403200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009107687369603596, + "loss": 0.0923, + "theoretical_loss": 4.137061422827525, + "tokens_seen": 324534272 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009106884930187771, + "loss": 0.0939, + "theoretical_loss": 4.136677198643908, + "tokens_seen": 324796416 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009106082490771947, + "loss": 0.0912, + "theoretical_loss": 4.13629337119402, + "tokens_seen": 325058560 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009105280051356123, + "loss": 0.0923, + "theoretical_loss": 4.135909939748757, + "tokens_seen": 325320704 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009104477611940298, + "loss": 0.0918, + "theoretical_loss": 4.135526903580946, + "tokens_seen": 325582848 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009103675172524475, + "loss": 0.0938, + "theoretical_loss": 4.135144261965327, + "tokens_seen": 325844992 + }, + { + "epoch": 0.1, + "learning_rate": 0.000910287273310865, + "loss": 0.0923, + "theoretical_loss": 4.134762014178559, + "tokens_seen": 326107136 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009102070293692827, + "loss": 0.0925, + "theoretical_loss": 4.134380159499204, + "tokens_seen": 326369280 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009101267854277002, + "loss": 0.0931, + "theoretical_loss": 4.1339986972077245, + "tokens_seen": 326631424 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009100465414861177, + "loss": 0.0885, + "theoretical_loss": 4.133617626586475, + "tokens_seen": 326893568 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009099662975445355, + "loss": 0.0916, + "theoretical_loss": 4.133236946919698, + "tokens_seen": 327155712 + }, + { + "epoch": 0.1, + "learning_rate": 0.000909886053602953, + "loss": 0.0972, + "theoretical_loss": 4.132856657493516, + "tokens_seen": 327417856 + }, + { + "debugging/Compilability": 1.0, + "debugging/distinct-1-grams": 0.7246169962973871, + "debugging/entropy-1-grams": 5.13190351820591, + "debugging/length": 472.8, + "debugging/num_segments": 10, + "debugging/raw_token_scores_avg": 0.008663635700941086, + "debugging/raw_token_scores_std": 0.023182114586234093, + "debugging/score": 0.0033104506818385365, + "debugging/score_std": 0.0017105448100298692, + "epoch": 0.1, + "objective/train/advantage_avg": 0.0008121287100948393, + "objective/train/docs_used": 126013, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9080229997634888, + "objective/train/original_loss": 1.9080228805541992, + "objective/train/theoretical_loss": 4.132476757595925, + "objective/train/tokens_used": 348140000, + "objective/train/value_avg": -0.0094757080078125, + "objective/train/value_loss": 0.0002959860139526427, + "objective/train/value_max": -0.0003554821014404297, + "objective/train/value_min": -0.2890625, + "objective/train/value_reward_corr": 0.6876529624313351, + "objective/train/value_std": 0.01248931884765625, + "objective/train/weight_avg": 1.000952959060669, + "objective/train/weighted_lm_loss": 1.9104235172271729, + "objective/train/weights_max": 1.1095370054244995, + "objective/train/weights_min": 0.376959890127182, + "theoretical_loss": 4.132476757595925, + "tokens_seen": 327680000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009098058096613706, + "loss": 0.0954, + "theoretical_loss": 4.132476757595925, + "tokens_seen": 327680000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009097255657197882, + "loss": 0.0948, + "theoretical_loss": 4.132097246516788, + "tokens_seen": 327942144 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009096453217782058, + "loss": 0.0945, + "theoretical_loss": 4.131718123547829, + "tokens_seen": 328204288 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009095650778366233, + "loss": 0.0931, + "theoretical_loss": 4.131339387982628, + "tokens_seen": 328466432 + }, + { + "epoch": 0.1, + "learning_rate": 0.000909484833895041, + "loss": 0.0913, + "theoretical_loss": 4.1309610391166105, + "tokens_seen": 328728576 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009094045899534585, + "loss": 0.0902, + "theoretical_loss": 4.1305830762470475, + "tokens_seen": 328990720 + }, + { + "epoch": 0.1, + "learning_rate": 0.000909324346011876, + "loss": 0.093, + "theoretical_loss": 4.1302054986730425, + "tokens_seen": 329252864 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009092441020702938, + "loss": 0.0922, + "theoretical_loss": 4.129828305695531, + "tokens_seen": 329515008 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009091638581287113, + "loss": 0.0919, + "theoretical_loss": 4.129451496617269, + "tokens_seen": 329777152 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009090836141871289, + "loss": 0.0938, + "theoretical_loss": 4.129075070742831, + "tokens_seen": 330039296 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009090033702455465, + "loss": 0.093, + "theoretical_loss": 4.128699027378604, + "tokens_seen": 330301440 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009089231263039641, + "loss": 0.0926, + "theoretical_loss": 4.128323365832777, + "tokens_seen": 330563584 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009088428823623817, + "loss": 0.0938, + "theoretical_loss": 4.127948085415338, + "tokens_seen": 330825728 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.003476484213024378, + "objective/train/docs_used": 127130, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9096784591674805, + "objective/train/original_loss": 1.90967857837677, + "objective/train/theoretical_loss": 4.127760587914626, + "objective/train/tokens_used": 351416800, + "objective/train/value_avg": -0.009552001953125, + "objective/train/value_loss": 0.00040583781083114445, + "objective/train/value_max": -0.0003006458282470703, + "objective/train/value_min": -0.32568359375, + "objective/train/value_reward_corr": 0.46149172405686373, + "objective/train/value_std": 0.01270294189453125, + "objective/train/weight_avg": 1.0036547183990479, + "objective/train/weighted_lm_loss": 1.9170717000961304, + "objective/train/weights_max": 1.1853569746017456, + "objective/train/weights_min": 0.36846548318862915, + "theoretical_loss": 4.127760587914626, + "tokens_seen": 330956800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009087626384207992, + "loss": 0.0939, + "theoretical_loss": 4.127573185438068, + "tokens_seen": 331087872 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009086823944792168, + "loss": 0.0963, + "theoretical_loss": 4.127198665214536, + "tokens_seen": 331350016 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009086021505376344, + "loss": 0.0901, + "theoretical_loss": 4.126824524060088, + "tokens_seen": 331612160 + }, + { + "epoch": 0.1, + "learning_rate": 0.000908521906596052, + "loss": 0.093, + "theoretical_loss": 4.126450761291847, + "tokens_seen": 331874304 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009084416626544696, + "loss": 0.0952, + "theoretical_loss": 4.126077376228702, + "tokens_seen": 332136448 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009083614187128873, + "loss": 0.0878, + "theoretical_loss": 4.1257043681913075, + "tokens_seen": 332398592 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009082811747713048, + "loss": 0.092, + "theoretical_loss": 4.125331736502073, + "tokens_seen": 332660736 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009082009308297223, + "loss": 0.0937, + "theoretical_loss": 4.124959480485156, + "tokens_seen": 332922880 + }, + { + "epoch": 0.1, + "learning_rate": 0.00090812068688814, + "loss": 0.0899, + "theoretical_loss": 4.124587599466462, + "tokens_seen": 333185024 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009080404429465575, + "loss": 0.0936, + "theoretical_loss": 4.124216092773635, + "tokens_seen": 333447168 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009079601990049751, + "loss": 0.0898, + "theoretical_loss": 4.123844959736049, + "tokens_seen": 333709312 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009078799550633927, + "loss": 0.091, + "theoretical_loss": 4.123474199684807, + "tokens_seen": 333971456 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.0011445061536505818, + "objective/train/docs_used": 128373, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.0021450519561768, + "objective/train/original_loss": 2.0021448135375977, + "objective/train/theoretical_loss": 4.123103811952736, + "objective/train/tokens_used": 354693600, + "objective/train/value_avg": -0.010498046875, + "objective/train/value_loss": 0.0002171178930439055, + "objective/train/value_max": -0.0004494190216064453, + "objective/train/value_min": -0.269287109375, + "objective/train/value_reward_corr": 0.6008675056499843, + "objective/train/value_std": 0.011932373046875, + "objective/train/weight_avg": 1.0012482404708862, + "objective/train/weighted_lm_loss": 2.003540515899658, + "objective/train/weights_max": 1.1421287059783936, + "objective/train/weights_min": 0.3697309195995331, + "theoretical_loss": 4.123103811952736, + "tokens_seen": 334233600 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009077997111218103, + "loss": 0.0955, + "theoretical_loss": 4.123103811952736, + "tokens_seen": 334233600 + }, + { + "epoch": 0.1, + "learning_rate": 0.000907719467180228, + "loss": 0.0893, + "theoretical_loss": 4.122733795874372, + "tokens_seen": 334495744 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009076392232386455, + "loss": 0.0945, + "theoretical_loss": 4.122364150785966, + "tokens_seen": 334757888 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009075589792970631, + "loss": 0.0945, + "theoretical_loss": 4.1219948760254725, + "tokens_seen": 335020032 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009074787353554807, + "loss": 0.0932, + "theoretical_loss": 4.121625970932542, + "tokens_seen": 335282176 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009073984914138983, + "loss": 0.093, + "theoretical_loss": 4.121257434848519, + "tokens_seen": 335544320 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009073182474723158, + "loss": 0.0925, + "theoretical_loss": 4.120889267116435, + "tokens_seen": 335806464 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009072380035307335, + "loss": 0.0924, + "theoretical_loss": 4.1205214670810015, + "tokens_seen": 336068608 + }, + { + "epoch": 0.1, + "learning_rate": 0.000907157759589151, + "loss": 0.0926, + "theoretical_loss": 4.120154034088609, + "tokens_seen": 336330752 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009070775156475685, + "loss": 0.0924, + "theoretical_loss": 4.119786967487314, + "tokens_seen": 336592896 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009069972717059863, + "loss": 0.0936, + "theoretical_loss": 4.11942026662684, + "tokens_seen": 336855040 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009069170277644038, + "loss": 0.0898, + "theoretical_loss": 4.11905393085857, + "tokens_seen": 337117184 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009068367838228214, + "loss": 0.093, + "theoretical_loss": 4.118687959535539, + "tokens_seen": 337379328 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.0014116659294813871, + "objective/train/docs_used": 129675, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7438528537750244, + "objective/train/original_loss": 1.7438528537750244, + "objective/train/theoretical_loss": 4.118505110339262, + "objective/train/tokens_used": 357970400, + "objective/train/value_avg": -0.0096435546875, + "objective/train/value_loss": 0.00027853285428136587, + "objective/train/value_max": -0.00034737586975097656, + "objective/train/value_min": -0.552734375, + "objective/train/value_reward_corr": 0.6218103415544912, + "objective/train/value_std": 0.0139312744140625, + "objective/train/weight_avg": 1.001541256904602, + "objective/train/weighted_lm_loss": 1.745504379272461, + "objective/train/weights_max": 1.441696286201477, + "objective/train/weights_min": 0.36845633387565613, + "theoretical_loss": 4.118505110339262, + "tokens_seen": 337510400 + }, + { + "epoch": 0.1, + "learning_rate": 0.000906756539881239, + "loss": 0.091, + "theoretical_loss": 4.118322352012429, + "tokens_seen": 337641472 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009066762959396566, + "loss": 0.0905, + "theoretical_loss": 4.117957107645569, + "tokens_seen": 337903616 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009065960519980741, + "loss": 0.0931, + "theoretical_loss": 4.1175922257929205, + "tokens_seen": 338165760 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009065158080564918, + "loss": 0.0947, + "theoretical_loss": 4.117227705814078, + "tokens_seen": 338427904 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009064355641149093, + "loss": 0.0977, + "theoretical_loss": 4.116863547070264, + "tokens_seen": 338690048 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009063553201733269, + "loss": 0.0932, + "theoretical_loss": 4.116499748924319, + "tokens_seen": 338952192 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009062750762317446, + "loss": 0.0935, + "theoretical_loss": 4.116136310740702, + "tokens_seen": 339214336 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009061948322901621, + "loss": 0.0875, + "theoretical_loss": 4.115773231885479, + "tokens_seen": 339476480 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009061145883485798, + "loss": 0.0908, + "theoretical_loss": 4.115410511726323, + "tokens_seen": 339738624 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009060343444069973, + "loss": 0.0922, + "theoretical_loss": 4.115048149632507, + "tokens_seen": 340000768 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009059541004654149, + "loss": 0.0936, + "theoretical_loss": 4.114686144974897, + "tokens_seen": 340262912 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009058738565238325, + "loss": 0.0925, + "theoretical_loss": 4.114324497125947, + "tokens_seen": 340525056 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.0013680076226592064, + "objective/train/docs_used": 130755, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8868423700332642, + "objective/train/original_loss": 1.8868422508239746, + "objective/train/theoretical_loss": 4.113963205459697, + "objective/train/tokens_used": 361247200, + "objective/train/value_avg": -0.007740020751953125, + "objective/train/value_loss": 0.00031318547553382814, + "objective/train/value_max": -0.0003654956817626953, + "objective/train/value_min": -0.4619140625, + "objective/train/value_reward_corr": 0.5194909400023118, + "objective/train/value_std": 0.0107574462890625, + "objective/train/weight_avg": 1.0015056133270264, + "objective/train/weighted_lm_loss": 1.8895421028137207, + "objective/train/weights_max": 1.373528242111206, + "objective/train/weights_min": 0.38126373291015625, + "theoretical_loss": 4.113963205459697, + "tokens_seen": 340787200 + }, + { + "epoch": 0.1, + "learning_rate": 0.00090579361258225, + "loss": 0.0913, + "theoretical_loss": 4.113963205459697, + "tokens_seen": 340787200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009057133686406676, + "loss": 0.0915, + "theoretical_loss": 4.113602269351765, + "tokens_seen": 341049344 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009056331246990852, + "loss": 0.0906, + "theoretical_loss": 4.113241688179341, + "tokens_seen": 341311488 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009055528807575029, + "loss": 0.0934, + "theoretical_loss": 4.1128814613211855, + "tokens_seen": 341573632 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009054726368159204, + "loss": 0.0902, + "theoretical_loss": 4.1125215881576205, + "tokens_seen": 341835776 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009053923928743381, + "loss": 0.0901, + "theoretical_loss": 4.112162068070525, + "tokens_seen": 342097920 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009053121489327556, + "loss": 0.0916, + "theoretical_loss": 4.111802900443333, + "tokens_seen": 342360064 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009052319049911731, + "loss": 0.0895, + "theoretical_loss": 4.111444084661026, + "tokens_seen": 342622208 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009051516610495908, + "loss": 0.0891, + "theoretical_loss": 4.111085620110127, + "tokens_seen": 342884352 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009050714171080083, + "loss": 0.0918, + "theoretical_loss": 4.110727506178697, + "tokens_seen": 343146496 + }, + { + "epoch": 0.1, + "learning_rate": 0.000904991173166426, + "loss": 0.0935, + "theoretical_loss": 4.110369742256329, + "tokens_seen": 343408640 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009049109292248435, + "loss": 0.0902, + "theoretical_loss": 4.110012327734145, + "tokens_seen": 343670784 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009048306852832611, + "loss": 0.0916, + "theoretical_loss": 4.1096552620047895, + "tokens_seen": 343932928 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.00047141360118985176, + "objective/train/docs_used": 131936, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.86632239818573, + "objective/train/original_loss": 1.8663225173950195, + "objective/train/theoretical_loss": 4.109476859748038, + "objective/train/tokens_used": 364524000, + "objective/train/value_avg": -0.00713348388671875, + "objective/train/value_loss": 0.00014549301704391837, + "objective/train/value_max": -0.0002472400665283203, + "objective/train/value_min": -0.466064453125, + "objective/train/value_reward_corr": 0.7043933872865291, + "objective/train/value_std": 0.01116180419921875, + "objective/train/weight_avg": 1.0005429983139038, + "objective/train/weighted_lm_loss": 1.8668056726455688, + "objective/train/weights_max": 1.1419428586959839, + "objective/train/weights_min": 0.7390621304512024, + "theoretical_loss": 4.109476859748038, + "tokens_seen": 344064000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009047504413416788, + "loss": 0.0924, + "theoretical_loss": 4.109298544462423, + "tokens_seen": 344195072 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009046701974000963, + "loss": 0.087, + "theoretical_loss": 4.108942174502721, + "tokens_seen": 344457216 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009045899534585139, + "loss": 0.0912, + "theoretical_loss": 4.108586151522863, + "tokens_seen": 344719360 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009045097095169315, + "loss": 0.0936, + "theoretical_loss": 4.1082304749215375, + "tokens_seen": 344981504 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009044294655753491, + "loss": 0.0912, + "theoretical_loss": 4.107875144098925, + "tokens_seen": 345243648 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009043492216337666, + "loss": 0.0954, + "theoretical_loss": 4.107520158456703, + "tokens_seen": 345505792 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009042689776921843, + "loss": 0.0906, + "theoretical_loss": 4.107165517398034, + "tokens_seen": 345767936 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009041887337506018, + "loss": 0.093, + "theoretical_loss": 4.106811220327568, + "tokens_seen": 346030080 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009041084898090193, + "loss": 0.0911, + "theoretical_loss": 4.10645726665143, + "tokens_seen": 346292224 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009040282458674371, + "loss": 0.0936, + "theoretical_loss": 4.10610365577722, + "tokens_seen": 346554368 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009039480019258546, + "loss": 0.0919, + "theoretical_loss": 4.105750387114009, + "tokens_seen": 346816512 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009038677579842723, + "loss": 0.0909, + "theoretical_loss": 4.105397460072329, + "tokens_seen": 347078656 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0021935408003628254, + "objective/train/docs_used": 133174, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.799157738685608, + "objective/train/original_loss": 1.7991578578948975, + "objective/train/theoretical_loss": 4.105044874064174, + "objective/train/tokens_used": 367800800, + "objective/train/value_avg": -0.00794219970703125, + "objective/train/value_loss": 0.00020322235650382936, + "objective/train/value_max": -0.0003459453582763672, + "objective/train/value_min": -0.2293701171875, + "objective/train/value_reward_corr": 0.5725621260668602, + "objective/train/value_std": 0.0089569091796875, + "objective/train/weight_avg": 1.0022897720336914, + "objective/train/weighted_lm_loss": 1.801876425743103, + "objective/train/weights_max": 1.1363343000411987, + "objective/train/weights_min": 0.3731314241886139, + "theoretical_loss": 4.105044874064174, + "tokens_seen": 347340800 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009037875140426898, + "loss": 0.0874, + "theoretical_loss": 4.105044874064174, + "tokens_seen": 347340800 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009037072701011074, + "loss": 0.0912, + "theoretical_loss": 4.104692628502993, + "tokens_seen": 347602944 + }, + { + "epoch": 0.11, + "learning_rate": 0.000903627026159525, + "loss": 0.0899, + "theoretical_loss": 4.104340722803683, + "tokens_seen": 347865088 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009035467822179425, + "loss": 0.0907, + "theoretical_loss": 4.103989156382589, + "tokens_seen": 348127232 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009034665382763601, + "loss": 0.0876, + "theoretical_loss": 4.103637928657495, + "tokens_seen": 348389376 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009033862943347777, + "loss": 0.0955, + "theoretical_loss": 4.103287039047622, + "tokens_seen": 348651520 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009033060503931954, + "loss": 0.0904, + "theoretical_loss": 4.102936486973624, + "tokens_seen": 348913664 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009032258064516129, + "loss": 0.09, + "theoretical_loss": 4.102586271857579, + "tokens_seen": 349175808 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009031455625100306, + "loss": 0.0886, + "theoretical_loss": 4.102236393122989, + "tokens_seen": 349437952 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009030653185684481, + "loss": 0.092, + "theoretical_loss": 4.101886850194775, + "tokens_seen": 349700096 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009029850746268657, + "loss": 0.0886, + "theoretical_loss": 4.10153764249927, + "tokens_seen": 349962240 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009029048306852833, + "loss": 0.0889, + "theoretical_loss": 4.1011887694642155, + "tokens_seen": 350224384 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009028245867437008, + "loss": 0.09, + "theoretical_loss": 4.100840230518759, + "tokens_seen": 350486528 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": -0.0007634600042365491, + "objective/train/docs_used": 134382, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8593584299087524, + "objective/train/original_loss": 1.859358549118042, + "objective/train/theoretical_loss": 4.10066608615162, + "objective/train/tokens_used": 371077600, + "objective/train/value_avg": -0.00992584228515625, + "objective/train/value_loss": 0.00040058817830868065, + "objective/train/value_max": -0.000415802001953125, + "objective/train/value_min": -0.1810302734375, + "objective/train/value_reward_corr": 0.653844216492223, + "objective/train/value_std": 0.01154327392578125, + "objective/train/weight_avg": 0.9994103908538818, + "objective/train/weighted_lm_loss": 1.8588926792144775, + "objective/train/weights_max": 1.1080909967422485, + "objective/train/weights_min": 0.22362525761127472, + "theoretical_loss": 4.10066608615162, + "tokens_seen": 350617600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009027443428021184, + "loss": 0.0923, + "theoretical_loss": 4.100492025093445, + "tokens_seen": 350748672 + }, + { + "epoch": 0.11, + "learning_rate": 0.000902664098860536, + "loss": 0.0924, + "theoretical_loss": 4.100144152620215, + "tokens_seen": 351010816 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009025838549189537, + "loss": 0.0908, + "theoretical_loss": 4.099796612532403, + "tokens_seen": 351272960 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009025036109773713, + "loss": 0.09, + "theoretical_loss": 4.0994494042647265, + "tokens_seen": 351535104 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009024233670357889, + "loss": 0.0908, + "theoretical_loss": 4.099102527253285, + "tokens_seen": 351797248 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009023431230942064, + "loss": 0.0926, + "theoretical_loss": 4.098755980935557, + "tokens_seen": 352059392 + }, + { + "epoch": 0.11, + "learning_rate": 0.000902262879152624, + "loss": 0.0895, + "theoretical_loss": 4.0984097647503965, + "tokens_seen": 352321536 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009021826352110416, + "loss": 0.0906, + "theoretical_loss": 4.09806387813802, + "tokens_seen": 352583680 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009021023912694591, + "loss": 0.0878, + "theoretical_loss": 4.0977183205400145, + "tokens_seen": 352845824 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009020221473278768, + "loss": 0.0901, + "theoretical_loss": 4.097373091399324, + "tokens_seen": 353107968 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009019419033862943, + "loss": 0.0898, + "theoretical_loss": 4.097028190160249, + "tokens_seen": 353370112 + }, + { + "epoch": 0.11, + "learning_rate": 0.000901861659444712, + "loss": 0.0912, + "theoretical_loss": 4.096683616268442, + "tokens_seen": 353632256 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0033219335600733757, + "objective/train/docs_used": 135614, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5801607370376587, + "objective/train/original_loss": 1.5801606178283691, + "objective/train/theoretical_loss": 4.096339369170902, + "objective/train/tokens_used": 374354400, + "objective/train/value_avg": -0.01206207275390625, + "objective/train/value_loss": 0.00040383703890256584, + "objective/train/value_max": -0.0004992485046386719, + "objective/train/value_min": -0.5283203125, + "objective/train/value_reward_corr": 0.6677181799151066, + "objective/train/value_std": 0.0159149169921875, + "objective/train/weight_avg": 1.003501296043396, + "objective/train/weighted_lm_loss": 1.5844190120697021, + "objective/train/weights_max": 1.4516695737838745, + "objective/train/weights_min": 0.37713250517845154, + "theoretical_loss": 4.096339369170902, + "tokens_seen": 353894400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009017814155031296, + "loss": 0.0904, + "theoretical_loss": 4.096339369170902, + "tokens_seen": 353894400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009017011715615471, + "loss": 0.0911, + "theoretical_loss": 4.095995448315972, + "tokens_seen": 354156544 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009016209276199647, + "loss": 0.0903, + "theoretical_loss": 4.095651853153331, + "tokens_seen": 354418688 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009015406836783823, + "loss": 0.0916, + "theoretical_loss": 4.095308583133997, + "tokens_seen": 354680832 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009014604397367999, + "loss": 0.0917, + "theoretical_loss": 4.094965637710314, + "tokens_seen": 354942976 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009013801957952175, + "loss": 0.0907, + "theoretical_loss": 4.094623016335954, + "tokens_seen": 355205120 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009012999518536351, + "loss": 0.0904, + "theoretical_loss": 4.094280718465911, + "tokens_seen": 355467264 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009012197079120526, + "loss": 0.0904, + "theoretical_loss": 4.093938743556496, + "tokens_seen": 355729408 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009011394639704702, + "loss": 0.0891, + "theoretical_loss": 4.093597091065333, + "tokens_seen": 355991552 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009010592200288879, + "loss": 0.0892, + "theoretical_loss": 4.093255760451357, + "tokens_seen": 356253696 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009009789760873054, + "loss": 0.0887, + "theoretical_loss": 4.092914751174808, + "tokens_seen": 356515840 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009008987321457231, + "loss": 0.0903, + "theoretical_loss": 4.092574062697225, + "tokens_seen": 356777984 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009008184882041406, + "loss": 0.0885, + "theoretical_loss": 4.092233694481447, + "tokens_seen": 357040128 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0004918540944345295, + "objective/train/docs_used": 136331, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9003441333770752, + "objective/train/original_loss": 1.9003441333770752, + "objective/train/theoretical_loss": 4.092063630304224, + "objective/train/tokens_used": 377631200, + "objective/train/value_avg": -0.007537841796875, + "objective/train/value_loss": 0.00020429695723578334, + "objective/train/value_max": -0.0003597736358642578, + "objective/train/value_min": -0.2430419921875, + "objective/train/value_reward_corr": 0.5656587517444805, + "objective/train/value_std": 0.00870513916015625, + "objective/train/weight_avg": 1.0005881786346436, + "objective/train/weighted_lm_loss": 1.9004318714141846, + "objective/train/weights_max": 1.1491543054580688, + "objective/train/weights_min": 0.3702276945114136, + "theoretical_loss": 4.092063630304224, + "tokens_seen": 357171200 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009007382442625582, + "loss": 0.0933, + "theoretical_loss": 4.091893645991604, + "tokens_seen": 357302272 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009006580003209758, + "loss": 0.0918, + "theoretical_loss": 4.091553916693115, + "tokens_seen": 357564416 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009005777563793933, + "loss": 0.0863, + "theoretical_loss": 4.091214506052687, + "tokens_seen": 357826560 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009004975124378109, + "loss": 0.0846, + "theoretical_loss": 4.090875413538302, + "tokens_seen": 358088704 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009004172684962285, + "loss": 0.0925, + "theoretical_loss": 4.090536638619224, + "tokens_seen": 358350848 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009003370245546462, + "loss": 0.0879, + "theoretical_loss": 4.090198180765989, + "tokens_seen": 358612992 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009002567806130637, + "loss": 0.0895, + "theoretical_loss": 4.0898600394504, + "tokens_seen": 358875136 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009001765366714814, + "loss": 0.0891, + "theoretical_loss": 4.089522214145525, + "tokens_seen": 359137280 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009000962927298989, + "loss": 0.0917, + "theoretical_loss": 4.089184704325695, + "tokens_seen": 359399424 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009000160487883166, + "loss": 0.0891, + "theoretical_loss": 4.088847509466497, + "tokens_seen": 359661568 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008999358048467341, + "loss": 0.0889, + "theoretical_loss": 4.088510629044771, + "tokens_seen": 359923712 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008998555609051516, + "loss": 0.0889, + "theoretical_loss": 4.088174062538605, + "tokens_seen": 360185856 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0018875771202147007, + "objective/train/docs_used": 137426, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9378498792648315, + "objective/train/original_loss": 1.937849998474121, + "objective/train/theoretical_loss": 4.087837809427334, + "objective/train/tokens_used": 380908000, + "objective/train/value_avg": -0.01007843017578125, + "objective/train/value_loss": 0.00019329431233927608, + "objective/train/value_max": -0.0002779960632324219, + "objective/train/value_min": -0.277099609375, + "objective/train/value_reward_corr": 0.6944034668612225, + "objective/train/value_std": 0.01515960693359375, + "objective/train/weight_avg": 1.0019793510437012, + "objective/train/weighted_lm_loss": 1.9406569004058838, + "objective/train/weights_max": 1.115104079246521, + "objective/train/weights_min": 0.36884480714797974, + "theoretical_loss": 4.087837809427334, + "tokens_seen": 360448000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008997753169635693, + "loss": 0.0936, + "theoretical_loss": 4.087837809427334, + "tokens_seen": 360448000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008996950730219868, + "loss": 0.0886, + "theoretical_loss": 4.087501869191536, + "tokens_seen": 360710144 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008996148290804045, + "loss": 0.0882, + "theoretical_loss": 4.087166241313023, + "tokens_seen": 360972288 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008995345851388221, + "loss": 0.093, + "theoretical_loss": 4.086830925274842, + "tokens_seen": 361234432 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008994543411972397, + "loss": 0.0892, + "theoretical_loss": 4.08649592056127, + "tokens_seen": 361496576 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008993740972556572, + "loss": 0.093, + "theoretical_loss": 4.086161226657811, + "tokens_seen": 361758720 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008992938533140748, + "loss": 0.0863, + "theoretical_loss": 4.085826843051191, + "tokens_seen": 362020864 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008992136093724924, + "loss": 0.092, + "theoretical_loss": 4.0854927692293534, + "tokens_seen": 362283008 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008991333654309099, + "loss": 0.0896, + "theoretical_loss": 4.085159004681458, + "tokens_seen": 362545152 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008990531214893276, + "loss": 0.0908, + "theoretical_loss": 4.084825548897873, + "tokens_seen": 362807296 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008989728775477451, + "loss": 0.0933, + "theoretical_loss": 4.084492401370177, + "tokens_seen": 363069440 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008988926336061629, + "loss": 0.0915, + "theoretical_loss": 4.08415956159115, + "tokens_seen": 363331584 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008988123896645804, + "loss": 0.0896, + "theoretical_loss": 4.083827029054773, + "tokens_seen": 363593728 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.000944749335758388, + "objective/train/docs_used": 138686, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8428866863250732, + "objective/train/original_loss": 1.8428866863250732, + "objective/train/theoretical_loss": 4.083660877844774, + "objective/train/tokens_used": 384184800, + "objective/train/value_avg": -0.007354736328125, + "objective/train/value_loss": 0.0001762463798513636, + "objective/train/value_max": -0.00036406517028808594, + "objective/train/value_min": -0.16845703125, + "objective/train/value_reward_corr": 0.5633609780519697, + "objective/train/value_std": 0.008056640625, + "objective/train/weight_avg": 1.0010242462158203, + "objective/train/weighted_lm_loss": 1.8447622060775757, + "objective/train/weights_max": 1.0969452857971191, + "objective/train/weights_min": 0.3732965886592865, + "theoretical_loss": 4.083660877844774, + "tokens_seen": 363724800 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008987321457229979, + "loss": 0.0914, + "theoretical_loss": 4.0834948032562215, + "tokens_seen": 363855872 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008986519017814156, + "loss": 0.0914, + "theoretical_loss": 4.083162883691864, + "tokens_seen": 364118016 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008985716578398331, + "loss": 0.0915, + "theoretical_loss": 4.082831269859261, + "tokens_seen": 364380160 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008984914138982507, + "loss": 0.0904, + "theoretical_loss": 4.0824999612571515, + "tokens_seen": 364642304 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008984111699566683, + "loss": 0.0894, + "theoretical_loss": 4.082168957385462, + "tokens_seen": 364904448 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008983309260150859, + "loss": 0.092, + "theoretical_loss": 4.081838257745293, + "tokens_seen": 365166592 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008982506820735034, + "loss": 0.0877, + "theoretical_loss": 4.081507861838922, + "tokens_seen": 365428736 + }, + { + "epoch": 0.11, + "learning_rate": 0.000898170438131921, + "loss": 0.0904, + "theoretical_loss": 4.081177769169795, + "tokens_seen": 365690880 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008980901941903387, + "loss": 0.0874, + "theoretical_loss": 4.080847979242526, + "tokens_seen": 365953024 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008980099502487562, + "loss": 0.0904, + "theoretical_loss": 4.0805184915628905, + "tokens_seen": 366215168 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008979297063071739, + "loss": 0.0896, + "theoretical_loss": 4.080189305637827, + "tokens_seen": 366477312 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008978494623655914, + "loss": 0.0898, + "theoretical_loss": 4.079860420975429, + "tokens_seen": 366739456 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0033440319821238518, + "objective/train/docs_used": 139849, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8659143447875977, + "objective/train/original_loss": 1.8659144639968872, + "objective/train/theoretical_loss": 4.07953183708494, + "objective/train/tokens_used": 387461600, + "objective/train/value_avg": -0.00817108154296875, + "objective/train/value_loss": 0.0002236411819467321, + "objective/train/value_max": -0.00025916099548339844, + "objective/train/value_min": -0.228515625, + "objective/train/value_reward_corr": 0.38717246449564546, + "objective/train/value_std": 0.00867462158203125, + "objective/train/weight_avg": 1.0034469366073608, + "objective/train/weighted_lm_loss": 1.8702045679092407, + "objective/train/weights_max": 1.2567331790924072, + "objective/train/weights_min": 0.3701472282409668, + "theoretical_loss": 4.07953183708494, + "tokens_seen": 367001600 + }, + { + "epoch": 0.11, + "learning_rate": 0.000897769218424009, + "loss": 0.0949, + "theoretical_loss": 4.07953183708494, + "tokens_seen": 367001600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008976889744824266, + "loss": 0.0914, + "theoretical_loss": 4.079203553476759, + "tokens_seen": 367263744 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008976087305408441, + "loss": 0.0919, + "theoretical_loss": 4.078875569662424, + "tokens_seen": 367525888 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008975284865992618, + "loss": 0.0886, + "theoretical_loss": 4.07854788515462, + "tokens_seen": 367788032 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008974482426576793, + "loss": 0.0896, + "theoretical_loss": 4.078220499467168, + "tokens_seen": 368050176 + }, + { + "epoch": 0.11, + "learning_rate": 0.000897367998716097, + "loss": 0.09, + "theoretical_loss": 4.077893412115025, + "tokens_seen": 368312320 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008972877547745146, + "loss": 0.0878, + "theoretical_loss": 4.077566622614281, + "tokens_seen": 368574464 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008972075108329322, + "loss": 0.0894, + "theoretical_loss": 4.077240130482153, + "tokens_seen": 368836608 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008971272668913497, + "loss": 0.0914, + "theoretical_loss": 4.076913935236982, + "tokens_seen": 369098752 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008970470229497674, + "loss": 0.0894, + "theoretical_loss": 4.076588036398235, + "tokens_seen": 369360896 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008969667790081849, + "loss": 0.0891, + "theoretical_loss": 4.076262433486491, + "tokens_seen": 369623040 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008968865350666024, + "loss": 0.0913, + "theoretical_loss": 4.075937126023448, + "tokens_seen": 369885184 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008968062911250201, + "loss": 0.0888, + "theoretical_loss": 4.075612113531915, + "tokens_seen": 370147328 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0013215008657425642, + "objective/train/docs_used": 141078, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.754763126373291, + "objective/train/original_loss": 1.754763126373291, + "objective/train/theoretical_loss": 4.0754497177516456, + "objective/train/tokens_used": 390738400, + "objective/train/value_avg": -0.0116424560546875, + "objective/train/value_loss": 0.00028973660664632916, + "objective/train/value_max": -0.00014770030975341797, + "objective/train/value_min": -0.254150390625, + "objective/train/value_reward_corr": 0.5981599021798076, + "objective/train/value_std": 0.013458251953125, + "objective/train/weight_avg": 1.001459002494812, + "objective/train/weighted_lm_loss": 1.7561613321304321, + "objective/train/weights_max": 1.1360948085784912, + "objective/train/weights_min": 0.38701966404914856, + "theoretical_loss": 4.0754497177516456, + "tokens_seen": 370278400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008967260471834376, + "loss": 0.0888, + "theoretical_loss": 4.075287395535807, + "tokens_seen": 370409472 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008966458032418553, + "loss": 0.0897, + "theoretical_loss": 4.074962971560145, + "tokens_seen": 370671616 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008965655593002729, + "loss": 0.0885, + "theoretical_loss": 4.074638841131052, + "tokens_seen": 370933760 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008964853153586905, + "loss": 0.0905, + "theoretical_loss": 4.074315003775746, + "tokens_seen": 371195904 + }, + { + "epoch": 0.11, + "learning_rate": 0.000896405071417108, + "loss": 0.0917, + "theoretical_loss": 4.073991459022544, + "tokens_seen": 371458048 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008963248274755256, + "loss": 0.0871, + "theoretical_loss": 4.073668206400851, + "tokens_seen": 371720192 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008962445835339432, + "loss": 0.0954, + "theoretical_loss": 4.0733452454411605, + "tokens_seen": 371982336 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008961643395923608, + "loss": 0.0904, + "theoretical_loss": 4.0730225756750515, + "tokens_seen": 372244480 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008960840956507784, + "loss": 0.0879, + "theoretical_loss": 4.072700196635185, + "tokens_seen": 372506624 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008960038517091959, + "loss": 0.0906, + "theoretical_loss": 4.072378107855299, + "tokens_seen": 372768768 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008959236077676137, + "loss": 0.0866, + "theoretical_loss": 4.0720563088702075, + "tokens_seen": 373030912 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008958433638260312, + "loss": 0.089, + "theoretical_loss": 4.071734799215796, + "tokens_seen": 373293056 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0027087300550192595, + "objective/train/docs_used": 142333, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8659402132034302, + "objective/train/original_loss": 1.8659402132034302, + "objective/train/theoretical_loss": 4.071413578429017, + "objective/train/tokens_used": 394015200, + "objective/train/value_avg": -0.01194000244140625, + "objective/train/value_loss": 0.0003572187852114439, + "objective/train/value_max": -0.0005660057067871094, + "objective/train/value_min": -0.54833984375, + "objective/train/value_reward_corr": 0.6665229811822021, + "objective/train/value_std": 0.0186767578125, + "objective/train/weight_avg": 1.002875804901123, + "objective/train/weighted_lm_loss": 1.8703423738479614, + "objective/train/weights_max": 1.7303780317306519, + "objective/train/weights_min": 0.39453253149986267, + "theoretical_loss": 4.071413578429017, + "tokens_seen": 373555200 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008957631198844487, + "loss": 0.088, + "theoretical_loss": 4.071413578429017, + "tokens_seen": 373555200 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008956828759428664, + "loss": 0.0903, + "theoretical_loss": 4.071092646047892, + "tokens_seen": 373817344 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008956026320012839, + "loss": 0.0885, + "theoretical_loss": 4.0707720016115, + "tokens_seen": 374079488 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008955223880597015, + "loss": 0.0874, + "theoretical_loss": 4.0704516446599825, + "tokens_seen": 374341632 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008954421441181191, + "loss": 0.092, + "theoretical_loss": 4.070131574734536, + "tokens_seen": 374603776 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008953619001765367, + "loss": 0.0897, + "theoretical_loss": 4.069811791377409, + "tokens_seen": 374865920 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008952816562349542, + "loss": 0.0887, + "theoretical_loss": 4.0694922941319, + "tokens_seen": 375128064 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008952014122933718, + "loss": 0.0914, + "theoretical_loss": 4.069173082542351, + "tokens_seen": 375390208 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008951211683517895, + "loss": 0.0939, + "theoretical_loss": 4.068854156154154, + "tokens_seen": 375652352 + }, + { + "epoch": 0.11, + "learning_rate": 0.000895040924410207, + "loss": 0.0905, + "theoretical_loss": 4.068535514513734, + "tokens_seen": 375914496 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008949606804686247, + "loss": 0.0918, + "theoretical_loss": 4.068217157168556, + "tokens_seen": 376176640 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008948804365270422, + "loss": 0.0897, + "theoretical_loss": 4.06789908366712, + "tokens_seen": 376438784 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008948001925854599, + "loss": 0.0918, + "theoretical_loss": 4.067581293558955, + "tokens_seen": 376700928 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0015242223162204027, + "objective/train/docs_used": 143470, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8477563858032227, + "objective/train/original_loss": 1.8477565050125122, + "objective/train/theoretical_loss": 4.067422504636857, + "objective/train/tokens_used": 397292000, + "objective/train/value_avg": -0.00897216796875, + "objective/train/value_loss": 0.00023800358758307993, + "objective/train/value_max": -0.00040459632873535156, + "objective/train/value_min": -0.291748046875, + "objective/train/value_reward_corr": 0.5821468077761229, + "objective/train/value_std": 0.0121917724609375, + "objective/train/weight_avg": 1.0016366243362427, + "objective/train/weighted_lm_loss": 1.8498717546463013, + "objective/train/weights_max": 1.1463937759399414, + "objective/train/weights_min": 0.3839375674724579, + "theoretical_loss": 4.067422504636857, + "tokens_seen": 376832000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008947199486438774, + "loss": 0.0918, + "theoretical_loss": 4.0672637863946175, + "tokens_seen": 376963072 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008946397047022949, + "loss": 0.0885, + "theoretical_loss": 4.0669465617256915, + "tokens_seen": 377225216 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008945594607607126, + "loss": 0.0878, + "theoretical_loss": 4.06662961910478, + "tokens_seen": 377487360 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008944792168191301, + "loss": 0.0873, + "theoretical_loss": 4.066312958085503, + "tokens_seen": 377749504 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008943989728775478, + "loss": 0.088, + "theoretical_loss": 4.065996578222502, + "tokens_seen": 378011648 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008943187289359654, + "loss": 0.0904, + "theoretical_loss": 4.065680479071426, + "tokens_seen": 378273792 + }, + { + "epoch": 0.11, + "learning_rate": 0.000894238484994383, + "loss": 0.0875, + "theoretical_loss": 4.0653646601889335, + "tokens_seen": 378535936 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008941582410528005, + "loss": 0.0891, + "theoretical_loss": 4.065049121132693, + "tokens_seen": 378798080 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008940779971112181, + "loss": 0.0881, + "theoretical_loss": 4.0647338614613755, + "tokens_seen": 379060224 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008939977531696357, + "loss": 0.0862, + "theoretical_loss": 4.0644188807346495, + "tokens_seen": 379322368 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008939175092280532, + "loss": 0.0906, + "theoretical_loss": 4.064104178513186, + "tokens_seen": 379584512 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008938372652864709, + "loss": 0.089, + "theoretical_loss": 4.0637897543586465, + "tokens_seen": 379846656 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.002633684780448675, + "objective/train/docs_used": 144698, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.705493688583374, + "objective/train/original_loss": 1.705493688583374, + "objective/train/theoretical_loss": 4.063475607833687, + "objective/train/tokens_used": 400568800, + "objective/train/value_avg": -0.00835418701171875, + "objective/train/value_loss": 0.0002564003807492554, + "objective/train/value_max": -0.00031757354736328125, + "objective/train/value_min": -0.32568359375, + "objective/train/value_reward_corr": 0.6303237316396728, + "objective/train/value_std": 0.01141357421875, + "objective/train/weight_avg": 1.002751350402832, + "objective/train/weighted_lm_loss": 1.7094148397445679, + "objective/train/weights_max": 1.104806661605835, + "objective/train/weights_min": 0.3750951290130615, + "theoretical_loss": 4.063475607833687, + "tokens_seen": 380108800 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008937570213448884, + "loss": 0.087, + "theoretical_loss": 4.063475607833687, + "tokens_seen": 380108800 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008936767774033062, + "loss": 0.0893, + "theoretical_loss": 4.063161738501951, + "tokens_seen": 380370944 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008935965334617237, + "loss": 0.0892, + "theoretical_loss": 4.0628481459280685, + "tokens_seen": 380633088 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008935162895201413, + "loss": 0.0872, + "theoretical_loss": 4.062534829677653, + "tokens_seen": 380895232 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008934360455785589, + "loss": 0.0894, + "theoretical_loss": 4.062221789317297, + "tokens_seen": 381157376 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008933558016369764, + "loss": 0.0874, + "theoretical_loss": 4.061909024414572, + "tokens_seen": 381419520 + }, + { + "epoch": 0.12, + "learning_rate": 0.000893275557695394, + "loss": 0.088, + "theoretical_loss": 4.061596534538021, + "tokens_seen": 381681664 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008931953137538116, + "loss": 0.0913, + "theoretical_loss": 4.061284319257162, + "tokens_seen": 381943808 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008931150698122292, + "loss": 0.0907, + "theoretical_loss": 4.060972378142479, + "tokens_seen": 382205952 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008930348258706467, + "loss": 0.0902, + "theoretical_loss": 4.060660710765423, + "tokens_seen": 382468096 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008929545819290645, + "loss": 0.0914, + "theoretical_loss": 4.060349316698408, + "tokens_seen": 382730240 + }, + { + "epoch": 0.12, + "learning_rate": 0.000892874337987482, + "loss": 0.0879, + "theoretical_loss": 4.060038195514808, + "tokens_seen": 382992384 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008927940940458995, + "loss": 0.0868, + "theoretical_loss": 4.059727346788955, + "tokens_seen": 383254528 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.0013565245317295194, + "objective/train/docs_used": 145827, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7567694187164307, + "objective/train/original_loss": 1.7567695379257202, + "objective/train/theoretical_loss": 4.059572024464923, + "objective/train/tokens_used": 403845600, + "objective/train/value_avg": -0.0077972412109375, + "objective/train/value_loss": 0.00035242707235738635, + "objective/train/value_max": -0.00033283233642578125, + "objective/train/value_min": -0.4189453125, + "objective/train/value_reward_corr": 0.5299108861611858, + "objective/train/value_std": 0.01129150390625, + "objective/train/weight_avg": 1.0015063285827637, + "objective/train/weighted_lm_loss": 1.7594711780548096, + "objective/train/weights_max": 1.2279499769210815, + "objective/train/weights_min": 0.36844298243522644, + "theoretical_loss": 4.059572024464923, + "tokens_seen": 383385600 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008927138501043172, + "loss": 0.0874, + "theoretical_loss": 4.059416770096134, + "tokens_seen": 383516672 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008926336061627347, + "loss": 0.0888, + "theoretical_loss": 4.059106465012583, + "tokens_seen": 383778816 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008925533622211523, + "loss": 0.0881, + "theoretical_loss": 4.058796431115489, + "tokens_seen": 384040960 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008924731182795699, + "loss": 0.0897, + "theoretical_loss": 4.058486667982986, + "tokens_seen": 384303104 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008923928743379875, + "loss": 0.0877, + "theoretical_loss": 4.058177175194148, + "tokens_seen": 384565248 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008923126303964051, + "loss": 0.0886, + "theoretical_loss": 4.057867952328994, + "tokens_seen": 384827392 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008922323864548226, + "loss": 0.0901, + "theoretical_loss": 4.057558998968479, + "tokens_seen": 385089536 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008921521425132403, + "loss": 0.0907, + "theoretical_loss": 4.0572503146944925, + "tokens_seen": 385351680 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008920718985716579, + "loss": 0.0887, + "theoretical_loss": 4.056941899089858, + "tokens_seen": 385613824 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008919916546300755, + "loss": 0.0885, + "theoretical_loss": 4.056633751738328, + "tokens_seen": 385875968 + }, + { + "epoch": 0.12, + "learning_rate": 0.000891911410688493, + "loss": 0.0879, + "theoretical_loss": 4.0563258722245825, + "tokens_seen": 386138112 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008918311667469107, + "loss": 0.0875, + "theoretical_loss": 4.056018260134226, + "tokens_seen": 386400256 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.0018099743174389005, + "objective/train/docs_used": 146981, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.903164267539978, + "objective/train/original_loss": 1.9031643867492676, + "objective/train/theoretical_loss": 4.055710915053783, + "objective/train/tokens_used": 407122400, + "objective/train/value_avg": -0.0086517333984375, + "objective/train/value_loss": 0.0001866398670244962, + "objective/train/value_max": -0.0004425048828125, + "objective/train/value_min": -0.2122802734375, + "objective/train/value_reward_corr": 0.5988158937852228, + "objective/train/value_std": 0.01146697998046875, + "objective/train/weight_avg": 1.0019017457962036, + "objective/train/weighted_lm_loss": 1.9061700105667114, + "objective/train/weights_max": 1.1358146667480469, + "objective/train/weights_min": 0.6089951395988464, + "theoretical_loss": 4.055710915053783, + "tokens_seen": 386662400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008917509228053282, + "loss": 0.0884, + "theoretical_loss": 4.055710915053783, + "tokens_seen": 386662400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008916706788637457, + "loss": 0.0871, + "theoretical_loss": 4.055403836570701, + "tokens_seen": 386924544 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008915904349221634, + "loss": 0.0915, + "theoretical_loss": 4.05509702427334, + "tokens_seen": 387186688 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008915101909805809, + "loss": 0.0866, + "theoretical_loss": 4.054790477750974, + "tokens_seen": 387448832 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008914299470389986, + "loss": 0.0881, + "theoretical_loss": 4.054484196593791, + "tokens_seen": 387710976 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008913497030974162, + "loss": 0.0891, + "theoretical_loss": 4.054178180392885, + "tokens_seen": 387973120 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008912694591558338, + "loss": 0.0835, + "theoretical_loss": 4.053872428740256, + "tokens_seen": 388235264 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008911892152142514, + "loss": 0.0894, + "theoretical_loss": 4.053566941228809, + "tokens_seen": 388497408 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008911089712726689, + "loss": 0.0895, + "theoretical_loss": 4.053261717452346, + "tokens_seen": 388759552 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008910287273310865, + "loss": 0.0865, + "theoretical_loss": 4.052956757005573, + "tokens_seen": 389021696 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008909484833895041, + "loss": 0.0864, + "theoretical_loss": 4.0526520594840845, + "tokens_seen": 389283840 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008908682394479217, + "loss": 0.0906, + "theoretical_loss": 4.052347624484373, + "tokens_seen": 389545984 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008907879955063392, + "loss": 0.0874, + "theoretical_loss": 4.052043451603818, + "tokens_seen": 389808128 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.002918938407674432, + "objective/train/docs_used": 148229, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8769011497497559, + "objective/train/original_loss": 1.8769011497497559, + "objective/train/theoretical_loss": 4.051891463332648, + "objective/train/tokens_used": 410399200, + "objective/train/value_avg": -0.00745391845703125, + "objective/train/value_loss": 0.00021266264957375824, + "objective/train/value_max": -0.0002779960632324219, + "objective/train/value_min": -0.27587890625, + "objective/train/value_reward_corr": 0.5017984739541499, + "objective/train/value_std": 0.0098876953125, + "objective/train/weight_avg": 1.003010869026184, + "objective/train/weighted_lm_loss": 1.8814412355422974, + "objective/train/weights_max": 1.1254287958145142, + "objective/train/weights_min": 0.380566269159317, + "theoretical_loss": 4.051891463332648, + "tokens_seen": 389939200 + }, + { + "epoch": 0.12, + "learning_rate": 0.000890707751564757, + "loss": 0.0897, + "theoretical_loss": 4.051739540440688, + "tokens_seen": 390070272 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008906275076231745, + "loss": 0.0865, + "theoretical_loss": 4.0514358905941386, + "tokens_seen": 390332416 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008905472636815921, + "loss": 0.0881, + "theoretical_loss": 4.051132501664204, + "tokens_seen": 390594560 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008904670197400097, + "loss": 0.0898, + "theoretical_loss": 4.050829373251803, + "tokens_seen": 390856704 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008903867757984272, + "loss": 0.0905, + "theoretical_loss": 4.050526504958727, + "tokens_seen": 391118848 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008903065318568448, + "loss": 0.0853, + "theoretical_loss": 4.050223896387647, + "tokens_seen": 391380992 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008902262879152624, + "loss": 0.0879, + "theoretical_loss": 4.0499215471421035, + "tokens_seen": 391643136 + }, + { + "epoch": 0.12, + "learning_rate": 0.00089014604397368, + "loss": 0.0888, + "theoretical_loss": 4.049619456826508, + "tokens_seen": 391905280 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008900658000320975, + "loss": 0.0875, + "theoretical_loss": 4.0493176250461405, + "tokens_seen": 392167424 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008899855560905153, + "loss": 0.0879, + "theoretical_loss": 4.049016051407145, + "tokens_seen": 392429568 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008899053121489328, + "loss": 0.087, + "theoretical_loss": 4.048714735516527, + "tokens_seen": 392691712 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008898250682073504, + "loss": 0.0909, + "theoretical_loss": 4.048413676982155, + "tokens_seen": 392953856 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.0008797993650659919, + "objective/train/docs_used": 149327, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8922388553619385, + "objective/train/original_loss": 1.8922390937805176, + "objective/train/theoretical_loss": 4.048112875412752, + "objective/train/tokens_used": 413676000, + "objective/train/value_avg": -0.0099639892578125, + "objective/train/value_loss": 0.00043581624049693346, + "objective/train/value_max": -0.0004076957702636719, + "objective/train/value_min": -0.87744140625, + "objective/train/value_reward_corr": 0.6595999360248977, + "objective/train/value_std": 0.01849365234375, + "objective/train/weight_avg": 1.001081109046936, + "objective/train/weighted_lm_loss": 1.8936957120895386, + "objective/train/weights_max": 1.6510648727416992, + "objective/train/weights_min": 0.36863452196121216, + "theoretical_loss": 4.048112875412752, + "tokens_seen": 393216000 + }, + { + "epoch": 0.12, + "learning_rate": 0.000889744824265768, + "loss": 0.0877, + "theoretical_loss": 4.048112875412752, + "tokens_seen": 393216000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008896645803241855, + "loss": 0.0895, + "theoretical_loss": 4.0478123304179, + "tokens_seen": 393478144 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008895843363826032, + "loss": 0.0908, + "theoretical_loss": 4.047512041608029, + "tokens_seen": 393740288 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008895040924410207, + "loss": 0.0891, + "theoretical_loss": 4.047212008594424, + "tokens_seen": 394002432 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008894238484994383, + "loss": 0.0877, + "theoretical_loss": 4.046912230989214, + "tokens_seen": 394264576 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008893436045578559, + "loss": 0.0898, + "theoretical_loss": 4.0466127084053785, + "tokens_seen": 394526720 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008892633606162734, + "loss": 0.0882, + "theoretical_loss": 4.046313440456733, + "tokens_seen": 394788864 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008891831166746911, + "loss": 0.0893, + "theoretical_loss": 4.0460144267579405, + "tokens_seen": 395051008 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008891028727331087, + "loss": 0.0901, + "theoretical_loss": 4.045715666924499, + "tokens_seen": 395313152 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008890226287915263, + "loss": 0.0885, + "theoretical_loss": 4.045417160572743, + "tokens_seen": 395575296 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008889423848499438, + "loss": 0.0904, + "theoretical_loss": 4.045118907319839, + "tokens_seen": 395837440 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008888621409083615, + "loss": 0.0868, + "theoretical_loss": 4.04482090678379, + "tokens_seen": 396099584 + }, + { + "epoch": 0.12, + "learning_rate": 0.000888781896966779, + "loss": 0.0911, + "theoretical_loss": 4.044523158583421, + "tokens_seen": 396361728 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.0021504259202629328, + "objective/train/docs_used": 150450, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7327624559402466, + "objective/train/original_loss": 1.732762336730957, + "objective/train/theoretical_loss": 4.044374378990227, + "objective/train/tokens_used": 416952800, + "objective/train/value_avg": -0.00965118408203125, + "objective/train/value_loss": 0.00036191148683428764, + "objective/train/value_max": -0.00023055076599121094, + "objective/train/value_min": -0.8271484375, + "objective/train/value_reward_corr": 0.6954772035577824, + "objective/train/value_std": 0.01971435546875, + "objective/train/weight_avg": 1.0023201704025269, + "objective/train/weighted_lm_loss": 1.7352375984191895, + "objective/train/weights_max": 1.7499204874038696, + "objective/train/weights_min": 0.37855079770088196, + "theoretical_loss": 4.044374378990227, + "tokens_seen": 396492800 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008887016530251965, + "loss": 0.0896, + "theoretical_loss": 4.044225662338388, + "tokens_seen": 396623872 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008886214090836142, + "loss": 0.0886, + "theoretical_loss": 4.04392841766917, + "tokens_seen": 396886016 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008885411651420317, + "loss": 0.0865, + "theoretical_loss": 4.043631424197067, + "tokens_seen": 397148160 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008884609212004495, + "loss": 0.09, + "theoretical_loss": 4.0433346815442, + "tokens_seen": 397410304 + }, + { + "epoch": 0.12, + "learning_rate": 0.000888380677258867, + "loss": 0.0919, + "theoretical_loss": 4.043038189333508, + "tokens_seen": 397672448 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008883004333172846, + "loss": 0.0924, + "theoretical_loss": 4.042741947188741, + "tokens_seen": 397934592 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008882201893757022, + "loss": 0.0876, + "theoretical_loss": 4.042445954734466, + "tokens_seen": 398196736 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008881399454341197, + "loss": 0.0926, + "theoretical_loss": 4.0421502115960575, + "tokens_seen": 398458880 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008880597014925373, + "loss": 0.0855, + "theoretical_loss": 4.0418547173997, + "tokens_seen": 398721024 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008879794575509549, + "loss": 0.0885, + "theoretical_loss": 4.041559471772382, + "tokens_seen": 398983168 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008878992136093725, + "loss": 0.0909, + "theoretical_loss": 4.041264474341896, + "tokens_seen": 399245312 + }, + { + "epoch": 0.12, + "learning_rate": 0.00088781896966779, + "loss": 0.0891, + "theoretical_loss": 4.040969724736838, + "tokens_seen": 399507456 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.001554798916913569, + "objective/train/docs_used": 151634, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7531509399414062, + "objective/train/original_loss": 1.7531508207321167, + "objective/train/theoretical_loss": 4.040675222586599, + "objective/train/tokens_used": 420229600, + "objective/train/value_avg": -0.009552001953125, + "objective/train/value_loss": 0.00028178023057989776, + "objective/train/value_max": -0.0002892017364501953, + "objective/train/value_min": -0.87255859375, + "objective/train/value_reward_corr": 0.7821445899209426, + "objective/train/value_std": 0.0219268798828125, + "objective/train/weight_avg": 1.0016918182373047, + "objective/train/weighted_lm_loss": 1.755144476890564, + "objective/train/weights_max": 1.6336557865142822, + "objective/train/weights_min": 0.541962206363678, + "theoretical_loss": 4.040675222586599, + "tokens_seen": 399769600 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008877387257262078, + "loss": 0.0863, + "theoretical_loss": 4.040675222586599, + "tokens_seen": 399769600 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008876584817846253, + "loss": 0.0854, + "theoretical_loss": 4.04038096752137, + "tokens_seen": 400031744 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008875782378430429, + "loss": 0.0886, + "theoretical_loss": 4.040086959172136, + "tokens_seen": 400293888 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008874979939014605, + "loss": 0.085, + "theoretical_loss": 4.039793197170672, + "tokens_seen": 400556032 + }, + { + "epoch": 0.12, + "learning_rate": 0.000887417749959878, + "loss": 0.0898, + "theoretical_loss": 4.039499681149547, + "tokens_seen": 400818176 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008873375060182957, + "loss": 0.0889, + "theoretical_loss": 4.039206410742114, + "tokens_seen": 401080320 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008872572620767132, + "loss": 0.0868, + "theoretical_loss": 4.038913385582515, + "tokens_seen": 401342464 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008871770181351308, + "loss": 0.0871, + "theoretical_loss": 4.038620605305673, + "tokens_seen": 401604608 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008870967741935484, + "loss": 0.0847, + "theoretical_loss": 4.038328069547293, + "tokens_seen": 401866752 + }, + { + "epoch": 0.12, + "learning_rate": 0.000887016530251966, + "loss": 0.0879, + "theoretical_loss": 4.03803577794386, + "tokens_seen": 402128896 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008869362863103836, + "loss": 0.0887, + "theoretical_loss": 4.037743730132635, + "tokens_seen": 402391040 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008868560423688012, + "loss": 0.0855, + "theoretical_loss": 4.037451925751654, + "tokens_seen": 402653184 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008867757984272188, + "loss": 0.088, + "theoretical_loss": 4.0371603644397265, + "tokens_seen": 402915328 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.0015503467293456197, + "objective/train/docs_used": 152845, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6842749118804932, + "objective/train/original_loss": 1.6842749118804932, + "objective/train/theoretical_loss": 4.037014674821996, + "objective/train/tokens_used": 423506400, + "objective/train/value_avg": -0.013092041015625, + "objective/train/value_loss": 0.0004618663515429944, + "objective/train/value_max": -0.00034737586975097656, + "objective/train/value_min": -0.5283203125, + "objective/train/value_reward_corr": 0.5689597227558094, + "objective/train/value_std": 0.016571044921875, + "objective/train/weight_avg": 1.0017578601837158, + "objective/train/weighted_lm_loss": 1.6876652240753174, + "objective/train/weights_max": 1.291870355606079, + "objective/train/weights_min": 0.36923545598983765, + "theoretical_loss": 4.037014674821996, + "tokens_seen": 403046400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008866955544856363, + "loss": 0.0901, + "theoretical_loss": 4.03686904583643, + "tokens_seen": 403177472 + }, + { + "epoch": 0.12, + "learning_rate": 0.000886615310544054, + "loss": 0.0872, + "theoretical_loss": 4.036577969582114, + "tokens_seen": 403439616 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008865350666024715, + "loss": 0.0862, + "theoretical_loss": 4.03628713531789, + "tokens_seen": 403701760 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008864548226608891, + "loss": 0.0896, + "theoretical_loss": 4.035996542685638, + "tokens_seen": 403963904 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008863745787193067, + "loss": 0.0864, + "theoretical_loss": 4.0357061913279955, + "tokens_seen": 404226048 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008862943347777242, + "loss": 0.0873, + "theoretical_loss": 4.035416080888364, + "tokens_seen": 404488192 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008862140908361419, + "loss": 0.0896, + "theoretical_loss": 4.035126211010899, + "tokens_seen": 404750336 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008861338468945595, + "loss": 0.0856, + "theoretical_loss": 4.034836581340515, + "tokens_seen": 405012480 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008860536029529771, + "loss": 0.0872, + "theoretical_loss": 4.034547191522877, + "tokens_seen": 405274624 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008859733590113947, + "loss": 0.0932, + "theoretical_loss": 4.034258041204404, + "tokens_seen": 405536768 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008858931150698123, + "loss": 0.0875, + "theoretical_loss": 4.033969130032263, + "tokens_seen": 405798912 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008858128711282298, + "loss": 0.0861, + "theoretical_loss": 4.033680457654368, + "tokens_seen": 406061056 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.0022317736875265837, + "objective/train/docs_used": 154148, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6351722478866577, + "objective/train/original_loss": 1.6351724863052368, + "objective/train/theoretical_loss": 4.0333920237193785, + "objective/train/tokens_used": 426783200, + "objective/train/value_avg": -0.00635528564453125, + "objective/train/value_loss": 0.00012135026918258518, + "objective/train/value_max": -0.00023055076599121094, + "objective/train/value_min": -0.303955078125, + "objective/train/value_reward_corr": 0.5124807247227108, + "objective/train/value_std": 0.0070343017578125, + "objective/train/weight_avg": 1.00228750705719, + "objective/train/weighted_lm_loss": 1.6392003297805786, + "objective/train/weights_max": 1.1095505952835083, + "objective/train/weights_min": 0.37000179290771484, + "theoretical_loss": 4.0333920237193785, + "tokens_seen": 406323200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008857326271866474, + "loss": 0.0859, + "theoretical_loss": 4.0333920237193785, + "tokens_seen": 406323200 + }, + { + "epoch": 0.12, + "learning_rate": 0.000885652383245065, + "loss": 0.0823, + "theoretical_loss": 4.0331038278766975, + "tokens_seen": 406585344 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008855721393034825, + "loss": 0.0895, + "theoretical_loss": 4.032815869776471, + "tokens_seen": 406847488 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008854918953619003, + "loss": 0.089, + "theoretical_loss": 4.032528149069579, + "tokens_seen": 407109632 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008854116514203178, + "loss": 0.0891, + "theoretical_loss": 4.0322406654076435, + "tokens_seen": 407371776 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008853314074787354, + "loss": 0.0893, + "theoretical_loss": 4.0319534184430195, + "tokens_seen": 407633920 + }, + { + "epoch": 0.12, + "learning_rate": 0.000885251163537153, + "loss": 0.0867, + "theoretical_loss": 4.031666407828795, + "tokens_seen": 407896064 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008851709195955705, + "loss": 0.0867, + "theoretical_loss": 4.03137963321879, + "tokens_seen": 408158208 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008850906756539881, + "loss": 0.089, + "theoretical_loss": 4.0310930942675505, + "tokens_seen": 408420352 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008850104317124057, + "loss": 0.0878, + "theoretical_loss": 4.030806790630353, + "tokens_seen": 408682496 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008849301877708233, + "loss": 0.0856, + "theoretical_loss": 4.030520721963199, + "tokens_seen": 408944640 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008848499438292408, + "loss": 0.0855, + "theoretical_loss": 4.030234887922808, + "tokens_seen": 409206784 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008847696998876586, + "loss": 0.0877, + "theoretical_loss": 4.029949288166627, + "tokens_seen": 409468928 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": -0.0004230485938023776, + "objective/train/docs_used": 155224, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8668181896209717, + "objective/train/original_loss": 1.8668180704116821, + "objective/train/theoretical_loss": 4.029806576038263, + "objective/train/tokens_used": 430060000, + "objective/train/value_avg": -0.00667572021484375, + "objective/train/value_loss": 0.00026823318330571055, + "objective/train/value_max": -0.0002512931823730469, + "objective/train/value_min": -0.2183837890625, + "objective/train/value_reward_corr": 0.5231429303787112, + "objective/train/value_std": 0.0097808837890625, + "objective/train/weight_avg": 0.9997006058692932, + "objective/train/weighted_lm_loss": 1.864938735961914, + "objective/train/weights_max": 1.1423146724700928, + "objective/train/weights_min": 0.36977463960647583, + "theoretical_loss": 4.029806576038263, + "tokens_seen": 409600000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008846894559460761, + "loss": 0.0827, + "theoretical_loss": 4.0296639223528175, + "tokens_seen": 409731072 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008846092120044937, + "loss": 0.0863, + "theoretical_loss": 4.029378790140261, + "tokens_seen": 409993216 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008845289680629113, + "loss": 0.0867, + "theoretical_loss": 4.029093891188552, + "tokens_seen": 410255360 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008844487241213288, + "loss": 0.0881, + "theoretical_loss": 4.028809225158, + "tokens_seen": 410517504 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008843684801797465, + "loss": 0.0862, + "theoretical_loss": 4.028524791709621, + "tokens_seen": 410779648 + }, + { + "epoch": 0.12, + "learning_rate": 0.000884288236238164, + "loss": 0.0867, + "theoretical_loss": 4.028240590505148, + "tokens_seen": 411041792 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008842079922965816, + "loss": 0.0864, + "theoretical_loss": 4.027956621207015, + "tokens_seen": 411303936 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008841277483549992, + "loss": 0.0861, + "theoretical_loss": 4.027672883478364, + "tokens_seen": 411566080 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008840475044134169, + "loss": 0.0897, + "theoretical_loss": 4.027389376983041, + "tokens_seen": 411828224 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008839672604718344, + "loss": 0.0898, + "theoretical_loss": 4.02710610138559, + "tokens_seen": 412090368 + }, + { + "epoch": 0.12, + "learning_rate": 0.000883887016530252, + "loss": 0.0868, + "theoretical_loss": 4.02682305635126, + "tokens_seen": 412352512 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008838067725886696, + "loss": 0.0879, + "theoretical_loss": 4.026540241545994, + "tokens_seen": 412614656 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0008909243042580783, + "objective/train/docs_used": 156351, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.63358473777771, + "objective/train/original_loss": 1.633584976196289, + "objective/train/theoretical_loss": 4.026257656636431, + "objective/train/tokens_used": 433336800, + "objective/train/value_avg": -0.00994110107421875, + "objective/train/value_loss": 0.0007588982116430998, + "objective/train/value_max": -0.00039196014404296875, + "objective/train/value_min": -0.9423828125, + "objective/train/value_reward_corr": 0.6866913112731529, + "objective/train/value_std": 0.0245361328125, + "objective/train/weight_avg": 1.0012253522872925, + "objective/train/weighted_lm_loss": 1.6334694623947144, + "objective/train/weights_max": 2.1991829872131348, + "objective/train/weights_min": 0.3683927655220032, + "theoretical_loss": 4.026257656636431, + "tokens_seen": 412876800 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008837265286470871, + "loss": 0.086, + "theoretical_loss": 4.026257656636431, + "tokens_seen": 412876800 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008836462847055048, + "loss": 0.0873, + "theoretical_loss": 4.025975301289906, + "tokens_seen": 413138944 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008835660407639223, + "loss": 0.0868, + "theoretical_loss": 4.025693175174443, + "tokens_seen": 413401088 + }, + { + "epoch": 0.13, + "learning_rate": 0.00088348579682234, + "loss": 0.084, + "theoretical_loss": 4.02541127795876, + "tokens_seen": 413663232 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008834055528807575, + "loss": 0.0864, + "theoretical_loss": 4.02512960931226, + "tokens_seen": 413925376 + }, + { + "epoch": 0.13, + "learning_rate": 0.000883325308939175, + "loss": 0.0834, + "theoretical_loss": 4.024848168905035, + "tokens_seen": 414187520 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008832450649975928, + "loss": 0.0868, + "theoretical_loss": 4.02456695640786, + "tokens_seen": 414449664 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008831648210560103, + "loss": 0.0859, + "theoretical_loss": 4.0242859714921915, + "tokens_seen": 414711808 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008830845771144279, + "loss": 0.0889, + "theoretical_loss": 4.024005213830171, + "tokens_seen": 414973952 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008830043331728455, + "loss": 0.0838, + "theoretical_loss": 4.023724683094615, + "tokens_seen": 415236096 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008829240892312631, + "loss": 0.0836, + "theoretical_loss": 4.023444378959019, + "tokens_seen": 415498240 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008828438452896806, + "loss": 0.0853, + "theoretical_loss": 4.023164301097555, + "tokens_seen": 415760384 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008827636013480982, + "loss": 0.0857, + "theoretical_loss": 4.0228844491850655, + "tokens_seen": 416022528 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.002180003095418215, + "objective/train/docs_used": 157543, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5781835317611694, + "objective/train/original_loss": 1.5781837701797485, + "objective/train/theoretical_loss": 4.022744607858259, + "objective/train/tokens_used": 436613600, + "objective/train/value_avg": -0.008880615234375, + "objective/train/value_loss": 0.00037475841236300766, + "objective/train/value_max": -0.0002397298812866211, + "objective/train/value_min": -0.37060546875, + "objective/train/value_reward_corr": 0.5816138183738614, + "objective/train/value_std": 0.0136260986328125, + "objective/train/weight_avg": 1.0023459196090698, + "objective/train/weighted_lm_loss": 1.5822744369506836, + "objective/train/weights_max": 1.4486113786697388, + "objective/train/weights_min": 0.37067708373069763, + "theoretical_loss": 4.022744607858259, + "tokens_seen": 416153600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008826833574065158, + "loss": 0.0879, + "theoretical_loss": 4.022604822897068, + "tokens_seen": 416284672 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008826031134649333, + "loss": 0.0876, + "theoretical_loss": 4.0223254219097475, + "tokens_seen": 416546816 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008825228695233511, + "loss": 0.0878, + "theoretical_loss": 4.022046245899958, + "tokens_seen": 416808960 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008824426255817686, + "loss": 0.0861, + "theoretical_loss": 4.021767294545221, + "tokens_seen": 417071104 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008823623816401862, + "loss": 0.0877, + "theoretical_loss": 4.021488567523721, + "tokens_seen": 417333248 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008822821376986038, + "loss": 0.0875, + "theoretical_loss": 4.021210064514305, + "tokens_seen": 417595392 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008822018937570213, + "loss": 0.0835, + "theoretical_loss": 4.020931785196484, + "tokens_seen": 417857536 + }, + { + "epoch": 0.13, + "learning_rate": 0.000882121649815439, + "loss": 0.0852, + "theoretical_loss": 4.020653729250424, + "tokens_seen": 418119680 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008820414058738565, + "loss": 0.0851, + "theoretical_loss": 4.020375896356951, + "tokens_seen": 418381824 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008819611619322741, + "loss": 0.0909, + "theoretical_loss": 4.0200982861975465, + "tokens_seen": 418643968 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008818809179906917, + "loss": 0.0883, + "theoretical_loss": 4.019820898454345, + "tokens_seen": 418906112 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008818006740491094, + "loss": 0.0843, + "theoretical_loss": 4.019543732810134, + "tokens_seen": 419168256 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0019043140346184373, + "objective/train/docs_used": 158751, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.881985068321228, + "objective/train/original_loss": 1.881984829902649, + "objective/train/theoretical_loss": 4.019266788948352, + "objective/train/tokens_used": 439890400, + "objective/train/value_avg": -0.008880615234375, + "objective/train/value_loss": 0.00036503959563560784, + "objective/train/value_max": -0.0002892017364501953, + "objective/train/value_min": -0.80029296875, + "objective/train/value_reward_corr": 0.6310464755810599, + "objective/train/value_std": 0.01422882080078125, + "objective/train/weight_avg": 1.0020649433135986, + "objective/train/weighted_lm_loss": 1.8862380981445312, + "objective/train/weights_max": 1.4735807180404663, + "objective/train/weights_min": 0.3895317018032074, + "theoretical_loss": 4.019266788948352, + "tokens_seen": 419430400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008817204301075269, + "loss": 0.0876, + "theoretical_loss": 4.019266788948352, + "tokens_seen": 419430400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008816401861659445, + "loss": 0.0901, + "theoretical_loss": 4.0189900665530836, + "tokens_seen": 419692544 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008815599422243621, + "loss": 0.0881, + "theoretical_loss": 4.0187135653090635, + "tokens_seen": 419954688 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008814796982827796, + "loss": 0.0832, + "theoretical_loss": 4.018437284901671, + "tokens_seen": 420216832 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008813994543411973, + "loss": 0.09, + "theoretical_loss": 4.018161225016926, + "tokens_seen": 420478976 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008813192103996148, + "loss": 0.088, + "theoretical_loss": 4.0178853853414935, + "tokens_seen": 420741120 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008812389664580324, + "loss": 0.0876, + "theoretical_loss": 4.017609765562678, + "tokens_seen": 421003264 + }, + { + "epoch": 0.13, + "learning_rate": 0.00088115872251645, + "loss": 0.0874, + "theoretical_loss": 4.017334365368422, + "tokens_seen": 421265408 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008810784785748677, + "loss": 0.0892, + "theoretical_loss": 4.017059184447303, + "tokens_seen": 421527552 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008809982346332853, + "loss": 0.088, + "theoretical_loss": 4.0167842224885355, + "tokens_seen": 421789696 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008809179906917028, + "loss": 0.0832, + "theoretical_loss": 4.016509479181968, + "tokens_seen": 422051840 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008808377467501204, + "loss": 0.0867, + "theoretical_loss": 4.016234954218078, + "tokens_seen": 422313984 + }, + { + "epoch": 0.13, + "learning_rate": 0.000880757502808538, + "loss": 0.086, + "theoretical_loss": 4.015960647287975, + "tokens_seen": 422576128 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.000816687592305243, + "objective/train/docs_used": 159824, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6725859642028809, + "objective/train/original_loss": 1.6725860834121704, + "objective/train/theoretical_loss": 4.015823575489237, + "objective/train/tokens_used": 443167200, + "objective/train/value_avg": -0.00794219970703125, + "objective/train/value_loss": 0.0004758470749948174, + "objective/train/value_max": -0.0003101825714111328, + "objective/train/value_min": -0.78759765625, + "objective/train/value_reward_corr": 0.706160026017736, + "objective/train/value_std": 0.0199432373046875, + "objective/train/weight_avg": 1.0010310411453247, + "objective/train/weighted_lm_loss": 1.6731016635894775, + "objective/train/weights_max": 1.929433822631836, + "objective/train/weights_min": 0.39345037937164307, + "theoretical_loss": 4.015823575489237, + "tokens_seen": 422707200 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008806772588669556, + "loss": 0.0878, + "theoretical_loss": 4.015686558083396, + "tokens_seen": 422838272 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008805970149253731, + "loss": 0.0882, + "theoretical_loss": 4.015412686296704, + "tokens_seen": 423100416 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008805167709837908, + "loss": 0.0873, + "theoretical_loss": 4.0151390316208895, + "tokens_seen": 423362560 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008804365270422083, + "loss": 0.0878, + "theoretical_loss": 4.014865593749563, + "tokens_seen": 423624704 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008803562831006258, + "loss": 0.0855, + "theoretical_loss": 4.014592372376958, + "tokens_seen": 423886848 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008802760391590436, + "loss": 0.0876, + "theoretical_loss": 4.014319367197928, + "tokens_seen": 424148992 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008801957952174611, + "loss": 0.0907, + "theoretical_loss": 4.014046577907946, + "tokens_seen": 424411136 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008801155512758787, + "loss": 0.089, + "theoretical_loss": 4.013774004203099, + "tokens_seen": 424673280 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008800353073342963, + "loss": 0.0877, + "theoretical_loss": 4.013501645780092, + "tokens_seen": 424935424 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008799550633927139, + "loss": 0.0839, + "theoretical_loss": 4.013229502336242, + "tokens_seen": 425197568 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008798748194511314, + "loss": 0.0868, + "theoretical_loss": 4.0129575735694765, + "tokens_seen": 425459712 + }, + { + "epoch": 0.13, + "learning_rate": 0.000879794575509549, + "loss": 0.0894, + "theoretical_loss": 4.012685859178337, + "tokens_seen": 425721856 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0018310642335563898, + "objective/train/docs_used": 161088, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5738030672073364, + "objective/train/original_loss": 1.5738029479980469, + "objective/train/theoretical_loss": 4.012414358861969, + "objective/train/tokens_used": 446444000, + "objective/train/value_avg": -0.006816864013671875, + "objective/train/value_loss": 0.00017067580483853817, + "objective/train/value_max": -0.0003542900085449219, + "objective/train/value_min": -0.53857421875, + "objective/train/value_reward_corr": 0.5243874673729859, + "objective/train/value_std": 0.009002685546875, + "objective/train/weight_avg": 1.0019116401672363, + "objective/train/weighted_lm_loss": 1.5763643980026245, + "objective/train/weights_max": 1.7135618925094604, + "objective/train/weights_min": 0.3707987070083618, + "theoretical_loss": 4.012414358861969, + "tokens_seen": 425984000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008797143315679666, + "loss": 0.0845, + "theoretical_loss": 4.012414358861969, + "tokens_seen": 425984000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008796340876263842, + "loss": 0.0872, + "theoretical_loss": 4.01214307232013, + "tokens_seen": 426246144 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008795538436848019, + "loss": 0.0868, + "theoretical_loss": 4.011871999253178, + "tokens_seen": 426508288 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008794735997432194, + "loss": 0.0889, + "theoretical_loss": 4.011601139362078, + "tokens_seen": 426770432 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008793933558016371, + "loss": 0.0858, + "theoretical_loss": 4.011330492348397, + "tokens_seen": 427032576 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008793131118600546, + "loss": 0.0894, + "theoretical_loss": 4.0110600579143, + "tokens_seen": 427294720 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008792328679184721, + "loss": 0.0869, + "theoretical_loss": 4.010789835762555, + "tokens_seen": 427556864 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008791526239768898, + "loss": 0.0891, + "theoretical_loss": 4.010519825596525, + "tokens_seen": 427819008 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008790723800353073, + "loss": 0.0865, + "theoretical_loss": 4.010250027120169, + "tokens_seen": 428081152 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008789921360937249, + "loss": 0.0857, + "theoretical_loss": 4.009980440038043, + "tokens_seen": 428343296 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008789118921521425, + "loss": 0.0893, + "theoretical_loss": 4.009711064055291, + "tokens_seen": 428605440 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008788316482105602, + "loss": 0.0856, + "theoretical_loss": 4.009441898877652, + "tokens_seen": 428867584 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008787514042689777, + "loss": 0.0858, + "theoretical_loss": 4.009172944211455, + "tokens_seen": 429129728 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0020093463826924562, + "objective/train/docs_used": 162190, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7857974767684937, + "objective/train/original_loss": 1.7857975959777832, + "objective/train/theoretical_loss": 4.009038545728536, + "objective/train/tokens_used": 449720800, + "objective/train/value_avg": -0.00783538818359375, + "objective/train/value_loss": 0.0001668190088821575, + "objective/train/value_max": -0.00030541419982910156, + "objective/train/value_min": -0.62890625, + "objective/train/value_reward_corr": 0.671483245399306, + "objective/train/value_std": 0.01235198974609375, + "objective/train/weight_avg": 1.0020873546600342, + "objective/train/weighted_lm_loss": 1.7891877889633179, + "objective/train/weights_max": 1.2835239171981812, + "objective/train/weights_min": 0.3886055648326874, + "theoretical_loss": 4.009038545728536, + "tokens_seen": 429260800 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008786711603273953, + "loss": 0.0874, + "theoretical_loss": 4.008904199763615, + "tokens_seen": 429391872 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008785909163858129, + "loss": 0.0856, + "theoretical_loss": 4.008635665241635, + "tokens_seen": 429654016 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008785106724442304, + "loss": 0.0889, + "theoretical_loss": 4.008367340353602, + "tokens_seen": 429916160 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008784304285026481, + "loss": 0.0869, + "theoretical_loss": 4.008099224808188, + "tokens_seen": 430178304 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008783501845610656, + "loss": 0.0842, + "theoretical_loss": 4.007831318314645, + "tokens_seen": 430440448 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008782699406194833, + "loss": 0.0848, + "theoretical_loss": 4.00756362058281, + "tokens_seen": 430702592 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008781896966779008, + "loss": 0.0841, + "theoretical_loss": 4.007296131323094, + "tokens_seen": 430964736 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008781094527363185, + "loss": 0.087, + "theoretical_loss": 4.007028850246487, + "tokens_seen": 431226880 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008780292087947361, + "loss": 0.0865, + "theoretical_loss": 4.006761777064557, + "tokens_seen": 431489024 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008779489648531536, + "loss": 0.0862, + "theoretical_loss": 4.006494911489444, + "tokens_seen": 431751168 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008778687209115712, + "loss": 0.0868, + "theoretical_loss": 4.006228253233864, + "tokens_seen": 432013312 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008777884769699888, + "loss": 0.0884, + "theoretical_loss": 4.0059618020111, + "tokens_seen": 432275456 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0010576657950878143, + "objective/train/docs_used": 163281, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.687941551208496, + "objective/train/original_loss": 1.687941312789917, + "objective/train/theoretical_loss": 4.00569555753501, + "objective/train/tokens_used": 452997600, + "objective/train/value_avg": -0.006397247314453125, + "objective/train/value_loss": 0.0002257569576613605, + "objective/train/value_max": -0.0002758502960205078, + "objective/train/value_min": -0.3525390625, + "objective/train/value_reward_corr": 0.5840597410895295, + "objective/train/value_std": 0.01157379150390625, + "objective/train/weight_avg": 1.0011610984802246, + "objective/train/weighted_lm_loss": 1.6893194913864136, + "objective/train/weights_max": 1.2730658054351807, + "objective/train/weights_min": 0.3710364103317261, + "theoretical_loss": 4.00569555753501, + "tokens_seen": 432537600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008777082330284064, + "loss": 0.0873, + "theoretical_loss": 4.00569555753501, + "tokens_seen": 432537600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008776279890868239, + "loss": 0.0862, + "theoretical_loss": 4.0054295195200185, + "tokens_seen": 432799744 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008775477451452416, + "loss": 0.0855, + "theoretical_loss": 4.005163687681116, + "tokens_seen": 433061888 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008774675012036591, + "loss": 0.0855, + "theoretical_loss": 4.00489806173386, + "tokens_seen": 433324032 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008773872572620766, + "loss": 0.0895, + "theoretical_loss": 4.004632641394372, + "tokens_seen": 433586176 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008773070133204944, + "loss": 0.0874, + "theoretical_loss": 4.0043674263793365, + "tokens_seen": 433848320 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008772267693789119, + "loss": 0.0848, + "theoretical_loss": 4.004102416405998, + "tokens_seen": 434110464 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008771465254373296, + "loss": 0.0857, + "theoretical_loss": 4.0038376111921625, + "tokens_seen": 434372608 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008770662814957471, + "loss": 0.0877, + "theoretical_loss": 4.0035730104561935, + "tokens_seen": 434634752 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008769860375541647, + "loss": 0.0893, + "theoretical_loss": 4.003308613917012, + "tokens_seen": 434896896 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008769057936125823, + "loss": 0.0879, + "theoretical_loss": 4.003044421294094, + "tokens_seen": 435159040 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008768255496709998, + "loss": 0.0856, + "theoretical_loss": 4.002780432307468, + "tokens_seen": 435421184 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008767453057294174, + "loss": 0.0858, + "theoretical_loss": 4.0025166466777184, + "tokens_seen": 435683328 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0019057170720770955, + "objective/train/docs_used": 164592, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.62677001953125, + "objective/train/original_loss": 1.6267703771591187, + "objective/train/theoretical_loss": 4.002384830034506, + "objective/train/tokens_used": 456274400, + "objective/train/value_avg": -0.00865936279296875, + "objective/train/value_loss": 0.00031009086524136364, + "objective/train/value_max": -0.00029587745666503906, + "objective/train/value_min": -0.68408203125, + "objective/train/value_reward_corr": 0.6592497390378055, + "objective/train/value_std": 0.01568603515625, + "objective/train/weight_avg": 1.0020478963851929, + "objective/train/weighted_lm_loss": 1.629705548286438, + "objective/train/weights_max": 1.3052014112472534, + "objective/train/weights_min": 0.3907938003540039, + "theoretical_loss": 4.002384830034506, + "tokens_seen": 435814400 + }, + { + "epoch": 0.13, + "learning_rate": 0.000876665061787835, + "loss": 0.0874, + "theoretical_loss": 4.00225306412598, + "tokens_seen": 435945472 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008765848178462527, + "loss": 0.0876, + "theoretical_loss": 4.001989684373934, + "tokens_seen": 436207616 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008765045739046702, + "loss": 0.0865, + "theoretical_loss": 4.0017265071438155, + "tokens_seen": 436469760 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008764243299630879, + "loss": 0.0866, + "theoretical_loss": 4.001463532158402, + "tokens_seen": 436731904 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008763440860215054, + "loss": 0.0868, + "theoretical_loss": 4.001200759141019, + "tokens_seen": 436994048 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008762638420799229, + "loss": 0.0861, + "theoretical_loss": 4.000938187815535, + "tokens_seen": 437256192 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008761835981383406, + "loss": 0.0855, + "theoretical_loss": 4.000675817906362, + "tokens_seen": 437518336 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008761033541967581, + "loss": 0.0857, + "theoretical_loss": 4.000413649138453, + "tokens_seen": 437780480 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008760231102551757, + "loss": 0.0873, + "theoretical_loss": 4.000151681237301, + "tokens_seen": 438042624 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008759428663135933, + "loss": 0.0856, + "theoretical_loss": 3.9998899139289392, + "tokens_seen": 438304768 + }, + { + "epoch": 0.13, + "learning_rate": 0.000875862622372011, + "loss": 0.0858, + "theoretical_loss": 3.999628346939934, + "tokens_seen": 438566912 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008757823784304286, + "loss": 0.0886, + "theoretical_loss": 3.9993669799973928, + "tokens_seen": 438829056 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0023695742711424828, + "objective/train/docs_used": 165818, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.712162733078003, + "objective/train/original_loss": 1.7121628522872925, + "objective/train/theoretical_loss": 3.9991058128289536, + "objective/train/tokens_used": 459551200, + "objective/train/value_avg": -0.0120391845703125, + "objective/train/value_loss": 0.0006223080563358963, + "objective/train/value_max": -0.0002002716064453125, + "objective/train/value_min": -0.81982421875, + "objective/train/value_reward_corr": 0.7511700502995355, + "objective/train/value_std": 0.0264739990234375, + "objective/train/weight_avg": 1.0026506185531616, + "objective/train/weighted_lm_loss": 1.716010570526123, + "objective/train/weights_max": 1.3089244365692139, + "objective/train/weights_min": 0.41716745495796204, + "theoretical_loss": 3.9991058128289536, + "tokens_seen": 439091200 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008757021344888461, + "loss": 0.0856, + "theoretical_loss": 3.9991058128289536, + "tokens_seen": 439091200 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008756218905472637, + "loss": 0.0895, + "theoretical_loss": 3.998844845162789, + "tokens_seen": 439353344 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008755416466056813, + "loss": 0.0873, + "theoretical_loss": 3.998584076727604, + "tokens_seen": 439615488 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008754614026640989, + "loss": 0.0886, + "theoretical_loss": 3.998323507252633, + "tokens_seen": 439877632 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008753811587225164, + "loss": 0.0853, + "theoretical_loss": 3.998063136467639, + "tokens_seen": 440139776 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008753009147809341, + "loss": 0.0881, + "theoretical_loss": 3.9978029641029154, + "tokens_seen": 440401920 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008752206708393516, + "loss": 0.0861, + "theoretical_loss": 3.9975429898892783, + "tokens_seen": 440664064 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008751404268977691, + "loss": 0.0871, + "theoretical_loss": 3.9972832135580707, + "tokens_seen": 440926208 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008750601829561869, + "loss": 0.087, + "theoretical_loss": 3.9970236348411605, + "tokens_seen": 441188352 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008749799390146044, + "loss": 0.0868, + "theoretical_loss": 3.996764253470935, + "tokens_seen": 441450496 + }, + { + "epoch": 0.13, + "learning_rate": 0.000874899695073022, + "loss": 0.0825, + "theoretical_loss": 3.996505069180306, + "tokens_seen": 441712640 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008748194511314396, + "loss": 0.0872, + "theoretical_loss": 3.9962460817027017, + "tokens_seen": 441974784 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008747392071898572, + "loss": 0.0865, + "theoretical_loss": 3.995987290772071, + "tokens_seen": 442236928 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0025604458060115576, + "objective/train/docs_used": 166951, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6768556833267212, + "objective/train/original_loss": 1.6768556833267212, + "objective/train/theoretical_loss": 3.9958579689288705, + "objective/train/tokens_used": 462828000, + "objective/train/value_avg": -0.0081634521484375, + "objective/train/value_loss": 0.00017307909729424864, + "objective/train/value_max": -0.0002472400665283203, + "objective/train/value_min": -0.494140625, + "objective/train/value_reward_corr": 0.5969880927520907, + "objective/train/value_std": 0.0106353759765625, + "objective/train/weight_avg": 1.0026414394378662, + "objective/train/weighted_lm_loss": 1.6814841032028198, + "objective/train/weights_max": 1.2068192958831787, + "objective/train/weights_min": 0.37579405307769775, + "theoretical_loss": 3.9958579689288705, + "tokens_seen": 442368000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008746589632482748, + "loss": 0.0859, + "theoretical_loss": 3.9957286961228786, + "tokens_seen": 442499072 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008745787193066924, + "loss": 0.0838, + "theoretical_loss": 3.995470297490106, + "tokens_seen": 442761216 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008744984753651099, + "loss": 0.0841, + "theoretical_loss": 3.995212094609249, + "tokens_seen": 443023360 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008744182314235275, + "loss": 0.0857, + "theoretical_loss": 3.994954087216315, + "tokens_seen": 443285504 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008743379874819452, + "loss": 0.0886, + "theoretical_loss": 3.994696275047825, + "tokens_seen": 443547648 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008742577435403627, + "loss": 0.0862, + "theoretical_loss": 3.9944386578408113, + "tokens_seen": 443809792 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008741774995987804, + "loss": 0.0845, + "theoretical_loss": 3.9941812353328126, + "tokens_seen": 444071936 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008740972556571979, + "loss": 0.0852, + "theoretical_loss": 3.993924007261878, + "tokens_seen": 444334080 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008740170117156155, + "loss": 0.0881, + "theoretical_loss": 3.9936669733665617, + "tokens_seen": 444596224 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008739367677740331, + "loss": 0.0859, + "theoretical_loss": 3.9934101333859253, + "tokens_seen": 444858368 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008738565238324506, + "loss": 0.0882, + "theoretical_loss": 3.9931534870595327, + "tokens_seen": 445120512 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008737762798908682, + "loss": 0.0857, + "theoretical_loss": 3.9928970341274517, + "tokens_seen": 445382656 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": -4.0266091673402116e-05, + "objective/train/docs_used": 168097, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8889011144638062, + "objective/train/original_loss": 1.8889009952545166, + "objective/train/theoretical_loss": 3.992640774330251, + "objective/train/tokens_used": 466104800, + "objective/train/value_avg": -0.007015228271484375, + "objective/train/value_loss": 0.00026242577587254345, + "objective/train/value_max": -0.0002415180206298828, + "objective/train/value_min": -0.1566162109375, + "objective/train/value_reward_corr": 0.5724330601050478, + "objective/train/value_std": 0.0086822509765625, + "objective/train/weight_avg": 1.0000807046890259, + "objective/train/weighted_lm_loss": 1.889707326889038, + "objective/train/weights_max": 1.112181305885315, + "objective/train/weights_min": 0.38205578923225403, + "theoretical_loss": 3.992640774330251, + "tokens_seen": 445644800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008736960359492858, + "loss": 0.0861, + "theoretical_loss": 3.992640774330251, + "tokens_seen": 445644800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008736157920077035, + "loss": 0.084, + "theoretical_loss": 3.9923847074090015, + "tokens_seen": 445906944 + }, + { + "epoch": 0.14, + "learning_rate": 0.000873535548066121, + "loss": 0.0835, + "theoretical_loss": 3.9921288331052702, + "tokens_seen": 446169088 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008734553041245387, + "loss": 0.0856, + "theoretical_loss": 3.991873151161124, + "tokens_seen": 446431232 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008733750601829562, + "loss": 0.0886, + "theoretical_loss": 3.9916176613191263, + "tokens_seen": 446693376 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008732948162413738, + "loss": 0.0862, + "theoretical_loss": 3.9913623633223354, + "tokens_seen": 446955520 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008732145722997914, + "loss": 0.0831, + "theoretical_loss": 3.9911072569143036, + "tokens_seen": 447217664 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008731343283582089, + "loss": 0.0842, + "theoretical_loss": 3.9908523418390764, + "tokens_seen": 447479808 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008730540844166266, + "loss": 0.0841, + "theoretical_loss": 3.990597617841191, + "tokens_seen": 447741952 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008729738404750441, + "loss": 0.0841, + "theoretical_loss": 3.9903430846656742, + "tokens_seen": 448004096 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008728935965334618, + "loss": 0.0871, + "theoretical_loss": 3.990088742058043, + "tokens_seen": 448266240 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008728133525918794, + "loss": 0.0835, + "theoretical_loss": 3.9898345897643024, + "tokens_seen": 448528384 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008727331086502969, + "loss": 0.0835, + "theoretical_loss": 3.989580627530943, + "tokens_seen": 448790528 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.0008596550906077027, + "objective/train/docs_used": 169309, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.786888599395752, + "objective/train/original_loss": 1.786888599395752, + "objective/train/theoretical_loss": 3.9894537176078178, + "objective/train/tokens_used": 469381600, + "objective/train/value_avg": -0.008880615234375, + "objective/train/value_loss": 0.00042329219286330044, + "objective/train/value_max": -0.0002359151840209961, + "objective/train/value_min": -0.681640625, + "objective/train/value_reward_corr": 0.5803828830417395, + "objective/train/value_std": 0.014617919921875, + "objective/train/weight_avg": 1.0010493993759155, + "objective/train/weighted_lm_loss": 1.7893576622009277, + "objective/train/weights_max": 1.8888424634933472, + "objective/train/weights_min": 0.3687688410282135, + "theoretical_loss": 3.9894537176078178, + "tokens_seen": 448921600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008726528647087145, + "loss": 0.0836, + "theoretical_loss": 3.9893268551049417, + "tokens_seen": 449052672 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008725726207671321, + "loss": 0.0848, + "theoretical_loss": 3.9890732722337594, + "tokens_seen": 449314816 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008724923768255497, + "loss": 0.0882, + "theoretical_loss": 3.988819878665341, + "tokens_seen": 449576960 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008724121328839672, + "loss": 0.085, + "theoretical_loss": 3.988566674148111, + "tokens_seen": 449839104 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008723318889423849, + "loss": 0.0854, + "theoretical_loss": 3.988313658430978, + "tokens_seen": 450101248 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008722516450008024, + "loss": 0.0872, + "theoretical_loss": 3.9880608312633274, + "tokens_seen": 450363392 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008721714010592199, + "loss": 0.0875, + "theoretical_loss": 3.9878081923950237, + "tokens_seen": 450625536 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008720911571176377, + "loss": 0.0854, + "theoretical_loss": 3.9875557415764087, + "tokens_seen": 450887680 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008720109131760552, + "loss": 0.0834, + "theoretical_loss": 3.9873034785582995, + "tokens_seen": 451149824 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008719306692344729, + "loss": 0.0829, + "theoretical_loss": 3.9870514030919884, + "tokens_seen": 451411968 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008718504252928904, + "loss": 0.0848, + "theoretical_loss": 3.986799514929242, + "tokens_seen": 451674112 + }, + { + "epoch": 0.14, + "learning_rate": 0.000871770181351308, + "loss": 0.0876, + "theoretical_loss": 3.9865478138222974, + "tokens_seen": 451936256 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.0022668084129691124, + "objective/train/docs_used": 170598, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.797487735748291, + "objective/train/original_loss": 1.797487735748291, + "objective/train/theoretical_loss": 3.9862962995238647, + "objective/train/tokens_used": 472658400, + "objective/train/value_avg": -0.0083465576171875, + "objective/train/value_loss": 0.0004899487248621881, + "objective/train/value_max": -0.0002779960632324219, + "objective/train/value_min": -0.68115234375, + "objective/train/value_reward_corr": 0.532976155116499, + "objective/train/value_std": 0.0135040283203125, + "objective/train/weight_avg": 1.0024774074554443, + "objective/train/weighted_lm_loss": 1.8017926216125488, + "objective/train/weights_max": 1.5135427713394165, + "objective/train/weights_min": 0.3686288893222809, + "theoretical_loss": 3.9862962995238647, + "tokens_seen": 452198400 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008716899374097256, + "loss": 0.086, + "theoretical_loss": 3.9862962995238647, + "tokens_seen": 452198400 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008716096934681432, + "loss": 0.0884, + "theoretical_loss": 3.9860449717871234, + "tokens_seen": 452460544 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008715294495265607, + "loss": 0.0851, + "theoretical_loss": 3.9857938303657217, + "tokens_seen": 452722688 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008714492055849783, + "loss": 0.0885, + "theoretical_loss": 3.9855428750137754, + "tokens_seen": 452984832 + }, + { + "epoch": 0.14, + "learning_rate": 0.000871368961643396, + "loss": 0.0889, + "theoretical_loss": 3.9852921054858665, + "tokens_seen": 453246976 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008712887177018135, + "loss": 0.0843, + "theoretical_loss": 3.9850415215370445, + "tokens_seen": 453509120 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008712084737602312, + "loss": 0.0819, + "theoretical_loss": 3.984791122922821, + "tokens_seen": 453771264 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008711282298186487, + "loss": 0.0859, + "theoretical_loss": 3.98454090939917, + "tokens_seen": 454033408 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008710479858770663, + "loss": 0.0861, + "theoretical_loss": 3.984290880722531, + "tokens_seen": 454295552 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008709677419354839, + "loss": 0.0834, + "theoretical_loss": 3.9840410366498, + "tokens_seen": 454557696 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008708874979939014, + "loss": 0.0869, + "theoretical_loss": 3.983791376938336, + "tokens_seen": 454819840 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008708072540523191, + "loss": 0.083, + "theoretical_loss": 3.983541901345955, + "tokens_seen": 455081984 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008707270101107366, + "loss": 0.0871, + "theoretical_loss": 3.983292609630931, + "tokens_seen": 455344128 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 6.911411037435755e-05, + "objective/train/docs_used": 171735, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7402760982513428, + "objective/train/original_loss": 1.7402762174606323, + "objective/train/theoretical_loss": 3.983168032652013, + "objective/train/tokens_used": 475935200, + "objective/train/value_avg": -0.01013946533203125, + "objective/train/value_loss": 0.0009821663843467832, + "objective/train/value_max": -0.00021147727966308594, + "objective/train/value_min": -0.916015625, + "objective/train/value_reward_corr": 0.7078605683518372, + "objective/train/value_std": 0.0244293212890625, + "objective/train/weight_avg": 1.000481367111206, + "objective/train/weighted_lm_loss": 1.7407102584838867, + "objective/train/weights_max": 2.12113881111145, + "objective/train/weights_min": 0.36906227469444275, + "theoretical_loss": 3.983168032652013, + "tokens_seen": 455475200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008706467661691543, + "loss": 0.0875, + "theoretical_loss": 3.9830435015519936, + "tokens_seen": 455606272 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008705665222275719, + "loss": 0.086, + "theoretical_loss": 3.982794576868328, + "tokens_seen": 455868416 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008704862782859895, + "loss": 0.0847, + "theoretical_loss": 3.982545835339573, + "tokens_seen": 456130560 + }, + { + "epoch": 0.14, + "learning_rate": 0.000870406034344407, + "loss": 0.0849, + "theoretical_loss": 3.982297276725822, + "tokens_seen": 456392704 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008703257904028246, + "loss": 0.0838, + "theoretical_loss": 3.9820489007876176, + "tokens_seen": 456654848 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008702455464612422, + "loss": 0.0813, + "theoretical_loss": 3.981800707285955, + "tokens_seen": 456916992 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008701653025196597, + "loss": 0.0853, + "theoretical_loss": 3.9815526959822787, + "tokens_seen": 457179136 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008700850585780774, + "loss": 0.0835, + "theoretical_loss": 3.981304866638481, + "tokens_seen": 457441280 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008700048146364949, + "loss": 0.0874, + "theoretical_loss": 3.9810572190169027, + "tokens_seen": 457703424 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008699245706949126, + "loss": 0.0864, + "theoretical_loss": 3.9808097528803295, + "tokens_seen": 457965568 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008698443267533302, + "loss": 0.0861, + "theoretical_loss": 3.9805624679919935, + "tokens_seen": 458227712 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008697640828117477, + "loss": 0.0878, + "theoretical_loss": 3.98031536411557, + "tokens_seen": 458489856 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.0018690774450078607, + "objective/train/docs_used": 173009, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6653363704681396, + "objective/train/original_loss": 1.6653363704681396, + "objective/train/theoretical_loss": 3.9800684410151783, + "objective/train/tokens_used": 479212000, + "objective/train/value_avg": -0.007537841796875, + "objective/train/value_loss": 0.00013798951113130897, + "objective/train/value_max": -0.0002846717834472656, + "objective/train/value_min": -0.277099609375, + "objective/train/value_reward_corr": 0.48532692544123834, + "objective/train/value_std": 0.00695037841796875, + "objective/train/weight_avg": 1.0019335746765137, + "objective/train/weighted_lm_loss": 1.6689884662628174, + "objective/train/weights_max": 1.0873464345932007, + "objective/train/weights_min": 0.3710477352142334, + "theoretical_loss": 3.9800684410151783, + "tokens_seen": 458752000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008696838388701653, + "loss": 0.0846, + "theoretical_loss": 3.9800684410151783, + "tokens_seen": 458752000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008696035949285829, + "loss": 0.0826, + "theoretical_loss": 3.979821698455379, + "tokens_seen": 459014144 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008695233509870005, + "loss": 0.0861, + "theoretical_loss": 3.9795751362011735, + "tokens_seen": 459276288 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008694431070454181, + "loss": 0.0848, + "theoretical_loss": 3.979328754018004, + "tokens_seen": 459538432 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008693628631038357, + "loss": 0.085, + "theoretical_loss": 3.979082551671749, + "tokens_seen": 459800576 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008692826191622532, + "loss": 0.0876, + "theoretical_loss": 3.9788365289287286, + "tokens_seen": 460062720 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008692023752206708, + "loss": 0.0879, + "theoretical_loss": 3.9785906855556945, + "tokens_seen": 460324864 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008691221312790885, + "loss": 0.0846, + "theoretical_loss": 3.9783450213198384, + "tokens_seen": 460587008 + }, + { + "epoch": 0.14, + "learning_rate": 0.000869041887337506, + "loss": 0.0837, + "theoretical_loss": 3.9780995359887843, + "tokens_seen": 460849152 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008689616433959237, + "loss": 0.0868, + "theoretical_loss": 3.9778542293305894, + "tokens_seen": 461111296 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008688813994543412, + "loss": 0.0865, + "theoretical_loss": 3.977609101113744, + "tokens_seen": 461373440 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008688011555127588, + "loss": 0.0809, + "theoretical_loss": 3.97736415110717, + "tokens_seen": 461635584 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008687209115711764, + "loss": 0.0903, + "theoretical_loss": 3.977119379080218, + "tokens_seen": 461897728 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.0003697322681546211, + "objective/train/docs_used": 174193, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8311926126480103, + "objective/train/original_loss": 1.8311924934387207, + "objective/train/theoretical_loss": 3.9769970597371405, + "objective/train/tokens_used": 482488800, + "objective/train/value_avg": -0.0074005126953125, + "objective/train/value_loss": 0.00031192455207929015, + "objective/train/value_max": -0.00023233890533447266, + "objective/train/value_min": -0.40869140625, + "objective/train/value_reward_corr": 0.5429328033135243, + "objective/train/value_std": 0.011566162109375, + "objective/train/weight_avg": 1.0005161762237549, + "objective/train/weighted_lm_loss": 1.8320584297180176, + "objective/train/weights_max": 1.3934298753738403, + "objective/train/weights_min": 0.6126090288162231, + "theoretical_loss": 3.9769970597371405, + "tokens_seen": 462028800 + }, + { + "epoch": 0.14, + "learning_rate": 0.000868640667629594, + "loss": 0.0886, + "theoretical_loss": 3.9768747848026695, + "tokens_seen": 462159872 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008685604236880115, + "loss": 0.0836, + "theoretical_loss": 3.9766303680447335, + "tokens_seen": 462422016 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008684801797464291, + "loss": 0.0834, + "theoretical_loss": 3.9763861285770457, + "tokens_seen": 462684160 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008683999358048468, + "loss": 0.0862, + "theoretical_loss": 3.9761420661706683, + "tokens_seen": 462946304 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008683196918632643, + "loss": 0.084, + "theoretical_loss": 3.975898180597089, + "tokens_seen": 463208448 + }, + { + "epoch": 0.14, + "learning_rate": 0.000868239447921682, + "loss": 0.0857, + "theoretical_loss": 3.9756544716282187, + "tokens_seen": 463470592 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008681592039800995, + "loss": 0.0837, + "theoretical_loss": 3.975410939036392, + "tokens_seen": 463732736 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008680789600385172, + "loss": 0.0852, + "theoretical_loss": 3.9751675825943638, + "tokens_seen": 463994880 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008679987160969347, + "loss": 0.0895, + "theoretical_loss": 3.974924402075313, + "tokens_seen": 464257024 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008679184721553522, + "loss": 0.0863, + "theoretical_loss": 3.9746813972528354, + "tokens_seen": 464519168 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008678382282137699, + "loss": 0.0875, + "theoretical_loss": 3.9744385679009486, + "tokens_seen": 464781312 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008677579842721874, + "loss": 0.0833, + "theoretical_loss": 3.9741959137940848, + "tokens_seen": 465043456 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.002355672651901841, + "objective/train/docs_used": 175471, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4537473917007446, + "objective/train/original_loss": 1.4537473917007446, + "objective/train/theoretical_loss": 3.973953434707096, + "objective/train/tokens_used": 485765600, + "objective/train/value_avg": -0.0084075927734375, + "objective/train/value_loss": 0.00017681960889603943, + "objective/train/value_max": -0.00033664703369140625, + "objective/train/value_min": -0.244384765625, + "objective/train/value_reward_corr": 0.6067470079442566, + "objective/train/value_std": 0.011962890625, + "objective/train/weight_avg": 1.0024397373199463, + "objective/train/weighted_lm_loss": 1.4569942951202393, + "objective/train/weights_max": 1.135114312171936, + "objective/train/weights_min": 0.36958563327789307, + "theoretical_loss": 3.973953434707096, + "tokens_seen": 465305600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008676777403306051, + "loss": 0.0827, + "theoretical_loss": 3.973953434707096, + "tokens_seen": 465305600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008675974963890227, + "loss": 0.084, + "theoretical_loss": 3.973711130415248, + "tokens_seen": 465567744 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008675172524474403, + "loss": 0.0855, + "theoretical_loss": 3.973469000694223, + "tokens_seen": 465829888 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008674370085058578, + "loss": 0.084, + "theoretical_loss": 3.973227045320117, + "tokens_seen": 466092032 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008673567645642754, + "loss": 0.0852, + "theoretical_loss": 3.9729852640694383, + "tokens_seen": 466354176 + }, + { + "epoch": 0.14, + "learning_rate": 0.000867276520622693, + "loss": 0.0855, + "theoretical_loss": 3.972743656719107, + "tokens_seen": 466616320 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008671962766811105, + "loss": 0.0819, + "theoretical_loss": 3.9725022230464537, + "tokens_seen": 466878464 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008671160327395282, + "loss": 0.0849, + "theoretical_loss": 3.9722609628292207, + "tokens_seen": 467140608 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008670357887979457, + "loss": 0.084, + "theoretical_loss": 3.9720198758455574, + "tokens_seen": 467402752 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008669555448563635, + "loss": 0.0854, + "theoretical_loss": 3.9717789618740227, + "tokens_seen": 467664896 + }, + { + "epoch": 0.14, + "learning_rate": 0.000866875300914781, + "loss": 0.0864, + "theoretical_loss": 3.9715382206935814, + "tokens_seen": 467927040 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008667950569731985, + "loss": 0.082, + "theoretical_loss": 3.9712976520836043, + "tokens_seen": 468189184 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008667148130316162, + "loss": 0.0839, + "theoretical_loss": 3.971057255823868, + "tokens_seen": 468451328 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.0019131185254082084, + "objective/train/docs_used": 176758, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8133913278579712, + "objective/train/original_loss": 1.8133914470672607, + "objective/train/theoretical_loss": 3.9709371222566308, + "objective/train/tokens_used": 489042400, + "objective/train/value_avg": -0.00600433349609375, + "objective/train/value_loss": 0.00013036759628448635, + "objective/train/value_max": -0.00028252601623535156, + "objective/train/value_min": -0.2418212890625, + "objective/train/value_reward_corr": 0.4864233011946609, + "objective/train/value_std": 0.006320953369140625, + "objective/train/weight_avg": 1.0019735097885132, + "objective/train/weighted_lm_loss": 1.817116618156433, + "objective/train/weights_max": 1.2735666036605835, + "objective/train/weights_min": 0.3718809485435486, + "theoretical_loss": 3.9709371222566308, + "tokens_seen": 468582400 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008666345690900337, + "loss": 0.0859, + "theoretical_loss": 3.9708170316945526, + "tokens_seen": 468713472 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008665543251484513, + "loss": 0.0853, + "theoretical_loss": 3.9705769794762418, + "tokens_seen": 468975616 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008664740812068689, + "loss": 0.0866, + "theoretical_loss": 3.97033709894992, + "tokens_seen": 469237760 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008663938372652865, + "loss": 0.085, + "theoretical_loss": 3.970097389896975, + "tokens_seen": 469499904 + }, + { + "epoch": 0.14, + "learning_rate": 0.000866313593323704, + "loss": 0.0853, + "theoretical_loss": 3.9698578520991936, + "tokens_seen": 469762048 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008662333493821216, + "loss": 0.0873, + "theoretical_loss": 3.9696184853387617, + "tokens_seen": 470024192 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008661531054405393, + "loss": 0.0834, + "theoretical_loss": 3.9693792893982636, + "tokens_seen": 470286336 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008660728614989568, + "loss": 0.0861, + "theoretical_loss": 3.969140264060681, + "tokens_seen": 470548480 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008659926175573745, + "loss": 0.0844, + "theoretical_loss": 3.9689014091093933, + "tokens_seen": 470810624 + }, + { + "epoch": 0.14, + "learning_rate": 0.000865912373615792, + "loss": 0.0843, + "theoretical_loss": 3.9686627243281727, + "tokens_seen": 471072768 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008658321296742096, + "loss": 0.088, + "theoretical_loss": 3.9684242095011886, + "tokens_seen": 471334912 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008657518857326272, + "loss": 0.0841, + "theoretical_loss": 3.9681858644130017, + "tokens_seen": 471597056 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.0015276180347427726, + "objective/train/docs_used": 177948, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7598378658294678, + "objective/train/original_loss": 1.7598377466201782, + "objective/train/theoretical_loss": 3.967947688848568, + "objective/train/tokens_used": 492319200, + "objective/train/value_avg": -0.01081085205078125, + "objective/train/value_loss": 0.00034501656773500144, + "objective/train/value_max": -0.00039505958557128906, + "objective/train/value_min": -0.76025390625, + "objective/train/value_reward_corr": 0.7302345418778485, + "objective/train/value_std": 0.0194549560546875, + "objective/train/weight_avg": 1.0016895532608032, + "objective/train/weighted_lm_loss": 1.7620900869369507, + "objective/train/weights_max": 1.419206142425537, + "objective/train/weights_min": 0.39239516854286194, + "theoretical_loss": 3.967947688848568, + "tokens_seen": 471859200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008656716417910447, + "loss": 0.0869, + "theoretical_loss": 3.967947688848568, + "tokens_seen": 471859200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008655913978494624, + "loss": 0.0807, + "theoretical_loss": 3.9677096825932328, + "tokens_seen": 472121344 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008655111539078799, + "loss": 0.0841, + "theoretical_loss": 3.9674718454327325, + "tokens_seen": 472383488 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008654309099662976, + "loss": 0.0864, + "theoretical_loss": 3.9672341771531956, + "tokens_seen": 472645632 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008653506660247152, + "loss": 0.0862, + "theoretical_loss": 3.9669966775411365, + "tokens_seen": 472907776 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008652704220831328, + "loss": 0.0841, + "theoretical_loss": 3.96675934638346, + "tokens_seen": 473169920 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008651901781415503, + "loss": 0.0864, + "theoretical_loss": 3.9665221834674558, + "tokens_seen": 473432064 + }, + { + "epoch": 0.14, + "learning_rate": 0.000865109934199968, + "loss": 0.0841, + "theoretical_loss": 3.9662851885808026, + "tokens_seen": 473694208 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008650296902583855, + "loss": 0.0848, + "theoretical_loss": 3.966048361511562, + "tokens_seen": 473956352 + }, + { + "epoch": 0.14, + "learning_rate": 0.000864949446316803, + "loss": 0.0829, + "theoretical_loss": 3.96581170204818, + "tokens_seen": 474218496 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008648692023752207, + "loss": 0.087, + "theoretical_loss": 3.9655752099794874, + "tokens_seen": 474480640 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008647889584336382, + "loss": 0.0888, + "theoretical_loss": 3.9653388850946976, + "tokens_seen": 474742784 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008647087144920559, + "loss": 0.084, + "theoretical_loss": 3.965102727183404, + "tokens_seen": 475004928 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.0018348618177697062, + "objective/train/docs_used": 179087, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5616334676742554, + "objective/train/original_loss": 1.5616333484649658, + "objective/train/theoretical_loss": 3.9649847107771707, + "objective/train/tokens_used": 495596000, + "objective/train/value_avg": -0.0064697265625, + "objective/train/value_loss": 0.00020544622384477407, + "objective/train/value_max": -0.0003077983856201172, + "objective/train/value_min": -0.6181640625, + "objective/train/value_reward_corr": 0.5330140721340589, + "objective/train/value_std": 0.00867462158203125, + "objective/train/weight_avg": 1.0019270181655884, + "objective/train/weighted_lm_loss": 1.564939260482788, + "objective/train/weights_max": 1.3273746967315674, + "objective/train/weights_min": 0.36936861276626587, + "theoretical_loss": 3.9649847107771707, + "tokens_seen": 475136000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008646284705504735, + "loss": 0.0832, + "theoretical_loss": 3.9648667360355816, + "tokens_seen": 475267072 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008645482266088911, + "loss": 0.0869, + "theoretical_loss": 3.9646309114415863, + "tokens_seen": 475529216 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008644679826673087, + "loss": 0.0838, + "theoretical_loss": 3.9643952531921505, + "tokens_seen": 475791360 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008643877387257262, + "loss": 0.0817, + "theoretical_loss": 3.9641597610783874, + "tokens_seen": 476053504 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008643074947841438, + "loss": 0.0847, + "theoretical_loss": 3.9639244348917853, + "tokens_seen": 476315648 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008642272508425614, + "loss": 0.0826, + "theoretical_loss": 3.96368927442421, + "tokens_seen": 476577792 + }, + { + "epoch": 0.14, + "learning_rate": 0.000864147006900979, + "loss": 0.0855, + "theoretical_loss": 3.9634542794679013, + "tokens_seen": 476839936 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008640667629593965, + "loss": 0.0832, + "theoretical_loss": 3.963219449815475, + "tokens_seen": 477102080 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008639865190178143, + "loss": 0.0809, + "theoretical_loss": 3.9629847852599207, + "tokens_seen": 477364224 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008639062750762318, + "loss": 0.0871, + "theoretical_loss": 3.9627502855945984, + "tokens_seen": 477626368 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008638260311346493, + "loss": 0.0839, + "theoretical_loss": 3.962515950613242, + "tokens_seen": 477888512 + }, + { + "epoch": 0.14, + "learning_rate": 0.000863745787193067, + "loss": 0.0862, + "theoretical_loss": 3.962281780109957, + "tokens_seen": 478150656 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": -0.0006487751961685717, + "objective/train/docs_used": 180329, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7242850065231323, + "objective/train/original_loss": 1.7242848873138428, + "objective/train/theoretical_loss": 3.9620477738792164, + "objective/train/tokens_used": 498872800, + "objective/train/value_avg": -0.005603790283203125, + "objective/train/value_loss": 0.00016961862274911255, + "objective/train/value_max": -0.00028252601623535156, + "objective/train/value_min": -0.17919921875, + "objective/train/value_reward_corr": 0.5824427047069172, + "objective/train/value_std": 0.0064849853515625, + "objective/train/weight_avg": 0.999433696269989, + "objective/train/weighted_lm_loss": 1.724251389503479, + "objective/train/weights_max": 1.0798728466033936, + "objective/train/weights_min": 0.6110895276069641, + "theoretical_loss": 3.9620477738792164, + "tokens_seen": 478412800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008636655432514845, + "loss": 0.0843, + "theoretical_loss": 3.9620477738792164, + "tokens_seen": 478412800 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008635852993099021, + "loss": 0.0854, + "theoretical_loss": 3.9618139317158647, + "tokens_seen": 478674944 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008635050553683197, + "loss": 0.0847, + "theoretical_loss": 3.961580253415114, + "tokens_seen": 478937088 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008634248114267373, + "loss": 0.0845, + "theoretical_loss": 3.9613467387725434, + "tokens_seen": 479199232 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008633445674851548, + "loss": 0.0807, + "theoretical_loss": 3.9611133875841, + "tokens_seen": 479461376 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008632643235435724, + "loss": 0.0854, + "theoretical_loss": 3.960880199646096, + "tokens_seen": 479723520 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008631840796019901, + "loss": 0.0858, + "theoretical_loss": 3.9606471747552083, + "tokens_seen": 479985664 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008631038356604077, + "loss": 0.0826, + "theoretical_loss": 3.9604143127084774, + "tokens_seen": 480247808 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008630235917188253, + "loss": 0.0847, + "theoretical_loss": 3.960181613303309, + "tokens_seen": 480509952 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008629433477772428, + "loss": 0.0875, + "theoretical_loss": 3.9599490763374687, + "tokens_seen": 480772096 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008628631038356605, + "loss": 0.0857, + "theoretical_loss": 3.9597167016090866, + "tokens_seen": 481034240 + }, + { + "epoch": 0.15, + "learning_rate": 0.000862782859894078, + "loss": 0.0827, + "theoretical_loss": 3.95948448891665, + "tokens_seen": 481296384 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008627026159524955, + "loss": 0.0838, + "theoretical_loss": 3.959252438059009, + "tokens_seen": 481558528 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.0011554267257452011, + "objective/train/docs_used": 181505, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.673024296760559, + "objective/train/original_loss": 1.6730244159698486, + "objective/train/theoretical_loss": 3.959136473255475, + "objective/train/tokens_used": 502149600, + "objective/train/value_avg": -0.00830078125, + "objective/train/value_loss": 0.0005257127340883017, + "objective/train/value_max": -0.0005254745483398438, + "objective/train/value_min": -0.478515625, + "objective/train/value_reward_corr": 0.5014164299225942, + "objective/train/value_std": 0.0123291015625, + "objective/train/weight_avg": 1.0013680458068848, + "objective/train/weighted_lm_loss": 1.674941062927246, + "objective/train/weights_max": 1.4412035942077637, + "objective/train/weights_min": 0.1574522852897644, + "theoretical_loss": 3.959136473255475, + "tokens_seen": 481689600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008626223720109132, + "loss": 0.0841, + "theoretical_loss": 3.9590205488353707, + "tokens_seen": 481820672 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008625421280693307, + "loss": 0.0867, + "theoretical_loss": 3.958788821045302, + "tokens_seen": 482082816 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008624618841277484, + "loss": 0.0848, + "theoretical_loss": 3.958557254488727, + "tokens_seen": 482344960 + }, + { + "epoch": 0.15, + "learning_rate": 0.000862381640186166, + "loss": 0.084, + "theoretical_loss": 3.958325848965925, + "tokens_seen": 482607104 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008623013962445836, + "loss": 0.0831, + "theoretical_loss": 3.958094604277532, + "tokens_seen": 482869248 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008622211523030011, + "loss": 0.0863, + "theoretical_loss": 3.9578635202245387, + "tokens_seen": 483131392 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008621409083614188, + "loss": 0.0807, + "theoretical_loss": 3.95763259660829, + "tokens_seen": 483393536 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008620606644198363, + "loss": 0.0829, + "theoretical_loss": 3.957401833230484, + "tokens_seen": 483655680 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008619804204782538, + "loss": 0.0833, + "theoretical_loss": 3.957171229893171, + "tokens_seen": 483917824 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008619001765366715, + "loss": 0.0847, + "theoretical_loss": 3.956940786398753, + "tokens_seen": 484179968 + }, + { + "epoch": 0.15, + "learning_rate": 0.000861819932595089, + "loss": 0.0856, + "theoretical_loss": 3.9567105025499827, + "tokens_seen": 484442112 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008617396886535068, + "loss": 0.0839, + "theoretical_loss": 3.9564803781499633, + "tokens_seen": 484704256 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.0023670962546020746, + "objective/train/docs_used": 182723, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4888525009155273, + "objective/train/original_loss": 1.4888522624969482, + "objective/train/theoretical_loss": 3.956250413002146, + "objective/train/tokens_used": 505426400, + "objective/train/value_avg": -0.01146697998046875, + "objective/train/value_loss": 0.0007001186022534966, + "objective/train/value_max": -0.0003249645233154297, + "objective/train/value_min": -0.8916015625, + "objective/train/value_reward_corr": 0.688608831300723, + "objective/train/value_std": 0.02484130859375, + "objective/train/weight_avg": 1.0026851892471313, + "objective/train/weighted_lm_loss": 1.4927496910095215, + "objective/train/weights_max": 2.0180838108062744, + "objective/train/weights_min": 0.3854755163192749, + "theoretical_loss": 3.956250413002146, + "tokens_seen": 484966400 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008616594447119243, + "loss": 0.0845, + "theoretical_loss": 3.956250413002146, + "tokens_seen": 484966400 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008615792007703419, + "loss": 0.0851, + "theoretical_loss": 3.9560206069103314, + "tokens_seen": 485228544 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008614989568287595, + "loss": 0.086, + "theoretical_loss": 3.9557909596786676, + "tokens_seen": 485490688 + }, + { + "epoch": 0.15, + "learning_rate": 0.000861418712887177, + "loss": 0.0842, + "theoretical_loss": 3.9555614711116487, + "tokens_seen": 485752832 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008613384689455946, + "loss": 0.0841, + "theoretical_loss": 3.9553321410141162, + "tokens_seen": 486014976 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008612582250040122, + "loss": 0.0831, + "theoretical_loss": 3.9551029691912545, + "tokens_seen": 486277120 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008611779810624298, + "loss": 0.0843, + "theoretical_loss": 3.954873955448594, + "tokens_seen": 486539264 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008610977371208473, + "loss": 0.0846, + "theoretical_loss": 3.9546450995920086, + "tokens_seen": 486801408 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008610174931792651, + "loss": 0.0827, + "theoretical_loss": 3.954416401427715, + "tokens_seen": 487063552 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008609372492376826, + "loss": 0.0827, + "theoretical_loss": 3.9541878607622705, + "tokens_seen": 487325696 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008608570052961001, + "loss": 0.0805, + "theoretical_loss": 3.9539594774025755, + "tokens_seen": 487587840 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008607767613545178, + "loss": 0.084, + "theoretical_loss": 3.9537312511558698, + "tokens_seen": 487849984 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008606965174129353, + "loss": 0.0825, + "theoretical_loss": 3.953503181829732, + "tokens_seen": 488112128 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.0009867006447166204, + "objective/train/docs_used": 183865, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7441798448562622, + "objective/train/original_loss": 1.7441800832748413, + "objective/train/theoretical_loss": 3.9533892059518383, + "objective/train/tokens_used": 508703200, + "objective/train/value_avg": -0.01270294189453125, + "objective/train/value_loss": 0.0009546683286316693, + "objective/train/value_max": -0.00030303001403808594, + "objective/train/value_min": -0.64697265625, + "objective/train/value_reward_corr": 0.6314438238999378, + "objective/train/value_std": 0.02288818359375, + "objective/train/weight_avg": 1.0013914108276367, + "objective/train/weighted_lm_loss": 1.7459317445755005, + "objective/train/weights_max": 1.6658852100372314, + "objective/train/weights_min": 0.3877171576023102, + "theoretical_loss": 3.9533892059518383, + "tokens_seen": 488243200 + }, + { + "epoch": 0.15, + "learning_rate": 0.000860616273471353, + "loss": 0.0843, + "theoretical_loss": 3.9532752692320816, + "tokens_seen": 488374272 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008605360295297705, + "loss": 0.0811, + "theoretical_loss": 3.9530475131711746, + "tokens_seen": 488636416 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008604557855881881, + "loss": 0.0826, + "theoretical_loss": 3.9528199134556044, + "tokens_seen": 488898560 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008603755416466057, + "loss": 0.0843, + "theoretical_loss": 3.9525924698943022, + "tokens_seen": 489160704 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008602952977050232, + "loss": 0.0833, + "theoretical_loss": 3.952365182296533, + "tokens_seen": 489422848 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008602150537634409, + "loss": 0.0848, + "theoretical_loss": 3.9521380504718975, + "tokens_seen": 489684992 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008601348098218585, + "loss": 0.0852, + "theoretical_loss": 3.9519110742303325, + "tokens_seen": 489947136 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008600545658802761, + "loss": 0.0831, + "theoretical_loss": 3.951684253382105, + "tokens_seen": 490209280 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008599743219386936, + "loss": 0.0832, + "theoretical_loss": 3.951457587737817, + "tokens_seen": 490471424 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008598940779971113, + "loss": 0.0834, + "theoretical_loss": 3.9512310771084014, + "tokens_seen": 490733568 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008598138340555288, + "loss": 0.0848, + "theoretical_loss": 3.951004721305123, + "tokens_seen": 490995712 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008597335901139463, + "loss": 0.0833, + "theoretical_loss": 3.950778520139576, + "tokens_seen": 491257856 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": -0.0002496841480024159, + "objective/train/docs_used": 185162, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6399861574172974, + "objective/train/original_loss": 1.639986276626587, + "objective/train/theoretical_loss": 3.9505524734236848, + "objective/train/tokens_used": 511980000, + "objective/train/value_avg": -0.00838470458984375, + "objective/train/value_loss": 0.00021605678193736821, + "objective/train/value_max": -0.0002148151397705078, + "objective/train/value_min": -0.469970703125, + "objective/train/value_reward_corr": 0.7257053344268602, + "objective/train/value_std": 0.01364898681640625, + "objective/train/weight_avg": 0.9998546242713928, + "objective/train/weighted_lm_loss": 1.6394292116165161, + "objective/train/weights_max": 1.1676921844482422, + "objective/train/weights_min": 0.5068990588188171, + "theoretical_loss": 3.9505524734236848, + "tokens_seen": 491520000 + }, + { + "epoch": 0.15, + "learning_rate": 0.000859653346172364, + "loss": 0.0843, + "theoretical_loss": 3.9505524734236848, + "tokens_seen": 491520000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008595731022307815, + "loss": 0.0814, + "theoretical_loss": 3.950326580969703, + "tokens_seen": 491782144 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008594928582891992, + "loss": 0.0857, + "theoretical_loss": 3.950100842590212, + "tokens_seen": 492044288 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008594126143476168, + "loss": 0.085, + "theoretical_loss": 3.949875258098121, + "tokens_seen": 492306432 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008593323704060344, + "loss": 0.0855, + "theoretical_loss": 3.949649827306665, + "tokens_seen": 492568576 + }, + { + "epoch": 0.15, + "learning_rate": 0.000859252126464452, + "loss": 0.0843, + "theoretical_loss": 3.9494245500294047, + "tokens_seen": 492830720 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008591718825228696, + "loss": 0.084, + "theoretical_loss": 3.949199426080228, + "tokens_seen": 493092864 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008590916385812871, + "loss": 0.0851, + "theoretical_loss": 3.9489744552733455, + "tokens_seen": 493355008 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008590113946397047, + "loss": 0.0891, + "theoretical_loss": 3.9487496374232913, + "tokens_seen": 493617152 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008589311506981223, + "loss": 0.0832, + "theoretical_loss": 3.9485249723449236, + "tokens_seen": 493879296 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008588509067565398, + "loss": 0.0826, + "theoretical_loss": 3.9483004598534217, + "tokens_seen": 494141440 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008587706628149576, + "loss": 0.0859, + "theoretical_loss": 3.948076099764288, + "tokens_seen": 494403584 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008586904188733751, + "loss": 0.0891, + "theoretical_loss": 3.947851891893343, + "tokens_seen": 494665728 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.0016816434217616916, + "objective/train/docs_used": 186433, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7250248193740845, + "objective/train/original_loss": 1.725024700164795, + "objective/train/theoretical_loss": 3.947739844982224, + "objective/train/tokens_used": 515256800, + "objective/train/value_avg": -0.00727081298828125, + "objective/train/value_loss": 0.00025146797997877, + "objective/train/value_max": -0.0002148151397705078, + "objective/train/value_min": -0.45849609375, + "objective/train/value_reward_corr": 0.5433043884355142, + "objective/train/value_std": 0.01275634765625, + "objective/train/weight_avg": 1.0017974376678467, + "objective/train/weighted_lm_loss": 1.7270184755325317, + "objective/train/weights_max": 1.2517598867416382, + "objective/train/weights_min": 0.3689996004104614, + "theoretical_loss": 3.947739844982224, + "tokens_seen": 494796800 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008586101749317927, + "loss": 0.0807, + "theoretical_loss": 3.9476278360567303, + "tokens_seen": 494927872 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008585299309902103, + "loss": 0.0842, + "theoretical_loss": 3.9474039320709107, + "tokens_seen": 495190016 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008584496870486278, + "loss": 0.0869, + "theoretical_loss": 3.9471801797526633, + "tokens_seen": 495452160 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008583694431070454, + "loss": 0.0824, + "theoretical_loss": 3.946956578919088, + "tokens_seen": 495714304 + }, + { + "epoch": 0.15, + "learning_rate": 0.000858289199165463, + "loss": 0.0844, + "theoretical_loss": 3.9467331293875976, + "tokens_seen": 495976448 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008582089552238806, + "loss": 0.0856, + "theoretical_loss": 3.9465098309759252, + "tokens_seen": 496238592 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008581287112822982, + "loss": 0.0844, + "theoretical_loss": 3.9462866835021178, + "tokens_seen": 496500736 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008580484673407159, + "loss": 0.0841, + "theoretical_loss": 3.9460636867845365, + "tokens_seen": 496762880 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008579682233991334, + "loss": 0.0852, + "theoretical_loss": 3.9458408406418584, + "tokens_seen": 497025024 + }, + { + "epoch": 0.15, + "learning_rate": 0.000857887979457551, + "loss": 0.0824, + "theoretical_loss": 3.945618144893073, + "tokens_seen": 497287168 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008578077355159686, + "loss": 0.0869, + "theoretical_loss": 3.9453955993574845, + "tokens_seen": 497549312 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008577274915743861, + "loss": 0.0834, + "theoretical_loss": 3.945173203854707, + "tokens_seen": 497811456 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 6.547255179611966e-05, + "objective/train/docs_used": 187487, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.78741455078125, + "objective/train/original_loss": 1.78741455078125, + "objective/train/theoretical_loss": 3.9449509582046662, + "objective/train/tokens_used": 518533600, + "objective/train/value_avg": -0.00795745849609375, + "objective/train/value_loss": 0.0003167542163282633, + "objective/train/value_max": -0.00017404556274414062, + "objective/train/value_min": -0.7861328125, + "objective/train/value_reward_corr": 0.6039141734249502, + "objective/train/value_std": 0.0124359130859375, + "objective/train/weight_avg": 1.0002011060714722, + "objective/train/weighted_lm_loss": 1.7879151105880737, + "objective/train/weights_max": 1.272313117980957, + "objective/train/weights_min": 0.3683635890483856, + "theoretical_loss": 3.9449509582046662, + "tokens_seen": 498073600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008576472476328038, + "loss": 0.0857, + "theoretical_loss": 3.9449509582046662, + "tokens_seen": 498073600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008575670036912213, + "loss": 0.0836, + "theoretical_loss": 3.944728862227601, + "tokens_seen": 498335744 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008574867597496389, + "loss": 0.0814, + "theoretical_loss": 3.9445069157440575, + "tokens_seen": 498597888 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008574065158080565, + "loss": 0.0849, + "theoretical_loss": 3.944285118574893, + "tokens_seen": 498860032 + }, + { + "epoch": 0.15, + "learning_rate": 0.000857326271866474, + "loss": 0.0845, + "theoretical_loss": 3.9440634705412725, + "tokens_seen": 499122176 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008572460279248917, + "loss": 0.0855, + "theoretical_loss": 3.9438419714646695, + "tokens_seen": 499384320 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008571657839833093, + "loss": 0.0871, + "theoretical_loss": 3.9436206211668647, + "tokens_seen": 499646464 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008570855400417269, + "loss": 0.0809, + "theoretical_loss": 3.9433994194699453, + "tokens_seen": 499908608 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008570052961001444, + "loss": 0.0838, + "theoretical_loss": 3.943178366196304, + "tokens_seen": 500170752 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008569250521585621, + "loss": 0.0851, + "theoretical_loss": 3.942957461168639, + "tokens_seen": 500432896 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008568448082169796, + "loss": 0.0872, + "theoretical_loss": 3.9427367042099544, + "tokens_seen": 500695040 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008567645642753972, + "loss": 0.0811, + "theoretical_loss": 3.942516095143555, + "tokens_seen": 500957184 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008566843203338148, + "loss": 0.0849, + "theoretical_loss": 3.9422956337930524, + "tokens_seen": 501219328 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.0007019721670076251, + "objective/train/docs_used": 188727, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5103391408920288, + "objective/train/original_loss": 1.5103389024734497, + "objective/train/theoretical_loss": 3.9421854584562226, + "objective/train/tokens_used": 521810400, + "objective/train/value_avg": -0.0062103271484375, + "objective/train/value_loss": 0.00019669736502692103, + "objective/train/value_max": -0.00018966197967529297, + "objective/train/value_min": -0.333251953125, + "objective/train/value_reward_corr": 0.5123174818364106, + "objective/train/value_std": 0.00872802734375, + "objective/train/weight_avg": 1.0007885694503784, + "objective/train/weighted_lm_loss": 1.5108174085617065, + "objective/train/weights_max": 1.1460858583450317, + "objective/train/weights_min": 0.3740321099758148, + "theoretical_loss": 3.9421854584562226, + "tokens_seen": 501350400 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008566040763922323, + "loss": 0.0823, + "theoretical_loss": 3.942075319982358, + "tokens_seen": 501481472 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008565238324506501, + "loss": 0.083, + "theoretical_loss": 3.941855153535686, + "tokens_seen": 501743616 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008564435885090676, + "loss": 0.0841, + "theoretical_loss": 3.9416351342775524, + "tokens_seen": 502005760 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008563633445674852, + "loss": 0.0828, + "theoretical_loss": 3.9414152620327716, + "tokens_seen": 502267904 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008562831006259028, + "loss": 0.0801, + "theoretical_loss": 3.941195536626461, + "tokens_seen": 502530048 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008562028566843203, + "loss": 0.084, + "theoretical_loss": 3.940975957884034, + "tokens_seen": 502792192 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008561226127427379, + "loss": 0.0842, + "theoretical_loss": 3.9407565256312047, + "tokens_seen": 503054336 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008560423688011555, + "loss": 0.0861, + "theoretical_loss": 3.940537239693983, + "tokens_seen": 503316480 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008559621248595731, + "loss": 0.0849, + "theoretical_loss": 3.9403180998986778, + "tokens_seen": 503578624 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008558818809179906, + "loss": 0.081, + "theoretical_loss": 3.9400991060718935, + "tokens_seen": 503840768 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008558016369764084, + "loss": 0.087, + "theoretical_loss": 3.93988025804053, + "tokens_seen": 504102912 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008557213930348259, + "loss": 0.0848, + "theoretical_loss": 3.9396615556317838, + "tokens_seen": 504365056 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.0015238908817991614, + "objective/train/docs_used": 189911, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5270758867263794, + "objective/train/original_loss": 1.527076005935669, + "objective/train/theoretical_loss": 3.939442998673144, + "objective/train/tokens_used": 525087200, + "objective/train/value_avg": -0.0095672607421875, + "objective/train/value_loss": 0.0004635912482626736, + "objective/train/value_max": -0.00027370452880859375, + "objective/train/value_min": -0.77197265625, + "objective/train/value_reward_corr": 0.581789120363388, + "objective/train/value_std": 0.0157928466796875, + "objective/train/weight_avg": 1.0017266273498535, + "objective/train/weighted_lm_loss": 1.5284429788589478, + "objective/train/weights_max": 1.6315034627914429, + "objective/train/weights_min": 0.3693629503250122, + "theoretical_loss": 3.939442998673144, + "tokens_seen": 504627200 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008556411490932436, + "loss": 0.0826, + "theoretical_loss": 3.939442998673144, + "tokens_seen": 504627200 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008555609051516611, + "loss": 0.0827, + "theoretical_loss": 3.9392245869923954, + "tokens_seen": 504889344 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008554806612100786, + "loss": 0.087, + "theoretical_loss": 3.939006320417614, + "tokens_seen": 505151488 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008554004172684963, + "loss": 0.0838, + "theoretical_loss": 3.9387881987771705, + "tokens_seen": 505413632 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008553201733269138, + "loss": 0.0852, + "theoretical_loss": 3.9385702218997247, + "tokens_seen": 505675776 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008552399293853314, + "loss": 0.0852, + "theoretical_loss": 3.9383523896142316, + "tokens_seen": 505937920 + }, + { + "epoch": 0.15, + "learning_rate": 0.000855159685443749, + "loss": 0.0851, + "theoretical_loss": 3.9381347017499326, + "tokens_seen": 506200064 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008550794415021667, + "loss": 0.0852, + "theoretical_loss": 3.9379171581363623, + "tokens_seen": 506462208 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008549991975605842, + "loss": 0.0849, + "theoretical_loss": 3.937699758603342, + "tokens_seen": 506724352 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008549189536190018, + "loss": 0.085, + "theoretical_loss": 3.937482502980985, + "tokens_seen": 506986496 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008548387096774194, + "loss": 0.0878, + "theoretical_loss": 3.937265391099688, + "tokens_seen": 507248640 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008547584657358369, + "loss": 0.0814, + "theoretical_loss": 3.9370484227901397, + "tokens_seen": 507510784 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008546782217942546, + "loss": 0.0822, + "theoretical_loss": 3.9368315978833124, + "tokens_seen": 507772928 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.0013619712553918362, + "objective/train/docs_used": 191028, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.592982530593872, + "objective/train/original_loss": 1.592982530593872, + "objective/train/theoretical_loss": 3.936723239153177, + "objective/train/tokens_used": 528364000, + "objective/train/value_avg": -0.00579071044921875, + "objective/train/value_loss": 0.00020233231771271676, + "objective/train/value_max": -0.00026535987854003906, + "objective/train/value_min": -0.587890625, + "objective/train/value_reward_corr": 0.7309359003301154, + "objective/train/value_std": 0.0113677978515625, + "objective/train/weight_avg": 1.00145423412323, + "objective/train/weighted_lm_loss": 1.5958586931228638, + "objective/train/weights_max": 1.2670164108276367, + "objective/train/weights_min": 0.38817885518074036, + "theoretical_loss": 3.936723239153177, + "tokens_seen": 507904000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008545979778526721, + "loss": 0.0828, + "theoretical_loss": 3.936614916210466, + "tokens_seen": 508035072 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008545177339110897, + "loss": 0.0816, + "theoretical_loss": 3.9363983776031457, + "tokens_seen": 508297216 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008544374899695073, + "loss": 0.085, + "theoretical_loss": 3.936181981893182, + "tokens_seen": 508559360 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008543572460279248, + "loss": 0.0871, + "theoretical_loss": 3.9359657289126875, + "tokens_seen": 508821504 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008542770020863426, + "loss": 0.0858, + "theoretical_loss": 3.935749618494061, + "tokens_seen": 509083648 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008541967581447601, + "loss": 0.0846, + "theoretical_loss": 3.935533650469983, + "tokens_seen": 509345792 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008541165142031777, + "loss": 0.0831, + "theoretical_loss": 3.935317824673417, + "tokens_seen": 509607936 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008540362702615953, + "loss": 0.0852, + "theoretical_loss": 3.935102140937608, + "tokens_seen": 509870080 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008539560263200129, + "loss": 0.0837, + "theoretical_loss": 3.934886599096081, + "tokens_seen": 510132224 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008538757823784304, + "loss": 0.0828, + "theoretical_loss": 3.9346711989826426, + "tokens_seen": 510394368 + }, + { + "epoch": 0.15, + "learning_rate": 0.000853795538436848, + "loss": 0.0842, + "theoretical_loss": 3.93445594043138, + "tokens_seen": 510656512 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008537152944952656, + "loss": 0.0831, + "theoretical_loss": 3.9342408232766584, + "tokens_seen": 510918656 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.0022731111384928226, + "objective/train/docs_used": 192198, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7514947652816772, + "objective/train/original_loss": 1.7514947652816772, + "objective/train/theoretical_loss": 3.934025847353122, + "objective/train/tokens_used": 531640800, + "objective/train/value_avg": -0.006824493408203125, + "objective/train/value_loss": 0.00011324948718538508, + "objective/train/value_max": -0.0002779960632324219, + "objective/train/value_min": -0.3486328125, + "objective/train/value_reward_corr": 0.5780862010598011, + "objective/train/value_std": 0.007709503173828125, + "objective/train/weight_avg": 1.002328634262085, + "objective/train/weighted_lm_loss": 1.7554271221160889, + "objective/train/weights_max": 1.185212254524231, + "objective/train/weights_min": 0.7214187383651733, + "theoretical_loss": 3.934025847353122, + "tokens_seen": 511180800 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008536350505536831, + "loss": 0.0848, + "theoretical_loss": 3.934025847353122, + "tokens_seen": 511180800 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008535548066121009, + "loss": 0.0827, + "theoretical_loss": 3.9338110124956924, + "tokens_seen": 511442944 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008534745626705184, + "loss": 0.0846, + "theoretical_loss": 3.9335963185395713, + "tokens_seen": 511705088 + }, + { + "epoch": 0.16, + "learning_rate": 0.000853394318728936, + "loss": 0.0818, + "theoretical_loss": 3.933381765320233, + "tokens_seen": 511967232 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008533140747873536, + "loss": 0.0821, + "theoretical_loss": 3.933167352673432, + "tokens_seen": 512229376 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008532338308457711, + "loss": 0.0843, + "theoretical_loss": 3.9329530804351958, + "tokens_seen": 512491520 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008531535869041887, + "loss": 0.0862, + "theoretical_loss": 3.9327389484418287, + "tokens_seen": 512753664 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008530733429626063, + "loss": 0.0808, + "theoretical_loss": 3.9325249565299076, + "tokens_seen": 513015808 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008529930990210239, + "loss": 0.085, + "theoretical_loss": 3.932311104536285, + "tokens_seen": 513277952 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008529128550794415, + "loss": 0.0829, + "theoretical_loss": 3.9320973922980844, + "tokens_seen": 513540096 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008528326111378592, + "loss": 0.0849, + "theoretical_loss": 3.931883819652705, + "tokens_seen": 513802240 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008527523671962767, + "loss": 0.0826, + "theoretical_loss": 3.9316703864378155, + "tokens_seen": 514064384 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008526721232546944, + "loss": 0.0825, + "theoretical_loss": 3.9314570924913568, + "tokens_seen": 514326528 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.0023866831324994564, + "objective/train/docs_used": 193343, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5726442337036133, + "objective/train/original_loss": 1.5726442337036133, + "objective/train/theoretical_loss": 3.931350497693219, + "objective/train/tokens_used": 534917600, + "objective/train/value_avg": -0.00785064697265625, + "objective/train/value_loss": 0.00021415037917904556, + "objective/train/value_max": -0.0003528594970703125, + "objective/train/value_min": -0.262451171875, + "objective/train/value_reward_corr": 0.5768801997308433, + "objective/train/value_std": 0.01004791259765625, + "objective/train/weight_avg": 1.002478003501892, + "objective/train/weighted_lm_loss": 1.5768412351608276, + "objective/train/weights_max": 1.115512490272522, + "objective/train/weights_min": 0.36873859167099, + "theoretical_loss": 3.931350497693219, + "tokens_seen": 514457600 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008525918793131119, + "loss": 0.0841, + "theoretical_loss": 3.9312439376515407, + "tokens_seen": 514588672 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008525116353715294, + "loss": 0.0836, + "theoretical_loss": 3.9310309217568493, + "tokens_seen": 514850816 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008524313914299471, + "loss": 0.0851, + "theoretical_loss": 3.9308180446460343, + "tokens_seen": 515112960 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008523511474883646, + "loss": 0.0845, + "theoretical_loss": 3.9306053061581165, + "tokens_seen": 515375104 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008522709035467822, + "loss": 0.0838, + "theoretical_loss": 3.930392706132385, + "tokens_seen": 515637248 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008521906596051998, + "loss": 0.085, + "theoretical_loss": 3.9301802444083966, + "tokens_seen": 515899392 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008521104156636175, + "loss": 0.0851, + "theoretical_loss": 3.929967920825977, + "tokens_seen": 516161536 + }, + { + "epoch": 0.16, + "learning_rate": 0.000852030171722035, + "loss": 0.0809, + "theoretical_loss": 3.929755735225216, + "tokens_seen": 516423680 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008519499277804526, + "loss": 0.0836, + "theoretical_loss": 3.9295436874464715, + "tokens_seen": 516685824 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008518696838388702, + "loss": 0.0839, + "theoretical_loss": 3.929331777330366, + "tokens_seen": 516947968 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008517894398972877, + "loss": 0.0802, + "theoretical_loss": 3.9291200047177886, + "tokens_seen": 517210112 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008517091959557054, + "loss": 0.0821, + "theoretical_loss": 3.9289083694498905, + "tokens_seen": 517472256 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.00018064206233248115, + "objective/train/docs_used": 194581, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6930550336837769, + "objective/train/original_loss": 1.6930551528930664, + "objective/train/theoretical_loss": 3.9286968713680883, + "objective/train/tokens_used": 538194400, + "objective/train/value_avg": -0.00974273681640625, + "objective/train/value_loss": 0.0005410881130956113, + "objective/train/value_max": -0.0002892017364501953, + "objective/train/value_min": -0.72412109375, + "objective/train/value_reward_corr": 0.7755958663238911, + "objective/train/value_std": 0.0196533203125, + "objective/train/weight_avg": 1.0004180669784546, + "objective/train/weighted_lm_loss": 1.694211721420288, + "objective/train/weights_max": 1.331702709197998, + "objective/train/weights_min": 0.3937867283821106, + "theoretical_loss": 3.9286968713680883, + "tokens_seen": 517734400 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008516289520141229, + "loss": 0.0811, + "theoretical_loss": 3.9286968713680883, + "tokens_seen": 517734400 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008515487080725406, + "loss": 0.0829, + "theoretical_loss": 3.9284855103140615, + "tokens_seen": 517996544 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008514684641309581, + "loss": 0.0827, + "theoretical_loss": 3.9282742861297524, + "tokens_seen": 518258688 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008513882201893756, + "loss": 0.0793, + "theoretical_loss": 3.928063198657365, + "tokens_seen": 518520832 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008513079762477934, + "loss": 0.0814, + "theoretical_loss": 3.9278522477393656, + "tokens_seen": 518782976 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008512277323062109, + "loss": 0.0829, + "theoretical_loss": 3.9276414332184815, + "tokens_seen": 519045120 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008511474883646285, + "loss": 0.0823, + "theoretical_loss": 3.927430754937699, + "tokens_seen": 519307264 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008510672444230461, + "loss": 0.08, + "theoretical_loss": 3.927220212740267, + "tokens_seen": 519569408 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008509870004814637, + "loss": 0.0799, + "theoretical_loss": 3.9270098064696906, + "tokens_seen": 519831552 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008509067565398812, + "loss": 0.0818, + "theoretical_loss": 3.9267995359697356, + "tokens_seen": 520093696 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008508265125982988, + "loss": 0.0823, + "theoretical_loss": 3.926589401084426, + "tokens_seen": 520355840 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008507462686567164, + "loss": 0.0807, + "theoretical_loss": 3.9263794016580427, + "tokens_seen": 520617984 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008506660247151339, + "loss": 0.0803, + "theoretical_loss": 3.9261695375351238, + "tokens_seen": 520880128 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.0009029977954924107, + "objective/train/docs_used": 195804, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7540147304534912, + "objective/train/original_loss": 1.7540148496627808, + "objective/train/theoretical_loss": 3.926064656163952, + "objective/train/tokens_used": 541471200, + "objective/train/value_avg": -0.01312255859375, + "objective/train/value_loss": 0.0006540497415699065, + "objective/train/value_max": -0.00019562244415283203, + "objective/train/value_min": -0.7529296875, + "objective/train/value_reward_corr": 0.6588186474670367, + "objective/train/value_std": 0.021728515625, + "objective/train/weight_avg": 1.0011897087097168, + "objective/train/weighted_lm_loss": 1.7548198699951172, + "objective/train/weights_max": 1.322047233581543, + "objective/train/weights_min": 0.3694334030151367, + "theoretical_loss": 3.926064656163952, + "tokens_seen": 521011200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008505857807735517, + "loss": 0.0844, + "theoretical_loss": 3.9259598085604646, + "tokens_seen": 521142272 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008505055368319692, + "loss": 0.0813, + "theoretical_loss": 3.925750214579116, + "tokens_seen": 521404416 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008504252928903869, + "loss": 0.0848, + "theoretical_loss": 3.9255407554363835, + "tokens_seen": 521666560 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008503450489488044, + "loss": 0.0816, + "theoretical_loss": 3.92533143097783, + "tokens_seen": 521928704 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008502648050072219, + "loss": 0.083, + "theoretical_loss": 3.9251222410492694, + "tokens_seen": 522190848 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008501845610656396, + "loss": 0.0834, + "theoretical_loss": 3.924913185496772, + "tokens_seen": 522452992 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008501043171240571, + "loss": 0.0817, + "theoretical_loss": 3.924704264166659, + "tokens_seen": 522715136 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008500240731824747, + "loss": 0.082, + "theoretical_loss": 3.9244954769055074, + "tokens_seen": 522977280 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008499438292408923, + "loss": 0.0844, + "theoretical_loss": 3.924286823560144, + "tokens_seen": 523239424 + }, + { + "epoch": 0.16, + "learning_rate": 0.00084986358529931, + "loss": 0.0809, + "theoretical_loss": 3.9240783039776472, + "tokens_seen": 523501568 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008497833413577275, + "loss": 0.0828, + "theoretical_loss": 3.9238699180053485, + "tokens_seen": 523763712 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008497030974161451, + "loss": 0.0837, + "theoretical_loss": 3.923661665490828, + "tokens_seen": 524025856 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.0017735909204930067, + "objective/train/docs_used": 196964, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5325592756271362, + "objective/train/original_loss": 1.5325592756271362, + "objective/train/theoretical_loss": 3.9234535462819156, + "objective/train/tokens_used": 544748000, + "objective/train/value_avg": -0.00875091552734375, + "objective/train/value_loss": 0.00024432019563391805, + "objective/train/value_max": -0.0002378225326538086, + "objective/train/value_min": -0.404052734375, + "objective/train/value_reward_corr": 0.6240298333146145, + "objective/train/value_std": 0.0148773193359375, + "objective/train/weight_avg": 1.0018869638442993, + "objective/train/weighted_lm_loss": 1.5350106954574585, + "objective/train/weights_max": 1.2263628244400024, + "objective/train/weights_min": 0.37659770250320435, + "theoretical_loss": 3.9234535462819156, + "tokens_seen": 524288000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008496228534745627, + "loss": 0.0818, + "theoretical_loss": 3.9234535462819156, + "tokens_seen": 524288000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008495426095329802, + "loss": 0.0825, + "theoretical_loss": 3.923245560226693, + "tokens_seen": 524550144 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008494623655913979, + "loss": 0.0794, + "theoretical_loss": 3.9230377071734885, + "tokens_seen": 524812288 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008493821216498154, + "loss": 0.0822, + "theoretical_loss": 3.9228299869708794, + "tokens_seen": 525074432 + }, + { + "epoch": 0.16, + "learning_rate": 0.000849301877708233, + "loss": 0.0828, + "theoretical_loss": 3.9226223994676923, + "tokens_seen": 525336576 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008492216337666506, + "loss": 0.0855, + "theoretical_loss": 3.9224149445129983, + "tokens_seen": 525598720 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008491413898250683, + "loss": 0.0816, + "theoretical_loss": 3.922207621956119, + "tokens_seen": 525860864 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008490611458834859, + "loss": 0.0808, + "theoretical_loss": 3.9220004316466186, + "tokens_seen": 526123008 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008489809019419034, + "loss": 0.0842, + "theoretical_loss": 3.9217933734343093, + "tokens_seen": 526385152 + }, + { + "epoch": 0.16, + "learning_rate": 0.000848900658000321, + "loss": 0.0824, + "theoretical_loss": 3.9215864471692488, + "tokens_seen": 526647296 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008488204140587386, + "loss": 0.0819, + "theoretical_loss": 3.921379652701738, + "tokens_seen": 526909440 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008487401701171562, + "loss": 0.0812, + "theoretical_loss": 3.9211729898823235, + "tokens_seen": 527171584 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008486599261755737, + "loss": 0.0821, + "theoretical_loss": 3.920966458561794, + "tokens_seen": 527433728 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.00034622373641468585, + "objective/train/docs_used": 198271, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5029301643371582, + "objective/train/original_loss": 1.5029301643371582, + "objective/train/theoretical_loss": 3.9208632421670497, + "objective/train/tokens_used": 548024800, + "objective/train/value_avg": -0.00766754150390625, + "objective/train/value_loss": 0.0003754664212465286, + "objective/train/value_max": -0.0003077983856201172, + "objective/train/value_min": -0.64697265625, + "objective/train/value_reward_corr": 0.5987369406329422, + "objective/train/value_std": 0.0127410888671875, + "objective/train/weight_avg": 1.0005102157592773, + "objective/train/weighted_lm_loss": 1.5032236576080322, + "objective/train/weights_max": 1.5309104919433594, + "objective/train/weights_min": 0.37003564834594727, + "theoretical_loss": 3.9208632421670497, + "tokens_seen": 527564800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008485796822339914, + "loss": 0.0817, + "theoretical_loss": 3.920760058591182, + "tokens_seen": 527695872 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008484994382924089, + "loss": 0.0827, + "theoretical_loss": 3.9205537898217644, + "tokens_seen": 527958016 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008484191943508264, + "loss": 0.0819, + "theoretical_loss": 3.920347652105058, + "tokens_seen": 528220160 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008483389504092442, + "loss": 0.0831, + "theoretical_loss": 3.920141645292821, + "tokens_seen": 528482304 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008482587064676617, + "loss": 0.0815, + "theoretical_loss": 3.919935769237055, + "tokens_seen": 528744448 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008481784625260793, + "loss": 0.0842, + "theoretical_loss": 3.91973002379, + "tokens_seen": 529006592 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008480982185844969, + "loss": 0.082, + "theoretical_loss": 3.919524408804137, + "tokens_seen": 529268736 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008480179746429145, + "loss": 0.0829, + "theoretical_loss": 3.9193189241321873, + "tokens_seen": 529530880 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008479377307013321, + "loss": 0.0823, + "theoretical_loss": 3.9191135696271098, + "tokens_seen": 529793024 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008478574867597496, + "loss": 0.0852, + "theoretical_loss": 3.9189083451421025, + "tokens_seen": 530055168 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008477772428181672, + "loss": 0.0784, + "theoretical_loss": 3.9187032505306023, + "tokens_seen": 530317312 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008476969988765848, + "loss": 0.0813, + "theoretical_loss": 3.918498285646282, + "tokens_seen": 530579456 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.000702679913956672, + "objective/train/docs_used": 199409, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6489094495773315, + "objective/train/original_loss": 1.648909330368042, + "objective/train/theoretical_loss": 3.9182934503430538, + "objective/train/tokens_used": 551301600, + "objective/train/value_avg": -0.00879669189453125, + "objective/train/value_loss": 0.0003273165493737906, + "objective/train/value_max": -0.00041413307189941406, + "objective/train/value_min": -0.771484375, + "objective/train/value_reward_corr": 0.6633614602841378, + "objective/train/value_std": 0.0150604248046875, + "objective/train/weight_avg": 1.0008490085601807, + "objective/train/weighted_lm_loss": 1.6499123573303223, + "objective/train/weights_max": 1.3013832569122314, + "objective/train/weights_min": 0.38973385095596313, + "theoretical_loss": 3.9182934503430538, + "tokens_seen": 530841600 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008476167549350025, + "loss": 0.0819, + "theoretical_loss": 3.9182934503430538, + "tokens_seen": 530841600 + }, + { + "epoch": 0.16, + "learning_rate": 0.00084753651099342, + "loss": 0.0795, + "theoretical_loss": 3.918088744475064, + "tokens_seen": 531103744 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008474562670518377, + "loss": 0.0844, + "theoretical_loss": 3.9178841678966956, + "tokens_seen": 531365888 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008473760231102552, + "loss": 0.0846, + "theoretical_loss": 3.9176797204625693, + "tokens_seen": 531628032 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008472957791686727, + "loss": 0.0833, + "theoretical_loss": 3.917475402027537, + "tokens_seen": 531890176 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008472155352270904, + "loss": 0.0799, + "theoretical_loss": 3.917271212446689, + "tokens_seen": 532152320 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008471352912855079, + "loss": 0.0822, + "theoretical_loss": 3.917067151575348, + "tokens_seen": 532414464 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008470550473439255, + "loss": 0.085, + "theoretical_loss": 3.916863219269069, + "tokens_seen": 532676608 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008469748034023431, + "loss": 0.0816, + "theoretical_loss": 3.9166594153836427, + "tokens_seen": 532938752 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008468945594607608, + "loss": 0.0823, + "theoretical_loss": 3.9164557397750897, + "tokens_seen": 533200896 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008468143155191783, + "loss": 0.0817, + "theoretical_loss": 3.916252192299665, + "tokens_seen": 533463040 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008467340715775959, + "loss": 0.0818, + "theoretical_loss": 3.9160487728138538, + "tokens_seen": 533725184 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008466538276360135, + "loss": 0.0826, + "theoretical_loss": 3.9158454811743733, + "tokens_seen": 533987328 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.0011991261271759868, + "objective/train/docs_used": 200516, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.67826509475708, + "objective/train/original_loss": 1.6782649755477905, + "objective/train/theoretical_loss": 3.9157438832522944, + "objective/train/tokens_used": 554578400, + "objective/train/value_avg": -0.007232666015625, + "objective/train/value_loss": 0.00027039533597417176, + "objective/train/value_max": -0.00033545494079589844, + "objective/train/value_min": -0.525390625, + "objective/train/value_reward_corr": 0.46649640516327573, + "objective/train/value_std": 0.00963592529296875, + "objective/train/weight_avg": 1.0013190507888794, + "objective/train/weighted_lm_loss": 1.6796201467514038, + "objective/train/weights_max": 1.2117398977279663, + "objective/train/weights_min": 0.3694460988044739, + "theoretical_loss": 3.9157438832522944, + "tokens_seen": 534118400 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008465735836944311, + "loss": 0.0848, + "theoretical_loss": 3.915642317238171, + "tokens_seen": 534249472 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008464933397528487, + "loss": 0.084, + "theoretical_loss": 3.915439280862423, + "tokens_seen": 534511616 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008464130958112662, + "loss": 0.0814, + "theoretical_loss": 3.915236371904539, + "tokens_seen": 534773760 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008463328518696839, + "loss": 0.0826, + "theoretical_loss": 3.915033590222153, + "tokens_seen": 535035904 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008462526079281014, + "loss": 0.0826, + "theoretical_loss": 3.914830935673132, + "tokens_seen": 535298048 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008461723639865191, + "loss": 0.0795, + "theoretical_loss": 3.914628408115569, + "tokens_seen": 535560192 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008460921200449367, + "loss": 0.0816, + "theoretical_loss": 3.9144260074077843, + "tokens_seen": 535822336 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008460118761033542, + "loss": 0.0843, + "theoretical_loss": 3.9142237334083276, + "tokens_seen": 536084480 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008459316321617718, + "loss": 0.0818, + "theoretical_loss": 3.914021585975973, + "tokens_seen": 536346624 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008458513882201894, + "loss": 0.0808, + "theoretical_loss": 3.9138195649697227, + "tokens_seen": 536608768 + }, + { + "epoch": 0.16, + "learning_rate": 0.000845771144278607, + "loss": 0.0827, + "theoretical_loss": 3.9136176702488044, + "tokens_seen": 536870912 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008456909003370245, + "loss": 0.0794, + "theoretical_loss": 3.91341590167267, + "tokens_seen": 537133056 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.0008067527669481933, + "objective/train/docs_used": 201641, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5511680841445923, + "objective/train/original_loss": 1.5511682033538818, + "objective/train/theoretical_loss": 3.9132142591009975, + "objective/train/tokens_used": 557855200, + "objective/train/value_avg": -0.00760650634765625, + "objective/train/value_loss": 0.00028524218942038715, + "objective/train/value_max": -0.00030541419982910156, + "objective/train/value_min": -0.60986328125, + "objective/train/value_reward_corr": 0.643433561791619, + "objective/train/value_std": 0.0146026611328125, + "objective/train/weight_avg": 1.0009351968765259, + "objective/train/weighted_lm_loss": 1.551741361618042, + "objective/train/weights_max": 1.4019429683685303, + "objective/train/weights_min": 0.3742033541202545, + "theoretical_loss": 3.9132142591009975, + "tokens_seen": 537395200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008456106563954422, + "loss": 0.0807, + "theoretical_loss": 3.9132142591009975, + "tokens_seen": 537395200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008455304124538597, + "loss": 0.0813, + "theoretical_loss": 3.9130127423936907, + "tokens_seen": 537657344 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008454501685122772, + "loss": 0.0839, + "theoretical_loss": 3.9128113514108733, + "tokens_seen": 537919488 + }, + { + "epoch": 0.16, + "learning_rate": 0.000845369924570695, + "loss": 0.0821, + "theoretical_loss": 3.9126100860128963, + "tokens_seen": 538181632 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008452896806291125, + "loss": 0.0836, + "theoretical_loss": 3.9124089460603324, + "tokens_seen": 538443776 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008452094366875302, + "loss": 0.0828, + "theoretical_loss": 3.9122079314139766, + "tokens_seen": 538705920 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008451291927459477, + "loss": 0.0828, + "theoretical_loss": 3.9120070419348463, + "tokens_seen": 538968064 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008450489488043653, + "loss": 0.0799, + "theoretical_loss": 3.9118062774841804, + "tokens_seen": 539230208 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008449687048627829, + "loss": 0.084, + "theoretical_loss": 3.91160563792344, + "tokens_seen": 539492352 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008448884609212004, + "loss": 0.0829, + "theoretical_loss": 3.911405123114305, + "tokens_seen": 539754496 + }, + { + "epoch": 0.16, + "learning_rate": 0.000844808216979618, + "loss": 0.0847, + "theoretical_loss": 3.9112047329186783, + "tokens_seen": 540016640 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008447279730380356, + "loss": 0.0833, + "theoretical_loss": 3.911004467198679, + "tokens_seen": 540278784 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008446477290964533, + "loss": 0.0842, + "theoretical_loss": 3.9108043258166485, + "tokens_seen": 540540928 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.0016820263117551804, + "objective/train/docs_used": 202818, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5808069705963135, + "objective/train/original_loss": 1.5808069705963135, + "objective/train/theoretical_loss": 3.910704301709413, + "objective/train/tokens_used": 561132000, + "objective/train/value_avg": -0.00955963134765625, + "objective/train/value_loss": 0.0003617853799369186, + "objective/train/value_max": -0.0003407001495361328, + "objective/train/value_min": -0.6962890625, + "objective/train/value_reward_corr": 0.5872441373349445, + "objective/train/value_std": 0.016387939453125, + "objective/train/weight_avg": 1.0018519163131714, + "objective/train/weighted_lm_loss": 1.5838106870651245, + "objective/train/weights_max": 1.8153696060180664, + "objective/train/weights_min": 0.3881314992904663, + "theoretical_loss": 3.910704301709413, + "tokens_seen": 540672000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008445674851548708, + "loss": 0.0816, + "theoretical_loss": 3.910604308635146, + "tokens_seen": 540803072 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008444872412132885, + "loss": 0.0799, + "theoretical_loss": 3.9104044155169495, + "tokens_seen": 541065216 + }, + { + "epoch": 0.16, + "learning_rate": 0.000844406997271706, + "loss": 0.0854, + "theoretical_loss": 3.910204646325055, + "tokens_seen": 541327360 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008443267533301235, + "loss": 0.0843, + "theoretical_loss": 3.9100050009226752, + "tokens_seen": 541589504 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008442465093885412, + "loss": 0.0833, + "theoretical_loss": 3.9098054791732406, + "tokens_seen": 541851648 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008441662654469587, + "loss": 0.0837, + "theoretical_loss": 3.909606080940399, + "tokens_seen": 542113792 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008440860215053764, + "loss": 0.0824, + "theoretical_loss": 3.909406806088013, + "tokens_seen": 542375936 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008440057775637939, + "loss": 0.0841, + "theoretical_loss": 3.909207654480162, + "tokens_seen": 542638080 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008439255336222116, + "loss": 0.082, + "theoretical_loss": 3.9090086259811403, + "tokens_seen": 542900224 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008438452896806292, + "loss": 0.0825, + "theoretical_loss": 3.908809720455457, + "tokens_seen": 543162368 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008437650457390467, + "loss": 0.0824, + "theoretical_loss": 3.908610937767836, + "tokens_seen": 543424512 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008436848017974643, + "loss": 0.083, + "theoretical_loss": 3.9084122777832144, + "tokens_seen": 543686656 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": -0.0008586941985413432, + "objective/train/docs_used": 204061, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6755656003952026, + "objective/train/original_loss": 1.675565481185913, + "objective/train/theoretical_loss": 3.908213740366744, + "objective/train/tokens_used": 564408800, + "objective/train/value_avg": -0.00843048095703125, + "objective/train/value_loss": 0.0002858492953237146, + "objective/train/value_max": -0.00023055076599121094, + "objective/train/value_min": -0.22607421875, + "objective/train/value_reward_corr": 0.6344704655141316, + "objective/train/value_std": 0.01132965087890625, + "objective/train/weight_avg": 0.9992753863334656, + "objective/train/weighted_lm_loss": 1.6743135452270508, + "objective/train/weights_max": 1.2291204929351807, + "objective/train/weights_min": 0.37150946259498596, + "theoretical_loss": 3.908213740366744, + "tokens_seen": 543948800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008436045578558819, + "loss": 0.0836, + "theoretical_loss": 3.908213740366744, + "tokens_seen": 543948800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008435243139142995, + "loss": 0.083, + "theoretical_loss": 3.908015325383788, + "tokens_seen": 544210944 + }, + { + "epoch": 0.16, + "learning_rate": 0.000843444069972717, + "loss": 0.0823, + "theoretical_loss": 3.907817032699924, + "tokens_seen": 544473088 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008433638260311347, + "loss": 0.0813, + "theoretical_loss": 3.9076188621809416, + "tokens_seen": 544735232 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008432835820895522, + "loss": 0.0842, + "theoretical_loss": 3.9074208136928408, + "tokens_seen": 544997376 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008432033381479699, + "loss": 0.0833, + "theoretical_loss": 3.907222887101834, + "tokens_seen": 545259520 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008431230942063875, + "loss": 0.0858, + "theoretical_loss": 3.9070250822743446, + "tokens_seen": 545521664 + }, + { + "epoch": 0.17, + "learning_rate": 0.000843042850264805, + "loss": 0.0846, + "theoretical_loss": 3.906827399077006, + "tokens_seen": 545783808 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008429626063232226, + "loss": 0.083, + "theoretical_loss": 3.9066298373766615, + "tokens_seen": 546045952 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008428823623816402, + "loss": 0.0835, + "theoretical_loss": 3.9064323970403656, + "tokens_seen": 546308096 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008428021184400578, + "loss": 0.0835, + "theoretical_loss": 3.9062350779353787, + "tokens_seen": 546570240 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008427218744984754, + "loss": 0.083, + "theoretical_loss": 3.906037879929174, + "tokens_seen": 546832384 + }, + { + "epoch": 0.17, + "learning_rate": 0.000842641630556893, + "loss": 0.0816, + "theoretical_loss": 3.90584080288943, + "tokens_seen": 547094528 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.00023532530758529902, + "objective/train/docs_used": 204823, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5397506952285767, + "objective/train/original_loss": 1.5397508144378662, + "objective/train/theoretical_loss": 3.9057423096906874, + "objective/train/tokens_used": 567685600, + "objective/train/value_avg": -0.00724029541015625, + "objective/train/value_loss": 0.0003734648635145277, + "objective/train/value_max": -0.00015354156494140625, + "objective/train/value_min": -0.76318359375, + "objective/train/value_reward_corr": 0.8074329039979607, + "objective/train/value_std": 0.0235595703125, + "objective/train/weight_avg": 1.0004050731658936, + "objective/train/weighted_lm_loss": 1.539077639579773, + "objective/train/weights_max": 1.4787062406539917, + "objective/train/weights_min": 0.3683115839958191, + "theoretical_loss": 3.9057423096906874, + "tokens_seen": 547225600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008425613866153105, + "loss": 0.0832, + "theoretical_loss": 3.905643846684034, + "tokens_seen": 547356672 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008424811426737282, + "loss": 0.0801, + "theoretical_loss": 3.9054470111810815, + "tokens_seen": 547618816 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008424008987321458, + "loss": 0.0815, + "theoretical_loss": 3.9052502962488735, + "tokens_seen": 547880960 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008423206547905633, + "loss": 0.083, + "theoretical_loss": 3.9050537017559197, + "tokens_seen": 548143104 + }, + { + "epoch": 0.17, + "learning_rate": 0.000842240410848981, + "loss": 0.0831, + "theoretical_loss": 3.904857227570934, + "tokens_seen": 548405248 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008421601669073985, + "loss": 0.0789, + "theoretical_loss": 3.904660873562837, + "tokens_seen": 548667392 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008420799229658161, + "loss": 0.0817, + "theoretical_loss": 3.9044646396007545, + "tokens_seen": 548929536 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008419996790242337, + "loss": 0.0823, + "theoretical_loss": 3.9042685255540177, + "tokens_seen": 549191680 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008419194350826512, + "loss": 0.0831, + "theoretical_loss": 3.9040725312921616, + "tokens_seen": 549453824 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008418391911410688, + "loss": 0.0827, + "theoretical_loss": 3.9038766566849263, + "tokens_seen": 549715968 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008417589471994864, + "loss": 0.0825, + "theoretical_loss": 3.903680901602254, + "tokens_seen": 549978112 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008416787032579041, + "loss": 0.0814, + "theoretical_loss": 3.9034852659142913, + "tokens_seen": 550240256 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.0012200219789519906, + "objective/train/docs_used": 206347, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7369581460952759, + "objective/train/original_loss": 1.7369580268859863, + "objective/train/theoretical_loss": 3.9032897494913876, + "objective/train/tokens_used": 570962400, + "objective/train/value_avg": -0.00876617431640625, + "objective/train/value_loss": 0.0002591983356978744, + "objective/train/value_max": -0.0002359151840209961, + "objective/train/value_min": -0.483154296875, + "objective/train/value_reward_corr": 0.6927815042926995, + "objective/train/value_std": 0.0133209228515625, + "objective/train/weight_avg": 1.0013376474380493, + "objective/train/weighted_lm_loss": 1.73981511592865, + "objective/train/weights_max": 1.3604191541671753, + "objective/train/weights_min": 0.38016000390052795, + "theoretical_loss": 3.9032897494913876, + "tokens_seen": 550502400 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008415984593163216, + "loss": 0.0813, + "theoretical_loss": 3.9032897494913876, + "tokens_seen": 550502400 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008415182153747393, + "loss": 0.0812, + "theoretical_loss": 3.9030943522040946, + "tokens_seen": 550764544 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008414379714331568, + "loss": 0.0848, + "theoretical_loss": 3.902899073923166, + "tokens_seen": 551026688 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008413577274915744, + "loss": 0.0828, + "theoretical_loss": 3.902703914519557, + "tokens_seen": 551288832 + }, + { + "epoch": 0.17, + "learning_rate": 0.000841277483549992, + "loss": 0.083, + "theoretical_loss": 3.9025088738644236, + "tokens_seen": 551550976 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008411972396084095, + "loss": 0.0801, + "theoretical_loss": 3.9023139518291243, + "tokens_seen": 551813120 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008411169956668272, + "loss": 0.0809, + "theoretical_loss": 3.902119148285216, + "tokens_seen": 552075264 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008410367517252447, + "loss": 0.0828, + "theoretical_loss": 3.9019244631044563, + "tokens_seen": 552337408 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008409565077836624, + "loss": 0.0801, + "theoretical_loss": 3.9017298961588027, + "tokens_seen": 552599552 + }, + { + "epoch": 0.17, + "learning_rate": 0.00084087626384208, + "loss": 0.0819, + "theoretical_loss": 3.901535447320412, + "tokens_seen": 552861696 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008407960199004975, + "loss": 0.0812, + "theoretical_loss": 3.901341116461639, + "tokens_seen": 553123840 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008407157759589151, + "loss": 0.0807, + "theoretical_loss": 3.9011469034550372, + "tokens_seen": 553385984 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008406355320173327, + "loss": 0.0819, + "theoretical_loss": 3.900952808173358, + "tokens_seen": 553648128 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": -0.00015300983795896173, + "objective/train/docs_used": 207589, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8608996868133545, + "objective/train/original_loss": 1.8608994483947754, + "objective/train/theoretical_loss": 3.9008558046396535, + "objective/train/tokens_used": 574239200, + "objective/train/value_avg": -0.0084075927734375, + "objective/train/value_loss": 0.0002866761351469904, + "objective/train/value_max": -0.00022876262664794922, + "objective/train/value_min": -0.2509765625, + "objective/train/value_reward_corr": 0.6109745044269907, + "objective/train/value_std": 0.01207733154296875, + "objective/train/weight_avg": 0.999976634979248, + "objective/train/weighted_lm_loss": 1.8602499961853027, + "objective/train/weights_max": 1.1194891929626465, + "objective/train/weights_min": 0.36891940236091614, + "theoretical_loss": 3.9008558046396535, + "tokens_seen": 553779200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008405552880757503, + "loss": 0.0839, + "theoretical_loss": 3.900758830489551, + "tokens_seen": 553910272 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008404750441341678, + "loss": 0.083, + "theoretical_loss": 3.900564970276762, + "tokens_seen": 554172416 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008403948001925855, + "loss": 0.0812, + "theoretical_loss": 3.9003712274083346, + "tokens_seen": 554434560 + }, + { + "epoch": 0.17, + "learning_rate": 0.000840314556251003, + "loss": 0.0826, + "theoretical_loss": 3.9001776017578074, + "tokens_seen": 554696704 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008402343123094207, + "loss": 0.0817, + "theoretical_loss": 3.899984093198916, + "tokens_seen": 554958848 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008401540683678383, + "loss": 0.0802, + "theoretical_loss": 3.899790701605592, + "tokens_seen": 555220992 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008400738244262558, + "loss": 0.081, + "theoretical_loss": 3.899597426851961, + "tokens_seen": 555483136 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008399935804846735, + "loss": 0.0781, + "theoretical_loss": 3.899404268812343, + "tokens_seen": 555745280 + }, + { + "epoch": 0.17, + "learning_rate": 0.000839913336543091, + "loss": 0.0806, + "theoretical_loss": 3.8992112273612545, + "tokens_seen": 556007424 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008398330926015086, + "loss": 0.0824, + "theoretical_loss": 3.8990183023734044, + "tokens_seen": 556269568 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008397528486599262, + "loss": 0.0818, + "theoretical_loss": 3.8988254937236952, + "tokens_seen": 556531712 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008396726047183438, + "loss": 0.0819, + "theoretical_loss": 3.8986328012872233, + "tokens_seen": 556793856 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.0018912331433966756, + "objective/train/docs_used": 208740, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6763869524002075, + "objective/train/original_loss": 1.6763869524002075, + "objective/train/theoretical_loss": 3.8984402249392778, + "objective/train/tokens_used": 577516000, + "objective/train/value_avg": -0.00763702392578125, + "objective/train/value_loss": 0.00018121585890185088, + "objective/train/value_max": -0.00034880638122558594, + "objective/train/value_min": -0.187255859375, + "objective/train/value_reward_corr": 0.5036202731375501, + "objective/train/value_std": 0.00873565673828125, + "objective/train/weight_avg": 1.0019742250442505, + "objective/train/weighted_lm_loss": 1.6807695627212524, + "objective/train/weights_max": 1.0873464345932007, + "objective/train/weights_min": 0.368623286485672, + "theoretical_loss": 3.8984402249392778, + "tokens_seen": 557056000 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008395923607767613, + "loss": 0.0794, + "theoretical_loss": 3.8984402249392778, + "tokens_seen": 557056000 + }, + { + "epoch": 0.17, + "learning_rate": 0.000839512116835179, + "loss": 0.0784, + "theoretical_loss": 3.8982477645553395, + "tokens_seen": 557318144 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008394318728935966, + "loss": 0.0799, + "theoretical_loss": 3.898055420011082, + "tokens_seen": 557580288 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008393516289520141, + "loss": 0.0782, + "theoretical_loss": 3.8978631911823705, + "tokens_seen": 557842432 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008392713850104318, + "loss": 0.0802, + "theoretical_loss": 3.8976710779452612, + "tokens_seen": 558104576 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008391911410688493, + "loss": 0.0815, + "theoretical_loss": 3.8974790801760015, + "tokens_seen": 558366720 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008391108971272669, + "loss": 0.0841, + "theoretical_loss": 3.897287197751029, + "tokens_seen": 558628864 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008390306531856845, + "loss": 0.0831, + "theoretical_loss": 3.897095430546971, + "tokens_seen": 558891008 + }, + { + "epoch": 0.17, + "learning_rate": 0.000838950409244102, + "loss": 0.0834, + "theoretical_loss": 3.896903778440646, + "tokens_seen": 559153152 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008388701653025197, + "loss": 0.0823, + "theoretical_loss": 3.896712241309061, + "tokens_seen": 559415296 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008387899213609372, + "loss": 0.0833, + "theoretical_loss": 3.896520819029411, + "tokens_seen": 559677440 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008387096774193549, + "loss": 0.082, + "theoretical_loss": 3.896329511479082, + "tokens_seen": 559939584 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008386294334777725, + "loss": 0.0792, + "theoretical_loss": 3.8961383185356455, + "tokens_seen": 560201728 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.0007566718268208206, + "objective/train/docs_used": 209977, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.573256254196167, + "objective/train/original_loss": 1.573256015777588, + "objective/train/theoretical_loss": 3.8960427650033047, + "objective/train/tokens_used": 580792800, + "objective/train/value_avg": -0.00795745849609375, + "objective/train/value_loss": 0.00029120329418219626, + "objective/train/value_max": -0.00023055076599121094, + "objective/train/value_min": -0.6328125, + "objective/train/value_reward_corr": 0.6374020027425222, + "objective/train/value_std": 0.012054443359375, + "objective/train/weight_avg": 1.0008906126022339, + "objective/train/weighted_lm_loss": 1.5738105773925781, + "objective/train/weights_max": 1.2740330696105957, + "objective/train/weights_min": 0.3893653154373169, + "theoretical_loss": 3.8960427650033047, + "tokens_seen": 560332800 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008385491895361901, + "loss": 0.0794, + "theoretical_loss": 3.895947240076862, + "tokens_seen": 560463872 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008384689455946076, + "loss": 0.0784, + "theoretical_loss": 3.895756275980681, + "tokens_seen": 560726016 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008383887016530252, + "loss": 0.0817, + "theoretical_loss": 3.895565426125237, + "tokens_seen": 560988160 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008383084577114428, + "loss": 0.0812, + "theoretical_loss": 3.8953746903888513, + "tokens_seen": 561250304 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008382282137698603, + "loss": 0.0787, + "theoretical_loss": 3.895184068650033, + "tokens_seen": 561512448 + }, + { + "epoch": 0.17, + "learning_rate": 0.000838147969828278, + "loss": 0.0817, + "theoretical_loss": 3.8949935607874764, + "tokens_seen": 561774592 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008380677258866955, + "loss": 0.0806, + "theoretical_loss": 3.8948031666800613, + "tokens_seen": 562036736 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008379874819451132, + "loss": 0.0838, + "theoretical_loss": 3.8946128862068528, + "tokens_seen": 562298880 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008379072380035308, + "loss": 0.0804, + "theoretical_loss": 3.8944227192471006, + "tokens_seen": 562561024 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008378269940619483, + "loss": 0.082, + "theoretical_loss": 3.8942326656802395, + "tokens_seen": 562823168 + }, + { + "epoch": 0.17, + "learning_rate": 0.000837746750120366, + "loss": 0.0829, + "theoretical_loss": 3.894042725385888, + "tokens_seen": 563085312 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008376665061787835, + "loss": 0.0837, + "theoretical_loss": 3.893852898243849, + "tokens_seen": 563347456 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.0011895080097019672, + "objective/train/docs_used": 211161, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.739345908164978, + "objective/train/original_loss": 1.7393460273742676, + "objective/train/theoretical_loss": 3.8936631841341076, + "objective/train/tokens_used": 584069600, + "objective/train/value_avg": -0.009521484375, + "objective/train/value_loss": 0.000342073617503047, + "objective/train/value_max": -0.00018525123596191406, + "objective/train/value_min": -0.6494140625, + "objective/train/value_reward_corr": 0.6713985854388784, + "objective/train/value_std": 0.0169830322265625, + "objective/train/weight_avg": 1.0013439655303955, + "objective/train/weighted_lm_loss": 1.7415797710418701, + "objective/train/weights_max": 1.3866688013076782, + "objective/train/weights_min": 0.3763507008552551, + "theoretical_loss": 3.8936631841341076, + "tokens_seen": 563609600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008375862622372011, + "loss": 0.0801, + "theoretical_loss": 3.8936631841341076, + "tokens_seen": 563609600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008375060182956187, + "loss": 0.0814, + "theoretical_loss": 3.893473582936833, + "tokens_seen": 563871744 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008374257743540363, + "loss": 0.083, + "theoretical_loss": 3.8932840945323774, + "tokens_seen": 564133888 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008373455304124538, + "loss": 0.0809, + "theoretical_loss": 3.8930947188012737, + "tokens_seen": 564396032 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008372652864708715, + "loss": 0.0784, + "theoretical_loss": 3.8929054556242377, + "tokens_seen": 564658176 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008371850425292891, + "loss": 0.0789, + "theoretical_loss": 3.892716304882167, + "tokens_seen": 564920320 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008371047985877066, + "loss": 0.0815, + "theoretical_loss": 3.892527266456141, + "tokens_seen": 565182464 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008370245546461243, + "loss": 0.0829, + "theoretical_loss": 3.8923383402274174, + "tokens_seen": 565444608 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008369443107045418, + "loss": 0.0822, + "theoretical_loss": 3.8921495260774375, + "tokens_seen": 565706752 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008368640667629594, + "loss": 0.0811, + "theoretical_loss": 3.8919608238878216, + "tokens_seen": 565968896 + }, + { + "epoch": 0.17, + "learning_rate": 0.000836783822821377, + "loss": 0.0774, + "theoretical_loss": 3.891772233540369, + "tokens_seen": 566231040 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008367035788797946, + "loss": 0.08, + "theoretical_loss": 3.8915837549170584, + "tokens_seen": 566493184 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008366233349382121, + "loss": 0.0794, + "theoretical_loss": 3.89139538790005, + "tokens_seen": 566755328 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.0005871827597729862, + "objective/train/docs_used": 212374, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6017706394195557, + "objective/train/original_loss": 1.6017706394195557, + "objective/train/theoretical_loss": 3.8913012462071315, + "objective/train/tokens_used": 587346400, + "objective/train/value_avg": -0.00801849365234375, + "objective/train/value_loss": 0.00017772662977222353, + "objective/train/value_max": -0.00031757354736328125, + "objective/train/value_min": -0.368408203125, + "objective/train/value_reward_corr": 0.6835943969490854, + "objective/train/value_std": 0.0125732421875, + "objective/train/weight_avg": 1.0006738901138306, + "objective/train/weighted_lm_loss": 1.6030828952789307, + "objective/train/weights_max": 1.1462258100509644, + "objective/train/weights_min": 0.7185884118080139, + "theoretical_loss": 3.8913012462071315, + "tokens_seen": 566886400 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008365430909966297, + "loss": 0.0827, + "theoretical_loss": 3.8912071323716795, + "tokens_seen": 567017472 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008364628470550474, + "loss": 0.0814, + "theoretical_loss": 3.8910189882144626, + "tokens_seen": 567279616 + }, + { + "epoch": 0.17, + "learning_rate": 0.000836382603113465, + "loss": 0.0837, + "theoretical_loss": 3.8908309553110936, + "tokens_seen": 567541760 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008363023591718826, + "loss": 0.0806, + "theoretical_loss": 3.8906430335444426, + "tokens_seen": 567803904 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008362221152303001, + "loss": 0.0813, + "theoretical_loss": 3.8904552227975593, + "tokens_seen": 568066048 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008361418712887178, + "loss": 0.082, + "theoretical_loss": 3.8902675229536685, + "tokens_seen": 568328192 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008360616273471353, + "loss": 0.0825, + "theoretical_loss": 3.8900799338961725, + "tokens_seen": 568590336 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008359813834055528, + "loss": 0.0832, + "theoretical_loss": 3.8898924555086496, + "tokens_seen": 568852480 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008359011394639705, + "loss": 0.0811, + "theoretical_loss": 3.8897050876748542, + "tokens_seen": 569114624 + }, + { + "epoch": 0.17, + "learning_rate": 0.000835820895522388, + "loss": 0.0825, + "theoretical_loss": 3.8895178302787166, + "tokens_seen": 569376768 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008357406515808057, + "loss": 0.0817, + "theoretical_loss": 3.8893306832043404, + "tokens_seen": 569638912 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008356604076392233, + "loss": 0.0835, + "theoretical_loss": 3.8891436463360076, + "tokens_seen": 569901056 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": -0.0002838138898368925, + "objective/train/docs_used": 213119, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5427865982055664, + "objective/train/original_loss": 1.5427868366241455, + "objective/train/theoretical_loss": 3.8889567195581716, + "objective/train/tokens_used": 590623200, + "objective/train/value_avg": -0.007099151611328125, + "objective/train/value_loss": 0.0001902274671010673, + "objective/train/value_max": -0.00019109249114990234, + "objective/train/value_min": -0.2220458984375, + "objective/train/value_reward_corr": 0.6440366057374965, + "objective/train/value_std": 0.00963592529296875, + "objective/train/weight_avg": 0.9998065829277039, + "objective/train/weighted_lm_loss": 1.543387532234192, + "objective/train/weights_max": 1.1371427774429321, + "objective/train/weights_min": 0.6790033578872681, + "theoretical_loss": 3.8889567195581716, + "tokens_seen": 570163200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008355801636976409, + "loss": 0.082, + "theoretical_loss": 3.8889567195581716, + "tokens_seen": 570163200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008354999197560584, + "loss": 0.0789, + "theoretical_loss": 3.8887699027554614, + "tokens_seen": 570425344 + }, + { + "epoch": 0.17, + "learning_rate": 0.000835419675814476, + "loss": 0.0805, + "theoretical_loss": 3.8885831958126786, + "tokens_seen": 570687488 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008353394318728936, + "loss": 0.0826, + "theoretical_loss": 3.8883965986148015, + "tokens_seen": 570949632 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008352591879313111, + "loss": 0.0792, + "theoretical_loss": 3.888210111046978, + "tokens_seen": 571211776 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008351789439897288, + "loss": 0.0824, + "theoretical_loss": 3.8880237329945295, + "tokens_seen": 571473920 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008350987000481463, + "loss": 0.0839, + "theoretical_loss": 3.887837464342952, + "tokens_seen": 571736064 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008350184561065641, + "loss": 0.0831, + "theoretical_loss": 3.8876513049779113, + "tokens_seen": 571998208 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008349382121649816, + "loss": 0.083, + "theoretical_loss": 3.887465254785246, + "tokens_seen": 572260352 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008348579682233991, + "loss": 0.0838, + "theoretical_loss": 3.887279313650967, + "tokens_seen": 572522496 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008347777242818168, + "loss": 0.0834, + "theoretical_loss": 3.8870934814612546, + "tokens_seen": 572784640 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008346974803402343, + "loss": 0.0786, + "theoretical_loss": 3.886907758102461, + "tokens_seen": 573046784 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008346172363986519, + "loss": 0.0831, + "theoretical_loss": 3.8867221434611094, + "tokens_seen": 573308928 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.00023172213695943356, + "objective/train/docs_used": 214228, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.640936255455017, + "objective/train/original_loss": 1.640936255455017, + "objective/train/theoretical_loss": 3.8866293768740587, + "objective/train/tokens_used": 593900000, + "objective/train/value_avg": -0.00962066650390625, + "objective/train/value_loss": 0.00022791478841099888, + "objective/train/value_max": -0.0002434253692626953, + "objective/train/value_min": -0.2286376953125, + "objective/train/value_reward_corr": 0.7870548857924071, + "objective/train/value_std": 0.0165863037109375, + "objective/train/weight_avg": 1.0003434419631958, + "objective/train/weighted_lm_loss": 1.641239881515503, + "objective/train/weights_max": 1.2273142337799072, + "objective/train/weights_min": 0.6072003245353699, + "theoretical_loss": 3.8866293768740587, + "tokens_seen": 573440000 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008345369924570695, + "loss": 0.0818, + "theoretical_loss": 3.8865366374238914, + "tokens_seen": 573571072 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008344567485154871, + "loss": 0.0821, + "theoretical_loss": 3.88635123987767, + "tokens_seen": 573833216 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008343765045739046, + "loss": 0.0825, + "theoretical_loss": 3.8861659507094766, + "tokens_seen": 574095360 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008342962606323223, + "loss": 0.0816, + "theoretical_loss": 3.885980769806513, + "tokens_seen": 574357504 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008342160166907399, + "loss": 0.0846, + "theoretical_loss": 3.8857956970561487, + "tokens_seen": 574619648 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008341357727491574, + "loss": 0.0788, + "theoretical_loss": 3.8856107323459215, + "tokens_seen": 574881792 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008340555288075751, + "loss": 0.0822, + "theoretical_loss": 3.8854258755635387, + "tokens_seen": 575143936 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008339752848659926, + "loss": 0.0831, + "theoretical_loss": 3.885241126596874, + "tokens_seen": 575406080 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008338950409244103, + "loss": 0.0803, + "theoretical_loss": 3.885056485333969, + "tokens_seen": 575668224 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008338147969828278, + "loss": 0.083, + "theoretical_loss": 3.884871951663034, + "tokens_seen": 575930368 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008337345530412454, + "loss": 0.078, + "theoretical_loss": 3.8846875254724442, + "tokens_seen": 576192512 + }, + { + "epoch": 0.17, + "learning_rate": 0.000833654309099663, + "loss": 0.08, + "theoretical_loss": 3.8845032066507414, + "tokens_seen": 576454656 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.0014360037166625261, + "objective/train/docs_used": 215374, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6849347352981567, + "objective/train/original_loss": 1.6849347352981567, + "objective/train/theoretical_loss": 3.884318995086635, + "objective/train/tokens_used": 597176800, + "objective/train/value_avg": -0.00965118408203125, + "objective/train/value_loss": 0.00028802509768866, + "objective/train/value_max": -0.0003101825714111328, + "objective/train/value_min": -0.41748046875, + "objective/train/value_reward_corr": 0.6993208086391764, + "objective/train/value_std": 0.015655517578125, + "objective/train/weight_avg": 1.0015720129013062, + "objective/train/weighted_lm_loss": 1.6866198778152466, + "objective/train/weights_max": 1.1339787244796753, + "objective/train/weights_min": 0.41069987416267395, + "theoretical_loss": 3.884318995086635, + "tokens_seen": 576716800 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008335740651580805, + "loss": 0.0794, + "theoretical_loss": 3.884318995086635, + "tokens_seen": 576716800 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008334938212164982, + "loss": 0.0816, + "theoretical_loss": 3.8841348906689985, + "tokens_seen": 576978944 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008334135772749158, + "loss": 0.0831, + "theoretical_loss": 3.8839508932868725, + "tokens_seen": 577241088 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008333333333333334, + "loss": 0.0806, + "theoretical_loss": 3.8837670028294626, + "tokens_seen": 577503232 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008332530893917509, + "loss": 0.0822, + "theoretical_loss": 3.883583219186138, + "tokens_seen": 577765376 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008331728454501686, + "loss": 0.0824, + "theoretical_loss": 3.8833995422464342, + "tokens_seen": 578027520 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008330926015085861, + "loss": 0.0821, + "theoretical_loss": 3.88321597190005, + "tokens_seen": 578289664 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008330123575670036, + "loss": 0.0824, + "theoretical_loss": 3.883032508036848, + "tokens_seen": 578551808 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008329321136254213, + "loss": 0.0797, + "theoretical_loss": 3.882849150546856, + "tokens_seen": 578813952 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008328518696838388, + "loss": 0.0833, + "theoretical_loss": 3.8826658993202625, + "tokens_seen": 579076096 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008327716257422565, + "loss": 0.0817, + "theoretical_loss": 3.8824827542474214, + "tokens_seen": 579338240 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008326913818006741, + "loss": 0.0822, + "theoretical_loss": 3.882299715218848, + "tokens_seen": 579600384 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008326111378590917, + "loss": 0.0817, + "theoretical_loss": 3.8821167821252196, + "tokens_seen": 579862528 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.0010556569322943687, + "objective/train/docs_used": 216668, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7441470623016357, + "objective/train/original_loss": 1.7441470623016357, + "objective/train/theoretical_loss": 3.8820253552698913, + "objective/train/tokens_used": 600453600, + "objective/train/value_avg": -0.01125335693359375, + "objective/train/value_loss": 0.0008476045331917703, + "objective/train/value_max": -0.00021660327911376953, + "objective/train/value_min": -0.91845703125, + "objective/train/value_reward_corr": 0.5798144182694726, + "objective/train/value_std": 0.020416259765625, + "objective/train/weight_avg": 1.0014194250106812, + "objective/train/weighted_lm_loss": 1.7468537092208862, + "objective/train/weights_max": 1.7236318588256836, + "objective/train/weights_min": 0.3710477352142334, + "theoretical_loss": 3.8820253552698913, + "tokens_seen": 579993600 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008325308939175093, + "loss": 0.083, + "theoretical_loss": 3.8819339548573772, + "tokens_seen": 580124672 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008324506499759268, + "loss": 0.0819, + "theoretical_loss": 3.881751233306322, + "tokens_seen": 580386816 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008323704060343444, + "loss": 0.0818, + "theoretical_loss": 3.881568617363218, + "tokens_seen": 580648960 + }, + { + "epoch": 0.18, + "learning_rate": 0.000832290162092762, + "loss": 0.0827, + "theoretical_loss": 3.881386106919389, + "tokens_seen": 580911104 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008322099181511796, + "loss": 0.083, + "theoretical_loss": 3.88120370186632, + "tokens_seen": 581173248 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008321296742095971, + "loss": 0.0806, + "theoretical_loss": 3.881021402095657, + "tokens_seen": 581435392 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008320494302680149, + "loss": 0.081, + "theoretical_loss": 3.880839207499205, + "tokens_seen": 581697536 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008319691863264324, + "loss": 0.0823, + "theoretical_loss": 3.880657117968931, + "tokens_seen": 581959680 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008318889423848499, + "loss": 0.0846, + "theoretical_loss": 3.880475133396959, + "tokens_seen": 582221824 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008318086984432676, + "loss": 0.0817, + "theoretical_loss": 3.8802932536755748, + "tokens_seen": 582483968 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008317284545016851, + "loss": 0.0835, + "theoretical_loss": 3.880111478697221, + "tokens_seen": 582746112 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008316482105601027, + "loss": 0.0851, + "theoretical_loss": 3.8799298083545004, + "tokens_seen": 583008256 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.0006918111466802657, + "objective/train/docs_used": 217803, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7163397073745728, + "objective/train/original_loss": 1.7163399457931519, + "objective/train/theoretical_loss": 3.879748242540173, + "objective/train/tokens_used": 603730400, + "objective/train/value_avg": -0.0118408203125, + "objective/train/value_loss": 0.00036437122616916895, + "objective/train/value_max": -0.0002892017364501953, + "objective/train/value_min": -0.70654296875, + "objective/train/value_reward_corr": 0.6686834210191256, + "objective/train/value_std": 0.0202178955078125, + "objective/train/weight_avg": 1.0008691549301147, + "objective/train/weighted_lm_loss": 1.7178725004196167, + "objective/train/weights_max": 1.8649041652679443, + "objective/train/weights_min": 0.3742033541202545, + "theoretical_loss": 3.879748242540173, + "tokens_seen": 583270400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008315679666185203, + "loss": 0.0817, + "theoretical_loss": 3.879748242540173, + "tokens_seen": 583270400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008314877226769379, + "loss": 0.084, + "theoretical_loss": 3.8795667811471573, + "tokens_seen": 583532544 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008314074787353555, + "loss": 0.0824, + "theoretical_loss": 3.8793854240685306, + "tokens_seen": 583794688 + }, + { + "epoch": 0.18, + "learning_rate": 0.000831327234793773, + "loss": 0.0821, + "theoretical_loss": 3.879204171197525, + "tokens_seen": 584056832 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008312469908521907, + "loss": 0.0813, + "theoretical_loss": 3.879023022427533, + "tokens_seen": 584318976 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008311667469106083, + "loss": 0.0818, + "theoretical_loss": 3.878841977652101, + "tokens_seen": 584581120 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008310865029690259, + "loss": 0.0819, + "theoretical_loss": 3.8786610367649343, + "tokens_seen": 584843264 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008310062590274434, + "loss": 0.0829, + "theoretical_loss": 3.8784801996598928, + "tokens_seen": 585105408 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008309260150858611, + "loss": 0.0824, + "theoretical_loss": 3.878299466230992, + "tokens_seen": 585367552 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008308457711442786, + "loss": 0.0813, + "theoretical_loss": 3.8781188363724057, + "tokens_seen": 585629696 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008307655272026961, + "loss": 0.0813, + "theoretical_loss": 3.87793830997846, + "tokens_seen": 585891840 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008306852832611138, + "loss": 0.0807, + "theoretical_loss": 3.8777578869436384, + "tokens_seen": 586153984 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008306050393195313, + "loss": 0.0822, + "theoretical_loss": 3.8775775671625765, + "tokens_seen": 586416128 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.0013700941344723105, + "objective/train/docs_used": 218927, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.789874792098999, + "objective/train/original_loss": 1.789874792098999, + "objective/train/theoretical_loss": 3.8774874459593223, + "objective/train/tokens_used": 607007200, + "objective/train/value_avg": -0.00925445556640625, + "objective/train/value_loss": 0.0004925947869196534, + "objective/train/value_max": -0.0002779960632324219, + "objective/train/value_min": -0.70556640625, + "objective/train/value_reward_corr": 0.5972247492646405, + "objective/train/value_std": 0.0181732177734375, + "objective/train/weight_avg": 1.0015876293182373, + "objective/train/weighted_lm_loss": 1.7908934354782104, + "objective/train/weights_max": 1.9563560485839844, + "objective/train/weights_min": 0.3702559173107147, + "theoretical_loss": 3.8774874459593223, + "tokens_seen": 586547200 + }, + { + "epoch": 0.18, + "learning_rate": 0.000830524795377949, + "loss": 0.0824, + "theoretical_loss": 3.8773973505300674, + "tokens_seen": 586678272 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008304445514363666, + "loss": 0.0803, + "theoretical_loss": 3.877217236941055, + "tokens_seen": 586940416 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008303643074947842, + "loss": 0.0836, + "theoretical_loss": 3.877037226290641, + "tokens_seen": 587202560 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008302840635532017, + "loss": 0.0801, + "theoretical_loss": 3.8768573184740767, + "tokens_seen": 587464704 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008302038196116194, + "loss": 0.0806, + "theoretical_loss": 3.87667751338677, + "tokens_seen": 587726848 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008301235756700369, + "loss": 0.0803, + "theoretical_loss": 3.8764978109242794, + "tokens_seen": 587988992 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008300433317284545, + "loss": 0.0822, + "theoretical_loss": 3.8763182109823173, + "tokens_seen": 588251136 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008299630877868721, + "loss": 0.0763, + "theoretical_loss": 3.8761387134567475, + "tokens_seen": 588513280 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008298828438452896, + "loss": 0.0813, + "theoretical_loss": 3.8759593182435874, + "tokens_seen": 588775424 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008298025999037074, + "loss": 0.0849, + "theoretical_loss": 3.875780025239005, + "tokens_seen": 589037568 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008297223559621249, + "loss": 0.0797, + "theoretical_loss": 3.8756008343393202, + "tokens_seen": 589299712 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008296421120205425, + "loss": 0.0835, + "theoretical_loss": 3.8754217454410043, + "tokens_seen": 589561856 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": -6.183000368764624e-05, + "objective/train/docs_used": 220165, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6896483898162842, + "objective/train/original_loss": 1.6896482706069946, + "objective/train/theoretical_loss": 3.875242758440679, + "objective/train/tokens_used": 610284000, + "objective/train/value_avg": -0.0089569091796875, + "objective/train/value_loss": 0.00046085796202532947, + "objective/train/value_max": -0.0002415180206298828, + "objective/train/value_min": -0.673828125, + "objective/train/value_reward_corr": 0.5928429396910613, + "objective/train/value_std": 0.015838623046875, + "objective/train/weight_avg": 1.0001388788223267, + "objective/train/weighted_lm_loss": 1.6889548301696777, + "objective/train/weights_max": 1.2902297973632812, + "objective/train/weights_min": 0.36886030435562134, + "theoretical_loss": 3.875242758440679, + "tokens_seen": 589824000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008295618680789601, + "loss": 0.0821, + "theoretical_loss": 3.875242758440679, + "tokens_seen": 589824000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008294816241373776, + "loss": 0.0809, + "theoretical_loss": 3.875063873235117, + "tokens_seen": 590086144 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008294013801957952, + "loss": 0.0854, + "theoretical_loss": 3.874885089721242, + "tokens_seen": 590348288 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008293211362542128, + "loss": 0.0782, + "theoretical_loss": 3.8747064077961264, + "tokens_seen": 590610432 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008292408923126304, + "loss": 0.079, + "theoretical_loss": 3.874527827356994, + "tokens_seen": 590872576 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008291606483710479, + "loss": 0.0832, + "theoretical_loss": 3.8743493483012172, + "tokens_seen": 591134720 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008290804044294657, + "loss": 0.0806, + "theoretical_loss": 3.874170970526317, + "tokens_seen": 591396864 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008290001604878832, + "loss": 0.0826, + "theoretical_loss": 3.873992693929965, + "tokens_seen": 591659008 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008289199165463007, + "loss": 0.0782, + "theoretical_loss": 3.8738145184099797, + "tokens_seen": 591921152 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008288396726047184, + "loss": 0.0798, + "theoretical_loss": 3.8736364438643296, + "tokens_seen": 592183296 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008287594286631359, + "loss": 0.0833, + "theoretical_loss": 3.87345847019113, + "tokens_seen": 592445440 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008286791847215536, + "loss": 0.079, + "theoretical_loss": 3.8732805972886446, + "tokens_seen": 592707584 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008285989407799711, + "loss": 0.0794, + "theoretical_loss": 3.873102825055285, + "tokens_seen": 592969728 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.0022287839092314243, + "objective/train/docs_used": 221319, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6529507637023926, + "objective/train/original_loss": 1.6529508829116821, + "objective/train/theoretical_loss": 3.873013976657821, + "objective/train/tokens_used": 613560800, + "objective/train/value_avg": -0.01081085205078125, + "objective/train/value_loss": 0.0005324392695911229, + "objective/train/value_max": -0.00033926963806152344, + "objective/train/value_min": -0.43798828125, + "objective/train/value_reward_corr": 0.6031346357258855, + "objective/train/value_std": 0.0146331787109375, + "objective/train/weight_avg": 1.0024420022964478, + "objective/train/weighted_lm_loss": 1.6559507846832275, + "objective/train/weights_max": 1.158463716506958, + "objective/train/weights_min": 0.2581728398799896, + "theoretical_loss": 3.873013976657821, + "tokens_seen": 593100800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008285186968383887, + "loss": 0.0797, + "theoretical_loss": 3.87292515338961, + "tokens_seen": 593231872 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008284384528968063, + "loss": 0.0804, + "theoretical_loss": 3.872747582190324, + "tokens_seen": 593494016 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008283582089552239, + "loss": 0.0818, + "theoretical_loss": 3.8725701113562794, + "tokens_seen": 593756160 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008282779650136415, + "loss": 0.0793, + "theoretical_loss": 3.8723927407864758, + "tokens_seen": 594018304 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008281977210720591, + "loss": 0.0818, + "theoretical_loss": 3.8722154703800573, + "tokens_seen": 594280448 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008281174771304767, + "loss": 0.0825, + "theoretical_loss": 3.8720383000363148, + "tokens_seen": 594542592 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008280372331888942, + "loss": 0.081, + "theoretical_loss": 3.871861229654684, + "tokens_seen": 594804736 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008279569892473119, + "loss": 0.0779, + "theoretical_loss": 3.8716842591347476, + "tokens_seen": 595066880 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008278767453057294, + "loss": 0.0822, + "theoretical_loss": 3.871507388376231, + "tokens_seen": 595329024 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008277965013641469, + "loss": 0.0796, + "theoretical_loss": 3.871330617279006, + "tokens_seen": 595591168 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008277162574225646, + "loss": 0.0815, + "theoretical_loss": 3.8711539457430897, + "tokens_seen": 595853312 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008276360134809821, + "loss": 0.0797, + "theoretical_loss": 3.87097737366864, + "tokens_seen": 596115456 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.00167937483638525, + "objective/train/docs_used": 222574, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.542468547821045, + "objective/train/original_loss": 1.5424683094024658, + "objective/train/theoretical_loss": 3.870800900955963, + "objective/train/tokens_used": 616837600, + "objective/train/value_avg": -0.0085296630859375, + "objective/train/value_loss": 0.0005001907702535391, + "objective/train/value_max": -0.00029587745666503906, + "objective/train/value_min": -0.86962890625, + "objective/train/value_reward_corr": 0.6588531620632475, + "objective/train/value_std": 0.0174713134765625, + "objective/train/weight_avg": 1.0018889904022217, + "objective/train/weighted_lm_loss": 1.5452805757522583, + "objective/train/weights_max": 1.3651901483535767, + "objective/train/weights_min": 0.37030255794525146, + "theoretical_loss": 3.870800900955963, + "tokens_seen": 596377600 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008275557695393999, + "loss": 0.0794, + "theoretical_loss": 3.870800900955963, + "tokens_seen": 596377600 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008274755255978174, + "loss": 0.0816, + "theoretical_loss": 3.8706245275055062, + "tokens_seen": 596639744 + }, + { + "epoch": 0.18, + "learning_rate": 0.000827395281656235, + "loss": 0.08, + "theoretical_loss": 3.8704482532178606, + "tokens_seen": 596901888 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008273150377146526, + "loss": 0.0839, + "theoretical_loss": 3.8702720779937607, + "tokens_seen": 597164032 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008272347937730702, + "loss": 0.0793, + "theoretical_loss": 3.8700960017340833, + "tokens_seen": 597426176 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008271545498314877, + "loss": 0.08, + "theoretical_loss": 3.8699200243398493, + "tokens_seen": 597688320 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008270743058899053, + "loss": 0.0848, + "theoretical_loss": 3.8697441457122204, + "tokens_seen": 597950464 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008269940619483229, + "loss": 0.0823, + "theoretical_loss": 3.8695683657525013, + "tokens_seen": 598212608 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008269138180067404, + "loss": 0.0826, + "theoretical_loss": 3.8693926843621376, + "tokens_seen": 598474752 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008268335740651582, + "loss": 0.0805, + "theoretical_loss": 3.8692171014427177, + "tokens_seen": 598736896 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008267533301235757, + "loss": 0.0796, + "theoretical_loss": 3.86904161689597, + "tokens_seen": 598999040 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008266730861819933, + "loss": 0.0832, + "theoretical_loss": 3.868866230623766, + "tokens_seen": 599261184 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008265928422404109, + "loss": 0.0809, + "theoretical_loss": 3.8686909425281146, + "tokens_seen": 599523328 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.0010342653840780258, + "objective/train/docs_used": 223680, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6975395679473877, + "objective/train/original_loss": 1.6975395679473877, + "objective/train/theoretical_loss": 3.868603335265913, + "objective/train/tokens_used": 620114400, + "objective/train/value_avg": -0.010101318359375, + "objective/train/value_loss": 0.00020030724408570677, + "objective/train/value_max": -0.00017404556274414062, + "objective/train/value_min": -0.347900390625, + "objective/train/value_reward_corr": 0.8020624486643968, + "objective/train/value_std": 0.0178985595703125, + "objective/train/weight_avg": 1.00112783908844, + "objective/train/weighted_lm_loss": 1.6995559930801392, + "objective/train/weights_max": 1.3582996129989624, + "objective/train/weights_min": 0.39774826169013977, + "theoretical_loss": 3.868603335265913, + "tokens_seen": 599654400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008265125982988284, + "loss": 0.0814, + "theoretical_loss": 3.8685157525111684, + "tokens_seen": 599785472 + }, + { + "epoch": 0.18, + "learning_rate": 0.000826432354357246, + "loss": 0.0816, + "theoretical_loss": 3.8683406604752184, + "tokens_seen": 600047616 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008263521104156636, + "loss": 0.0829, + "theoretical_loss": 3.868165666322696, + "tokens_seen": 600309760 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008262718664740812, + "loss": 0.0828, + "theoretical_loss": 3.8679907699561733, + "tokens_seen": 600571904 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008261916225324988, + "loss": 0.0822, + "theoretical_loss": 3.86781597127836, + "tokens_seen": 600834048 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008261113785909165, + "loss": 0.0826, + "theoretical_loss": 3.867641270192107, + "tokens_seen": 601096192 + }, + { + "epoch": 0.18, + "learning_rate": 0.000826031134649334, + "loss": 0.0834, + "theoretical_loss": 3.867466666600402, + "tokens_seen": 601358336 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008259508907077516, + "loss": 0.0819, + "theoretical_loss": 3.867292160406373, + "tokens_seen": 601620480 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008258706467661692, + "loss": 0.0798, + "theoretical_loss": 3.8671177515132857, + "tokens_seen": 601882624 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008257904028245867, + "loss": 0.0824, + "theoretical_loss": 3.866943439824545, + "tokens_seen": 602144768 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008257101588830044, + "loss": 0.0801, + "theoretical_loss": 3.8667692252436914, + "tokens_seen": 602406912 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008256299149414219, + "loss": 0.0822, + "theoretical_loss": 3.8665951076744056, + "tokens_seen": 602669056 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": -8.796821930445731e-05, + "objective/train/docs_used": 224736, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5687521696090698, + "objective/train/original_loss": 1.5687522888183594, + "objective/train/theoretical_loss": 3.866421087020504, + "objective/train/tokens_used": 623391200, + "objective/train/value_avg": -0.00591278076171875, + "objective/train/value_loss": 0.00038589219911955297, + "objective/train/value_max": -0.00026535987854003906, + "objective/train/value_min": -0.270263671875, + "objective/train/value_reward_corr": 0.5731155725865233, + "objective/train/value_std": 0.00867462158203125, + "objective/train/weight_avg": 1.0000841617584229, + "objective/train/weighted_lm_loss": 1.569719672203064, + "objective/train/weights_max": 1.1083186864852905, + "objective/train/weights_min": 0.36869922280311584, + "theoretical_loss": 3.866421087020504, + "tokens_seen": 602931200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008255496709998395, + "loss": 0.0794, + "theoretical_loss": 3.866421087020504, + "tokens_seen": 602931200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008254694270582571, + "loss": 0.0768, + "theoretical_loss": 3.8662471631859407, + "tokens_seen": 603193344 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008253891831166747, + "loss": 0.0821, + "theoretical_loss": 3.866073336074807, + "tokens_seen": 603455488 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008253089391750923, + "loss": 0.0787, + "theoretical_loss": 3.8658996055913297, + "tokens_seen": 603717632 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008252286952335099, + "loss": 0.0838, + "theoretical_loss": 3.8657259716398737, + "tokens_seen": 603979776 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008251484512919275, + "loss": 0.0801, + "theoretical_loss": 3.8655524341249388, + "tokens_seen": 604241920 + }, + { + "epoch": 0.18, + "learning_rate": 0.000825068207350345, + "loss": 0.0827, + "theoretical_loss": 3.865378992951161, + "tokens_seen": 604504064 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008249879634087627, + "loss": 0.0812, + "theoretical_loss": 3.865205648023311, + "tokens_seen": 604766208 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008249077194671802, + "loss": 0.0821, + "theoretical_loss": 3.8650323992462963, + "tokens_seen": 605028352 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008248274755255978, + "loss": 0.0807, + "theoretical_loss": 3.8648592465251586, + "tokens_seen": 605290496 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008247472315840154, + "loss": 0.0817, + "theoretical_loss": 3.864686189765075, + "tokens_seen": 605552640 + }, + { + "epoch": 0.18, + "learning_rate": 0.000824666987642433, + "loss": 0.082, + "theoretical_loss": 3.864513228871357, + "tokens_seen": 605814784 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008245867437008507, + "loss": 0.0809, + "theoretical_loss": 3.8643403637494504, + "tokens_seen": 606076928 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.0005140507710166276, + "objective/train/docs_used": 225863, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.601678729057312, + "objective/train/original_loss": 1.6016786098480225, + "objective/train/theoretical_loss": 3.8642539670734144, + "objective/train/tokens_used": 626668000, + "objective/train/value_avg": -0.006038665771484375, + "objective/train/value_loss": 9.395569941261783e-05, + "objective/train/value_max": -0.00026535987854003906, + "objective/train/value_min": -0.2125244140625, + "objective/train/value_reward_corr": 0.690265895238366, + "objective/train/value_std": 0.0079193115234375, + "objective/train/weight_avg": 1.0005604028701782, + "objective/train/weighted_lm_loss": 1.6026065349578857, + "objective/train/weights_max": 1.0788187980651855, + "objective/train/weights_min": 0.8205270171165466, + "theoretical_loss": 3.8642539670734144, + "tokens_seen": 606208000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008245064997592682, + "loss": 0.0797, + "theoretical_loss": 3.8641675943049343, + "tokens_seen": 606339072 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008244262558176858, + "loss": 0.0831, + "theoretical_loss": 3.863994920443523, + "tokens_seen": 606601216 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008243460118761034, + "loss": 0.0815, + "theoretical_loss": 3.8638223420710647, + "tokens_seen": 606863360 + }, + { + "epoch": 0.18, + "learning_rate": 0.000824265767934521, + "loss": 0.079, + "theoretical_loss": 3.863649859093538, + "tokens_seen": 607125504 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008241855239929385, + "loss": 0.0795, + "theoretical_loss": 3.863477471417059, + "tokens_seen": 607387648 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008241052800513561, + "loss": 0.0821, + "theoretical_loss": 3.8633051789478734, + "tokens_seen": 607649792 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008240250361097737, + "loss": 0.0841, + "theoretical_loss": 3.8631329815923605, + "tokens_seen": 607911936 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008239447921681912, + "loss": 0.079, + "theoretical_loss": 3.862960879257032, + "tokens_seen": 608174080 + }, + { + "epoch": 0.18, + "learning_rate": 0.000823864548226609, + "loss": 0.0832, + "theoretical_loss": 3.8627888718485313, + "tokens_seen": 608436224 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008237843042850265, + "loss": 0.0798, + "theoretical_loss": 3.862616959273635, + "tokens_seen": 608698368 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008237040603434442, + "loss": 0.0803, + "theoretical_loss": 3.8624451414392498, + "tokens_seen": 608960512 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008236238164018617, + "loss": 0.0785, + "theoretical_loss": 3.8622734182524154, + "tokens_seen": 609222656 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.00217598513700068, + "objective/train/docs_used": 227108, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5498684644699097, + "objective/train/original_loss": 1.5498683452606201, + "objective/train/theoretical_loss": 3.8621017896203007, + "objective/train/tokens_used": 629944800, + "objective/train/value_avg": -0.00588226318359375, + "objective/train/value_loss": 8.349979179911315e-05, + "objective/train/value_max": -0.0002779960632324219, + "objective/train/value_min": -0.254150390625, + "objective/train/value_reward_corr": 0.5373123562529432, + "objective/train/value_std": 0.00652313232421875, + "objective/train/weight_avg": 1.0022169351577759, + "objective/train/weighted_lm_loss": 1.5533596277236938, + "objective/train/weights_max": 1.130799412727356, + "objective/train/weights_min": 0.6347857117652893, + "theoretical_loss": 3.8621017896203007, + "tokens_seen": 609484800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008235435724602792, + "loss": 0.0803, + "theoretical_loss": 3.8621017896203007, + "tokens_seen": 609484800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008234633285186969, + "loss": 0.0793, + "theoretical_loss": 3.8619302554502077, + "tokens_seen": 609746944 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008233830845771144, + "loss": 0.0787, + "theoretical_loss": 3.8617588156495666, + "tokens_seen": 610009088 + }, + { + "epoch": 0.18, + "learning_rate": 0.000823302840635532, + "loss": 0.0829, + "theoretical_loss": 3.861587470125941, + "tokens_seen": 610271232 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008232225966939496, + "loss": 0.081, + "theoretical_loss": 3.8614162187870216, + "tokens_seen": 610533376 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008231423527523673, + "loss": 0.0804, + "theoretical_loss": 3.861245061540631, + "tokens_seen": 610795520 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008230621088107848, + "loss": 0.0806, + "theoretical_loss": 3.8610739982947218, + "tokens_seen": 611057664 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008229818648692024, + "loss": 0.0773, + "theoretical_loss": 3.8609030289573747, + "tokens_seen": 611319808 + }, + { + "epoch": 0.19, + "learning_rate": 0.00082290162092762, + "loss": 0.0826, + "theoretical_loss": 3.8607321534368007, + "tokens_seen": 611581952 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008228213769860375, + "loss": 0.0772, + "theoretical_loss": 3.8605613716413396, + "tokens_seen": 611844096 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008227411330444552, + "loss": 0.0822, + "theoretical_loss": 3.860390683479459, + "tokens_seen": 612106240 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008226608891028727, + "loss": 0.0778, + "theoretical_loss": 3.860220088859757, + "tokens_seen": 612368384 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008225806451612903, + "loss": 0.0774, + "theoretical_loss": 3.860049587690958, + "tokens_seen": 612630528 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.0016880445182323456, + "objective/train/docs_used": 228263, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.764600396156311, + "objective/train/original_loss": 1.7646005153656006, + "objective/train/theoretical_loss": 3.8599643721221586, + "objective/train/tokens_used": 633221600, + "objective/train/value_avg": -0.006122589111328125, + "objective/train/value_loss": 0.00013401157048065215, + "objective/train/value_max": -0.00021660327911376953, + "objective/train/value_min": -0.330322265625, + "objective/train/value_reward_corr": 0.6502286444986164, + "objective/train/value_std": 0.01097869873046875, + "objective/train/weight_avg": 1.0017540454864502, + "objective/train/weighted_lm_loss": 1.767932415008545, + "objective/train/weights_max": 1.193005919456482, + "objective/train/weights_min": 0.8255929350852966, + "theoretical_loss": 3.8599643721221586, + "tokens_seen": 612761600 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008225004012197079, + "loss": 0.0799, + "theoretical_loss": 3.8598791798819154, + "tokens_seen": 612892672 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008224201572781254, + "loss": 0.0771, + "theoretical_loss": 3.859708865341611, + "tokens_seen": 613154816 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008223399133365432, + "loss": 0.0819, + "theoretical_loss": 3.8595386439791532, + "tokens_seen": 613416960 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008222596693949607, + "loss": 0.0794, + "theoretical_loss": 3.859368515703778, + "tokens_seen": 613679104 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008221794254533783, + "loss": 0.0779, + "theoretical_loss": 3.859198480424849, + "tokens_seen": 613941248 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008220991815117959, + "loss": 0.0787, + "theoretical_loss": 3.859028538051856, + "tokens_seen": 614203392 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008220189375702135, + "loss": 0.0799, + "theoretical_loss": 3.858858688494416, + "tokens_seen": 614465536 + }, + { + "epoch": 0.19, + "learning_rate": 0.000821938693628631, + "loss": 0.0789, + "theoretical_loss": 3.8586889316622726, + "tokens_seen": 614727680 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008218584496870486, + "loss": 0.076, + "theoretical_loss": 3.8585192674652955, + "tokens_seen": 614989824 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008217782057454662, + "loss": 0.0788, + "theoretical_loss": 3.8583496958134793, + "tokens_seen": 615251968 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008216979618038837, + "loss": 0.0768, + "theoretical_loss": 3.8581802166169457, + "tokens_seen": 615514112 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008216177178623015, + "loss": 0.0801, + "theoretical_loss": 3.8580108297859415, + "tokens_seen": 615776256 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.001968114171177149, + "objective/train/docs_used": 229528, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.539759874343872, + "objective/train/original_loss": 1.5397597551345825, + "objective/train/theoretical_loss": 3.857841535230839, + "objective/train/tokens_used": 636498400, + "objective/train/value_avg": -0.0091400146484375, + "objective/train/value_loss": 0.00024838329409249127, + "objective/train/value_max": -0.00023412704467773438, + "objective/train/value_min": -0.63916015625, + "objective/train/value_reward_corr": 0.6929620425353863, + "objective/train/value_std": 0.01442718505859375, + "objective/train/weight_avg": 1.0020780563354492, + "objective/train/weighted_lm_loss": 1.543168306350708, + "objective/train/weights_max": 1.582837700843811, + "objective/train/weights_min": 0.23275621235370636, + "theoretical_loss": 3.857841535230839, + "tokens_seen": 616038400 + }, + { + "epoch": 0.19, + "learning_rate": 0.000821537473920719, + "loss": 0.0777, + "theoretical_loss": 3.857841535230839, + "tokens_seen": 616038400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008214572299791366, + "loss": 0.0793, + "theoretical_loss": 3.8576723328621347, + "tokens_seen": 616300544 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008213769860375542, + "loss": 0.0794, + "theoretical_loss": 3.8575032225904513, + "tokens_seen": 616562688 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008212967420959717, + "loss": 0.0775, + "theoretical_loss": 3.8573342043265346, + "tokens_seen": 616824832 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008212164981543894, + "loss": 0.0832, + "theoretical_loss": 3.857165277981256, + "tokens_seen": 617086976 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008211362542128069, + "loss": 0.0811, + "theoretical_loss": 3.8569964434656105, + "tokens_seen": 617349120 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008210560102712245, + "loss": 0.0811, + "theoretical_loss": 3.856827700690718, + "tokens_seen": 617611264 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008209757663296422, + "loss": 0.0795, + "theoretical_loss": 3.8566590495678192, + "tokens_seen": 617873408 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008208955223880598, + "loss": 0.0817, + "theoretical_loss": 3.856490490008282, + "tokens_seen": 618135552 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008208152784464773, + "loss": 0.0809, + "theoretical_loss": 3.856322021923595, + "tokens_seen": 618397696 + }, + { + "epoch": 0.19, + "learning_rate": 0.000820735034504895, + "loss": 0.08, + "theoretical_loss": 3.8561536452253713, + "tokens_seen": 618659840 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008206547905633125, + "loss": 0.0773, + "theoretical_loss": 3.855985359825346, + "tokens_seen": 618921984 + }, + { + "epoch": 0.19, + "learning_rate": 0.00082057454662173, + "loss": 0.0827, + "theoretical_loss": 3.855817165635377, + "tokens_seen": 619184128 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.0019921851344406605, + "objective/train/docs_used": 230719, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.756550669670105, + "objective/train/original_loss": 1.756550669670105, + "objective/train/theoretical_loss": 3.855733102716653, + "objective/train/tokens_used": 639775200, + "objective/train/value_avg": -0.006992340087890625, + "objective/train/value_loss": 0.0001579941454110667, + "objective/train/value_max": -0.0001926422119140625, + "objective/train/value_min": -0.21337890625, + "objective/train/value_reward_corr": 0.45218047642197834, + "objective/train/value_std": 0.00893402099609375, + "objective/train/weight_avg": 1.002066969871521, + "objective/train/weighted_lm_loss": 1.7593868970870972, + "objective/train/weights_max": 1.1907228231430054, + "objective/train/weights_min": 0.36894404888153076, + "theoretical_loss": 3.855733102716653, + "tokens_seen": 619315200 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008204943026801477, + "loss": 0.082, + "theoretical_loss": 3.8556490625674447, + "tokens_seen": 619446272 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008204140587385652, + "loss": 0.0815, + "theoretical_loss": 3.855481050533651, + "tokens_seen": 619708416 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008203338147969828, + "loss": 0.0814, + "theoretical_loss": 3.8553131294462206, + "tokens_seen": 619970560 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008202535708554004, + "loss": 0.0814, + "theoretical_loss": 3.8551452992175, + "tokens_seen": 620232704 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008201733269138181, + "loss": 0.0813, + "theoretical_loss": 3.8549775597599556, + "tokens_seen": 620494848 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008200930829722356, + "loss": 0.0788, + "theoretical_loss": 3.8548099109861775, + "tokens_seen": 620756992 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008200128390306532, + "loss": 0.0811, + "theoretical_loss": 3.8546423528088747, + "tokens_seen": 621019136 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008199325950890708, + "loss": 0.0804, + "theoretical_loss": 3.8544748851408777, + "tokens_seen": 621281280 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008198523511474884, + "loss": 0.0784, + "theoretical_loss": 3.8543075078951388, + "tokens_seen": 621543424 + }, + { + "epoch": 0.19, + "learning_rate": 0.000819772107205906, + "loss": 0.0784, + "theoretical_loss": 3.8541402209847284, + "tokens_seen": 621805568 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008196918632643235, + "loss": 0.0819, + "theoretical_loss": 3.8539730243228387, + "tokens_seen": 622067712 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008196116193227412, + "loss": 0.0778, + "theoretical_loss": 3.8538059178227817, + "tokens_seen": 622329856 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.0007625448633916676, + "objective/train/docs_used": 231894, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6909435987472534, + "objective/train/original_loss": 1.690943717956543, + "objective/train/theoretical_loss": 3.8536389013979893, + "objective/train/tokens_used": 643052000, + "objective/train/value_avg": -0.0119781494140625, + "objective/train/value_loss": 0.00041994385537691414, + "objective/train/value_max": -0.00027370452880859375, + "objective/train/value_min": -0.638671875, + "objective/train/value_reward_corr": 0.6989086451961908, + "objective/train/value_std": 0.0189056396484375, + "objective/train/weight_avg": 1.0009546279907227, + "objective/train/weighted_lm_loss": 1.6919503211975098, + "objective/train/weights_max": 1.6490602493286133, + "objective/train/weights_min": 0.37530699372291565, + "theoretical_loss": 3.8536389013979893, + "tokens_seen": 622592000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008195313753811587, + "loss": 0.0836, + "theoretical_loss": 3.8536389013979893, + "tokens_seen": 622592000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008194511314395762, + "loss": 0.0795, + "theoretical_loss": 3.8534719749620114, + "tokens_seen": 622854144 + }, + { + "epoch": 0.19, + "learning_rate": 0.000819370887497994, + "loss": 0.0788, + "theoretical_loss": 3.8533051384285195, + "tokens_seen": 623116288 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008192906435564115, + "loss": 0.0814, + "theoretical_loss": 3.853138391711303, + "tokens_seen": 623378432 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008192103996148291, + "loss": 0.0795, + "theoretical_loss": 3.852971734724269, + "tokens_seen": 623640576 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008191301556732467, + "loss": 0.0787, + "theoretical_loss": 3.8528051673814456, + "tokens_seen": 623902720 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008190499117316643, + "loss": 0.081, + "theoretical_loss": 3.8526386895969775, + "tokens_seen": 624164864 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008189696677900818, + "loss": 0.0826, + "theoretical_loss": 3.8524723012851294, + "tokens_seen": 624427008 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008188894238484994, + "loss": 0.081, + "theoretical_loss": 3.852306002360282, + "tokens_seen": 624689152 + }, + { + "epoch": 0.19, + "learning_rate": 0.000818809179906917, + "loss": 0.0816, + "theoretical_loss": 3.852139792736936, + "tokens_seen": 624951296 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008187289359653345, + "loss": 0.0787, + "theoretical_loss": 3.8519736723297067, + "tokens_seen": 625213440 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008186486920237523, + "loss": 0.0791, + "theoretical_loss": 3.8518076410533304, + "tokens_seen": 625475584 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008185684480821698, + "loss": 0.0789, + "theoretical_loss": 3.8516416988226574, + "tokens_seen": 625737728 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.0014086907031014562, + "objective/train/docs_used": 233149, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4563045501708984, + "objective/train/original_loss": 1.4563043117523193, + "objective/train/theoretical_loss": 3.8515587610728836, + "objective/train/tokens_used": 646328800, + "objective/train/value_avg": -0.00971221923828125, + "objective/train/value_loss": 0.0002561133005656302, + "objective/train/value_max": -0.00016605854034423828, + "objective/train/value_min": -0.36669921875, + "objective/train/value_reward_corr": 0.7151985996360277, + "objective/train/value_std": 0.01548004150390625, + "objective/train/weight_avg": 1.0015289783477783, + "objective/train/weighted_lm_loss": 1.457317590713501, + "objective/train/weights_max": 1.146925687789917, + "objective/train/weights_min": 0.3961734175682068, + "theoretical_loss": 3.8515587610728836, + "tokens_seen": 625868800 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008184882041405875, + "loss": 0.0795, + "theoretical_loss": 3.851475845552658, + "tokens_seen": 625999872 + }, + { + "epoch": 0.19, + "learning_rate": 0.000818407960199005, + "loss": 0.0821, + "theoretical_loss": 3.8513100811584158, + "tokens_seen": 626262016 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008183277162574225, + "loss": 0.0818, + "theoretical_loss": 3.851144405555134, + "tokens_seen": 626524160 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008182474723158402, + "loss": 0.0788, + "theoretical_loss": 3.85097881865813, + "tokens_seen": 626786304 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008181672283742577, + "loss": 0.0809, + "theoretical_loss": 3.850813320382839, + "tokens_seen": 627048448 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008180869844326753, + "loss": 0.0812, + "theoretical_loss": 3.8506479106448115, + "tokens_seen": 627310592 + }, + { + "epoch": 0.19, + "learning_rate": 0.000818006740491093, + "loss": 0.0824, + "theoretical_loss": 3.8504825893597134, + "tokens_seen": 627572736 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008179264965495106, + "loss": 0.0803, + "theoretical_loss": 3.850317356443326, + "tokens_seen": 627834880 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008178462526079281, + "loss": 0.079, + "theoretical_loss": 3.8501522118115465, + "tokens_seen": 628097024 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008177660086663458, + "loss": 0.0827, + "theoretical_loss": 3.8499871553803873, + "tokens_seen": 628359168 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008176857647247633, + "loss": 0.0806, + "theoretical_loss": 3.8498221870659743, + "tokens_seen": 628621312 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008176055207831808, + "loss": 0.0763, + "theoretical_loss": 3.8496573067845503, + "tokens_seen": 628883456 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.0006253430037759244, + "objective/train/docs_used": 234355, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4915151596069336, + "objective/train/original_loss": 1.4915152788162231, + "objective/train/theoretical_loss": 3.8494925144524705, + "objective/train/tokens_used": 649605600, + "objective/train/value_avg": -0.00966644287109375, + "objective/train/value_loss": 0.0007618270465172827, + "objective/train/value_max": -0.00021660327911376953, + "objective/train/value_min": -0.759765625, + "objective/train/value_reward_corr": 0.5889753777352726, + "objective/train/value_std": 0.0186614990234375, + "objective/train/weight_avg": 1.0009468793869019, + "objective/train/weighted_lm_loss": 1.4915693998336792, + "objective/train/weights_max": 1.9064472913742065, + "objective/train/weights_min": 0.3697463870048523, + "theoretical_loss": 3.8494925144524705, + "tokens_seen": 629145600 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008175252768415985, + "loss": 0.0813, + "theoretical_loss": 3.8494925144524705, + "tokens_seen": 629145600 + }, + { + "epoch": 0.19, + "learning_rate": 0.000817445032900016, + "loss": 0.0806, + "theoretical_loss": 3.849327809986206, + "tokens_seen": 629407744 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008173647889584337, + "loss": 0.0772, + "theoretical_loss": 3.8491631933023407, + "tokens_seen": 629669888 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008172845450168512, + "loss": 0.0817, + "theoretical_loss": 3.8489986643175733, + "tokens_seen": 629932032 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008172043010752689, + "loss": 0.0805, + "theoretical_loss": 3.8488342229487156, + "tokens_seen": 630194176 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008171240571336865, + "loss": 0.0822, + "theoretical_loss": 3.8486698691126935, + "tokens_seen": 630456320 + }, + { + "epoch": 0.19, + "learning_rate": 0.000817043813192104, + "loss": 0.081, + "theoretical_loss": 3.8485056027265454, + "tokens_seen": 630718464 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008169635692505216, + "loss": 0.0802, + "theoretical_loss": 3.848341423707423, + "tokens_seen": 630980608 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008168833253089392, + "loss": 0.0791, + "theoretical_loss": 3.8481773319725914, + "tokens_seen": 631242752 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008168030813673568, + "loss": 0.0834, + "theoretical_loss": 3.8480133274394275, + "tokens_seen": 631504896 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008167228374257743, + "loss": 0.0764, + "theoretical_loss": 3.8478494100254217, + "tokens_seen": 631767040 + }, + { + "epoch": 0.19, + "learning_rate": 0.000816642593484192, + "loss": 0.0828, + "theoretical_loss": 3.847685579648176, + "tokens_seen": 632029184 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008165623495426095, + "loss": 0.0826, + "theoretical_loss": 3.847521836225404, + "tokens_seen": 632291328 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.0012705049011856318, + "objective/train/docs_used": 235545, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8972015380859375, + "objective/train/original_loss": 1.8972015380859375, + "objective/train/theoretical_loss": 3.847439997096262, + "objective/train/tokens_used": 652882400, + "objective/train/value_avg": -0.0090484619140625, + "objective/train/value_loss": 0.0002362952072871849, + "objective/train/value_max": -0.0002065896987915039, + "objective/train/value_min": -0.40380859375, + "objective/train/value_reward_corr": 0.6654254308226828, + "objective/train/value_std": 0.0135650634765625, + "objective/train/weight_avg": 1.0013830661773682, + "objective/train/weighted_lm_loss": 1.8993679285049438, + "objective/train/weights_max": 1.2981646060943604, + "objective/train/weights_min": 0.36874493956565857, + "theoretical_loss": 3.847439997096262, + "tokens_seen": 632422400 + }, + { + "epoch": 0.19, + "learning_rate": 0.000816482105601027, + "loss": 0.0829, + "theoretical_loss": 3.8473581796749317, + "tokens_seen": 632553472 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008164018616594448, + "loss": 0.0774, + "theoretical_loss": 3.8471946099146983, + "tokens_seen": 632815616 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008163216177178623, + "loss": 0.0793, + "theoretical_loss": 3.847031126862751, + "tokens_seen": 633077760 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008162413737762799, + "loss": 0.0793, + "theoretical_loss": 3.8468677304372507, + "tokens_seen": 633339904 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008161611298346975, + "loss": 0.0781, + "theoretical_loss": 3.8467044205564704, + "tokens_seen": 633602048 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008160808858931151, + "loss": 0.0804, + "theoretical_loss": 3.846541197138791, + "tokens_seen": 633864192 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008160006419515327, + "loss": 0.0839, + "theoretical_loss": 3.8463780601027056, + "tokens_seen": 634126336 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008159203980099502, + "loss": 0.0786, + "theoretical_loss": 3.846215009366819, + "tokens_seen": 634388480 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008158401540683678, + "loss": 0.0812, + "theoretical_loss": 3.846052044849843, + "tokens_seen": 634650624 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008157599101267855, + "loss": 0.0796, + "theoretical_loss": 3.845889166470604, + "tokens_seen": 634912768 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008156796661852031, + "loss": 0.0804, + "theoretical_loss": 3.8457263741480343, + "tokens_seen": 635174912 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008155994222436206, + "loss": 0.0791, + "theoretical_loss": 3.845563667801178, + "tokens_seen": 635437056 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.0004186597652733326, + "objective/train/docs_used": 236526, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6329374313354492, + "objective/train/original_loss": 1.6329374313354492, + "objective/train/theoretical_loss": 3.8454010473491884, + "objective/train/tokens_used": 656159200, + "objective/train/value_avg": -0.007476806640625, + "objective/train/value_loss": 0.00015600294864270836, + "objective/train/value_max": -0.0002512931823730469, + "objective/train/value_min": -0.230712890625, + "objective/train/value_reward_corr": 0.6553133134938699, + "objective/train/value_std": 0.01104736328125, + "objective/train/weight_avg": 1.0004956722259521, + "objective/train/weighted_lm_loss": 1.6333415508270264, + "objective/train/weights_max": 1.2097692489624023, + "objective/train/weights_min": 0.7830726504325867, + "theoretical_loss": 3.8454010473491884, + "tokens_seen": 635699200 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008155191783020383, + "loss": 0.081, + "theoretical_loss": 3.8454010473491884, + "tokens_seen": 635699200 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008154389343604558, + "loss": 0.0795, + "theoretical_loss": 3.845238512711327, + "tokens_seen": 635961344 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008153586904188733, + "loss": 0.0817, + "theoretical_loss": 3.845076063806966, + "tokens_seen": 636223488 + }, + { + "epoch": 0.19, + "learning_rate": 0.000815278446477291, + "loss": 0.0833, + "theoretical_loss": 3.844913700555586, + "tokens_seen": 636485632 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008151982025357085, + "loss": 0.0782, + "theoretical_loss": 3.8447514228767763, + "tokens_seen": 636747776 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008151179585941261, + "loss": 0.0831, + "theoretical_loss": 3.844589230690234, + "tokens_seen": 637009920 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008150377146525437, + "loss": 0.0797, + "theoretical_loss": 3.844427123915766, + "tokens_seen": 637272064 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008149574707109614, + "loss": 0.0781, + "theoretical_loss": 3.8442651024732863, + "tokens_seen": 637534208 + }, + { + "epoch": 0.19, + "learning_rate": 0.000814877226769379, + "loss": 0.0784, + "theoretical_loss": 3.8441031662828173, + "tokens_seen": 637796352 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008147969828277966, + "loss": 0.0799, + "theoretical_loss": 3.843941315264489, + "tokens_seen": 638058496 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008147167388862141, + "loss": 0.0788, + "theoretical_loss": 3.8437795493385387, + "tokens_seen": 638320640 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008146364949446317, + "loss": 0.0813, + "theoretical_loss": 3.8436178684253126, + "tokens_seen": 638582784 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008145562510030493, + "loss": 0.0819, + "theoretical_loss": 3.843456272445262, + "tokens_seen": 638844928 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.0014796998584643006, + "objective/train/docs_used": 237858, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4043687582015991, + "objective/train/original_loss": 1.4043687582015991, + "objective/train/theoretical_loss": 3.8433755062803483, + "objective/train/tokens_used": 659436000, + "objective/train/value_avg": -0.0091400146484375, + "objective/train/value_loss": 0.0002844578993972391, + "objective/train/value_max": -0.0001881122589111328, + "objective/train/value_min": -0.2335205078125, + "objective/train/value_reward_corr": 0.6038268655262016, + "objective/train/value_std": 0.0128326416015625, + "objective/train/weight_avg": 1.0016050338745117, + "objective/train/weighted_lm_loss": 1.4062108993530273, + "objective/train/weights_max": 1.1530903577804565, + "objective/train/weights_min": 0.4062868654727936, + "theoretical_loss": 3.8433755062803483, + "tokens_seen": 638976000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008144760070614668, + "loss": 0.078, + "theoretical_loss": 3.8432947613189468, + "tokens_seen": 639107072 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008143957631198845, + "loss": 0.0825, + "theoretical_loss": 3.8431333349670336, + "tokens_seen": 639369216 + }, + { + "epoch": 0.19, + "learning_rate": 0.000814315519178302, + "loss": 0.0788, + "theoretical_loss": 3.842971993310294, + "tokens_seen": 639631360 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008142352752367197, + "loss": 0.0793, + "theoretical_loss": 3.8428107362696085, + "tokens_seen": 639893504 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008141550312951373, + "loss": 0.0782, + "theoretical_loss": 3.842649563765962, + "tokens_seen": 640155648 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008140747873535548, + "loss": 0.0818, + "theoretical_loss": 3.8424884757204474, + "tokens_seen": 640417792 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008139945434119724, + "loss": 0.08, + "theoretical_loss": 3.8423274720542606, + "tokens_seen": 640679936 + }, + { + "epoch": 0.19, + "learning_rate": 0.00081391429947039, + "loss": 0.0786, + "theoretical_loss": 3.842166552688706, + "tokens_seen": 640942080 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008138340555288076, + "loss": 0.0806, + "theoretical_loss": 3.8420057175451934, + "tokens_seen": 641204224 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008137538115872251, + "loss": 0.0812, + "theoretical_loss": 3.841844966545236, + "tokens_seen": 641466368 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008136735676456428, + "loss": 0.0802, + "theoretical_loss": 3.841684299610453, + "tokens_seen": 641728512 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008135933237040603, + "loss": 0.079, + "theoretical_loss": 3.8415237166625698, + "tokens_seen": 641990656 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": -2.3364991648122668e-05, + "objective/train/docs_used": 239059, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5975748300552368, + "objective/train/original_loss": 1.5975749492645264, + "objective/train/theoretical_loss": 3.8413632176234147, + "objective/train/tokens_used": 662712800, + "objective/train/value_avg": -0.006862640380859375, + "objective/train/value_loss": 0.0001523960818303749, + "objective/train/value_max": -0.0002033710479736328, + "objective/train/value_min": -0.264892578125, + "objective/train/value_reward_corr": 0.6269222691924341, + "objective/train/value_std": 0.00958251953125, + "objective/train/weight_avg": 1.0000513792037964, + "objective/train/weighted_lm_loss": 1.597486138343811, + "objective/train/weights_max": 1.1600202322006226, + "objective/train/weights_min": 0.6130765676498413, + "theoretical_loss": 3.8413632176234147, + "tokens_seen": 642252800 + }, + { + "epoch": 0.19, + "learning_rate": 0.000813513079762478, + "loss": 0.0793, + "theoretical_loss": 3.8413632176234147, + "tokens_seen": 642252800 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008134328358208956, + "loss": 0.0834, + "theoretical_loss": 3.8412028024149225, + "tokens_seen": 642514944 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008133525918793131, + "loss": 0.079, + "theoretical_loss": 3.841042470959131, + "tokens_seen": 642777088 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008132723479377308, + "loss": 0.0791, + "theoretical_loss": 3.840882223178183, + "tokens_seen": 643039232 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008131921039961483, + "loss": 0.0811, + "theoretical_loss": 3.8407220589943254, + "tokens_seen": 643301376 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008131118600545659, + "loss": 0.0778, + "theoretical_loss": 3.8405619783299083, + "tokens_seen": 643563520 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008130316161129835, + "loss": 0.0784, + "theoretical_loss": 3.8404019811073864, + "tokens_seen": 643825664 + }, + { + "epoch": 0.2, + "learning_rate": 0.000812951372171401, + "loss": 0.0787, + "theoretical_loss": 3.840242067249317, + "tokens_seen": 644087808 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008128711282298186, + "loss": 0.0783, + "theoretical_loss": 3.840082236678362, + "tokens_seen": 644349952 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008127908842882363, + "loss": 0.08, + "theoretical_loss": 3.8399224893172854, + "tokens_seen": 644612096 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008127106403466539, + "loss": 0.0794, + "theoretical_loss": 3.839762825088955, + "tokens_seen": 644874240 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008126303964050714, + "loss": 0.0833, + "theoretical_loss": 3.8396032439163394, + "tokens_seen": 645136384 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008125501524634891, + "loss": 0.0806, + "theoretical_loss": 3.8394437457225132, + "tokens_seen": 645398528 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": -3.4955566661665216e-05, + "objective/train/docs_used": 240316, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6006709337234497, + "objective/train/original_loss": 1.6006708145141602, + "objective/train/theoretical_loss": 3.8393640277186343, + "objective/train/tokens_used": 665989600, + "objective/train/value_avg": -0.01154327392578125, + "objective/train/value_loss": 0.00029328398522920907, + "objective/train/value_max": -0.00016224384307861328, + "objective/train/value_min": -0.419189453125, + "objective/train/value_reward_corr": 0.8165028368002758, + "objective/train/value_std": 0.0216522216796875, + "objective/train/weight_avg": 1.0001044273376465, + "objective/train/weighted_lm_loss": 1.5991564989089966, + "objective/train/weights_max": 1.134090781211853, + "objective/train/weights_min": 0.3825574517250061, + "theoretical_loss": 3.8393640277186343, + "tokens_seen": 645529600 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008124699085219066, + "loss": 0.0832, + "theoretical_loss": 3.839284330430651, + "tokens_seen": 645660672 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008123896645803241, + "loss": 0.081, + "theoretical_loss": 3.8391249979640305, + "tokens_seen": 645922816 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008123094206387418, + "loss": 0.0802, + "theoretical_loss": 3.8389657482460313, + "tokens_seen": 646184960 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008122291766971593, + "loss": 0.0774, + "theoretical_loss": 3.838806581200134, + "tokens_seen": 646447104 + }, + { + "epoch": 0.2, + "learning_rate": 0.000812148932755577, + "loss": 0.0808, + "theoretical_loss": 3.838647496749924, + "tokens_seen": 646709248 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008120686888139945, + "loss": 0.0818, + "theoretical_loss": 3.8384884948190847, + "tokens_seen": 646971392 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008119884448724122, + "loss": 0.0809, + "theoretical_loss": 3.838329575331403, + "tokens_seen": 647233536 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008119082009308298, + "loss": 0.0798, + "theoretical_loss": 3.8381707382107657, + "tokens_seen": 647495680 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008118279569892473, + "loss": 0.0836, + "theoretical_loss": 3.838011983381162, + "tokens_seen": 647757824 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008117477130476649, + "loss": 0.0792, + "theoretical_loss": 3.8378533107666817, + "tokens_seen": 648019968 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008116674691060825, + "loss": 0.0833, + "theoretical_loss": 3.8376947202915144, + "tokens_seen": 648282112 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008115872251645001, + "loss": 0.0801, + "theoretical_loss": 3.8375362118799505, + "tokens_seen": 648544256 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.002151393797248602, + "objective/train/docs_used": 241544, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5193239450454712, + "objective/train/original_loss": 1.5193241834640503, + "objective/train/theoretical_loss": 3.8373777854563813, + "objective/train/tokens_used": 669266400, + "objective/train/value_avg": -0.007175445556640625, + "objective/train/value_loss": 0.0002748131228145212, + "objective/train/value_max": -0.00010722875595092773, + "objective/train/value_min": -0.6533203125, + "objective/train/value_reward_corr": 0.5608935796152028, + "objective/train/value_std": 0.01296234130859375, + "objective/train/weight_avg": 1.002267599105835, + "objective/train/weighted_lm_loss": 1.5228184461593628, + "objective/train/weights_max": 1.1310755014419556, + "objective/train/weights_min": 0.36963358521461487, + "theoretical_loss": 3.8373777854563813, + "tokens_seen": 648806400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008115069812229176, + "loss": 0.0798, + "theoretical_loss": 3.8373777854563813, + "tokens_seen": 648806400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008114267372813353, + "loss": 0.0816, + "theoretical_loss": 3.837219440945298, + "tokens_seen": 649068544 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008113464933397528, + "loss": 0.08, + "theoretical_loss": 3.8370611782712922, + "tokens_seen": 649330688 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008112662493981705, + "loss": 0.0823, + "theoretical_loss": 3.8369029973590543, + "tokens_seen": 649592832 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008111860054565881, + "loss": 0.0796, + "theoretical_loss": 3.836744898133376, + "tokens_seen": 649854976 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008111057615150056, + "loss": 0.079, + "theoretical_loss": 3.8365868805191456, + "tokens_seen": 650117120 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008110255175734233, + "loss": 0.0801, + "theoretical_loss": 3.836428944441354, + "tokens_seen": 650379264 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008109452736318408, + "loss": 0.0804, + "theoretical_loss": 3.8362710898250896, + "tokens_seen": 650641408 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008108650296902584, + "loss": 0.0819, + "theoretical_loss": 3.83611331659554, + "tokens_seen": 650903552 + }, + { + "epoch": 0.2, + "learning_rate": 0.000810784785748676, + "loss": 0.0765, + "theoretical_loss": 3.8359556246779913, + "tokens_seen": 651165696 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008107045418070936, + "loss": 0.0828, + "theoretical_loss": 3.8357980139978283, + "tokens_seen": 651427840 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008106242978655111, + "loss": 0.0827, + "theoretical_loss": 3.8356404844805354, + "tokens_seen": 651689984 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008105440539239288, + "loss": 0.0817, + "theoretical_loss": 3.835483036051694, + "tokens_seen": 651952128 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.0021593719720840454, + "objective/train/docs_used": 242832, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6270049810409546, + "objective/train/original_loss": 1.6270049810409546, + "objective/train/theoretical_loss": 3.8354043422222133, + "objective/train/tokens_used": 672543200, + "objective/train/value_avg": -0.00695037841796875, + "objective/train/value_loss": 0.0001394545251969248, + "objective/train/value_max": -0.00016736984252929688, + "objective/train/value_min": -0.36376953125, + "objective/train/value_reward_corr": 0.674807627723227, + "objective/train/value_std": 0.0125274658203125, + "objective/train/weight_avg": 1.0022249221801758, + "objective/train/weighted_lm_loss": 1.6309560537338257, + "objective/train/weights_max": 1.1779428720474243, + "objective/train/weights_min": 0.3801831901073456, + "theoretical_loss": 3.8354043422222133, + "tokens_seen": 652083200 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008104638099823464, + "loss": 0.0802, + "theoretical_loss": 3.835325668636983, + "tokens_seen": 652214272 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008103835660407639, + "loss": 0.0816, + "theoretical_loss": 3.8351683821621814, + "tokens_seen": 652476416 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008103033220991816, + "loss": 0.0819, + "theoretical_loss": 3.8350111765531647, + "tokens_seen": 652738560 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008102230781575991, + "loss": 0.0797, + "theoretical_loss": 3.834854051735906, + "tokens_seen": 653000704 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008101428342160167, + "loss": 0.0824, + "theoretical_loss": 3.8346970076364757, + "tokens_seen": 653262848 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008100625902744343, + "loss": 0.0798, + "theoretical_loss": 3.8345400441810424, + "tokens_seen": 653524992 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008099823463328518, + "loss": 0.0818, + "theoretical_loss": 3.8343831612958703, + "tokens_seen": 653787136 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008099021023912694, + "loss": 0.0817, + "theoretical_loss": 3.834226358907322, + "tokens_seen": 654049280 + }, + { + "epoch": 0.2, + "learning_rate": 0.000809821858449687, + "loss": 0.0817, + "theoretical_loss": 3.8340696369418565, + "tokens_seen": 654311424 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008097416145081047, + "loss": 0.0785, + "theoretical_loss": 3.833912995326029, + "tokens_seen": 654573568 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008096613705665223, + "loss": 0.0791, + "theoretical_loss": 3.8337564339864914, + "tokens_seen": 654835712 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008095811266249399, + "loss": 0.0801, + "theoretical_loss": 3.8335999528499913, + "tokens_seen": 655097856 + }, + { + "debugging/Compilability": 0.9230769230769231, + "debugging/distinct-1-grams": 0.7560193640250044, + "debugging/entropy-1-grams": 5.2534808493234895, + "debugging/length": 422.2307692307692, + "debugging/num_segments": 13, + "debugging/raw_token_scores_avg": 0.00857908371835947, + "debugging/raw_token_scores_std": 0.02154596894979477, + "debugging/score": 0.01638830111902192, + "debugging/score_std": 0.019821935735086043, + "epoch": 0.2, + "objective/train/advantage_avg": 0.002418318996205926, + "objective/train/docs_used": 243976, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5416268110275269, + "objective/train/original_loss": 1.5416266918182373, + "objective/train/theoretical_loss": 3.833443551843374, + "objective/train/tokens_used": 675820000, + "objective/train/value_avg": -0.01099395751953125, + "objective/train/value_loss": 0.00022288458421826363, + "objective/train/value_max": -0.00031256675720214844, + "objective/train/value_min": -0.326904296875, + "objective/train/value_reward_corr": 0.7334904683634422, + "objective/train/value_std": 0.0173797607421875, + "objective/train/weight_avg": 1.0025254487991333, + "objective/train/weighted_lm_loss": 1.5436171293258667, + "objective/train/weights_max": 1.2229843139648438, + "objective/train/weights_min": 0.37925034761428833, + "theoretical_loss": 3.833443551843374, + "tokens_seen": 655360000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008095008826833574, + "loss": 0.0824, + "theoretical_loss": 3.833443551843374, + "tokens_seen": 655360000 + }, + { + "epoch": 0.2, + "learning_rate": 0.000809420638741775, + "loss": 0.0802, + "theoretical_loss": 3.8332872308935793, + "tokens_seen": 655622144 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008093403948001926, + "loss": 0.081, + "theoretical_loss": 3.833130989927643, + "tokens_seen": 655884288 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008092601508586101, + "loss": 0.0797, + "theoretical_loss": 3.8329748288726972, + "tokens_seen": 656146432 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008091799069170278, + "loss": 0.0838, + "theoretical_loss": 3.8328187476559687, + "tokens_seen": 656408576 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008090996629754453, + "loss": 0.082, + "theoretical_loss": 3.83266274620478, + "tokens_seen": 656670720 + }, + { + "epoch": 0.2, + "learning_rate": 0.000809019419033863, + "loss": 0.0783, + "theoretical_loss": 3.832506824446549, + "tokens_seen": 656932864 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008089391750922806, + "loss": 0.0795, + "theoretical_loss": 3.832350982308788, + "tokens_seen": 657195008 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008088589311506981, + "loss": 0.0784, + "theoretical_loss": 3.8321952197191043, + "tokens_seen": 657457152 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008087786872091157, + "loss": 0.0816, + "theoretical_loss": 3.8320395366052, + "tokens_seen": 657719296 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008086984432675333, + "loss": 0.0795, + "theoretical_loss": 3.8318839328948715, + "tokens_seen": 657981440 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008086181993259509, + "loss": 0.0807, + "theoretical_loss": 3.8317284085160095, + "tokens_seen": 658243584 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008085379553843684, + "loss": 0.0805, + "theoretical_loss": 3.8315729633965994, + "tokens_seen": 658505728 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": -0.001669611083343625, + "objective/train/docs_used": 245288, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5030797719955444, + "objective/train/original_loss": 1.503079891204834, + "objective/train/theoretical_loss": 3.83149527053671, + "objective/train/tokens_used": 679096800, + "objective/train/value_avg": -0.00933837890625, + "objective/train/value_loss": 0.0003506782522890717, + "objective/train/value_max": -0.0002532005310058594, + "objective/train/value_min": -0.263427734375, + "objective/train/value_reward_corr": 0.5930356277614546, + "objective/train/value_std": 0.01061248779296875, + "objective/train/weight_avg": 0.9984805583953857, + "objective/train/weighted_lm_loss": 1.5003520250320435, + "objective/train/weights_max": 1.164748191833496, + "objective/train/weights_min": 0.22747893631458282, + "theoretical_loss": 3.83149527053671, + "tokens_seen": 658636800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008084577114427861, + "loss": 0.0786, + "theoretical_loss": 3.8314175974647195, + "tokens_seen": 658767872 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008083774675012036, + "loss": 0.0808, + "theoretical_loss": 3.831262310648544, + "tokens_seen": 659030016 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008082972235596214, + "loss": 0.0816, + "theoretical_loss": 3.831107102876338, + "tokens_seen": 659292160 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008082169796180389, + "loss": 0.0829, + "theoretical_loss": 3.830951974076463, + "tokens_seen": 659554304 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008081367356764564, + "loss": 0.0825, + "theoretical_loss": 3.830796924177371, + "tokens_seen": 659816448 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008080564917348741, + "loss": 0.0792, + "theoretical_loss": 3.830641953107609, + "tokens_seen": 660078592 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008079762477932916, + "loss": 0.0806, + "theoretical_loss": 3.8304870607958175, + "tokens_seen": 660340736 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008078960038517092, + "loss": 0.0807, + "theoretical_loss": 3.8303322471707286, + "tokens_seen": 660602880 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008078157599101268, + "loss": 0.0784, + "theoretical_loss": 3.830177512161167, + "tokens_seen": 660865024 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008077355159685444, + "loss": 0.0818, + "theoretical_loss": 3.8300228556960523, + "tokens_seen": 661127168 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008076552720269619, + "loss": 0.0778, + "theoretical_loss": 3.829868277704393, + "tokens_seen": 661389312 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008075750280853796, + "loss": 0.0808, + "theoretical_loss": 3.829713778115293, + "tokens_seen": 661651456 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.0014472852926701307, + "objective/train/docs_used": 246530, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6413122415542603, + "objective/train/original_loss": 1.6413123607635498, + "objective/train/theoretical_loss": 3.8295593568579465, + "objective/train/tokens_used": 682373600, + "objective/train/value_avg": -0.0119781494140625, + "objective/train/value_loss": 0.000297568243695423, + "objective/train/value_max": -0.00022172927856445312, + "objective/train/value_min": -0.2919921875, + "objective/train/value_reward_corr": 0.7385408733331538, + "objective/train/value_std": 0.019012451171875, + "objective/train/weight_avg": 1.0015898942947388, + "objective/train/weighted_lm_loss": 1.6430256366729736, + "objective/train/weights_max": 1.2300209999084473, + "objective/train/weights_min": 0.36995241045951843, + "theoretical_loss": 3.8295593568579465, + "tokens_seen": 661913600 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008074947841437972, + "loss": 0.0816, + "theoretical_loss": 3.8295593568579465, + "tokens_seen": 661913600 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008074145402022147, + "loss": 0.0813, + "theoretical_loss": 3.8294050138616402, + "tokens_seen": 662175744 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008073342962606324, + "loss": 0.081, + "theoretical_loss": 3.8292507490557526, + "tokens_seen": 662437888 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008072540523190499, + "loss": 0.0768, + "theoretical_loss": 3.8290965623697537, + "tokens_seen": 662700032 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008071738083774676, + "loss": 0.0783, + "theoretical_loss": 3.8289424537332053, + "tokens_seen": 662962176 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008070935644358851, + "loss": 0.0828, + "theoretical_loss": 3.82878842307576, + "tokens_seen": 663224320 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008070133204943026, + "loss": 0.0797, + "theoretical_loss": 3.828634470327162, + "tokens_seen": 663486464 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008069330765527203, + "loss": 0.0808, + "theoretical_loss": 3.8284805954172474, + "tokens_seen": 663748608 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008068528326111379, + "loss": 0.0812, + "theoretical_loss": 3.828326798275941, + "tokens_seen": 664010752 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008067725886695555, + "loss": 0.0797, + "theoretical_loss": 3.82817307883326, + "tokens_seen": 664272896 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008066923447279731, + "loss": 0.0797, + "theoretical_loss": 3.8280194370193112, + "tokens_seen": 664535040 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008066121007863907, + "loss": 0.0799, + "theoretical_loss": 3.827865872764293, + "tokens_seen": 664797184 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008065318568448082, + "loss": 0.0802, + "theoretical_loss": 3.8277123859984936, + "tokens_seen": 665059328 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.00026472590980120003, + "objective/train/docs_used": 247583, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7137538194656372, + "objective/train/original_loss": 1.713753581047058, + "objective/train/theoretical_loss": 3.82763567165229, + "objective/train/tokens_used": 685650400, + "objective/train/value_avg": -0.00846099853515625, + "objective/train/value_loss": 0.00032397088943980634, + "objective/train/value_max": -0.00016605854034423828, + "objective/train/value_min": -0.64111328125, + "objective/train/value_reward_corr": 0.6828980449967452, + "objective/train/value_std": 0.01425933837890625, + "objective/train/weight_avg": 1.00041663646698, + "objective/train/weighted_lm_loss": 1.7142980098724365, + "objective/train/weights_max": 1.409537672996521, + "objective/train/weights_min": 0.5323238372802734, + "theoretical_loss": 3.82763567165229, + "tokens_seen": 665190400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008064516129032258, + "loss": 0.0815, + "theoretical_loss": 3.8275589766522895, + "tokens_seen": 665321472 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008063713689616434, + "loss": 0.0795, + "theoretical_loss": 3.8274056446561504, + "tokens_seen": 665583616 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008062911250200609, + "loss": 0.077, + "theoretical_loss": 3.827252389940633, + "tokens_seen": 665845760 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008062108810784786, + "loss": 0.079, + "theoretical_loss": 3.827099212436386, + "tokens_seen": 666107904 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008061306371368961, + "loss": 0.0802, + "theoretical_loss": 3.826946112074145, + "tokens_seen": 666370048 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008060503931953138, + "loss": 0.0791, + "theoretical_loss": 3.826793088784737, + "tokens_seen": 666632192 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008059701492537314, + "loss": 0.0817, + "theoretical_loss": 3.826640142499077, + "tokens_seen": 666894336 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008058899053121489, + "loss": 0.0786, + "theoretical_loss": 3.8264872731481705, + "tokens_seen": 667156480 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008058096613705666, + "loss": 0.0786, + "theoretical_loss": 3.8263344806631103, + "tokens_seen": 667418624 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008057294174289841, + "loss": 0.0804, + "theoretical_loss": 3.8261817649750784, + "tokens_seen": 667680768 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008056491734874017, + "loss": 0.0781, + "theoretical_loss": 3.8260291260153463, + "tokens_seen": 667942912 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008055689295458193, + "loss": 0.0798, + "theoretical_loss": 3.8258765637152727, + "tokens_seen": 668205056 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": -0.00018435930542182177, + "objective/train/docs_used": 248744, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.638197660446167, + "objective/train/original_loss": 1.638197660446167, + "objective/train/theoretical_loss": 3.8257240780063055, + "objective/train/tokens_used": 688927200, + "objective/train/value_avg": -0.006977081298828125, + "objective/train/value_loss": 0.0003241746162530035, + "objective/train/value_max": -0.00019109249114990234, + "objective/train/value_min": -0.227783203125, + "objective/train/value_reward_corr": 0.7174840285003992, + "objective/train/value_std": 0.0107421875, + "objective/train/weight_avg": 0.9999673962593079, + "objective/train/weighted_lm_loss": 1.6396377086639404, + "objective/train/weights_max": 1.1206990480422974, + "objective/train/weights_min": 0.39359450340270996, + "theoretical_loss": 3.8257240780063055, + "tokens_seen": 668467200 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008054886856042369, + "loss": 0.0806, + "theoretical_loss": 3.8257240780063055, + "tokens_seen": 668467200 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008054084416626544, + "loss": 0.078, + "theoretical_loss": 3.8255716688199803, + "tokens_seen": 668729344 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008053281977210722, + "loss": 0.079, + "theoretical_loss": 3.825419336087921, + "tokens_seen": 668991488 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008052479537794897, + "loss": 0.0788, + "theoretical_loss": 3.825267079741839, + "tokens_seen": 669253632 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008051677098379072, + "loss": 0.0796, + "theoretical_loss": 3.825114899713533, + "tokens_seen": 669515776 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008050874658963249, + "loss": 0.0808, + "theoretical_loss": 3.8249627959348915, + "tokens_seen": 669777920 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008050072219547424, + "loss": 0.0791, + "theoretical_loss": 3.824810768337887, + "tokens_seen": 670040064 + }, + { + "epoch": 0.2, + "learning_rate": 0.00080492697801316, + "loss": 0.0801, + "theoretical_loss": 3.8246588168545816, + "tokens_seen": 670302208 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008048467340715776, + "loss": 0.0747, + "theoretical_loss": 3.824506941417125, + "tokens_seen": 670564352 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008047664901299952, + "loss": 0.0809, + "theoretical_loss": 3.824355141957752, + "tokens_seen": 670826496 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008046862461884128, + "loss": 0.0801, + "theoretical_loss": 3.8242034184087847, + "tokens_seen": 671088640 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008046060022468304, + "loss": 0.0774, + "theoretical_loss": 3.824051770702633, + "tokens_seen": 671350784 + }, + { + "epoch": 0.2, + "learning_rate": 0.000804525758305248, + "loss": 0.0821, + "theoretical_loss": 3.823900198771792, + "tokens_seen": 671612928 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.0005832649767398834, + "objective/train/docs_used": 249880, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4792834520339966, + "objective/train/original_loss": 1.4792835712432861, + "objective/train/theoretical_loss": 3.823824441201042, + "objective/train/tokens_used": 692204000, + "objective/train/value_avg": -0.00798797607421875, + "objective/train/value_loss": 0.0001945794647326693, + "objective/train/value_max": -0.00016224384307861328, + "objective/train/value_min": -0.30322265625, + "objective/train/value_reward_corr": 0.7662795001045949, + "objective/train/value_std": 0.0170745849609375, + "objective/train/weight_avg": 1.0006768703460693, + "objective/train/weighted_lm_loss": 1.479491949081421, + "objective/train/weights_max": 1.267970323562622, + "objective/train/weights_min": 0.5421772599220276, + "theoretical_loss": 3.823824441201042, + "tokens_seen": 671744000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008044455143636656, + "loss": 0.0767, + "theoretical_loss": 3.823748702548845, + "tokens_seen": 671875072 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008043652704220832, + "loss": 0.0775, + "theoretical_loss": 3.823597281966459, + "tokens_seen": 672137216 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008042850264805007, + "loss": 0.0787, + "theoretical_loss": 3.8234459369573894, + "tokens_seen": 672399360 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008042047825389184, + "loss": 0.0793, + "theoretical_loss": 3.8232946674544763, + "tokens_seen": 672661504 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008041245385973359, + "loss": 0.0805, + "theoretical_loss": 3.8231434733906458, + "tokens_seen": 672923648 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008040442946557534, + "loss": 0.0803, + "theoretical_loss": 3.822992354698911, + "tokens_seen": 673185792 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008039640507141711, + "loss": 0.0784, + "theoretical_loss": 3.822841311312368, + "tokens_seen": 673447936 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008038838067725887, + "loss": 0.0793, + "theoretical_loss": 3.8226903431642008, + "tokens_seen": 673710080 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008038035628310063, + "loss": 0.0789, + "theoretical_loss": 3.8225394501876764, + "tokens_seen": 673972224 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008037233188894239, + "loss": 0.0802, + "theoretical_loss": 3.822388632316149, + "tokens_seen": 674234368 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008036430749478415, + "loss": 0.0779, + "theoretical_loss": 3.822237889483057, + "tokens_seen": 674496512 + }, + { + "epoch": 0.2, + "learning_rate": 0.000803562831006259, + "loss": 0.0794, + "theoretical_loss": 3.822087221621923, + "tokens_seen": 674758656 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": -0.00032604276202619076, + "objective/train/docs_used": 251058, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5687280893325806, + "objective/train/original_loss": 1.5687280893325806, + "objective/train/theoretical_loss": 3.821936628666355, + "objective/train/tokens_used": 695480800, + "objective/train/value_avg": -0.006984710693359375, + "objective/train/value_loss": 0.00032798174652270973, + "objective/train/value_max": -0.0001926422119140625, + "objective/train/value_min": -0.90283203125, + "objective/train/value_reward_corr": 0.6570254406817982, + "objective/train/value_std": 0.01174163818359375, + "objective/train/weight_avg": 0.9998152256011963, + "objective/train/weighted_lm_loss": 1.568171501159668, + "objective/train/weights_max": 1.1402249336242676, + "objective/train/weights_min": 0.2320682257413864, + "theoretical_loss": 3.821936628666355, + "tokens_seen": 675020800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008034825870646766, + "loss": 0.0753, + "theoretical_loss": 3.821936628666355, + "tokens_seen": 675020800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008034023431230942, + "loss": 0.0777, + "theoretical_loss": 3.821786110550045, + "tokens_seen": 675282944 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008033220991815118, + "loss": 0.0768, + "theoretical_loss": 3.8216356672067704, + "tokens_seen": 675545088 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008032418552399294, + "loss": 0.0794, + "theoretical_loss": 3.821485298570391, + "tokens_seen": 675807232 + }, + { + "epoch": 0.2, + "learning_rate": 0.000803161611298347, + "loss": 0.0774, + "theoretical_loss": 3.8213350045748533, + "tokens_seen": 676069376 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008030813673567647, + "loss": 0.0787, + "theoretical_loss": 3.821184785154186, + "tokens_seen": 676331520 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008030011234151822, + "loss": 0.0792, + "theoretical_loss": 3.8210346402425013, + "tokens_seen": 676593664 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008029208794735997, + "loss": 0.0791, + "theoretical_loss": 3.820884569773997, + "tokens_seen": 676855808 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008028406355320174, + "loss": 0.0771, + "theoretical_loss": 3.8207345736829526, + "tokens_seen": 677117952 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008027603915904349, + "loss": 0.078, + "theoretical_loss": 3.820584651903732, + "tokens_seen": 677380096 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008026801476488525, + "loss": 0.0784, + "theoretical_loss": 3.820434804370782, + "tokens_seen": 677642240 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008025999037072701, + "loss": 0.0781, + "theoretical_loss": 3.820285031018633, + "tokens_seen": 677904384 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008025196597656877, + "loss": 0.0788, + "theoretical_loss": 3.8201353317818985, + "tokens_seen": 678166528 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.0017591392388567328, + "objective/train/docs_used": 252282, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.499388575553894, + "objective/train/original_loss": 1.4993884563446045, + "objective/train/theoretical_loss": 3.8200605099364005, + "objective/train/tokens_used": 698757600, + "objective/train/value_avg": -0.0076446533203125, + "objective/train/value_loss": 0.00020787572429981083, + "objective/train/value_max": -0.00022876262664794922, + "objective/train/value_min": -0.267333984375, + "objective/train/value_reward_corr": 0.6231772208041639, + "objective/train/value_std": 0.0118560791015625, + "objective/train/weight_avg": 1.001856803894043, + "objective/train/weighted_lm_loss": 1.5024080276489258, + "objective/train/weights_max": 1.1745542287826538, + "objective/train/weights_min": 0.3710364103317261, + "theoretical_loss": 3.8200605099364005, + "tokens_seen": 678297600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008024394158241052, + "loss": 0.0767, + "theoretical_loss": 3.819985706595274, + "tokens_seen": 678428672 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008023591718825229, + "loss": 0.0816, + "theoretical_loss": 3.8198361553935385, + "tokens_seen": 678690816 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008022789279409405, + "loss": 0.0814, + "theoretical_loss": 3.8196866781115526, + "tokens_seen": 678952960 + }, + { + "epoch": 0.21, + "learning_rate": 0.000802198683999358, + "loss": 0.0831, + "theoretical_loss": 3.8195372746842615, + "tokens_seen": 679215104 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008021184400577757, + "loss": 0.0767, + "theoretical_loss": 3.8193879450466905, + "tokens_seen": 679477248 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008020381961161932, + "loss": 0.0798, + "theoretical_loss": 3.819238689133948, + "tokens_seen": 679739392 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008019579521746109, + "loss": 0.0795, + "theoretical_loss": 3.819089506881225, + "tokens_seen": 680001536 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008018777082330284, + "loss": 0.079, + "theoretical_loss": 3.8189403982237935, + "tokens_seen": 680263680 + }, + { + "epoch": 0.21, + "learning_rate": 0.000801797464291446, + "loss": 0.0773, + "theoretical_loss": 3.818791363097008, + "tokens_seen": 680525824 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008017172203498636, + "loss": 0.0789, + "theoretical_loss": 3.818642401436304, + "tokens_seen": 680787968 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008016369764082812, + "loss": 0.0793, + "theoretical_loss": 3.8184935131771987, + "tokens_seen": 681050112 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008015567324666988, + "loss": 0.0787, + "theoretical_loss": 3.8183446982552915, + "tokens_seen": 681312256 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.0013181634712964296, + "objective/train/docs_used": 253498, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5834970474243164, + "objective/train/original_loss": 1.5834970474243164, + "objective/train/theoretical_loss": 3.818195956606262, + "objective/train/tokens_used": 702034400, + "objective/train/value_avg": -0.00580596923828125, + "objective/train/value_loss": 0.00019775994587689638, + "objective/train/value_max": -0.000263214111328125, + "objective/train/value_min": -0.3359375, + "objective/train/value_reward_corr": 0.4194674562674259, + "objective/train/value_std": 0.007694244384765625, + "objective/train/weight_avg": 1.0014029741287231, + "objective/train/weighted_lm_loss": 1.5860118865966797, + "objective/train/weights_max": 1.3310092687606812, + "objective/train/weights_min": 0.3714952766895294, + "theoretical_loss": 3.818195956606262, + "tokens_seen": 681574400 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008014764885251164, + "loss": 0.0807, + "theoretical_loss": 3.818195956606262, + "tokens_seen": 681574400 + }, + { + "epoch": 0.21, + "learning_rate": 0.000801396244583534, + "loss": 0.0802, + "theoretical_loss": 3.8180472881658707, + "tokens_seen": 681836544 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008013160006419515, + "loss": 0.0807, + "theoretical_loss": 3.817898692869961, + "tokens_seen": 682098688 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008012357567003692, + "loss": 0.0812, + "theoretical_loss": 3.817750170654455, + "tokens_seen": 682360832 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008011555127587867, + "loss": 0.0797, + "theoretical_loss": 3.8176017214553575, + "tokens_seen": 682622976 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008010752688172042, + "loss": 0.0803, + "theoretical_loss": 3.8174533452087513, + "tokens_seen": 682885120 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008009950248756219, + "loss": 0.0774, + "theoretical_loss": 3.817305041850802, + "tokens_seen": 683147264 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008009147809340394, + "loss": 0.0806, + "theoretical_loss": 3.817156811317754, + "tokens_seen": 683409408 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008008345369924572, + "loss": 0.0802, + "theoretical_loss": 3.8170086535459333, + "tokens_seen": 683671552 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008007542930508747, + "loss": 0.0799, + "theoretical_loss": 3.8168605684717454, + "tokens_seen": 683933696 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008006740491092923, + "loss": 0.08, + "theoretical_loss": 3.816712556031675, + "tokens_seen": 684195840 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008005938051677099, + "loss": 0.079, + "theoretical_loss": 3.816564616162287, + "tokens_seen": 684457984 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008005135612261274, + "loss": 0.078, + "theoretical_loss": 3.8164167488002265, + "tokens_seen": 684720128 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.001422036555595696, + "objective/train/docs_used": 254674, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.559423565864563, + "objective/train/original_loss": 1.5594233274459839, + "objective/train/theoretical_loss": 3.816342842289668, + "objective/train/tokens_used": 705311200, + "objective/train/value_avg": -0.006954193115234375, + "objective/train/value_loss": 0.00011249267845414579, + "objective/train/value_max": -0.0001823902130126953, + "objective/train/value_min": -0.30419921875, + "objective/train/value_reward_corr": 0.6799434767604788, + "objective/train/value_std": 0.01035308837890625, + "objective/train/weight_avg": 1.0014777183532715, + "objective/train/weighted_lm_loss": 1.5613473653793335, + "objective/train/weights_max": 1.134773850440979, + "objective/train/weights_min": 0.7942905426025391, + "theoretical_loss": 3.816342842289668, + "tokens_seen": 684851200 + }, + { + "epoch": 0.21, + "learning_rate": 0.000800433317284545, + "loss": 0.0798, + "theoretical_loss": 3.816268953882218, + "tokens_seen": 684982272 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008003530733429626, + "loss": 0.0777, + "theoretical_loss": 3.8161212313450648, + "tokens_seen": 685244416 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008002728294013802, + "loss": 0.0793, + "theoretical_loss": 3.81597358112565, + "tokens_seen": 685506560 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008001925854597977, + "loss": 0.0798, + "theoretical_loss": 3.815826003160935, + "tokens_seen": 685768704 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008001123415182155, + "loss": 0.0811, + "theoretical_loss": 3.815678497387962, + "tokens_seen": 686030848 + }, + { + "epoch": 0.21, + "learning_rate": 0.000800032097576633, + "loss": 0.0788, + "theoretical_loss": 3.8155310637438506, + "tokens_seen": 686292992 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007999518536350505, + "loss": 0.079, + "theoretical_loss": 3.8153837021657995, + "tokens_seen": 686555136 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007998716096934682, + "loss": 0.0776, + "theoretical_loss": 3.8152364125910863, + "tokens_seen": 686817280 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007997913657518857, + "loss": 0.0781, + "theoretical_loss": 3.8150891949570664, + "tokens_seen": 687079424 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007997111218103033, + "loss": 0.0782, + "theoretical_loss": 3.814942049201175, + "tokens_seen": 687341568 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007996308778687209, + "loss": 0.0817, + "theoretical_loss": 3.8147949752609236, + "tokens_seen": 687603712 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007995506339271385, + "loss": 0.0789, + "theoretical_loss": 3.814647973073903, + "tokens_seen": 687865856 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.0019079339690506458, + "objective/train/docs_used": 255827, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5390352010726929, + "objective/train/original_loss": 1.5390353202819824, + "objective/train/theoretical_loss": 3.8145010425777826, + "objective/train/tokens_used": 708588000, + "objective/train/value_avg": -0.0116119384765625, + "objective/train/value_loss": 0.0002800696238409728, + "objective/train/value_max": -0.00017261505126953125, + "objective/train/value_min": -0.5703125, + "objective/train/value_reward_corr": 0.7645409376422845, + "objective/train/value_std": 0.020599365234375, + "objective/train/weight_avg": 1.0020438432693481, + "objective/train/weighted_lm_loss": 1.5430036783218384, + "objective/train/weights_max": 1.2020081281661987, + "objective/train/weights_min": 0.627696692943573, + "theoretical_loss": 3.8145010425777826, + "tokens_seen": 688128000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007994703899855562, + "loss": 0.0788, + "theoretical_loss": 3.8145010425777826, + "tokens_seen": 688128000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007993901460439737, + "loss": 0.0796, + "theoretical_loss": 3.814354183710308, + "tokens_seen": 688390144 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007993099021023913, + "loss": 0.0796, + "theoretical_loss": 3.8142073964093046, + "tokens_seen": 688652288 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007992296581608089, + "loss": 0.0782, + "theoretical_loss": 3.8140606806126733, + "tokens_seen": 688914432 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007991494142192265, + "loss": 0.0796, + "theoretical_loss": 3.813914036258393, + "tokens_seen": 689176576 + }, + { + "epoch": 0.21, + "learning_rate": 0.000799069170277644, + "loss": 0.0789, + "theoretical_loss": 3.813767463284522, + "tokens_seen": 689438720 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007989889263360617, + "loss": 0.0797, + "theoretical_loss": 3.8136209616291934, + "tokens_seen": 689700864 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007989086823944792, + "loss": 0.0796, + "theoretical_loss": 3.813474531230618, + "tokens_seen": 689963008 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007988284384528968, + "loss": 0.0785, + "theoretical_loss": 3.8133281720270835, + "tokens_seen": 690225152 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007987481945113144, + "loss": 0.0806, + "theoretical_loss": 3.8131818839569562, + "tokens_seen": 690487296 + }, + { + "epoch": 0.21, + "learning_rate": 0.000798667950569732, + "loss": 0.0806, + "theoretical_loss": 3.8130356669586765, + "tokens_seen": 690749440 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007985877066281496, + "loss": 0.0768, + "theoretical_loss": 3.8128895209707627, + "tokens_seen": 691011584 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007985074626865672, + "loss": 0.0819, + "theoretical_loss": 3.81274344593181, + "tokens_seen": 691273728 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.001648097182624042, + "objective/train/docs_used": 257040, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.549683690071106, + "objective/train/original_loss": 1.5496838092803955, + "objective/train/theoretical_loss": 3.812670434999026, + "objective/train/tokens_used": 711864800, + "objective/train/value_avg": -0.0057830810546875, + "objective/train/value_loss": 0.00016456494631711394, + "objective/train/value_max": -0.00016605854034423828, + "objective/train/value_min": -0.3056640625, + "objective/train/value_reward_corr": 0.6402803699022559, + "objective/train/value_std": 0.00890350341796875, + "objective/train/weight_avg": 1.0017253160476685, + "objective/train/weighted_lm_loss": 1.5529325008392334, + "objective/train/weights_max": 1.138139009475708, + "objective/train/weights_min": 0.368623286485672, + "theoretical_loss": 3.812670434999026, + "tokens_seen": 691404800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007984272187449848, + "loss": 0.0774, + "theoretical_loss": 3.8125974417804893, + "tokens_seen": 691535872 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007983469748034023, + "loss": 0.0767, + "theoretical_loss": 3.8124515084555477, + "tokens_seen": 691798016 + }, + { + "epoch": 0.21, + "learning_rate": 0.00079826673086182, + "loss": 0.0786, + "theoretical_loss": 3.8123056458958087, + "tokens_seen": 692060160 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007981864869202375, + "loss": 0.078, + "theoretical_loss": 3.812159854040172, + "tokens_seen": 692322304 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007981062429786551, + "loss": 0.0796, + "theoretical_loss": 3.8120141328276125, + "tokens_seen": 692584448 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007980259990370727, + "loss": 0.0813, + "theoretical_loss": 3.8118684821971813, + "tokens_seen": 692846592 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007979457550954902, + "loss": 0.0807, + "theoretical_loss": 3.8117229020880057, + "tokens_seen": 693108736 + }, + { + "epoch": 0.21, + "learning_rate": 0.000797865511153908, + "loss": 0.0804, + "theoretical_loss": 3.811577392439287, + "tokens_seen": 693370880 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007977852672123255, + "loss": 0.0801, + "theoretical_loss": 3.8114319531903025, + "tokens_seen": 693633024 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007977050232707431, + "loss": 0.0807, + "theoretical_loss": 3.811286584280406, + "tokens_seen": 693895168 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007976247793291607, + "loss": 0.0799, + "theoretical_loss": 3.8111412856490245, + "tokens_seen": 694157312 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007975445353875782, + "loss": 0.08, + "theoretical_loss": 3.810996057235661, + "tokens_seen": 694419456 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.0017769966507330537, + "objective/train/docs_used": 258341, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6005815267562866, + "objective/train/original_loss": 1.6005815267562866, + "objective/train/theoretical_loss": 3.810850898979894, + "objective/train/tokens_used": 715141600, + "objective/train/value_avg": -0.00868988037109375, + "objective/train/value_loss": 0.00037193644675426185, + "objective/train/value_max": -0.00015234947204589844, + "objective/train/value_min": -0.72021484375, + "objective/train/value_reward_corr": 0.5291458465205756, + "objective/train/value_std": 0.0122528076171875, + "objective/train/weight_avg": 1.001926302909851, + "objective/train/weighted_lm_loss": 1.60252046585083, + "objective/train/weights_max": 1.3593815565109253, + "objective/train/weights_min": 0.23074059188365936, + "theoretical_loss": 3.810850898979894, + "tokens_seen": 694681600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007974642914459958, + "loss": 0.08, + "theoretical_loss": 3.810850898979894, + "tokens_seen": 694681600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007973840475044134, + "loss": 0.0808, + "theoretical_loss": 3.810705810821375, + "tokens_seen": 694943744 + }, + { + "epoch": 0.21, + "learning_rate": 0.000797303803562831, + "loss": 0.0795, + "theoretical_loss": 3.8105607926998326, + "tokens_seen": 695205888 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007972235596212485, + "loss": 0.0785, + "theoretical_loss": 3.810415844555067, + "tokens_seen": 695468032 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007971433156796663, + "loss": 0.0778, + "theoretical_loss": 3.8102709663269554, + "tokens_seen": 695730176 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007970630717380838, + "loss": 0.0791, + "theoretical_loss": 3.810126157955448, + "tokens_seen": 695992320 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007969828277965014, + "loss": 0.0779, + "theoretical_loss": 3.809981419380569, + "tokens_seen": 696254464 + }, + { + "epoch": 0.21, + "learning_rate": 0.000796902583854919, + "loss": 0.0797, + "theoretical_loss": 3.809836750542418, + "tokens_seen": 696516608 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007968223399133365, + "loss": 0.0803, + "theoretical_loss": 3.8096921513811663, + "tokens_seen": 696778752 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007967420959717542, + "loss": 0.0804, + "theoretical_loss": 3.809547621837061, + "tokens_seen": 697040896 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007966618520301717, + "loss": 0.0765, + "theoretical_loss": 3.809403161850423, + "tokens_seen": 697303040 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007965816080885893, + "loss": 0.0823, + "theoretical_loss": 3.8092587713616446, + "tokens_seen": 697565184 + }, + { + "epoch": 0.21, + "learning_rate": 0.000796501364147007, + "loss": 0.081, + "theoretical_loss": 3.809114450311193, + "tokens_seen": 697827328 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": -0.0008030504104681313, + "objective/train/docs_used": 259086, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6132134199142456, + "objective/train/original_loss": 1.613213300704956, + "objective/train/theoretical_loss": 3.8090423158067566, + "objective/train/tokens_used": 718418400, + "objective/train/value_avg": -0.00884246826171875, + "objective/train/value_loss": 0.00026143278228119016, + "objective/train/value_max": -0.0002033710479736328, + "objective/train/value_min": -0.25, + "objective/train/value_reward_corr": 0.8812842327841226, + "objective/train/value_std": 0.0213165283203125, + "objective/train/weight_avg": 0.9993177652359009, + "objective/train/weighted_lm_loss": 1.6114174127578735, + "objective/train/weights_max": 1.1213833093643188, + "objective/train/weights_min": 0.37042829394340515, + "theoretical_loss": 3.8090423158067566, + "tokens_seen": 697958400 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007964211202054245, + "loss": 0.08, + "theoretical_loss": 3.808970198639609, + "tokens_seen": 698089472 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007963408762638421, + "loss": 0.0781, + "theoretical_loss": 3.808826016287507, + "tokens_seen": 698351616 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007962606323222597, + "loss": 0.0804, + "theoretical_loss": 3.8086819031955725, + "tokens_seen": 698613760 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007961803883806773, + "loss": 0.0787, + "theoretical_loss": 3.8085378593045665, + "tokens_seen": 698875904 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007961001444390948, + "loss": 0.0817, + "theoretical_loss": 3.8083938845553202, + "tokens_seen": 699138048 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007960199004975125, + "loss": 0.0783, + "theoretical_loss": 3.80824997888874, + "tokens_seen": 699400192 + }, + { + "epoch": 0.21, + "learning_rate": 0.00079593965655593, + "loss": 0.0807, + "theoretical_loss": 3.8081061422458036, + "tokens_seen": 699662336 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007958594126143476, + "loss": 0.0785, + "theoretical_loss": 3.8079623745675613, + "tokens_seen": 699924480 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007957791686727652, + "loss": 0.0755, + "theoretical_loss": 3.8078186757951364, + "tokens_seen": 700186624 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007956989247311828, + "loss": 0.0807, + "theoretical_loss": 3.8076750458697237, + "tokens_seen": 700448768 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007956186807896005, + "loss": 0.0789, + "theoretical_loss": 3.807531484732591, + "tokens_seen": 700710912 + }, + { + "epoch": 0.21, + "learning_rate": 0.000795538436848018, + "loss": 0.0793, + "theoretical_loss": 3.8073879923250775, + "tokens_seen": 700973056 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.0006472544046118855, + "objective/train/docs_used": 260260, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.584854006767273, + "objective/train/original_loss": 1.5848541259765625, + "objective/train/theoretical_loss": 3.8072445685885947, + "objective/train/tokens_used": 721695200, + "objective/train/value_avg": -0.007328033447265625, + "objective/train/value_loss": 0.0002117718249792233, + "objective/train/value_max": -7.784366607666016e-05, + "objective/train/value_min": -0.60693359375, + "objective/train/value_reward_corr": 0.616764286984292, + "objective/train/value_std": 0.011688232421875, + "objective/train/weight_avg": 1.000746250152588, + "objective/train/weighted_lm_loss": 1.5859571695327759, + "objective/train/weights_max": 1.590544581413269, + "objective/train/weights_min": 0.38592278957366943, + "theoretical_loss": 3.8072445685885947, + "tokens_seen": 701235200 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007954581929064356, + "loss": 0.0775, + "theoretical_loss": 3.8072445685885947, + "tokens_seen": 701235200 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007953779489648532, + "loss": 0.0785, + "theoretical_loss": 3.807101213464625, + "tokens_seen": 701497344 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007952977050232708, + "loss": 0.0803, + "theoretical_loss": 3.8069579268947242, + "tokens_seen": 701759488 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007952174610816883, + "loss": 0.0812, + "theoretical_loss": 3.806814708820519, + "tokens_seen": 702021632 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007951372171401059, + "loss": 0.0759, + "theoretical_loss": 3.806671559183706, + "tokens_seen": 702283776 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007950569731985235, + "loss": 0.078, + "theoretical_loss": 3.806528477926056, + "tokens_seen": 702545920 + }, + { + "epoch": 0.21, + "learning_rate": 0.000794976729256941, + "loss": 0.0811, + "theoretical_loss": 3.806385464989409, + "tokens_seen": 702808064 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007948964853153588, + "loss": 0.0808, + "theoretical_loss": 3.806242520315676, + "tokens_seen": 703070208 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007948162413737763, + "loss": 0.0788, + "theoretical_loss": 3.806099643846841, + "tokens_seen": 703332352 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007947359974321939, + "loss": 0.0792, + "theoretical_loss": 3.8059568355249564, + "tokens_seen": 703594496 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007946557534906115, + "loss": 0.0789, + "theoretical_loss": 3.8058140952921478, + "tokens_seen": 703856640 + }, + { + "epoch": 0.21, + "learning_rate": 0.000794575509549029, + "loss": 0.0796, + "theoretical_loss": 3.805671423090609, + "tokens_seen": 704118784 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007944952656074467, + "loss": 0.0781, + "theoretical_loss": 3.805528818862607, + "tokens_seen": 704380928 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.00023795383458491415, + "objective/train/docs_used": 261464, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4358924627304077, + "objective/train/original_loss": 1.4358923435211182, + "objective/train/theoretical_loss": 3.8054575422206596, + "objective/train/tokens_used": 724972000, + "objective/train/value_avg": -0.00519561767578125, + "objective/train/value_loss": 0.00011082483979407698, + "objective/train/value_max": -0.00012433528900146484, + "objective/train/value_min": -0.2100830078125, + "objective/train/value_reward_corr": 0.568821173994195, + "objective/train/value_std": 0.00617218017578125, + "objective/train/weight_avg": 1.000292420387268, + "objective/train/weighted_lm_loss": 1.4362776279449463, + "objective/train/weights_max": 1.1238564252853394, + "objective/train/weights_min": 0.7808533906936646, + "theoretical_loss": 3.8054575422206596, + "tokens_seen": 704512000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007944150216658642, + "loss": 0.078, + "theoretical_loss": 3.8053862825504776, + "tokens_seen": 704643072 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007943347777242818, + "loss": 0.0806, + "theoretical_loss": 3.8052438140966265, + "tokens_seen": 704905216 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007942545337826995, + "loss": 0.0804, + "theoretical_loss": 3.8051014134435315, + "tokens_seen": 705167360 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007941742898411171, + "loss": 0.0751, + "theoretical_loss": 3.804959080533739, + "tokens_seen": 705429504 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007940940458995346, + "loss": 0.0791, + "theoretical_loss": 3.8048168153098656, + "tokens_seen": 705691648 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007940138019579522, + "loss": 0.0787, + "theoretical_loss": 3.8046746177145985, + "tokens_seen": 705953792 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007939335580163698, + "loss": 0.0799, + "theoretical_loss": 3.804532487690694, + "tokens_seen": 706215936 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007938533140747873, + "loss": 0.0782, + "theoretical_loss": 3.8043904251809786, + "tokens_seen": 706478080 + }, + { + "epoch": 0.21, + "learning_rate": 0.000793773070133205, + "loss": 0.0796, + "theoretical_loss": 3.8042484301283475, + "tokens_seen": 706740224 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007936928261916225, + "loss": 0.0805, + "theoretical_loss": 3.8041065024757668, + "tokens_seen": 707002368 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007936125822500401, + "loss": 0.0791, + "theoretical_loss": 3.8039646421662705, + "tokens_seen": 707264512 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007935323383084578, + "loss": 0.0783, + "theoretical_loss": 3.8038228491429624, + "tokens_seen": 707526656 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.00028257613303139806, + "objective/train/docs_used": 262675, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7718579769134521, + "objective/train/original_loss": 1.7718580961227417, + "objective/train/theoretical_loss": 3.8036811233490164, + "objective/train/tokens_used": 728248800, + "objective/train/value_avg": -0.01105499267578125, + "objective/train/value_loss": 0.00044336056453175843, + "objective/train/value_max": -0.0001442432403564453, + "objective/train/value_min": -0.88037109375, + "objective/train/value_reward_corr": 0.5845583896713396, + "objective/train/value_std": 0.0162353515625, + "objective/train/weight_avg": 1.0004814863204956, + "objective/train/weighted_lm_loss": 1.772655725479126, + "objective/train/weights_max": 1.4637386798858643, + "objective/train/weights_min": 0.3728155791759491, + "theoretical_loss": 3.8036811233490164, + "tokens_seen": 707788800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007934520943668753, + "loss": 0.0805, + "theoretical_loss": 3.8036811233490164, + "tokens_seen": 707788800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007933718504252929, + "loss": 0.0776, + "theoretical_loss": 3.803539464727673, + "tokens_seen": 708050944 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007932916064837105, + "loss": 0.0815, + "theoretical_loss": 3.803397873222244, + "tokens_seen": 708313088 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007932113625421281, + "loss": 0.0802, + "theoretical_loss": 3.8032563487761095, + "tokens_seen": 708575232 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007931311186005457, + "loss": 0.0775, + "theoretical_loss": 3.8031148913327177, + "tokens_seen": 708837376 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007930508746589633, + "loss": 0.0807, + "theoretical_loss": 3.8029735008355843, + "tokens_seen": 709099520 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007929706307173808, + "loss": 0.0802, + "theoretical_loss": 3.8028321772282965, + "tokens_seen": 709361664 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007928903867757984, + "loss": 0.079, + "theoretical_loss": 3.8026909204545065, + "tokens_seen": 709623808 + }, + { + "epoch": 0.22, + "learning_rate": 0.000792810142834216, + "loss": 0.0813, + "theoretical_loss": 3.8025497304579376, + "tokens_seen": 709885952 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007927298988926336, + "loss": 0.0786, + "theoretical_loss": 3.80240860718238, + "tokens_seen": 710148096 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007926496549510513, + "loss": 0.0829, + "theoretical_loss": 3.802267550571691, + "tokens_seen": 710410240 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007925694110094688, + "loss": 0.082, + "theoretical_loss": 3.802126560569798, + "tokens_seen": 710672384 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007924891670678864, + "loss": 0.0815, + "theoretical_loss": 3.801985637120694, + "tokens_seen": 710934528 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0011543107684701681, + "objective/train/docs_used": 263840, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6443710327148438, + "objective/train/original_loss": 1.6443710327148438, + "objective/train/theoretical_loss": 3.801915200335954, + "objective/train/tokens_used": 731525600, + "objective/train/value_avg": -0.00934600830078125, + "objective/train/value_loss": 0.00040373342926613986, + "objective/train/value_max": -0.0002199411392211914, + "objective/train/value_min": -0.466552734375, + "objective/train/value_reward_corr": 0.6620460795564023, + "objective/train/value_std": 0.01470947265625, + "objective/train/weight_avg": 1.0013347864151, + "objective/train/weighted_lm_loss": 1.6481596231460571, + "objective/train/weights_max": 1.1715375185012817, + "objective/train/weights_min": 0.37442609667778015, + "theoretical_loss": 3.801915200335954, + "tokens_seen": 711065600 + }, + { + "epoch": 0.22, + "learning_rate": 0.000792408923126304, + "loss": 0.0788, + "theoretical_loss": 3.801844780168441, + "tokens_seen": 711196672 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007923286791847216, + "loss": 0.0816, + "theoretical_loss": 3.8017039896571685, + "tokens_seen": 711458816 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007922484352431391, + "loss": 0.0787, + "theoretical_loss": 3.8015632655310734, + "tokens_seen": 711720960 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007921681913015567, + "loss": 0.0801, + "theoretical_loss": 3.8014226077344198, + "tokens_seen": 711983104 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007920879473599743, + "loss": 0.0779, + "theoretical_loss": 3.8012820162115393, + "tokens_seen": 712245248 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007920077034183918, + "loss": 0.0772, + "theoretical_loss": 3.801141490906831, + "tokens_seen": 712507392 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007919274594768096, + "loss": 0.078, + "theoretical_loss": 3.80100103176476, + "tokens_seen": 712769536 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007918472155352271, + "loss": 0.0816, + "theoretical_loss": 3.8008606387298594, + "tokens_seen": 713031680 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007917669715936448, + "loss": 0.0767, + "theoretical_loss": 3.80072031174673, + "tokens_seen": 713293824 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007916867276520623, + "loss": 0.0804, + "theoretical_loss": 3.800580050760036, + "tokens_seen": 713555968 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007916064837104798, + "loss": 0.0764, + "theoretical_loss": 3.800439855714512, + "tokens_seen": 713818112 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007915262397688975, + "loss": 0.08, + "theoretical_loss": 3.8002997265549574, + "tokens_seen": 714080256 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.001398808672092855, + "objective/train/docs_used": 265036, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4737719297409058, + "objective/train/original_loss": 1.4737719297409058, + "objective/train/theoretical_loss": 3.8001596632262387, + "objective/train/tokens_used": 734802400, + "objective/train/value_avg": -0.009124755859375, + "objective/train/value_loss": 0.00014532258501276374, + "objective/train/value_max": -0.00020992755889892578, + "objective/train/value_min": -0.1900634765625, + "objective/train/value_reward_corr": 0.6496980006355382, + "objective/train/value_std": 0.0111541748046875, + "objective/train/weight_avg": 1.0014667510986328, + "objective/train/weighted_lm_loss": 1.4759221076965332, + "objective/train/weights_max": 1.1377222537994385, + "objective/train/weights_min": 0.3690439462661743, + "theoretical_loss": 3.8001596632262387, + "tokens_seen": 714342400 + }, + { + "epoch": 0.22, + "learning_rate": 0.000791445995827315, + "loss": 0.0801, + "theoretical_loss": 3.8001596632262387, + "tokens_seen": 714342400 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007913657518857326, + "loss": 0.0779, + "theoretical_loss": 3.8000196656732874, + "tokens_seen": 714604544 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007912855079441503, + "loss": 0.078, + "theoretical_loss": 3.7998797338411032, + "tokens_seen": 714866688 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007912052640025679, + "loss": 0.0797, + "theoretical_loss": 3.7997398676747496, + "tokens_seen": 715128832 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007911250200609854, + "loss": 0.0797, + "theoretical_loss": 3.7996000671193593, + "tokens_seen": 715390976 + }, + { + "epoch": 0.22, + "learning_rate": 0.000791044776119403, + "loss": 0.0777, + "theoretical_loss": 3.7994603321201277, + "tokens_seen": 715653120 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007909645321778206, + "loss": 0.0787, + "theoretical_loss": 3.7993206626223177, + "tokens_seen": 715915264 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007908842882362381, + "loss": 0.0789, + "theoretical_loss": 3.799181058571258, + "tokens_seen": 716177408 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007908040442946558, + "loss": 0.0757, + "theoretical_loss": 3.7990415199123424, + "tokens_seen": 716439552 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007907238003530733, + "loss": 0.0809, + "theoretical_loss": 3.79890204659103, + "tokens_seen": 716701696 + }, + { + "epoch": 0.22, + "learning_rate": 0.000790643556411491, + "loss": 0.0783, + "theoretical_loss": 3.7987626385528466, + "tokens_seen": 716963840 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007905633124699085, + "loss": 0.0818, + "theoretical_loss": 3.798623295743382, + "tokens_seen": 717225984 + }, + { + "epoch": 0.22, + "learning_rate": 0.000790483068528326, + "loss": 0.0818, + "theoretical_loss": 3.798484018108291, + "tokens_seen": 717488128 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0011304336367174983, + "objective/train/docs_used": 266305, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5863089561462402, + "objective/train/original_loss": 1.5863087177276611, + "objective/train/theoretical_loss": 3.7984144037141716, + "objective/train/tokens_used": 738079200, + "objective/train/value_avg": -0.0092620849609375, + "objective/train/value_loss": 0.0005252858973108232, + "objective/train/value_max": -0.0002434253692626953, + "objective/train/value_min": -0.962890625, + "objective/train/value_reward_corr": 0.61228862282039, + "objective/train/value_std": 0.0162353515625, + "objective/train/weight_avg": 1.0013542175292969, + "objective/train/weighted_lm_loss": 1.5876915454864502, + "objective/train/weights_max": 1.5222142934799194, + "objective/train/weights_min": 0.23608994483947754, + "theoretical_loss": 3.7984144037141716, + "tokens_seen": 717619200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007904028245867438, + "loss": 0.0783, + "theoretical_loss": 3.7983448055932953, + "tokens_seen": 717750272 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007903225806451613, + "loss": 0.0822, + "theoretical_loss": 3.79820565814418, + "tokens_seen": 718012416 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007902423367035789, + "loss": 0.0786, + "theoretical_loss": 3.798066575706795, + "tokens_seen": 718274560 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007901620927619965, + "loss": 0.0779, + "theoretical_loss": 3.797927558227056, + "tokens_seen": 718536704 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007900818488204141, + "loss": 0.0771, + "theoretical_loss": 3.7977886056509433, + "tokens_seen": 718798848 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007900016048788316, + "loss": 0.0781, + "theoretical_loss": 3.797649717924502, + "tokens_seen": 719060992 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007899213609372492, + "loss": 0.0791, + "theoretical_loss": 3.797510894993839, + "tokens_seen": 719323136 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007898411169956668, + "loss": 0.0783, + "theoretical_loss": 3.79737213680513, + "tokens_seen": 719585280 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007897608730540844, + "loss": 0.0791, + "theoretical_loss": 3.797233443304612, + "tokens_seen": 719847424 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007896806291125021, + "loss": 0.0805, + "theoretical_loss": 3.7970948144385868, + "tokens_seen": 720109568 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007896003851709196, + "loss": 0.0764, + "theoretical_loss": 3.796956250153421, + "tokens_seen": 720371712 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007895201412293372, + "loss": 0.0797, + "theoretical_loss": 3.796817750395544, + "tokens_seen": 720633856 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0022220478858798742, + "objective/train/docs_used": 267624, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4708417654037476, + "objective/train/original_loss": 1.4708420038223267, + "objective/train/theoretical_loss": 3.7966793151114504, + "objective/train/tokens_used": 741356000, + "objective/train/value_avg": -0.0073699951171875, + "objective/train/value_loss": 0.0003509256348479539, + "objective/train/value_max": -0.0001838207244873047, + "objective/train/value_min": -0.91015625, + "objective/train/value_reward_corr": 0.5371351152106856, + "objective/train/value_std": 0.0122222900390625, + "objective/train/weight_avg": 1.0023695230484009, + "objective/train/weighted_lm_loss": 1.4751582145690918, + "objective/train/weights_max": 1.2902494668960571, + "objective/train/weights_min": 0.27010083198547363, + "theoretical_loss": 3.7966793151114504, + "tokens_seen": 720896000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007894398972877548, + "loss": 0.0768, + "theoretical_loss": 3.7966793151114504, + "tokens_seen": 720896000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007893596533461724, + "loss": 0.0768, + "theoretical_loss": 3.796540944247698, + "tokens_seen": 721158144 + }, + { + "epoch": 0.22, + "learning_rate": 0.00078927940940459, + "loss": 0.0783, + "theoretical_loss": 3.796402637750908, + "tokens_seen": 721420288 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007891991654630075, + "loss": 0.0809, + "theoretical_loss": 3.796264395567766, + "tokens_seen": 721682432 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007891189215214251, + "loss": 0.0788, + "theoretical_loss": 3.7961262176450195, + "tokens_seen": 721944576 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007890386775798428, + "loss": 0.08, + "theoretical_loss": 3.795988103929482, + "tokens_seen": 722206720 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007889584336382604, + "loss": 0.0762, + "theoretical_loss": 3.7958500543680276, + "tokens_seen": 722468864 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007888781896966779, + "loss": 0.079, + "theoretical_loss": 3.795712068907596, + "tokens_seen": 722731008 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007887979457550956, + "loss": 0.0825, + "theoretical_loss": 3.795574147495188, + "tokens_seen": 722993152 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007887177018135131, + "loss": 0.0807, + "theoretical_loss": 3.795436290077868, + "tokens_seen": 723255296 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007886374578719306, + "loss": 0.081, + "theoretical_loss": 3.795298496602765, + "tokens_seen": 723517440 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007885572139303483, + "loss": 0.0778, + "theoretical_loss": 3.795160767017068, + "tokens_seen": 723779584 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007884769699887658, + "loss": 0.0787, + "theoretical_loss": 3.795023101268031, + "tokens_seen": 724041728 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0011378737399354577, + "objective/train/docs_used": 268786, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.588625431060791, + "objective/train/original_loss": 1.588625431060791, + "objective/train/theoretical_loss": 3.7949542923157935, + "objective/train/tokens_used": 744632800, + "objective/train/value_avg": -0.007007598876953125, + "objective/train/value_loss": 0.00012135379802202806, + "objective/train/value_max": -0.00010973215103149414, + "objective/train/value_min": -0.343505859375, + "objective/train/value_reward_corr": 0.6047769086501988, + "objective/train/value_std": 0.00875091552734375, + "objective/train/weight_avg": 1.0011974573135376, + "objective/train/weighted_lm_loss": 1.5900264978408813, + "objective/train/weights_max": 1.1416176557540894, + "objective/train/weights_min": 0.7288485765457153, + "theoretical_loss": 3.7949542923157935, + "tokens_seen": 724172800 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007883967260471834, + "loss": 0.0827, + "theoretical_loss": 3.7948854993029695, + "tokens_seen": 724303872 + }, + { + "epoch": 0.22, + "learning_rate": 0.000788316482105601, + "loss": 0.0767, + "theoretical_loss": 3.7947479610692616, + "tokens_seen": 724566016 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007882362381640187, + "loss": 0.0784, + "theoretical_loss": 3.794610486514348, + "tokens_seen": 724828160 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007881559942224363, + "loss": 0.0788, + "theoretical_loss": 3.7944730755857323, + "tokens_seen": 725090304 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007880757502808538, + "loss": 0.0773, + "theoretical_loss": 3.794335728230979, + "tokens_seen": 725352448 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007879955063392714, + "loss": 0.078, + "theoretical_loss": 3.7941984443977157, + "tokens_seen": 725614592 + }, + { + "epoch": 0.22, + "learning_rate": 0.000787915262397689, + "loss": 0.0762, + "theoretical_loss": 3.7940612240336327, + "tokens_seen": 725876736 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007878350184561066, + "loss": 0.0786, + "theoretical_loss": 3.793924067086481, + "tokens_seen": 726138880 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007877547745145241, + "loss": 0.0781, + "theoretical_loss": 3.793786973504073, + "tokens_seen": 726401024 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007876745305729418, + "loss": 0.0772, + "theoretical_loss": 3.7936499432342847, + "tokens_seen": 726663168 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007875942866313593, + "loss": 0.0782, + "theoretical_loss": 3.7935129762250526, + "tokens_seen": 726925312 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007875140426897769, + "loss": 0.0813, + "theoretical_loss": 3.7933760724243752, + "tokens_seen": 727187456 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0019741442520171404, + "objective/train/docs_used": 269904, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5811312198638916, + "objective/train/original_loss": 1.5811312198638916, + "objective/train/theoretical_loss": 3.7932392317803116, + "objective/train/tokens_used": 747909600, + "objective/train/value_avg": -0.006404876708984375, + "objective/train/value_loss": 0.00015449750935658813, + "objective/train/value_max": -0.0001398324966430664, + "objective/train/value_min": -0.30126953125, + "objective/train/value_reward_corr": 0.5738474794753151, + "objective/train/value_std": 0.00901031494140625, + "objective/train/weight_avg": 1.002044916152954, + "objective/train/weighted_lm_loss": 1.5848934650421143, + "objective/train/weights_max": 1.2912355661392212, + "objective/train/weights_min": 0.36876535415649414, + "theoretical_loss": 3.7932392317803116, + "tokens_seen": 727449600 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007874337987481946, + "loss": 0.0798, + "theoretical_loss": 3.7932392317803116, + "tokens_seen": 727449600 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007873535548066121, + "loss": 0.0773, + "theoretical_loss": 3.7931024542409837, + "tokens_seen": 727711744 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007872733108650297, + "loss": 0.0784, + "theoretical_loss": 3.7929657397545733, + "tokens_seen": 727973888 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007871930669234473, + "loss": 0.0767, + "theoretical_loss": 3.792829088269324, + "tokens_seen": 728236032 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007871128229818649, + "loss": 0.077, + "theoretical_loss": 3.792692499733541, + "tokens_seen": 728498176 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007870325790402824, + "loss": 0.0785, + "theoretical_loss": 3.7925559740955896, + "tokens_seen": 728760320 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007869523350987, + "loss": 0.0775, + "theoretical_loss": 3.7924195113038968, + "tokens_seen": 729022464 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007868720911571176, + "loss": 0.0788, + "theoretical_loss": 3.7922831113069493, + "tokens_seen": 729284608 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007867918472155353, + "loss": 0.0762, + "theoretical_loss": 3.792146774053296, + "tokens_seen": 729546752 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007867116032739529, + "loss": 0.077, + "theoretical_loss": 3.792010499491545, + "tokens_seen": 729808896 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007866313593323704, + "loss": 0.0754, + "theoretical_loss": 3.7918742875703657, + "tokens_seen": 730071040 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007865511153907881, + "loss": 0.0763, + "theoretical_loss": 3.7917381382384883, + "tokens_seen": 730333184 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007864708714492056, + "loss": 0.0769, + "theoretical_loss": 3.791602051444703, + "tokens_seen": 730595328 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0019119989592581987, + "objective/train/docs_used": 271086, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5188298225402832, + "objective/train/original_loss": 1.5188297033309937, + "objective/train/theoretical_loss": 3.7915340314836077, + "objective/train/tokens_used": 751186400, + "objective/train/value_avg": -0.00708770751953125, + "objective/train/value_loss": 0.00019582056847866625, + "objective/train/value_max": -0.00018966197967529297, + "objective/train/value_min": -0.70556640625, + "objective/train/value_reward_corr": 0.6300847957470939, + "objective/train/value_std": 0.0132293701171875, + "objective/train/weight_avg": 1.0020005702972412, + "objective/train/weighted_lm_loss": 1.5218416452407837, + "objective/train/weights_max": 2.00211238861084, + "objective/train/weights_min": 0.2432425171136856, + "theoretical_loss": 3.7915340314836077, + "tokens_seen": 730726400 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007863906275076232, + "loss": 0.0753, + "theoretical_loss": 3.791466027137859, + "tokens_seen": 730857472 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007863103835660408, + "loss": 0.0783, + "theoretical_loss": 3.7913300652668678, + "tokens_seen": 731119616 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007862301396244583, + "loss": 0.0779, + "theoretical_loss": 3.7911941657807002, + "tokens_seen": 731381760 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007861498956828759, + "loss": 0.0754, + "theoretical_loss": 3.7910583286283854, + "tokens_seen": 731643904 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007860696517412936, + "loss": 0.0786, + "theoretical_loss": 3.7909225537590157, + "tokens_seen": 731906048 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007859894077997112, + "loss": 0.0766, + "theoretical_loss": 3.790786841121739, + "tokens_seen": 732168192 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007859091638581287, + "loss": 0.0767, + "theoretical_loss": 3.7906511906657676, + "tokens_seen": 732430336 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007858289199165464, + "loss": 0.0766, + "theoretical_loss": 3.7905156023403697, + "tokens_seen": 732692480 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007857486759749639, + "loss": 0.0781, + "theoretical_loss": 3.7903800760948743, + "tokens_seen": 732954624 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007856684320333814, + "loss": 0.0778, + "theoretical_loss": 3.790244611878671, + "tokens_seen": 733216768 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007855881880917991, + "loss": 0.078, + "theoretical_loss": 3.790109209641206, + "tokens_seen": 733478912 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007855079441502166, + "loss": 0.0769, + "theoretical_loss": 3.7899738693319875, + "tokens_seen": 733741056 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0013161771930754185, + "objective/train/docs_used": 272255, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.631644606590271, + "objective/train/original_loss": 1.6316447257995605, + "objective/train/theoretical_loss": 3.7898385909005814, + "objective/train/tokens_used": 754463200, + "objective/train/value_avg": -0.006435394287109375, + "objective/train/value_loss": 0.0001979928492801264, + "objective/train/value_max": -0.00013446807861328125, + "objective/train/value_min": -0.2325439453125, + "objective/train/value_reward_corr": 0.5276408179266261, + "objective/train/value_std": 0.00835418701171875, + "objective/train/weight_avg": 1.0014052391052246, + "objective/train/weighted_lm_loss": 1.6336127519607544, + "objective/train/weights_max": 1.1831884384155273, + "objective/train/weights_min": 0.3687118589878082, + "theoretical_loss": 3.7898385909005814, + "tokens_seen": 734003200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007854277002086343, + "loss": 0.0802, + "theoretical_loss": 3.7898385909005814, + "tokens_seen": 734003200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007853474562670519, + "loss": 0.0776, + "theoretical_loss": 3.7897033742966135, + "tokens_seen": 734265344 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007852672123254695, + "loss": 0.0776, + "theoretical_loss": 3.789568219469767, + "tokens_seen": 734527488 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007851869683838871, + "loss": 0.08, + "theoretical_loss": 3.789433126369786, + "tokens_seen": 734789632 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007851067244423046, + "loss": 0.0767, + "theoretical_loss": 3.7892980949464716, + "tokens_seen": 735051776 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007850264805007222, + "loss": 0.0781, + "theoretical_loss": 3.7891631251496856, + "tokens_seen": 735313920 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007849462365591398, + "loss": 0.0775, + "theoretical_loss": 3.7890282169293465, + "tokens_seen": 735576064 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007848659926175574, + "loss": 0.0763, + "theoretical_loss": 3.7888933702354324, + "tokens_seen": 735838208 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007847857486759749, + "loss": 0.0796, + "theoretical_loss": 3.7887585850179786, + "tokens_seen": 736100352 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007847055047343926, + "loss": 0.0786, + "theoretical_loss": 3.788623861227081, + "tokens_seen": 736362496 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007846252607928101, + "loss": 0.0752, + "theoretical_loss": 3.7884891988128926, + "tokens_seen": 736624640 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007845450168512277, + "loss": 0.0782, + "theoretical_loss": 3.7883545977256228, + "tokens_seen": 736886784 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007844647729096454, + "loss": 0.0781, + "theoretical_loss": 3.7882200579155416, + "tokens_seen": 737148928 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0006363503634929657, + "objective/train/docs_used": 273341, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6099265813827515, + "objective/train/original_loss": 1.609926462173462, + "objective/train/theoretical_loss": 3.788152810973922, + "objective/train/tokens_used": 757740000, + "objective/train/value_avg": -0.0074005126953125, + "objective/train/value_loss": 0.00028310378547757864, + "objective/train/value_max": -0.0001596212387084961, + "objective/train/value_min": -0.5478515625, + "objective/train/value_reward_corr": 0.5414502644341082, + "objective/train/value_std": 0.0122528076171875, + "objective/train/weight_avg": 1.000755786895752, + "objective/train/weighted_lm_loss": 1.6105283498764038, + "objective/train/weights_max": 1.28411865234375, + "objective/train/weights_min": 0.3686028718948364, + "theoretical_loss": 3.788152810973922, + "tokens_seen": 737280000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007843845289680629, + "loss": 0.0792, + "theoretical_loss": 3.788085579332977, + "tokens_seen": 737411072 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007843042850264806, + "loss": 0.0773, + "theoretical_loss": 3.787951161928312, + "tokens_seen": 737673216 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007842240410848981, + "loss": 0.0786, + "theoretical_loss": 3.7878168056519916, + "tokens_seen": 737935360 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007841437971433157, + "loss": 0.0767, + "theoretical_loss": 3.787682510454515, + "tokens_seen": 738197504 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007840635532017333, + "loss": 0.0779, + "theoretical_loss": 3.7875482762864405, + "tokens_seen": 738459648 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007839833092601508, + "loss": 0.0761, + "theoretical_loss": 3.787414103098384, + "tokens_seen": 738721792 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007839030653185684, + "loss": 0.0768, + "theoretical_loss": 3.7872799908410193, + "tokens_seen": 738983936 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007838228213769861, + "loss": 0.0765, + "theoretical_loss": 3.787145939465076, + "tokens_seen": 739246080 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007837425774354037, + "loss": 0.0773, + "theoretical_loss": 3.7870119489213425, + "tokens_seen": 739508224 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007836623334938212, + "loss": 0.0795, + "theoretical_loss": 3.786878019160664, + "tokens_seen": 739770368 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007835820895522389, + "loss": 0.0763, + "theoretical_loss": 3.7867441501339427, + "tokens_seen": 740032512 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007835018456106564, + "loss": 0.0783, + "theoretical_loss": 3.7866103417921373, + "tokens_seen": 740294656 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0014227567007765174, + "objective/train/docs_used": 274540, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.675157070159912, + "objective/train/original_loss": 1.6751569509506226, + "objective/train/theoretical_loss": 3.786476594086265, + "objective/train/tokens_used": 761016800, + "objective/train/value_avg": -0.00887298583984375, + "objective/train/value_loss": 0.0004343730106484145, + "objective/train/value_max": -0.00012731552124023438, + "objective/train/value_min": -0.580078125, + "objective/train/value_reward_corr": 0.5689424667790532, + "objective/train/value_std": 0.0146942138671875, + "objective/train/weight_avg": 1.0016205310821533, + "objective/train/weighted_lm_loss": 1.6775554418563843, + "objective/train/weights_max": 1.65679132938385, + "objective/train/weights_min": 0.3746204078197479, + "theoretical_loss": 3.786476594086265, + "tokens_seen": 740556800 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007834216016690739, + "loss": 0.0818, + "theoretical_loss": 3.786476594086265, + "tokens_seen": 740556800 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007833413577274916, + "loss": 0.0761, + "theoretical_loss": 3.7863429069673984, + "tokens_seen": 740818944 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007832611137859091, + "loss": 0.0774, + "theoretical_loss": 3.7862092803866663, + "tokens_seen": 741081088 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007831808698443267, + "loss": 0.0745, + "theoretical_loss": 3.786075714295257, + "tokens_seen": 741343232 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007831006259027444, + "loss": 0.0797, + "theoretical_loss": 3.7859422086444123, + "tokens_seen": 741605376 + }, + { + "epoch": 0.22, + "learning_rate": 0.000783020381961162, + "loss": 0.078, + "theoretical_loss": 3.7858087633854325, + "tokens_seen": 741867520 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007829401380195796, + "loss": 0.0772, + "theoretical_loss": 3.785675378469673, + "tokens_seen": 742129664 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007828598940779972, + "loss": 0.0772, + "theoretical_loss": 3.7855420538485474, + "tokens_seen": 742391808 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007827796501364147, + "loss": 0.0787, + "theoretical_loss": 3.7854087894735233, + "tokens_seen": 742653952 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007826994061948323, + "loss": 0.0751, + "theoretical_loss": 3.7852755852961257, + "tokens_seen": 742916096 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007826191622532499, + "loss": 0.0791, + "theoretical_loss": 3.785142441267936, + "tokens_seen": 743178240 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007825389183116674, + "loss": 0.0778, + "theoretical_loss": 3.7850093573405905, + "tokens_seen": 743440384 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007824586743700851, + "loss": 0.076, + "theoretical_loss": 3.7848763334657827, + "tokens_seen": 743702528 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.0006977500161156058, + "objective/train/docs_used": 275779, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6511344909667969, + "objective/train/original_loss": 1.6511344909667969, + "objective/train/theoretical_loss": 3.784809844033, + "objective/train/tokens_used": 764293600, + "objective/train/value_avg": -0.00855255126953125, + "objective/train/value_loss": 0.0002167196071241051, + "objective/train/value_max": -0.00013768672943115234, + "objective/train/value_min": -0.63427734375, + "objective/train/value_reward_corr": 0.6324548991472896, + "objective/train/value_std": 0.01303863525390625, + "objective/train/weight_avg": 1.0008044242858887, + "objective/train/weighted_lm_loss": 1.6532789468765259, + "objective/train/weights_max": 1.774770736694336, + "objective/train/weights_min": 0.6139143705368042, + "theoretical_loss": 3.784809844033, + "tokens_seen": 743833600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007823784304285027, + "loss": 0.0807, + "theoretical_loss": 3.7847433695952617, + "tokens_seen": 743964672 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007822981864869203, + "loss": 0.0785, + "theoretical_loss": 3.7846104656808306, + "tokens_seen": 744226816 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007822179425453379, + "loss": 0.0759, + "theoretical_loss": 3.7844776216743505, + "tokens_seen": 744488960 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007821376986037554, + "loss": 0.0772, + "theoretical_loss": 3.784344837527737, + "tokens_seen": 744751104 + }, + { + "epoch": 0.23, + "learning_rate": 0.000782057454662173, + "loss": 0.0761, + "theoretical_loss": 3.784212113192961, + "tokens_seen": 745013248 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007819772107205906, + "loss": 0.0775, + "theoretical_loss": 3.7840794486220495, + "tokens_seen": 745275392 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007818969667790082, + "loss": 0.078, + "theoretical_loss": 3.783946843767084, + "tokens_seen": 745537536 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007818167228374257, + "loss": 0.0786, + "theoretical_loss": 3.783814298580203, + "tokens_seen": 745799680 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007817364788958434, + "loss": 0.0774, + "theoretical_loss": 3.7836818130135974, + "tokens_seen": 746061824 + }, + { + "epoch": 0.23, + "learning_rate": 0.000781656234954261, + "loss": 0.0759, + "theoretical_loss": 3.783549387019515, + "tokens_seen": 746323968 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007815759910126786, + "loss": 0.0781, + "theoretical_loss": 3.7834170205502584, + "tokens_seen": 746586112 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007814957470710962, + "loss": 0.0767, + "theoretical_loss": 3.783284713558186, + "tokens_seen": 746848256 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.0004192414053250104, + "objective/train/docs_used": 277055, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5548205375671387, + "objective/train/original_loss": 1.5548205375671387, + "objective/train/theoretical_loss": 3.783152465995708, + "objective/train/tokens_used": 767570400, + "objective/train/value_avg": -0.0106964111328125, + "objective/train/value_loss": 0.0003498530713841319, + "objective/train/value_max": -0.0001881122589111328, + "objective/train/value_min": -0.37255859375, + "objective/train/value_reward_corr": 0.7141878653817549, + "objective/train/value_std": 0.01702880859375, + "objective/train/weight_avg": 1.0005861520767212, + "objective/train/weighted_lm_loss": 1.5560520887374878, + "objective/train/weights_max": 1.2006479501724243, + "objective/train/weights_min": 0.36900386214256287, + "theoretical_loss": 3.783152465995708, + "tokens_seen": 747110400 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007814155031295137, + "loss": 0.0792, + "theoretical_loss": 3.783152465995708, + "tokens_seen": 747110400 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007813352591879314, + "loss": 0.0773, + "theoretical_loss": 3.7830202778152935, + "tokens_seen": 747372544 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007812550152463489, + "loss": 0.0751, + "theoretical_loss": 3.7828881489694632, + "tokens_seen": 747634688 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007811747713047665, + "loss": 0.0757, + "theoretical_loss": 3.7827560794107926, + "tokens_seen": 747896832 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007810945273631841, + "loss": 0.0767, + "theoretical_loss": 3.7826240690919137, + "tokens_seen": 748158976 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007810142834216016, + "loss": 0.0792, + "theoretical_loss": 3.7824921179655115, + "tokens_seen": 748421120 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007809340394800192, + "loss": 0.0775, + "theoretical_loss": 3.782360225984325, + "tokens_seen": 748683264 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007808537955384369, + "loss": 0.0771, + "theoretical_loss": 3.782228393101149, + "tokens_seen": 748945408 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007807735515968545, + "loss": 0.0775, + "theoretical_loss": 3.78209661926883, + "tokens_seen": 749207552 + }, + { + "epoch": 0.23, + "learning_rate": 0.000780693307655272, + "loss": 0.0774, + "theoretical_loss": 3.781964904440271, + "tokens_seen": 749469696 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007806130637136897, + "loss": 0.0796, + "theoretical_loss": 3.7818332485684283, + "tokens_seen": 749731840 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007805328197721072, + "loss": 0.0772, + "theoretical_loss": 3.781701651606311, + "tokens_seen": 749993984 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007804525758305248, + "loss": 0.0795, + "theoretical_loss": 3.7815701135069846, + "tokens_seen": 750256128 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.002483784221112728, + "objective/train/docs_used": 278273, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4896938800811768, + "objective/train/original_loss": 1.4896938800811768, + "objective/train/theoretical_loss": 3.7815043665162147, + "objective/train/tokens_used": 770847200, + "objective/train/value_avg": -0.00934600830078125, + "objective/train/value_loss": 0.00018718333740253001, + "objective/train/value_max": -0.00016736984252929688, + "objective/train/value_min": -0.252685546875, + "objective/train/value_reward_corr": 0.7167813323322847, + "objective/train/value_std": 0.014862060546875, + "objective/train/weight_avg": 1.002573013305664, + "objective/train/weighted_lm_loss": 1.4927213191986084, + "objective/train/weights_max": 1.2056413888931274, + "objective/train/weights_min": 0.3711751401424408, + "theoretical_loss": 3.7815043665162147, + "tokens_seen": 750387200 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007803723318889424, + "loss": 0.0767, + "theoretical_loss": 3.7814386342235653, + "tokens_seen": 750518272 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007802920879473599, + "loss": 0.0727, + "theoretical_loss": 3.7813072137092254, + "tokens_seen": 750780416 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007802118440057776, + "loss": 0.0752, + "theoretical_loss": 3.7811758519171894, + "tokens_seen": 751042560 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007801316000641952, + "loss": 0.079, + "theoretical_loss": 3.781044548800736, + "tokens_seen": 751304704 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007800513561226128, + "loss": 0.0795, + "theoretical_loss": 3.7809133043131973, + "tokens_seen": 751566848 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007799711121810304, + "loss": 0.0786, + "theoretical_loss": 3.7807821184079584, + "tokens_seen": 751828992 + }, + { + "epoch": 0.23, + "learning_rate": 0.000779890868239448, + "loss": 0.0776, + "theoretical_loss": 3.780650991038459, + "tokens_seen": 752091136 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007798106242978655, + "loss": 0.0775, + "theoretical_loss": 3.7805199221581893, + "tokens_seen": 752353280 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007797303803562831, + "loss": 0.0797, + "theoretical_loss": 3.7803889117206957, + "tokens_seen": 752615424 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007796501364147007, + "loss": 0.079, + "theoretical_loss": 3.7802579596795756, + "tokens_seen": 752877568 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007795698924731182, + "loss": 0.0755, + "theoretical_loss": 3.7801270659884807, + "tokens_seen": 753139712 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007794896485315359, + "loss": 0.078, + "theoretical_loss": 3.7799962306011143, + "tokens_seen": 753401856 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.0004187932063359767, + "objective/train/docs_used": 279492, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5806915760040283, + "objective/train/original_loss": 1.5806913375854492, + "objective/train/theoretical_loss": 3.779865453471234, + "objective/train/tokens_used": 774124000, + "objective/train/value_avg": -0.01265716552734375, + "objective/train/value_loss": 0.00038256868720054626, + "objective/train/value_max": -0.00014770030975341797, + "objective/train/value_min": -0.55712890625, + "objective/train/value_reward_corr": 0.7937497238851523, + "objective/train/value_std": 0.0217437744140625, + "objective/train/weight_avg": 1.0005953311920166, + "objective/train/weighted_lm_loss": 1.5813276767730713, + "objective/train/weights_max": 1.2988438606262207, + "objective/train/weights_min": 0.3966088891029358, + "theoretical_loss": 3.779865453471234, + "tokens_seen": 753664000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007794094045899534, + "loss": 0.0791, + "theoretical_loss": 3.779865453471234, + "tokens_seen": 753664000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007793291606483711, + "loss": 0.0777, + "theoretical_loss": 3.7797347345526484, + "tokens_seen": 753926144 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007792489167067887, + "loss": 0.0777, + "theoretical_loss": 3.7796040737992205, + "tokens_seen": 754188288 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007791686727652062, + "loss": 0.0784, + "theoretical_loss": 3.7794734711648648, + "tokens_seen": 754450432 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007790884288236239, + "loss": 0.0792, + "theoretical_loss": 3.779342926603549, + "tokens_seen": 754712576 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007790081848820414, + "loss": 0.0769, + "theoretical_loss": 3.7792124400692924, + "tokens_seen": 754974720 + }, + { + "epoch": 0.23, + "learning_rate": 0.000778927940940459, + "loss": 0.0763, + "theoretical_loss": 3.7790820115161674, + "tokens_seen": 755236864 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007788476969988766, + "loss": 0.0792, + "theoretical_loss": 3.778951640898298, + "tokens_seen": 755499008 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007787674530572942, + "loss": 0.0781, + "theoretical_loss": 3.7788213281698617, + "tokens_seen": 755761152 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007786872091157117, + "loss": 0.0759, + "theoretical_loss": 3.778691073285086, + "tokens_seen": 756023296 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007786069651741294, + "loss": 0.0796, + "theoretical_loss": 3.7785608761982523, + "tokens_seen": 756285440 + }, + { + "epoch": 0.23, + "learning_rate": 0.000778526721232547, + "loss": 0.0765, + "theoretical_loss": 3.7784307368636934, + "tokens_seen": 756547584 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007784464772909645, + "loss": 0.0756, + "theoretical_loss": 3.7783006552357934, + "tokens_seen": 756809728 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.0009907586500048637, + "objective/train/docs_used": 280659, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6565080881118774, + "objective/train/original_loss": 1.6565080881118774, + "objective/train/theoretical_loss": 3.7782356360476, + "objective/train/tokens_used": 777400800, + "objective/train/value_avg": -0.006893157958984375, + "objective/train/value_loss": 0.00026105440338142216, + "objective/train/value_max": -9.316205978393555e-05, + "objective/train/value_min": -0.25146484375, + "objective/train/value_reward_corr": 0.6604508877653548, + "objective/train/value_std": 0.011566162109375, + "objective/train/weight_avg": 1.0011118650436401, + "objective/train/weighted_lm_loss": 1.6582515239715576, + "objective/train/weights_max": 1.2315527200698853, + "objective/train/weights_min": 0.3999635875225067, + "theoretical_loss": 3.7782356360476, + "tokens_seen": 756940800 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007783662333493822, + "loss": 0.0793, + "theoretical_loss": 3.7781706312689893, + "tokens_seen": 757071872 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007782859894077997, + "loss": 0.078, + "theoretical_loss": 3.7780406649177696, + "tokens_seen": 757334016 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007782057454662173, + "loss": 0.0774, + "theoretical_loss": 3.777910756136673, + "tokens_seen": 757596160 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007781255015246349, + "loss": 0.0779, + "theoretical_loss": 3.777780904880292, + "tokens_seen": 757858304 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007780452575830524, + "loss": 0.0792, + "theoretical_loss": 3.777651111103269, + "tokens_seen": 758120448 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007779650136414702, + "loss": 0.0768, + "theoretical_loss": 3.777521374760298, + "tokens_seen": 758382592 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007778847696998877, + "loss": 0.0795, + "theoretical_loss": 3.7773916958061253, + "tokens_seen": 758644736 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007778045257583053, + "loss": 0.0812, + "theoretical_loss": 3.777262074195548, + "tokens_seen": 758906880 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007777242818167229, + "loss": 0.0791, + "theoretical_loss": 3.777132509883413, + "tokens_seen": 759169024 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007776440378751405, + "loss": 0.0789, + "theoretical_loss": 3.7770030028246215, + "tokens_seen": 759431168 + }, + { + "epoch": 0.23, + "learning_rate": 0.000777563793933558, + "loss": 0.078, + "theoretical_loss": 3.7768735529741226, + "tokens_seen": 759693312 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007774835499919756, + "loss": 0.0757, + "theoretical_loss": 3.776744160286918, + "tokens_seen": 759955456 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": -0.0008638743311166763, + "objective/train/docs_used": 281753, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.592660665512085, + "objective/train/original_loss": 1.592660665512085, + "objective/train/theoretical_loss": 3.77661482471806, + "objective/train/tokens_used": 780677600, + "objective/train/value_avg": -0.0077362060546875, + "objective/train/value_loss": 0.0008141609723679721, + "objective/train/value_max": -8.416175842285156e-05, + "objective/train/value_min": -0.5966796875, + "objective/train/value_reward_corr": 0.6082159732147249, + "objective/train/value_std": 0.015960693359375, + "objective/train/weight_avg": 0.9994794726371765, + "objective/train/weighted_lm_loss": 1.5920522212982178, + "objective/train/weights_max": 1.4571453332901, + "objective/train/weights_min": 0.26885083317756653, + "theoretical_loss": 3.77661482471806, + "tokens_seen": 760217600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007774033060503932, + "loss": 0.0807, + "theoretical_loss": 3.77661482471806, + "tokens_seen": 760217600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007773230621088107, + "loss": 0.0799, + "theoretical_loss": 3.776485546222651, + "tokens_seen": 760479744 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007772428181672284, + "loss": 0.0785, + "theoretical_loss": 3.776356324755847, + "tokens_seen": 760741888 + }, + { + "epoch": 0.23, + "learning_rate": 0.000777162574225646, + "loss": 0.0768, + "theoretical_loss": 3.7762271602728497, + "tokens_seen": 761004032 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007770823302840636, + "loss": 0.0786, + "theoretical_loss": 3.7760980527289156, + "tokens_seen": 761266176 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007770020863424812, + "loss": 0.0756, + "theoretical_loss": 3.77596900207935, + "tokens_seen": 761528320 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007769218424008987, + "loss": 0.0763, + "theoretical_loss": 3.775840008279509, + "tokens_seen": 761790464 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007768415984593163, + "loss": 0.0788, + "theoretical_loss": 3.7757110712847997, + "tokens_seen": 762052608 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007767613545177339, + "loss": 0.0792, + "theoretical_loss": 3.775582191050678, + "tokens_seen": 762314752 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007766811105761515, + "loss": 0.0783, + "theoretical_loss": 3.775453367532651, + "tokens_seen": 762576896 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007766008666345691, + "loss": 0.0783, + "theoretical_loss": 3.775324600686276, + "tokens_seen": 762839040 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007765206226929867, + "loss": 0.0776, + "theoretical_loss": 3.7751958904671614, + "tokens_seen": 763101184 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007764403787514042, + "loss": 0.0786, + "theoretical_loss": 3.7750672368309623, + "tokens_seen": 763363328 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.0011741745984181762, + "objective/train/docs_used": 282838, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.454331398010254, + "objective/train/original_loss": 1.454331398010254, + "objective/train/theoretical_loss": 3.775002931217613, + "objective/train/tokens_used": 783954400, + "objective/train/value_avg": -0.00695037841796875, + "objective/train/value_loss": 0.0001637246459722519, + "objective/train/value_max": -6.657838821411133e-05, + "objective/train/value_min": -0.275146484375, + "objective/train/value_reward_corr": 0.6056801602468862, + "objective/train/value_std": 0.01113128662109375, + "objective/train/weight_avg": 1.001247763633728, + "objective/train/weighted_lm_loss": 1.45583176612854, + "objective/train/weights_max": 1.1704035997390747, + "objective/train/weights_min": 0.3682715594768524, + "theoretical_loss": 3.775002931217613, + "tokens_seen": 763494400 + }, + { + "epoch": 0.23, + "learning_rate": 0.000776360134809822, + "loss": 0.0812, + "theoretical_loss": 3.7749386397333873, + "tokens_seen": 763625472 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007762798908682395, + "loss": 0.0781, + "theoretical_loss": 3.774810099130193, + "tokens_seen": 763887616 + }, + { + "epoch": 0.23, + "learning_rate": 0.000776199646926657, + "loss": 0.0786, + "theoretical_loss": 3.7746816149771862, + "tokens_seen": 764149760 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007761194029850747, + "loss": 0.0774, + "theoretical_loss": 3.7745531872302234, + "tokens_seen": 764411904 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007760391590434922, + "loss": 0.0782, + "theoretical_loss": 3.774424815845211, + "tokens_seen": 764674048 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007759589151019098, + "loss": 0.0781, + "theoretical_loss": 3.774296500778105, + "tokens_seen": 764936192 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007758786711603274, + "loss": 0.0784, + "theoretical_loss": 3.77416824198491, + "tokens_seen": 765198336 + }, + { + "epoch": 0.23, + "learning_rate": 0.000775798427218745, + "loss": 0.0764, + "theoretical_loss": 3.7740400394216813, + "tokens_seen": 765460480 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007757181832771625, + "loss": 0.0811, + "theoretical_loss": 3.7739118930445223, + "tokens_seen": 765722624 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007756379393355802, + "loss": 0.0799, + "theoretical_loss": 3.7737838028095867, + "tokens_seen": 765984768 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007755576953939978, + "loss": 0.0772, + "theoretical_loss": 3.773655768673077, + "tokens_seen": 766246912 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007754774514524153, + "loss": 0.0806, + "theoretical_loss": 3.7735277905912445, + "tokens_seen": 766509056 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": -0.0006871665827929974, + "objective/train/docs_used": 283988, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5568292140960693, + "objective/train/original_loss": 1.5568292140960693, + "objective/train/theoretical_loss": 3.773399868520391, + "objective/train/tokens_used": 787231200, + "objective/train/value_avg": -0.0089263916015625, + "objective/train/value_loss": 0.000296746235108003, + "objective/train/value_max": -0.00017535686492919922, + "objective/train/value_min": -0.2440185546875, + "objective/train/value_reward_corr": 0.6491560262447342, + "objective/train/value_std": 0.01305389404296875, + "objective/train/weight_avg": 0.9994507431983948, + "objective/train/weighted_lm_loss": 1.5556931495666504, + "objective/train/weights_max": 1.159029483795166, + "objective/train/weights_min": 0.37362140417099, + "theoretical_loss": 3.773399868520391, + "tokens_seen": 766771200 + }, + { + "epoch": 0.23, + "learning_rate": 0.000775397207510833, + "loss": 0.0784, + "theoretical_loss": 3.773399868520391, + "tokens_seen": 766771200 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007753169635692505, + "loss": 0.0784, + "theoretical_loss": 3.7732720024168644, + "tokens_seen": 767033344 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007752367196276682, + "loss": 0.0773, + "theoretical_loss": 3.773144192237065, + "tokens_seen": 767295488 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007751564756860857, + "loss": 0.0783, + "theoretical_loss": 3.7730164379374402, + "tokens_seen": 767557632 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007750762317445032, + "loss": 0.0777, + "theoretical_loss": 3.772888739474485, + "tokens_seen": 767819776 + }, + { + "epoch": 0.23, + "learning_rate": 0.000774995987802921, + "loss": 0.0769, + "theoretical_loss": 3.772761096804745, + "tokens_seen": 768081920 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007749157438613385, + "loss": 0.0777, + "theoretical_loss": 3.7726335098848143, + "tokens_seen": 768344064 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007748354999197561, + "loss": 0.0775, + "theoretical_loss": 3.772505978671334, + "tokens_seen": 768606208 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007747552559781737, + "loss": 0.0812, + "theoretical_loss": 3.772378503120996, + "tokens_seen": 768868352 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007746750120365913, + "loss": 0.0787, + "theoretical_loss": 3.7722510831905387, + "tokens_seen": 769130496 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007745947680950088, + "loss": 0.0793, + "theoretical_loss": 3.7721237188367494, + "tokens_seen": 769392640 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007745145241534264, + "loss": 0.0775, + "theoretical_loss": 3.771996410016464, + "tokens_seen": 769654784 + }, + { + "epoch": 0.23, + "learning_rate": 0.000774434280211844, + "loss": 0.0759, + "theoretical_loss": 3.7718691566865665, + "tokens_seen": 769916928 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.001441980479285121, + "objective/train/docs_used": 285055, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.580965518951416, + "objective/train/original_loss": 1.580965518951416, + "objective/train/theoretical_loss": 3.7718055508170525, + "objective/train/tokens_used": 790508000, + "objective/train/value_avg": -0.00901031494140625, + "objective/train/value_loss": 0.00021419981203507632, + "objective/train/value_max": -0.00022876262664794922, + "objective/train/value_min": -0.2083740234375, + "objective/train/value_reward_corr": 0.6527358181867582, + "objective/train/value_std": 0.0116729736328125, + "objective/train/weight_avg": 1.0015414953231812, + "objective/train/weighted_lm_loss": 1.5829206705093384, + "objective/train/weights_max": 1.149869441986084, + "objective/train/weights_min": 0.4065597653388977, + "theoretical_loss": 3.7718055508170525, + "tokens_seen": 770048000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007743540362702615, + "loss": 0.0782, + "theoretical_loss": 3.7717419588039887, + "tokens_seen": 770179072 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007742737923286792, + "loss": 0.078, + "theoretical_loss": 3.7716148163257115, + "tokens_seen": 770441216 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007741935483870968, + "loss": 0.0749, + "theoretical_loss": 3.7714877292087623, + "tokens_seen": 770703360 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007741133044455145, + "loss": 0.0774, + "theoretical_loss": 3.7713606974102167, + "tokens_seen": 770965504 + }, + { + "epoch": 0.23, + "learning_rate": 0.000774033060503932, + "loss": 0.0808, + "theoretical_loss": 3.7712337208872, + "tokens_seen": 771227648 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007739528165623495, + "loss": 0.0765, + "theoretical_loss": 3.7711067995968826, + "tokens_seen": 771489792 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007738725726207672, + "loss": 0.0798, + "theoretical_loss": 3.770979933496485, + "tokens_seen": 771751936 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007737923286791847, + "loss": 0.0771, + "theoretical_loss": 3.770853122543274, + "tokens_seen": 772014080 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007737120847376023, + "loss": 0.0752, + "theoretical_loss": 3.770726366694564, + "tokens_seen": 772276224 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007736318407960199, + "loss": 0.0788, + "theoretical_loss": 3.7705996659077172, + "tokens_seen": 772538368 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007735515968544375, + "loss": 0.0766, + "theoretical_loss": 3.770473020140143, + "tokens_seen": 772800512 + }, + { + "epoch": 0.23, + "learning_rate": 0.000773471352912855, + "loss": 0.0817, + "theoretical_loss": 3.770346429349299, + "tokens_seen": 773062656 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.0003448604547884315, + "objective/train/docs_used": 286255, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4529708623886108, + "objective/train/original_loss": 1.4529707431793213, + "objective/train/theoretical_loss": 3.7702198934926896, + "objective/train/tokens_used": 793784800, + "objective/train/value_avg": -0.009613037109375, + "objective/train/value_loss": 0.000516877043992281, + "objective/train/value_max": -0.0001233816146850586, + "objective/train/value_min": -0.37451171875, + "objective/train/value_reward_corr": 0.5522317075530438, + "objective/train/value_std": 0.016510009765625, + "objective/train/weight_avg": 1.0005626678466797, + "objective/train/weighted_lm_loss": 1.4538027048110962, + "objective/train/weights_max": 1.3192977905273438, + "objective/train/weights_min": 0.36989736557006836, + "theoretical_loss": 3.7702198934926896, + "tokens_seen": 773324800 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007733911089712728, + "loss": 0.0771, + "theoretical_loss": 3.7702198934926896, + "tokens_seen": 773324800 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007733108650296903, + "loss": 0.0767, + "theoretical_loss": 3.7700934125278653, + "tokens_seen": 773586944 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007732306210881078, + "loss": 0.0805, + "theoretical_loss": 3.7699669864124266, + "tokens_seen": 773849088 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007731503771465255, + "loss": 0.0776, + "theoretical_loss": 3.769840615104018, + "tokens_seen": 774111232 + }, + { + "epoch": 0.23, + "learning_rate": 0.000773070133204943, + "loss": 0.0808, + "theoretical_loss": 3.7697142985603325, + "tokens_seen": 774373376 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007729898892633606, + "loss": 0.0797, + "theoretical_loss": 3.76958803673911, + "tokens_seen": 774635520 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007729096453217782, + "loss": 0.08, + "theoretical_loss": 3.7694618295981375, + "tokens_seen": 774897664 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007728294013801958, + "loss": 0.0779, + "theoretical_loss": 3.769335677095248, + "tokens_seen": 775159808 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007727491574386135, + "loss": 0.0805, + "theoretical_loss": 3.769209579188323, + "tokens_seen": 775421952 + }, + { + "epoch": 0.24, + "learning_rate": 0.000772668913497031, + "loss": 0.078, + "theoretical_loss": 3.7690835358352883, + "tokens_seen": 775684096 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007725886695554486, + "loss": 0.0805, + "theoretical_loss": 3.7689575469941183, + "tokens_seen": 775946240 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007725084256138662, + "loss": 0.0791, + "theoretical_loss": 3.768831612622833, + "tokens_seen": 776208384 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007724281816722838, + "loss": 0.0774, + "theoretical_loss": 3.7687057326794986, + "tokens_seen": 776470528 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.0015384306898340583, + "objective/train/docs_used": 287303, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5866420269012451, + "objective/train/original_loss": 1.5866421461105347, + "objective/train/theoretical_loss": 3.768642813105222, + "objective/train/tokens_used": 797061600, + "objective/train/value_avg": -0.006908416748046875, + "objective/train/value_loss": 0.00025792099768295884, + "objective/train/value_max": -9.995698928833008e-05, + "objective/train/value_min": -0.2237548828125, + "objective/train/value_reward_corr": 0.42940502937583597, + "objective/train/value_std": 0.0088653564453125, + "objective/train/weight_avg": 1.0016469955444336, + "objective/train/weighted_lm_loss": 1.5896674394607544, + "objective/train/weights_max": 1.1481863260269165, + "objective/train/weights_min": 0.3687625229358673, + "theoretical_loss": 3.768642813105222, + "tokens_seen": 776601600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007723479377307013, + "loss": 0.0805, + "theoretical_loss": 3.768579907122229, + "tokens_seen": 776732672 + }, + { + "epoch": 0.24, + "learning_rate": 0.000772267693789119, + "loss": 0.078, + "theoretical_loss": 3.768454135909183, + "tokens_seen": 776994816 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007721874498475365, + "loss": 0.078, + "theoretical_loss": 3.768328418998567, + "tokens_seen": 777256960 + }, + { + "epoch": 0.24, + "learning_rate": 0.000772107205905954, + "loss": 0.0778, + "theoretical_loss": 3.7682027563486327, + "tokens_seen": 777519104 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007720269619643718, + "loss": 0.0806, + "theoretical_loss": 3.768077147917678, + "tokens_seen": 777781248 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007719467180227893, + "loss": 0.0772, + "theoretical_loss": 3.7679515936640477, + "tokens_seen": 778043392 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007718664740812069, + "loss": 0.0758, + "theoretical_loss": 3.7678260935461316, + "tokens_seen": 778305536 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007717862301396245, + "loss": 0.0741, + "theoretical_loss": 3.767700647522366, + "tokens_seen": 778567680 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007717059861980421, + "loss": 0.0735, + "theoretical_loss": 3.7675752555512334, + "tokens_seen": 778829824 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007716257422564597, + "loss": 0.0783, + "theoretical_loss": 3.7674499175912617, + "tokens_seen": 779091968 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007715454983148772, + "loss": 0.0776, + "theoretical_loss": 3.767324633601024, + "tokens_seen": 779354112 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007714652543732948, + "loss": 0.0788, + "theoretical_loss": 3.7671994035391405, + "tokens_seen": 779616256 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.0009066300117410719, + "objective/train/docs_used": 288522, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5356212854385376, + "objective/train/original_loss": 1.535621166229248, + "objective/train/theoretical_loss": 3.767074227364275, + "objective/train/tokens_used": 800338400, + "objective/train/value_avg": -0.005023956298828125, + "objective/train/value_loss": 0.0001440069027012214, + "objective/train/value_max": -0.00011414289474487305, + "objective/train/value_min": -0.1871337890625, + "objective/train/value_reward_corr": 0.5712434670773441, + "objective/train/value_std": 0.006877899169921875, + "objective/train/weight_avg": 1.0009735822677612, + "objective/train/weighted_lm_loss": 1.5371370315551758, + "objective/train/weights_max": 1.0837024450302124, + "objective/train/weights_min": 0.3689109683036804, + "theoretical_loss": 3.767074227364275, + "tokens_seen": 779878400 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007713850104317124, + "loss": 0.0805, + "theoretical_loss": 3.767074227364275, + "tokens_seen": 779878400 + }, + { + "epoch": 0.24, + "learning_rate": 0.00077130476649013, + "loss": 0.0804, + "theoretical_loss": 3.7669491050351396, + "tokens_seen": 780140544 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007712245225485476, + "loss": 0.0793, + "theoretical_loss": 3.7668240365104895, + "tokens_seen": 780402688 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007711442786069653, + "loss": 0.08, + "theoretical_loss": 3.7666990217491265, + "tokens_seen": 780664832 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007710640346653828, + "loss": 0.0778, + "theoretical_loss": 3.7665740607098974, + "tokens_seen": 780926976 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007709837907238003, + "loss": 0.0771, + "theoretical_loss": 3.7664491533516946, + "tokens_seen": 781189120 + }, + { + "epoch": 0.24, + "learning_rate": 0.000770903546782218, + "loss": 0.0773, + "theoretical_loss": 3.766324299633455, + "tokens_seen": 781451264 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007708233028406355, + "loss": 0.0787, + "theoretical_loss": 3.766199499514162, + "tokens_seen": 781713408 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007707430588990531, + "loss": 0.0755, + "theoretical_loss": 3.7660747529528424, + "tokens_seen": 781975552 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007706628149574707, + "loss": 0.079, + "theoretical_loss": 3.76595005990857, + "tokens_seen": 782237696 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007705825710158883, + "loss": 0.0752, + "theoretical_loss": 3.7658254203404615, + "tokens_seen": 782499840 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007705023270743058, + "loss": 0.0773, + "theoretical_loss": 3.7657008342076796, + "tokens_seen": 782761984 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007704220831327236, + "loss": 0.0761, + "theoretical_loss": 3.765576301469433, + "tokens_seen": 783024128 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.0012172440765425563, + "objective/train/docs_used": 289672, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6650699377059937, + "objective/train/original_loss": 1.6650700569152832, + "objective/train/theoretical_loss": 3.7655140551105246, + "objective/train/tokens_used": 803615200, + "objective/train/value_avg": -0.0089111328125, + "objective/train/value_loss": 0.000253047764999792, + "objective/train/value_max": -0.00019872188568115234, + "objective/train/value_min": -0.36279296875, + "objective/train/value_reward_corr": 0.6359651215028251, + "objective/train/value_std": 0.0158538818359375, + "objective/train/weight_avg": 1.0013352632522583, + "objective/train/weighted_lm_loss": 1.6672289371490479, + "objective/train/weights_max": 1.2659153938293457, + "objective/train/weights_min": 0.3686739206314087, + "theoretical_loss": 3.7655140551105246, + "tokens_seen": 783155200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007703418391911411, + "loss": 0.0775, + "theoretical_loss": 3.7654518220849726, + "tokens_seen": 783286272 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007702615952495587, + "loss": 0.0769, + "theoretical_loss": 3.7653273960135962, + "tokens_seen": 783548416 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007701813513079763, + "loss": 0.081, + "theoretical_loss": 3.765203023214645, + "tokens_seen": 783810560 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007701011073663938, + "loss": 0.0755, + "theoretical_loss": 3.7650787036475055, + "tokens_seen": 784072704 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007700208634248115, + "loss": 0.0768, + "theoretical_loss": 3.764954437271608, + "tokens_seen": 784334848 + }, + { + "epoch": 0.24, + "learning_rate": 0.000769940619483229, + "loss": 0.0718, + "theoretical_loss": 3.7648302240464284, + "tokens_seen": 784596992 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007698603755416466, + "loss": 0.0764, + "theoretical_loss": 3.764706063931486, + "tokens_seen": 784859136 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007697801316000643, + "loss": 0.0786, + "theoretical_loss": 3.764581956886345, + "tokens_seen": 785121280 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007696998876584818, + "loss": 0.0778, + "theoretical_loss": 3.7644579028706135, + "tokens_seen": 785383424 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007696196437168994, + "loss": 0.0759, + "theoretical_loss": 3.764333901843944, + "tokens_seen": 785645568 + }, + { + "epoch": 0.24, + "learning_rate": 0.000769539399775317, + "loss": 0.0775, + "theoretical_loss": 3.764209953766033, + "tokens_seen": 785907712 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007694591558337346, + "loss": 0.0755, + "theoretical_loss": 3.7640860585966207, + "tokens_seen": 786169856 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.0007303394959308207, + "objective/train/docs_used": 290810, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4556148052215576, + "objective/train/original_loss": 1.4556148052215576, + "objective/train/theoretical_loss": 3.763962216295493, + "objective/train/tokens_used": 806892000, + "objective/train/value_avg": -0.0081787109375, + "objective/train/value_loss": 0.00028056377777829766, + "objective/train/value_max": -0.0001881122589111328, + "objective/train/value_min": -0.37158203125, + "objective/train/value_reward_corr": 0.6735432986137638, + "objective/train/value_std": 0.01500701904296875, + "objective/train/weight_avg": 1.0008543729782104, + "objective/train/weighted_lm_loss": 1.4567698240280151, + "objective/train/weights_max": 1.129281997680664, + "objective/train/weights_min": 0.36916786432266235, + "theoretical_loss": 3.763962216295493, + "tokens_seen": 786432000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007693789118921521, + "loss": 0.0757, + "theoretical_loss": 3.763962216295493, + "tokens_seen": 786432000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007692986679505698, + "loss": 0.0762, + "theoretical_loss": 3.7638384268224776, + "tokens_seen": 786694144 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007692184240089873, + "loss": 0.0775, + "theoretical_loss": 3.7637146901374474, + "tokens_seen": 786956288 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007691381800674048, + "loss": 0.0788, + "theoretical_loss": 3.7635910062003193, + "tokens_seen": 787218432 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007690579361258225, + "loss": 0.0768, + "theoretical_loss": 3.7634673749710523, + "tokens_seen": 787480576 + }, + { + "epoch": 0.24, + "learning_rate": 0.00076897769218424, + "loss": 0.0755, + "theoretical_loss": 3.763343796409651, + "tokens_seen": 787742720 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007688974482426578, + "loss": 0.0761, + "theoretical_loss": 3.7632202704761637, + "tokens_seen": 788004864 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007688172043010753, + "loss": 0.0757, + "theoretical_loss": 3.7630967971306797, + "tokens_seen": 788267008 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007687369603594929, + "loss": 0.0768, + "theoretical_loss": 3.762973376333335, + "tokens_seen": 788529152 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007686567164179105, + "loss": 0.0777, + "theoretical_loss": 3.7628500080443077, + "tokens_seen": 788791296 + }, + { + "epoch": 0.24, + "learning_rate": 0.000768576472476328, + "loss": 0.0771, + "theoretical_loss": 3.7627266922238185, + "tokens_seen": 789053440 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007684962285347456, + "loss": 0.0769, + "theoretical_loss": 3.762603428832133, + "tokens_seen": 789315584 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007684159845931632, + "loss": 0.0751, + "theoretical_loss": 3.7624802178295584, + "tokens_seen": 789577728 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.0018347672885283828, + "objective/train/docs_used": 292000, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7165638208389282, + "objective/train/original_loss": 1.7165637016296387, + "objective/train/theoretical_loss": 3.762418631961796, + "objective/train/tokens_used": 810168800, + "objective/train/value_avg": -0.00826263427734375, + "objective/train/value_loss": 9.183614747598767e-05, + "objective/train/value_max": -0.00014090538024902344, + "objective/train/value_min": -0.263671875, + "objective/train/value_reward_corr": 0.8270739313759928, + "objective/train/value_std": 0.01273345947265625, + "objective/train/weight_avg": 1.0018802881240845, + "objective/train/weighted_lm_loss": 1.7201040983200073, + "objective/train/weights_max": 1.1363343000411987, + "objective/train/weights_min": 0.8306474089622498, + "theoretical_loss": 3.762418631961796, + "tokens_seen": 789708800 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007683357406515808, + "loss": 0.078, + "theoretical_loss": 3.762357059176447, + "tokens_seen": 789839872 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007682554967099984, + "loss": 0.0776, + "theoretical_loss": 3.762233952833193, + "tokens_seen": 790102016 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007681752527684161, + "loss": 0.0799, + "theoretical_loss": 3.7621108987602336, + "tokens_seen": 790364160 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007680950088268336, + "loss": 0.0766, + "theoretical_loss": 3.76198789691805, + "tokens_seen": 790626304 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007680147648852511, + "loss": 0.0798, + "theoretical_loss": 3.7618649472671652, + "tokens_seen": 790888448 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007679345209436688, + "loss": 0.0783, + "theoretical_loss": 3.761742049768146, + "tokens_seen": 791150592 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007678542770020863, + "loss": 0.0789, + "theoretical_loss": 3.761619204381602, + "tokens_seen": 791412736 + }, + { + "epoch": 0.24, + "learning_rate": 0.000767774033060504, + "loss": 0.0753, + "theoretical_loss": 3.7614964110681846, + "tokens_seen": 791674880 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007676937891189215, + "loss": 0.0771, + "theoretical_loss": 3.761373669788589, + "tokens_seen": 791937024 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007676135451773391, + "loss": 0.0766, + "theoretical_loss": 3.7612509805035526, + "tokens_seen": 792199168 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007675333012357568, + "loss": 0.0751, + "theoretical_loss": 3.761128343173856, + "tokens_seen": 792461312 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007674530572941743, + "loss": 0.0757, + "theoretical_loss": 3.7610057577603215, + "tokens_seen": 792723456 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.00022212705516722053, + "objective/train/docs_used": 293128, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.473114252090454, + "objective/train/original_loss": 1.473114252090454, + "objective/train/theoretical_loss": 3.7608832242238144, + "objective/train/tokens_used": 813445600, + "objective/train/value_avg": -0.0098419189453125, + "objective/train/value_loss": 0.00027089385548606515, + "objective/train/value_max": -0.00020182132720947266, + "objective/train/value_min": -0.5673828125, + "objective/train/value_reward_corr": 0.6538193866153705, + "objective/train/value_std": 0.01425933837890625, + "objective/train/weight_avg": 1.0003442764282227, + "objective/train/weighted_lm_loss": 1.473488211631775, + "objective/train/weights_max": 1.16512930393219, + "objective/train/weights_min": 0.36934465169906616, + "theoretical_loss": 3.7608832242238144, + "tokens_seen": 792985600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007673728133525919, + "loss": 0.0782, + "theoretical_loss": 3.7608832242238144, + "tokens_seen": 792985600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007672925694110095, + "loss": 0.0779, + "theoretical_loss": 3.7607607425252416, + "tokens_seen": 793247744 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007672123254694271, + "loss": 0.079, + "theoretical_loss": 3.7606383126255536, + "tokens_seen": 793509888 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007671320815278446, + "loss": 0.0794, + "theoretical_loss": 3.760515934485743, + "tokens_seen": 793772032 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007670518375862623, + "loss": 0.0777, + "theoretical_loss": 3.760393608066843, + "tokens_seen": 794034176 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007669715936446798, + "loss": 0.076, + "theoretical_loss": 3.760271333329932, + "tokens_seen": 794296320 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007668913497030974, + "loss": 0.078, + "theoretical_loss": 3.7601491102361275, + "tokens_seen": 794558464 + }, + { + "epoch": 0.24, + "learning_rate": 0.000766811105761515, + "loss": 0.0777, + "theoretical_loss": 3.7600269387465914, + "tokens_seen": 794820608 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007667308618199326, + "loss": 0.0781, + "theoretical_loss": 3.759904818822525, + "tokens_seen": 795082752 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007666506178783502, + "loss": 0.0789, + "theoretical_loss": 3.759782750425175, + "tokens_seen": 795344896 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007665703739367678, + "loss": 0.076, + "theoretical_loss": 3.759660733515826, + "tokens_seen": 795607040 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007664901299951854, + "loss": 0.0767, + "theoretical_loss": 3.7595387680558088, + "tokens_seen": 795869184 + }, + { + "epoch": 0.24, + "learning_rate": 0.000766409886053603, + "loss": 0.0792, + "theoretical_loss": 3.759416854006492, + "tokens_seen": 796131328 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.001882580341771245, + "objective/train/docs_used": 294439, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6294305324554443, + "objective/train/original_loss": 1.6294307708740234, + "objective/train/theoretical_loss": 3.7593559162487864, + "objective/train/tokens_used": 816722400, + "objective/train/value_avg": -0.00921630859375, + "objective/train/value_loss": 0.00032186240423470736, + "objective/train/value_max": -0.00012636184692382812, + "objective/train/value_min": -0.79052734375, + "objective/train/value_reward_corr": 0.6324796724873081, + "objective/train/value_std": 0.0162353515625, + "objective/train/weight_avg": 1.0020285844802856, + "objective/train/weighted_lm_loss": 1.6332080364227295, + "objective/train/weights_max": 1.9955464601516724, + "objective/train/weights_min": 0.39590752124786377, + "theoretical_loss": 3.7593559162487864, + "tokens_seen": 796262400 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007663296421120206, + "loss": 0.0795, + "theoretical_loss": 3.7592949913292886, + "tokens_seen": 796393472 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007662493981704381, + "loss": 0.0787, + "theoretical_loss": 3.759173179985652, + "tokens_seen": 796655616 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007661691542288557, + "loss": 0.0749, + "theoretical_loss": 3.7590514199370775, + "tokens_seen": 796917760 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007660889102872733, + "loss": 0.0737, + "theoretical_loss": 3.758929711145101, + "tokens_seen": 797179904 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007660086663456909, + "loss": 0.0756, + "theoretical_loss": 3.758808053571302, + "tokens_seen": 797442048 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007659284224041086, + "loss": 0.0805, + "theoretical_loss": 3.7586864471772996, + "tokens_seen": 797704192 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007658481784625261, + "loss": 0.0772, + "theoretical_loss": 3.758564891924755, + "tokens_seen": 797966336 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007657679345209437, + "loss": 0.0794, + "theoretical_loss": 3.758443387775371, + "tokens_seen": 798228480 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007656876905793613, + "loss": 0.0764, + "theoretical_loss": 3.7583219346908905, + "tokens_seen": 798490624 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007656074466377788, + "loss": 0.0772, + "theoretical_loss": 3.758200532633099, + "tokens_seen": 798752768 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007655272026961964, + "loss": 0.0768, + "theoretical_loss": 3.7580791815638213, + "tokens_seen": 799014912 + }, + { + "epoch": 0.24, + "learning_rate": 0.000765446958754614, + "loss": 0.0785, + "theoretical_loss": 3.7579578814449253, + "tokens_seen": 799277056 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.0015031647635623813, + "objective/train/docs_used": 295724, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.440466046333313, + "objective/train/original_loss": 1.4404661655426025, + "objective/train/theoretical_loss": 3.7578366322383188, + "objective/train/tokens_used": 819999200, + "objective/train/value_avg": -0.005229949951171875, + "objective/train/value_loss": 0.00010367185313953087, + "objective/train/value_max": -0.0001398324966430664, + "objective/train/value_min": -0.90234375, + "objective/train/value_reward_corr": 0.7276015339903671, + "objective/train/value_std": 0.01129150390625, + "objective/train/weight_avg": 1.0015531778335571, + "objective/train/weighted_lm_loss": 1.4429603815078735, + "objective/train/weights_max": 1.3412903547286987, + "objective/train/weights_min": 0.5442660450935364, + "theoretical_loss": 3.7578366322383188, + "tokens_seen": 799539200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007653667148130316, + "loss": 0.0781, + "theoretical_loss": 3.7578366322383188, + "tokens_seen": 799539200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007652864708714491, + "loss": 0.0763, + "theoretical_loss": 3.7577154339059504, + "tokens_seen": 799801344 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007652062269298669, + "loss": 0.077, + "theoretical_loss": 3.7575942864098106, + "tokens_seen": 800063488 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007651259829882844, + "loss": 0.0758, + "theoretical_loss": 3.75747318971193, + "tokens_seen": 800325632 + }, + { + "epoch": 0.24, + "learning_rate": 0.000765045739046702, + "loss": 0.0757, + "theoretical_loss": 3.7573521437743795, + "tokens_seen": 800587776 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007649654951051196, + "loss": 0.0775, + "theoretical_loss": 3.7572311485592715, + "tokens_seen": 800849920 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007648852511635371, + "loss": 0.0755, + "theoretical_loss": 3.7571102040287596, + "tokens_seen": 801112064 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007648050072219548, + "loss": 0.0804, + "theoretical_loss": 3.7569893101450367, + "tokens_seen": 801374208 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007647247632803723, + "loss": 0.0805, + "theoretical_loss": 3.756868466870337, + "tokens_seen": 801636352 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007646445193387899, + "loss": 0.0782, + "theoretical_loss": 3.7567476741669346, + "tokens_seen": 801898496 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007645642753972076, + "loss": 0.078, + "theoretical_loss": 3.756626931997145, + "tokens_seen": 802160640 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007644840314556251, + "loss": 0.0746, + "theoretical_loss": 3.7565062403233234, + "tokens_seen": 802422784 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007644037875140427, + "loss": 0.0743, + "theoretical_loss": 3.7563855991078654, + "tokens_seen": 802684928 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": -0.0006523468182422221, + "objective/train/docs_used": 296852, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5035277605056763, + "objective/train/original_loss": 1.5035279989242554, + "objective/train/theoretical_loss": 3.7563252974102825, + "objective/train/tokens_used": 823276000, + "objective/train/value_avg": -0.00974273681640625, + "objective/train/value_loss": 0.00043232380994595587, + "objective/train/value_max": -0.0001823902130126953, + "objective/train/value_min": -0.93505859375, + "objective/train/value_reward_corr": 0.643231403068373, + "objective/train/value_std": 0.01873779296875, + "objective/train/weight_avg": 0.9995439648628235, + "objective/train/weighted_lm_loss": 1.5028263330459595, + "objective/train/weights_max": 2.1485886573791504, + "objective/train/weights_min": 0.3683769106864929, + "theoretical_loss": 3.7563252974102825, + "tokens_seen": 802816000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007643235435724603, + "loss": 0.0728, + "theoretical_loss": 3.7562650083132074, + "tokens_seen": 802947072 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007642432996308779, + "loss": 0.0785, + "theoretical_loss": 3.756144467901825, + "tokens_seen": 803209216 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007641630556892954, + "loss": 0.0779, + "theoretical_loss": 3.756023977836235, + "tokens_seen": 803471360 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007640828117477131, + "loss": 0.0768, + "theoretical_loss": 3.755903538078994, + "tokens_seen": 803733504 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007640025678061306, + "loss": 0.0804, + "theoretical_loss": 3.7557831485926982, + "tokens_seen": 803995648 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007639223238645483, + "loss": 0.0787, + "theoretical_loss": 3.7556628093399835, + "tokens_seen": 804257792 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007638420799229659, + "loss": 0.0803, + "theoretical_loss": 3.7555425202835275, + "tokens_seen": 804519936 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007637618359813834, + "loss": 0.0738, + "theoretical_loss": 3.7554222813860463, + "tokens_seen": 804782080 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007636815920398011, + "loss": 0.0771, + "theoretical_loss": 3.7553020926102954, + "tokens_seen": 805044224 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007636013480982186, + "loss": 0.0785, + "theoretical_loss": 3.755181953919071, + "tokens_seen": 805306368 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007635211041566362, + "loss": 0.0768, + "theoretical_loss": 3.755061865275209, + "tokens_seen": 805568512 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007634408602150538, + "loss": 0.078, + "theoretical_loss": 3.754941826641584, + "tokens_seen": 805830656 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.001965285511687398, + "objective/train/docs_used": 297892, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5463677644729614, + "objective/train/original_loss": 1.546367883682251, + "objective/train/theoretical_loss": 3.754821837981112, + "objective/train/tokens_used": 826552800, + "objective/train/value_avg": -0.005489349365234375, + "objective/train/value_loss": 7.032585563138127e-05, + "objective/train/value_max": -0.00011771917343139648, + "objective/train/value_min": -0.2242431640625, + "objective/train/value_reward_corr": 0.6211484433775776, + "objective/train/value_std": 0.007572174072265625, + "objective/train/weight_avg": 1.0019999742507935, + "objective/train/weighted_lm_loss": 1.5493464469909668, + "objective/train/weights_max": 1.1479060649871826, + "objective/train/weights_min": 0.7884321212768555, + "theoretical_loss": 3.754821837981112, + "tokens_seen": 806092800 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007633606162734714, + "loss": 0.0756, + "theoretical_loss": 3.754821837981112, + "tokens_seen": 806092800 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007632803723318889, + "loss": 0.0761, + "theoretical_loss": 3.7547018992567462, + "tokens_seen": 806354944 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007632001283903065, + "loss": 0.0792, + "theoretical_loss": 3.7545820104314815, + "tokens_seen": 806617088 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007631198844487241, + "loss": 0.0745, + "theoretical_loss": 3.7544621714683517, + "tokens_seen": 806879232 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007630396405071417, + "loss": 0.0765, + "theoretical_loss": 3.754342382330428, + "tokens_seen": 807141376 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007629593965655594, + "loss": 0.0749, + "theoretical_loss": 3.7542226429808236, + "tokens_seen": 807403520 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007628791526239769, + "loss": 0.0744, + "theoretical_loss": 3.7541029533826893, + "tokens_seen": 807665664 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007627989086823945, + "loss": 0.0755, + "theoretical_loss": 3.7539833134992158, + "tokens_seen": 807927808 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007627186647408121, + "loss": 0.0784, + "theoretical_loss": 3.753863723293634, + "tokens_seen": 808189952 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007626384207992296, + "loss": 0.0768, + "theoretical_loss": 3.7537441827292106, + "tokens_seen": 808452096 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007625581768576473, + "loss": 0.0773, + "theoretical_loss": 3.753624691769255, + "tokens_seen": 808714240 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007624779329160648, + "loss": 0.0793, + "theoretical_loss": 3.7535052503771142, + "tokens_seen": 808976384 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007623976889744824, + "loss": 0.0757, + "theoretical_loss": 3.7533858585161735, + "tokens_seen": 809238528 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.0006120595498941839, + "objective/train/docs_used": 298985, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5744049549102783, + "objective/train/original_loss": 1.5744049549102783, + "objective/train/theoretical_loss": 3.753326181148472, + "objective/train/tokens_used": 829829600, + "objective/train/value_avg": -0.0063629150390625, + "objective/train/value_loss": 0.00018711324082687497, + "objective/train/value_max": -8.153915405273438e-05, + "objective/train/value_min": -0.1954345703125, + "objective/train/value_reward_corr": 0.5172567162546362, + "objective/train/value_std": 0.0081634521484375, + "objective/train/weight_avg": 1.0006967782974243, + "objective/train/weighted_lm_loss": 1.575119972229004, + "objective/train/weights_max": 1.1109601259231567, + "objective/train/weights_min": 0.37364134192466736, + "theoretical_loss": 3.753326181148472, + "tokens_seen": 809369600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007623174450329001, + "loss": 0.0797, + "theoretical_loss": 3.753266516149858, + "tokens_seen": 809500672 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007622372010913177, + "loss": 0.0768, + "theoretical_loss": 3.7531472232416316, + "tokens_seen": 809762816 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007621569571497352, + "loss": 0.0739, + "theoretical_loss": 3.7530279797549957, + "tokens_seen": 810024960 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007620767132081528, + "loss": 0.075, + "theoretical_loss": 3.752908785653492, + "tokens_seen": 810287104 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007619964692665704, + "loss": 0.0758, + "theoretical_loss": 3.7527896409007004, + "tokens_seen": 810549248 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007619162253249879, + "loss": 0.0781, + "theoretical_loss": 3.7526705454602394, + "tokens_seen": 810811392 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007618359813834056, + "loss": 0.0772, + "theoretical_loss": 3.752551499295766, + "tokens_seen": 811073536 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007617557374418231, + "loss": 0.0767, + "theoretical_loss": 3.7524325023709757, + "tokens_seen": 811335680 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007616754935002407, + "loss": 0.0738, + "theoretical_loss": 3.7523135546496023, + "tokens_seen": 811597824 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007615952495586584, + "loss": 0.0773, + "theoretical_loss": 3.7521946560954182, + "tokens_seen": 811859968 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007615150056170759, + "loss": 0.0768, + "theoretical_loss": 3.7520758066722344, + "tokens_seen": 812122112 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007614347616754936, + "loss": 0.0751, + "theoretical_loss": 3.7519570063438996, + "tokens_seen": 812384256 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.002464708173647523, + "objective/train/docs_used": 300102, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4953174591064453, + "objective/train/original_loss": 1.4953173398971558, + "objective/train/theoretical_loss": 3.7518382550743024, + "objective/train/tokens_used": 833106400, + "objective/train/value_avg": -0.0084686279296875, + "objective/train/value_loss": 0.00014236473361961544, + "objective/train/value_max": -0.0001323223114013672, + "objective/train/value_min": -0.236572265625, + "objective/train/value_reward_corr": 0.6225013810544121, + "objective/train/value_std": 0.01258087158203125, + "objective/train/weight_avg": 1.00253164768219, + "objective/train/weighted_lm_loss": 1.4988830089569092, + "objective/train/weights_max": 1.1583771705627441, + "objective/train/weights_min": 0.36844298243522644, + "theoretical_loss": 3.7518382550743024, + "tokens_seen": 812646400 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007613545177339111, + "loss": 0.0764, + "theoretical_loss": 3.7518382550743024, + "tokens_seen": 812646400 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007612742737923287, + "loss": 0.0778, + "theoretical_loss": 3.7517195528273666, + "tokens_seen": 812908544 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007611940298507463, + "loss": 0.0758, + "theoretical_loss": 3.751600899567057, + "tokens_seen": 813170688 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007611137859091639, + "loss": 0.0725, + "theoretical_loss": 3.7514822952573743, + "tokens_seen": 813432832 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007610335419675814, + "loss": 0.0773, + "theoretical_loss": 3.7513637398623603, + "tokens_seen": 813694976 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007609532980259991, + "loss": 0.0759, + "theoretical_loss": 3.751245233346091, + "tokens_seen": 813957120 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007608730540844167, + "loss": 0.077, + "theoretical_loss": 3.7511267756726823, + "tokens_seen": 814219264 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007607928101428342, + "loss": 0.0786, + "theoretical_loss": 3.7510083668062886, + "tokens_seen": 814481408 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007607125662012519, + "loss": 0.075, + "theoretical_loss": 3.750890006711101, + "tokens_seen": 814743552 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007606323222596694, + "loss": 0.0752, + "theoretical_loss": 3.7507716953513492, + "tokens_seen": 815005696 + }, + { + "epoch": 0.25, + "learning_rate": 0.000760552078318087, + "loss": 0.0787, + "theoretical_loss": 3.7506534326912995, + "tokens_seen": 815267840 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007604718343765046, + "loss": 0.0772, + "theoretical_loss": 3.7505352186952567, + "tokens_seen": 815529984 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007603915904349222, + "loss": 0.074, + "theoretical_loss": 3.7504170533275634, + "tokens_seen": 815792128 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.0003943704068660736, + "objective/train/docs_used": 301260, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4938346147537231, + "objective/train/original_loss": 1.4938344955444336, + "objective/train/theoretical_loss": 3.7503579888682155, + "objective/train/tokens_used": 836383200, + "objective/train/value_avg": -0.00901031494140625, + "objective/train/value_loss": 0.0003670316655188799, + "objective/train/value_max": -0.0001080632209777832, + "objective/train/value_min": -0.38720703125, + "objective/train/value_reward_corr": 0.6623424603359667, + "objective/train/value_std": 0.0164642333984375, + "objective/train/weight_avg": 1.0005619525909424, + "objective/train/weighted_lm_loss": 1.4935811758041382, + "objective/train/weights_max": 1.3037108182907104, + "objective/train/weights_min": 0.36955323815345764, + "theoretical_loss": 3.7503579888682155, + "tokens_seen": 815923200 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007603113464933397, + "loss": 0.0733, + "theoretical_loss": 3.7502989365526, + "tokens_seen": 816054272 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007602311025517573, + "loss": 0.0764, + "theoretical_loss": 3.7501808683347826, + "tokens_seen": 816316416 + }, + { + "epoch": 0.25, + "learning_rate": 0.000760150858610175, + "loss": 0.0753, + "theoretical_loss": 3.7500628486385668, + "tokens_seen": 816578560 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007600706146685926, + "loss": 0.0747, + "theoretical_loss": 3.7499448774284447, + "tokens_seen": 816840704 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007599903707270102, + "loss": 0.0771, + "theoretical_loss": 3.749826954668946, + "tokens_seen": 817102848 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007599101267854277, + "loss": 0.0774, + "theoretical_loss": 3.7497090803246387, + "tokens_seen": 817364992 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007598298828438454, + "loss": 0.075, + "theoretical_loss": 3.7495912543601246, + "tokens_seen": 817627136 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007597496389022629, + "loss": 0.0778, + "theoretical_loss": 3.7494734767400475, + "tokens_seen": 817889280 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007596693949606804, + "loss": 0.0766, + "theoretical_loss": 3.7493557474290853, + "tokens_seen": 818151424 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007595891510190981, + "loss": 0.0745, + "theoretical_loss": 3.7492380663919533, + "tokens_seen": 818413568 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007595089070775156, + "loss": 0.0762, + "theoretical_loss": 3.7491204335934043, + "tokens_seen": 818675712 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007594286631359332, + "loss": 0.0761, + "theoretical_loss": 3.7490028489982286, + "tokens_seen": 818937856 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.0018716322956606746, + "objective/train/docs_used": 302476, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5703623294830322, + "objective/train/original_loss": 1.5703623294830322, + "objective/train/theoretical_loss": 3.7488853125712525, + "objective/train/tokens_used": 839660000, + "objective/train/value_avg": -0.011810302734375, + "objective/train/value_loss": 0.0005407900898717344, + "objective/train/value_max": -0.00021660327911376953, + "objective/train/value_min": -0.86669921875, + "objective/train/value_reward_corr": 0.6016890575229221, + "objective/train/value_std": 0.01898193359375, + "objective/train/weight_avg": 1.0021065473556519, + "objective/train/weighted_lm_loss": 1.5734965801239014, + "objective/train/weights_max": 1.6662120819091797, + "objective/train/weights_min": 0.3839375674724579, + "theoretical_loss": 3.7488853125712525, + "tokens_seen": 819200000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007593484191943509, + "loss": 0.073, + "theoretical_loss": 3.7488853125712525, + "tokens_seen": 819200000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007592681752527685, + "loss": 0.0737, + "theoretical_loss": 3.7487678242773406, + "tokens_seen": 819462144 + }, + { + "epoch": 0.25, + "learning_rate": 0.000759187931311186, + "loss": 0.0774, + "theoretical_loss": 3.748650384081392, + "tokens_seen": 819724288 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007591076873696036, + "loss": 0.0769, + "theoretical_loss": 3.7485329919483448, + "tokens_seen": 819986432 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007590274434280212, + "loss": 0.0746, + "theoretical_loss": 3.7484156478431734, + "tokens_seen": 820248576 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007589471994864387, + "loss": 0.0751, + "theoretical_loss": 3.748298351730888, + "tokens_seen": 820510720 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007588669555448564, + "loss": 0.0756, + "theoretical_loss": 3.748181103576537, + "tokens_seen": 820772864 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007587867116032739, + "loss": 0.0773, + "theoretical_loss": 3.7480639033452032, + "tokens_seen": 821035008 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007587064676616916, + "loss": 0.078, + "theoretical_loss": 3.747946751002009, + "tokens_seen": 821297152 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007586262237201092, + "loss": 0.0783, + "theoretical_loss": 3.747829646512109, + "tokens_seen": 821559296 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007585459797785267, + "loss": 0.0765, + "theoretical_loss": 3.747712589840699, + "tokens_seen": 821821440 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007584657358369444, + "loss": 0.075, + "theoretical_loss": 3.7475955809530084, + "tokens_seen": 822083584 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007583854918953619, + "loss": 0.0732, + "theoretical_loss": 3.747478619814303, + "tokens_seen": 822345728 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.000402974575990811, + "objective/train/docs_used": 303734, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5402162075042725, + "objective/train/original_loss": 1.540216088294983, + "objective/train/theoretical_loss": 3.7474201571399757, + "objective/train/tokens_used": 842936800, + "objective/train/value_avg": -0.011871337890625, + "objective/train/value_loss": 0.00018918554997071624, + "objective/train/value_max": -0.00019872188568115234, + "objective/train/value_min": -0.302734375, + "objective/train/value_reward_corr": 0.9061185714934559, + "objective/train/value_std": 0.0229949951171875, + "objective/train/weight_avg": 1.000493049621582, + "objective/train/weighted_lm_loss": 1.5424861907958984, + "objective/train/weights_max": 1.3256474733352661, + "objective/train/weights_min": 0.37776029109954834, + "theoretical_loss": 3.7474201571399757, + "tokens_seen": 822476800 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007583052479537795, + "loss": 0.0791, + "theoretical_loss": 3.7473617063898863, + "tokens_seen": 822607872 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007582250040121971, + "loss": 0.0768, + "theoretical_loss": 3.747244840645097, + "tokens_seen": 822870016 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007581447600706147, + "loss": 0.0752, + "theoretical_loss": 3.7471280225453096, + "tokens_seen": 823132160 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007580645161290322, + "loss": 0.0766, + "theoretical_loss": 3.747011252055936, + "tokens_seen": 823394304 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007579842721874498, + "loss": 0.075, + "theoretical_loss": 3.746894529142424, + "tokens_seen": 823656448 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007579040282458675, + "loss": 0.0764, + "theoretical_loss": 3.746777853770256, + "tokens_seen": 823918592 + }, + { + "epoch": 0.25, + "learning_rate": 0.000757823784304285, + "loss": 0.0752, + "theoretical_loss": 3.746661225904953, + "tokens_seen": 824180736 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007577435403627027, + "loss": 0.0776, + "theoretical_loss": 3.746544645512069, + "tokens_seen": 824442880 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007576632964211202, + "loss": 0.0754, + "theoretical_loss": 3.7464281125571963, + "tokens_seen": 824705024 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007575830524795379, + "loss": 0.0761, + "theoretical_loss": 3.7463116270059618, + "tokens_seen": 824967168 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007575028085379554, + "loss": 0.0751, + "theoretical_loss": 3.7461951888240286, + "tokens_seen": 825229312 + }, + { + "epoch": 0.25, + "learning_rate": 0.000757422564596373, + "loss": 0.077, + "theoretical_loss": 3.7460787979770958, + "tokens_seen": 825491456 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.001048018573783338, + "objective/train/docs_used": 304950, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.441100835800171, + "objective/train/original_loss": 1.441100835800171, + "objective/train/theoretical_loss": 3.745962454430897, + "objective/train/tokens_used": 846213600, + "objective/train/value_avg": -0.00592803955078125, + "objective/train/value_loss": 0.00011939887917833403, + "objective/train/value_max": -0.00017404556274414062, + "objective/train/value_min": -0.1895751953125, + "objective/train/value_reward_corr": 0.6019524468909858, + "objective/train/value_std": 0.0082244873046875, + "objective/train/weight_avg": 1.0011035203933716, + "objective/train/weighted_lm_loss": 1.4430818557739258, + "objective/train/weights_max": 1.2087359428405762, + "objective/train/weights_min": 0.3807637691497803, + "theoretical_loss": 3.745962454430897, + "tokens_seen": 825753600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007573423206547906, + "loss": 0.0766, + "theoretical_loss": 3.745962454430897, + "tokens_seen": 825753600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007572620767132081, + "loss": 0.0787, + "theoretical_loss": 3.745846158151204, + "tokens_seen": 826015744 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007571818327716257, + "loss": 0.0766, + "theoretical_loss": 3.7457299091038214, + "tokens_seen": 826277888 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007571015888300434, + "loss": 0.0748, + "theoretical_loss": 3.745613707254591, + "tokens_seen": 826540032 + }, + { + "epoch": 0.25, + "learning_rate": 0.000757021344888461, + "loss": 0.0764, + "theoretical_loss": 3.7454975525693897, + "tokens_seen": 826802176 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007569411009468785, + "loss": 0.076, + "theoretical_loss": 3.7453814450141305, + "tokens_seen": 827064320 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007568608570052962, + "loss": 0.0808, + "theoretical_loss": 3.7452653845547603, + "tokens_seen": 827326464 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007567806130637137, + "loss": 0.08, + "theoretical_loss": 3.745149371157263, + "tokens_seen": 827588608 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007567003691221312, + "loss": 0.078, + "theoretical_loss": 3.7450334047876574, + "tokens_seen": 827850752 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007566201251805489, + "loss": 0.0779, + "theoretical_loss": 3.744917485411997, + "tokens_seen": 828112896 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007565398812389664, + "loss": 0.0803, + "theoretical_loss": 3.744801612996371, + "tokens_seen": 828375040 + }, + { + "epoch": 0.25, + "learning_rate": 0.000756459637297384, + "loss": 0.0767, + "theoretical_loss": 3.744685787506903, + "tokens_seen": 828637184 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007563793933558017, + "loss": 0.0776, + "theoretical_loss": 3.7445700089097533, + "tokens_seen": 828899328 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.0003027545753866434, + "objective/train/docs_used": 306096, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4149469137191772, + "objective/train/original_loss": 1.4149470329284668, + "objective/train/theoretical_loss": 3.7445121371852323, + "objective/train/tokens_used": 849490400, + "objective/train/value_avg": -0.00829315185546875, + "objective/train/value_loss": 0.00021588837262243032, + "objective/train/value_max": -0.0001926422119140625, + "objective/train/value_min": -0.2091064453125, + "objective/train/value_reward_corr": 0.6806189676548946, + "objective/train/value_std": 0.013153076171875, + "objective/train/weight_avg": 1.0003987550735474, + "objective/train/weighted_lm_loss": 1.4160531759262085, + "objective/train/weights_max": 1.1402249336242676, + "objective/train/weights_min": 0.3719916045665741, + "theoretical_loss": 3.7445121371852323, + "tokens_seen": 829030400 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007562991494142193, + "loss": 0.0764, + "theoretical_loss": 3.7444542771711165, + "tokens_seen": 829161472 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007562189054726369, + "loss": 0.0747, + "theoretical_loss": 3.744338592257222, + "tokens_seen": 829423616 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007561386615310544, + "loss": 0.0762, + "theoretical_loss": 3.744222954134334, + "tokens_seen": 829685760 + }, + { + "epoch": 0.25, + "learning_rate": 0.000756058417589472, + "loss": 0.0747, + "theoretical_loss": 3.7441073627687524, + "tokens_seen": 829947904 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007559781736478896, + "loss": 0.0769, + "theoretical_loss": 3.743991818126812, + "tokens_seen": 830210048 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007558979297063072, + "loss": 0.0764, + "theoretical_loss": 3.7438763201748815, + "tokens_seen": 830472192 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007558176857647247, + "loss": 0.0766, + "theoretical_loss": 3.743760868879365, + "tokens_seen": 830734336 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007557374418231424, + "loss": 0.075, + "theoretical_loss": 3.743645464206702, + "tokens_seen": 830996480 + }, + { + "epoch": 0.25, + "learning_rate": 0.00075565719788156, + "loss": 0.0759, + "theoretical_loss": 3.743530106123365, + "tokens_seen": 831258624 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007555769539399775, + "loss": 0.0757, + "theoretical_loss": 3.7434147945958642, + "tokens_seen": 831520768 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007554967099983952, + "loss": 0.0767, + "theoretical_loss": 3.7432995295907405, + "tokens_seen": 831782912 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007554164660568127, + "loss": 0.0782, + "theoretical_loss": 3.7431843110745726, + "tokens_seen": 832045056 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.0011427036952227354, + "objective/train/docs_used": 307306, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.53184974193573, + "objective/train/original_loss": 1.5318498611450195, + "objective/train/theoretical_loss": 3.743069139013972, + "objective/train/tokens_used": 852767200, + "objective/train/value_avg": -0.0099029541015625, + "objective/train/value_loss": 0.0002651831309776753, + "objective/train/value_max": -0.0002551078796386719, + "objective/train/value_min": -0.54150390625, + "objective/train/value_reward_corr": 0.6319940300493742, + "objective/train/value_std": 0.01386260986328125, + "objective/train/weight_avg": 1.0012651681900024, + "objective/train/weighted_lm_loss": 1.5327504873275757, + "objective/train/weights_max": 1.5147995948791504, + "objective/train/weights_min": 0.38184595108032227, + "theoretical_loss": 3.743069139013972, + "tokens_seen": 832307200 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007553362221152303, + "loss": 0.0752, + "theoretical_loss": 3.743069139013972, + "tokens_seen": 832307200 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007552559781736479, + "loss": 0.0763, + "theoretical_loss": 3.742954013375586, + "tokens_seen": 832569344 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007551757342320655, + "loss": 0.0769, + "theoretical_loss": 3.742838934126094, + "tokens_seen": 832831488 + }, + { + "epoch": 0.25, + "learning_rate": 0.000755095490290483, + "loss": 0.0764, + "theoretical_loss": 3.742723901232213, + "tokens_seen": 833093632 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007550152463489006, + "loss": 0.0767, + "theoretical_loss": 3.742608914660692, + "tokens_seen": 833355776 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007549350024073182, + "loss": 0.0781, + "theoretical_loss": 3.742493974378314, + "tokens_seen": 833617920 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007548547584657359, + "loss": 0.0753, + "theoretical_loss": 3.742379080351899, + "tokens_seen": 833880064 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007547745145241535, + "loss": 0.079, + "theoretical_loss": 3.7422642325482975, + "tokens_seen": 834142208 + }, + { + "epoch": 0.25, + "learning_rate": 0.000754694270582571, + "loss": 0.081, + "theoretical_loss": 3.742149430934398, + "tokens_seen": 834404352 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007546140266409887, + "loss": 0.0807, + "theoretical_loss": 3.7420346754771208, + "tokens_seen": 834666496 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007545337826994062, + "loss": 0.0763, + "theoretical_loss": 3.7419199661434197, + "tokens_seen": 834928640 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007544535387578238, + "loss": 0.0782, + "theoretical_loss": 3.7418053029002842, + "tokens_seen": 835190784 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007543732948162414, + "loss": 0.074, + "theoretical_loss": 3.7416906857147367, + "tokens_seen": 835452928 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 4.4281885493546724e-05, + "objective/train/docs_used": 308483, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.479179859161377, + "objective/train/original_loss": 1.479179859161377, + "objective/train/theoretical_loss": 3.741633394383263, + "objective/train/tokens_used": 856044000, + "objective/train/value_avg": -0.01134490966796875, + "objective/train/value_loss": 0.0005562845035456121, + "objective/train/value_max": -0.00019562244415283203, + "objective/train/value_min": -0.66552734375, + "objective/train/value_reward_corr": 0.6574823323493614, + "objective/train/value_std": 0.0205535888671875, + "objective/train/weight_avg": 1.0002927780151367, + "objective/train/weighted_lm_loss": 1.477531909942627, + "objective/train/weights_max": 1.606209397315979, + "objective/train/weights_min": 0.37909993529319763, + "theoretical_loss": 3.741633394383263, + "tokens_seen": 835584000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007542930508746589, + "loss": 0.0756, + "theoretical_loss": 3.741576114553835, + "tokens_seen": 835715072 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007542128069330765, + "loss": 0.0778, + "theoretical_loss": 3.7414615893846683, + "tokens_seen": 835977216 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007541325629914942, + "loss": 0.0768, + "theoretical_loss": 3.741347110174362, + "tokens_seen": 836239360 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007540523190499118, + "loss": 0.0776, + "theoretical_loss": 3.741232676890074, + "tokens_seen": 836501504 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007539720751083293, + "loss": 0.0785, + "theoretical_loss": 3.7411182894989965, + "tokens_seen": 836763648 + }, + { + "epoch": 0.25, + "learning_rate": 0.000753891831166747, + "loss": 0.078, + "theoretical_loss": 3.7410039479683546, + "tokens_seen": 837025792 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007538115872251645, + "loss": 0.0758, + "theoretical_loss": 3.740889652265408, + "tokens_seen": 837287936 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007537313432835821, + "loss": 0.0775, + "theoretical_loss": 3.7407754023574507, + "tokens_seen": 837550080 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007536510993419997, + "loss": 0.0768, + "theoretical_loss": 3.7406611982118076, + "tokens_seen": 837812224 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007535708554004172, + "loss": 0.0768, + "theoretical_loss": 3.74054703979584, + "tokens_seen": 838074368 + }, + { + "epoch": 0.25, + "learning_rate": 0.000753490611458835, + "loss": 0.0783, + "theoretical_loss": 3.7404329270769403, + "tokens_seen": 838336512 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007534103675172525, + "loss": 0.076, + "theoretical_loss": 3.740318860022537, + "tokens_seen": 838598656 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.0013807304203510284, + "objective/train/docs_used": 309674, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6753649711608887, + "objective/train/original_loss": 1.6753649711608887, + "objective/train/theoretical_loss": 3.7402048386000892, + "objective/train/tokens_used": 859320800, + "objective/train/value_avg": -0.01296234130859375, + "objective/train/value_loss": 0.0006226726691238582, + "objective/train/value_max": -0.0002065896987915039, + "objective/train/value_min": -0.6162109375, + "objective/train/value_reward_corr": 0.639937529812378, + "objective/train/value_std": 0.019622802734375, + "objective/train/weight_avg": 1.001650094985962, + "objective/train/weighted_lm_loss": 1.6781058311462402, + "objective/train/weights_max": 1.614859700202942, + "objective/train/weights_min": 0.37846994400024414, + "theoretical_loss": 3.7402048386000892, + "tokens_seen": 838860800 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007533301235756701, + "loss": 0.0754, + "theoretical_loss": 3.7402048386000892, + "tokens_seen": 838860800 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007532498796340877, + "loss": 0.0743, + "theoretical_loss": 3.740090862777091, + "tokens_seen": 839122944 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007531696356925052, + "loss": 0.0779, + "theoretical_loss": 3.7399769325210697, + "tokens_seen": 839385088 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007530893917509228, + "loss": 0.0771, + "theoretical_loss": 3.7398630477995853, + "tokens_seen": 839647232 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007530091478093404, + "loss": 0.0751, + "theoretical_loss": 3.7397492085802315, + "tokens_seen": 839909376 + }, + { + "epoch": 0.25, + "learning_rate": 0.000752928903867758, + "loss": 0.0765, + "theoretical_loss": 3.739635414830635, + "tokens_seen": 840171520 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007528486599261755, + "loss": 0.0763, + "theoretical_loss": 3.7395216665184554, + "tokens_seen": 840433664 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007527684159845932, + "loss": 0.0793, + "theoretical_loss": 3.739407963611386, + "tokens_seen": 840695808 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007526881720430108, + "loss": 0.074, + "theoretical_loss": 3.739294306077152, + "tokens_seen": 840957952 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007526079281014283, + "loss": 0.0785, + "theoretical_loss": 3.7391806938835126, + "tokens_seen": 841220096 + }, + { + "epoch": 0.26, + "learning_rate": 0.000752527684159846, + "loss": 0.0755, + "theoretical_loss": 3.7390671269982603, + "tokens_seen": 841482240 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007524474402182635, + "loss": 0.0764, + "theoretical_loss": 3.7389536053892187, + "tokens_seen": 841744384 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007523671962766812, + "loss": 0.0744, + "theoretical_loss": 3.738840129024246, + "tokens_seen": 842006528 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.002575838938355446, + "objective/train/docs_used": 310992, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4314990043640137, + "objective/train/original_loss": 1.4314990043640137, + "objective/train/theoretical_loss": 3.73878340779825, + "objective/train/tokens_used": 862597600, + "objective/train/value_avg": -0.00510406494140625, + "objective/train/value_loss": 6.991349073359743e-05, + "objective/train/value_max": -0.00016605854034423828, + "objective/train/value_min": -0.186279296875, + "objective/train/value_reward_corr": 0.4218440665138546, + "objective/train/value_std": 0.005443572998046875, + "objective/train/weight_avg": 1.0026094913482666, + "objective/train/weighted_lm_loss": 1.4359185695648193, + "objective/train/weights_max": 1.1117740869522095, + "objective/train/weights_min": 0.5176728963851929, + "theoretical_loss": 3.73878340779825, + "tokens_seen": 842137600 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007522869523350987, + "loss": 0.0771, + "theoretical_loss": 3.738726697871233, + "tokens_seen": 842268672 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007522067083935163, + "loss": 0.0796, + "theoretical_loss": 3.738613311898103, + "tokens_seen": 842530816 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007521264644519339, + "loss": 0.0764, + "theoretical_loss": 3.7384999710728106, + "tokens_seen": 842792960 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007520462205103514, + "loss": 0.0769, + "theoretical_loss": 3.738386675363346, + "tokens_seen": 843055104 + }, + { + "epoch": 0.26, + "learning_rate": 0.000751965976568769, + "loss": 0.0764, + "theoretical_loss": 3.738273424737729, + "tokens_seen": 843317248 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007518857326271867, + "loss": 0.0792, + "theoretical_loss": 3.7381602191640146, + "tokens_seen": 843579392 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007518054886856043, + "loss": 0.077, + "theoretical_loss": 3.738047058610289, + "tokens_seen": 843841536 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007517252447440218, + "loss": 0.0768, + "theoretical_loss": 3.7379339430446707, + "tokens_seen": 844103680 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007516450008024395, + "loss": 0.0752, + "theoretical_loss": 3.7378208724353117, + "tokens_seen": 844365824 + }, + { + "epoch": 0.26, + "learning_rate": 0.000751564756860857, + "loss": 0.0776, + "theoretical_loss": 3.7377078467503955, + "tokens_seen": 844627968 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007514845129192746, + "loss": 0.0782, + "theoretical_loss": 3.737594865958138, + "tokens_seen": 844890112 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007514042689776922, + "loss": 0.076, + "theoretical_loss": 3.7374819300267883, + "tokens_seen": 845152256 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.000701575365383178, + "objective/train/docs_used": 312167, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.512547492980957, + "objective/train/original_loss": 1.512547492980957, + "objective/train/theoretical_loss": 3.7373690389246272, + "objective/train/tokens_used": 865874400, + "objective/train/value_avg": -0.00689697265625, + "objective/train/value_loss": 0.00012069241347489879, + "objective/train/value_max": -0.00014317035675048828, + "objective/train/value_min": -0.318115234375, + "objective/train/value_reward_corr": 0.6602302605762631, + "objective/train/value_std": 0.01007843017578125, + "objective/train/weight_avg": 1.0007611513137817, + "objective/train/weighted_lm_loss": 1.5141348838806152, + "objective/train/weights_max": 1.115106463432312, + "objective/train/weights_min": 0.8200203776359558, + "theoretical_loss": 3.7373690389246272, + "tokens_seen": 845414400 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007513240250361097, + "loss": 0.0768, + "theoretical_loss": 3.7373690389246272, + "tokens_seen": 845414400 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007512437810945275, + "loss": 0.0783, + "theoretical_loss": 3.737256192619967, + "tokens_seen": 845676544 + }, + { + "epoch": 0.26, + "learning_rate": 0.000751163537152945, + "loss": 0.0753, + "theoretical_loss": 3.737143391081154, + "tokens_seen": 845938688 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007510832932113626, + "loss": 0.0762, + "theoretical_loss": 3.7370306342765653, + "tokens_seen": 846200832 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007510030492697802, + "loss": 0.0737, + "theoretical_loss": 3.73691792217461, + "tokens_seen": 846462976 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007509228053281978, + "loss": 0.0779, + "theoretical_loss": 3.7368052547437305, + "tokens_seen": 846725120 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007508425613866153, + "loss": 0.0772, + "theoretical_loss": 3.7366926319524003, + "tokens_seen": 846987264 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007507623174450329, + "loss": 0.0747, + "theoretical_loss": 3.736580053769125, + "tokens_seen": 847249408 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007506820735034505, + "loss": 0.0728, + "theoretical_loss": 3.736467520162442, + "tokens_seen": 847511552 + }, + { + "epoch": 0.26, + "learning_rate": 0.000750601829561868, + "loss": 0.0772, + "theoretical_loss": 3.736355031100922, + "tokens_seen": 847773696 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007505215856202858, + "loss": 0.0765, + "theoretical_loss": 3.7362425865531654, + "tokens_seen": 848035840 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007504413416787033, + "loss": 0.0773, + "theoretical_loss": 3.736130186487806, + "tokens_seen": 848297984 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007503610977371209, + "loss": 0.0749, + "theoretical_loss": 3.736017830873508, + "tokens_seen": 848560128 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.0018955293344333768, + "objective/train/docs_used": 313372, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.561277151107788, + "objective/train/original_loss": 1.561277151107788, + "objective/train/theoretical_loss": 3.7359616697257243, + "objective/train/tokens_used": 869151200, + "objective/train/value_avg": -0.00714111328125, + "objective/train/value_loss": 0.00045251031406223774, + "objective/train/value_max": -0.00013136863708496094, + "objective/train/value_min": -0.984375, + "objective/train/value_reward_corr": 0.6359393678379087, + "objective/train/value_std": 0.0159912109375, + "objective/train/weight_avg": 1.0020655393600464, + "objective/train/weighted_lm_loss": 1.5643846988677979, + "objective/train/weights_max": 2.523839235305786, + "objective/train/weights_min": 0.07859423011541367, + "theoretical_loss": 3.7359616697257243, + "tokens_seen": 848691200 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007502808537955385, + "loss": 0.0772, + "theoretical_loss": 3.7359055196789694, + "tokens_seen": 848822272 + }, + { + "epoch": 0.26, + "learning_rate": 0.000750200609853956, + "loss": 0.075, + "theoretical_loss": 3.7357932528729183, + "tokens_seen": 849084416 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007501203659123736, + "loss": 0.0787, + "theoretical_loss": 3.7356810304241144, + "tokens_seen": 849346560 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007500401219707912, + "loss": 0.0783, + "theoretical_loss": 3.73556885230135, + "tokens_seen": 849608704 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007499598780292088, + "loss": 0.0756, + "theoretical_loss": 3.735456718473449, + "tokens_seen": 849870848 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007498796340876264, + "loss": 0.0718, + "theoretical_loss": 3.7353446289092647, + "tokens_seen": 850132992 + }, + { + "epoch": 0.26, + "learning_rate": 0.000749799390146044, + "loss": 0.0782, + "theoretical_loss": 3.7352325835776856, + "tokens_seen": 850395136 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007497191462044616, + "loss": 0.0773, + "theoretical_loss": 3.7351205824476277, + "tokens_seen": 850657280 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007496389022628792, + "loss": 0.0791, + "theoretical_loss": 3.7350086254880415, + "tokens_seen": 850919424 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007495586583212968, + "loss": 0.0743, + "theoretical_loss": 3.734896712667907, + "tokens_seen": 851181568 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007494784143797143, + "loss": 0.0754, + "theoretical_loss": 3.734784843956236, + "tokens_seen": 851443712 + }, + { + "epoch": 0.26, + "learning_rate": 0.000749398170438132, + "loss": 0.0747, + "theoretical_loss": 3.7346730193220727, + "tokens_seen": 851705856 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.0007218181854113936, + "objective/train/docs_used": 314587, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5503199100494385, + "objective/train/original_loss": 1.5503199100494385, + "objective/train/theoretical_loss": 3.7345612387344906, + "objective/train/tokens_used": 872428000, + "objective/train/value_avg": -0.007656097412109375, + "objective/train/value_loss": 0.00033693682053126395, + "objective/train/value_max": -0.00014650821685791016, + "objective/train/value_min": -0.345947265625, + "objective/train/value_reward_corr": 0.6609246880890598, + "objective/train/value_std": 0.01441192626953125, + "objective/train/weight_avg": 1.0008713006973267, + "objective/train/weighted_lm_loss": 1.550735354423523, + "objective/train/weights_max": 1.2893656492233276, + "objective/train/weights_min": 0.3895317018032074, + "theoretical_loss": 3.7345612387344906, + "tokens_seen": 851968000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007493179264965495, + "loss": 0.0758, + "theoretical_loss": 3.7345612387344906, + "tokens_seen": 851968000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007492376825549671, + "loss": 0.0782, + "theoretical_loss": 3.734449502162596, + "tokens_seen": 852230144 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007491574386133847, + "loss": 0.0773, + "theoretical_loss": 3.7343378095755257, + "tokens_seen": 852492288 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007490771946718022, + "loss": 0.0745, + "theoretical_loss": 3.7342261609424483, + "tokens_seen": 852754432 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007489969507302198, + "loss": 0.0788, + "theoretical_loss": 3.7341145562325613, + "tokens_seen": 853016576 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007489167067886375, + "loss": 0.0761, + "theoretical_loss": 3.734002995415096, + "tokens_seen": 853278720 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007488364628470551, + "loss": 0.0772, + "theoretical_loss": 3.7338914784593134, + "tokens_seen": 853540864 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007487562189054726, + "loss": 0.0755, + "theoretical_loss": 3.733780005334505, + "tokens_seen": 853803008 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007486759749638903, + "loss": 0.0789, + "theoretical_loss": 3.733668576009995, + "tokens_seen": 854065152 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007485957310223078, + "loss": 0.0768, + "theoretical_loss": 3.733557190455136, + "tokens_seen": 854327296 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007485154870807254, + "loss": 0.0742, + "theoretical_loss": 3.733445848639313, + "tokens_seen": 854589440 + }, + { + "epoch": 0.26, + "learning_rate": 0.000748435243139143, + "loss": 0.0754, + "theoretical_loss": 3.733334550531942, + "tokens_seen": 854851584 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007483549991975605, + "loss": 0.0776, + "theoretical_loss": 3.7332232961024694, + "tokens_seen": 855113728 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.001274409587495029, + "objective/train/docs_used": 315806, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4173961877822876, + "objective/train/original_loss": 1.417395830154419, + "objective/train/theoretical_loss": 3.733167685257405, + "objective/train/tokens_used": 875704800, + "objective/train/value_avg": -0.006504058837890625, + "objective/train/value_loss": 0.00017858008504845202, + "objective/train/value_max": -0.0001647472381591797, + "objective/train/value_min": -0.2578125, + "objective/train/value_reward_corr": 0.5306423754138953, + "objective/train/value_std": 0.00859832763671875, + "objective/train/weight_avg": 1.001355767250061, + "objective/train/weighted_lm_loss": 1.420598030090332, + "objective/train/weights_max": 1.1989800930023193, + "objective/train/weights_min": 0.38221901655197144, + "theoretical_loss": 3.733167685257405, + "tokens_seen": 855244800 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007482747552559783, + "loss": 0.0759, + "theoretical_loss": 3.7331120853203714, + "tokens_seen": 855375872 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007481945113143958, + "loss": 0.0752, + "theoretical_loss": 3.733000918155156, + "tokens_seen": 855638016 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007481142673728134, + "loss": 0.0726, + "theoretical_loss": 3.7328897945763617, + "tokens_seen": 855900160 + }, + { + "epoch": 0.26, + "learning_rate": 0.000748034023431231, + "loss": 0.0781, + "theoretical_loss": 3.7327787145535574, + "tokens_seen": 856162304 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007479537794896486, + "loss": 0.0738, + "theoretical_loss": 3.732667678056342, + "tokens_seen": 856424448 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007478735355480661, + "loss": 0.073, + "theoretical_loss": 3.732556685054346, + "tokens_seen": 856686592 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007477932916064837, + "loss": 0.0745, + "theoretical_loss": 3.7324457355172296, + "tokens_seen": 856948736 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007477130476649013, + "loss": 0.0748, + "theoretical_loss": 3.7323348294146843, + "tokens_seen": 857210880 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007476328037233188, + "loss": 0.0758, + "theoretical_loss": 3.73222396671643, + "tokens_seen": 857473024 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007475525597817365, + "loss": 0.0766, + "theoretical_loss": 3.73211314739222, + "tokens_seen": 857735168 + }, + { + "epoch": 0.26, + "learning_rate": 0.000747472315840154, + "loss": 0.0748, + "theoretical_loss": 3.732002371411835, + "tokens_seen": 857997312 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007473920718985718, + "loss": 0.0738, + "theoretical_loss": 3.7318916387450876, + "tokens_seen": 858259456 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.00041493060416541994, + "objective/train/docs_used": 317142, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5313217639923096, + "objective/train/original_loss": 1.5313217639923096, + "objective/train/theoretical_loss": 3.7317809493618204, + "objective/train/tokens_used": 878981600, + "objective/train/value_avg": -0.007602691650390625, + "objective/train/value_loss": 0.00031841802410781384, + "objective/train/value_max": -0.0001596212387084961, + "objective/train/value_min": -0.71923828125, + "objective/train/value_reward_corr": 0.6602214596414195, + "objective/train/value_std": 0.0145111083984375, + "objective/train/weight_avg": 1.0005568265914917, + "objective/train/weighted_lm_loss": 1.5318763256072998, + "objective/train/weights_max": 1.2434570789337158, + "objective/train/weights_min": 0.3680877387523651, + "theoretical_loss": 3.7317809493618204, + "tokens_seen": 858521600 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007473118279569893, + "loss": 0.0776, + "theoretical_loss": 3.7317809493618204, + "tokens_seen": 858521600 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007472315840154068, + "loss": 0.0752, + "theoretical_loss": 3.7316703032319056, + "tokens_seen": 858783744 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007471513400738245, + "loss": 0.0787, + "theoretical_loss": 3.7315597003252474, + "tokens_seen": 859045888 + }, + { + "epoch": 0.26, + "learning_rate": 0.000747071096132242, + "loss": 0.0784, + "theoretical_loss": 3.731449140611777, + "tokens_seen": 859308032 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007469908521906596, + "loss": 0.0747, + "theoretical_loss": 3.7313386240614577, + "tokens_seen": 859570176 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007469106082490772, + "loss": 0.0761, + "theoretical_loss": 3.7312281506442835, + "tokens_seen": 859832320 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007468303643074948, + "loss": 0.0758, + "theoretical_loss": 3.7311177203302766, + "tokens_seen": 860094464 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007467501203659124, + "loss": 0.0769, + "theoretical_loss": 3.7310073330894906, + "tokens_seen": 860356608 + }, + { + "epoch": 0.26, + "learning_rate": 0.00074666987642433, + "loss": 0.074, + "theoretical_loss": 3.730896988892008, + "tokens_seen": 860618752 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007465896324827476, + "loss": 0.0809, + "theoretical_loss": 3.7307866877079414, + "tokens_seen": 860880896 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007465093885411651, + "loss": 0.0758, + "theoretical_loss": 3.730676429507435, + "tokens_seen": 861143040 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007464291445995828, + "loss": 0.0736, + "theoretical_loss": 3.730566214260659, + "tokens_seen": 861405184 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007463489006580003, + "loss": 0.0762, + "theoretical_loss": 3.730456041937817, + "tokens_seen": 861667328 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.0012228551786392927, + "objective/train/docs_used": 318349, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4355342388153076, + "objective/train/original_loss": 1.4355342388153076, + "objective/train/theoretical_loss": 3.730400971863568, + "objective/train/tokens_used": 882258400, + "objective/train/value_avg": -0.00885772705078125, + "objective/train/value_loss": 0.0004532379098236561, + "objective/train/value_max": -0.00011235475540161133, + "objective/train/value_min": -0.8740234375, + "objective/train/value_reward_corr": 0.5636366361372568, + "objective/train/value_std": 0.015533447265625, + "objective/train/weight_avg": 1.001416563987732, + "objective/train/weighted_lm_loss": 1.4377250671386719, + "objective/train/weights_max": 1.8811969757080078, + "objective/train/weights_min": 0.22597576677799225, + "theoretical_loss": 3.730400971863568, + "tokens_seen": 861798400 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007462686567164179, + "loss": 0.0767, + "theoretical_loss": 3.730345912509141, + "tokens_seen": 861929472 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007461884127748355, + "loss": 0.0742, + "theoretical_loss": 3.7302358259448924, + "tokens_seen": 862191616 + }, + { + "epoch": 0.26, + "learning_rate": 0.000746108168833253, + "loss": 0.0778, + "theoretical_loss": 3.730125782215362, + "tokens_seen": 862453760 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007460279248916708, + "loss": 0.0766, + "theoretical_loss": 3.730015781290872, + "tokens_seen": 862715904 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007459476809500883, + "loss": 0.0784, + "theoretical_loss": 3.729905823141771, + "tokens_seen": 862978048 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007458674370085059, + "loss": 0.0739, + "theoretical_loss": 3.729795907738441, + "tokens_seen": 863240192 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007457871930669235, + "loss": 0.0748, + "theoretical_loss": 3.729686035051291, + "tokens_seen": 863502336 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007457069491253411, + "loss": 0.0762, + "theoretical_loss": 3.7295762050507593, + "tokens_seen": 863764480 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007456267051837586, + "loss": 0.0754, + "theoretical_loss": 3.7294664177073145, + "tokens_seen": 864026624 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007455464612421762, + "loss": 0.0773, + "theoretical_loss": 3.7293566729914547, + "tokens_seen": 864288768 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007454662173005938, + "loss": 0.0756, + "theoretical_loss": 3.7292469708737066, + "tokens_seen": 864550912 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007453859733590113, + "loss": 0.0767, + "theoretical_loss": 3.729137311324627, + "tokens_seen": 864813056 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 6.832103099441156e-05, + "objective/train/docs_used": 319421, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.434070348739624, + "objective/train/original_loss": 1.4340702295303345, + "objective/train/theoretical_loss": 3.7290276943148015, + "objective/train/tokens_used": 885535200, + "objective/train/value_avg": -0.00914764404296875, + "objective/train/value_loss": 0.0007328266510739923, + "objective/train/value_max": -9.387731552124023e-05, + "objective/train/value_min": -0.91455078125, + "objective/train/value_reward_corr": 0.5348672218461172, + "objective/train/value_std": 0.0240936279296875, + "objective/train/weight_avg": 1.0004147291183472, + "objective/train/weighted_lm_loss": 1.43430757522583, + "objective/train/weights_max": 2.415832042694092, + "objective/train/weights_min": 0.3684886395931244, + "theoretical_loss": 3.7290276943148015, + "tokens_seen": 865075200 + }, + { + "epoch": 0.26, + "learning_rate": 0.000745305729417429, + "loss": 0.073, + "theoretical_loss": 3.7290276943148015, + "tokens_seen": 865075200 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007452254854758466, + "loss": 0.0777, + "theoretical_loss": 3.7289181198148458, + "tokens_seen": 865337344 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007451452415342642, + "loss": 0.0774, + "theoretical_loss": 3.7288085877954025, + "tokens_seen": 865599488 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007450649975926818, + "loss": 0.0766, + "theoretical_loss": 3.728699098227146, + "tokens_seen": 865861632 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007449847536510994, + "loss": 0.0757, + "theoretical_loss": 3.728589651080779, + "tokens_seen": 866123776 + }, + { + "epoch": 0.26, + "learning_rate": 0.000744904509709517, + "loss": 0.0754, + "theoretical_loss": 3.728480246327032, + "tokens_seen": 866385920 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007448242657679345, + "loss": 0.0745, + "theoretical_loss": 3.7283708839366656, + "tokens_seen": 866648064 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007447440218263521, + "loss": 0.0766, + "theoretical_loss": 3.72826156388047, + "tokens_seen": 866910208 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007446637778847697, + "loss": 0.0763, + "theoretical_loss": 3.728152286129263, + "tokens_seen": 867172352 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007445835339431873, + "loss": 0.0747, + "theoretical_loss": 3.728043050653893, + "tokens_seen": 867434496 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007445032900016049, + "loss": 0.0798, + "theoretical_loss": 3.7279338574252354, + "tokens_seen": 867696640 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007444230460600226, + "loss": 0.0782, + "theoretical_loss": 3.7278247064141956, + "tokens_seen": 867958784 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007443428021184401, + "loss": 0.0772, + "theoretical_loss": 3.7277155975917076, + "tokens_seen": 868220928 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.0010427895467728376, + "objective/train/docs_used": 320668, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5272066593170166, + "objective/train/original_loss": 1.5272066593170166, + "objective/train/theoretical_loss": 3.727661058992095, + "objective/train/tokens_used": 888812000, + "objective/train/value_avg": -0.0088348388671875, + "objective/train/value_loss": 0.00025395132252015173, + "objective/train/value_max": -0.0001398324966430664, + "objective/train/value_min": -0.421630859375, + "objective/train/value_reward_corr": 0.7319601341651985, + "objective/train/value_std": 0.0164642333984375, + "objective/train/weight_avg": 1.0011564493179321, + "objective/train/weighted_lm_loss": 1.5287580490112305, + "objective/train/weights_max": 1.296626091003418, + "objective/train/weights_min": 0.3688490390777588, + "theoretical_loss": 3.727661058992095, + "tokens_seen": 868352000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007442625581768576, + "loss": 0.0782, + "theoretical_loss": 3.7276065309287345, + "tokens_seen": 868483072 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007441823142352753, + "loss": 0.0775, + "theoretical_loss": 3.727497506396267, + "tokens_seen": 868745216 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007441020702936928, + "loss": 0.0752, + "theoretical_loss": 3.7273885239653266, + "tokens_seen": 869007360 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007440218263521104, + "loss": 0.0753, + "theoretical_loss": 3.727279583606961, + "tokens_seen": 869269504 + }, + { + "epoch": 0.26, + "learning_rate": 0.000743941582410528, + "loss": 0.0763, + "theoretical_loss": 3.727170685292248, + "tokens_seen": 869531648 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007438613384689456, + "loss": 0.077, + "theoretical_loss": 3.7270618289922943, + "tokens_seen": 869793792 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007437810945273631, + "loss": 0.0753, + "theoretical_loss": 3.7269530146782337, + "tokens_seen": 870055936 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007437008505857808, + "loss": 0.0809, + "theoretical_loss": 3.72684424232123, + "tokens_seen": 870318080 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007436206066441984, + "loss": 0.0772, + "theoretical_loss": 3.7267355118924748, + "tokens_seen": 870580224 + }, + { + "epoch": 0.26, + "learning_rate": 0.000743540362702616, + "loss": 0.0753, + "theoretical_loss": 3.726626823363188, + "tokens_seen": 870842368 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007434601187610336, + "loss": 0.0754, + "theoretical_loss": 3.7265181767046176, + "tokens_seen": 871104512 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007433798748194511, + "loss": 0.0773, + "theoretical_loss": 3.726409571888042, + "tokens_seen": 871366656 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.0008427270222455263, + "objective/train/docs_used": 321822, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6887977123260498, + "objective/train/original_loss": 1.6887975931167603, + "objective/train/theoretical_loss": 3.7263010088847652, + "objective/train/tokens_used": 892088800, + "objective/train/value_avg": -0.00997161865234375, + "objective/train/value_loss": 0.00024173302517738193, + "objective/train/value_max": -0.00015115737915039062, + "objective/train/value_min": -0.336181640625, + "objective/train/value_reward_corr": 0.7177472974340894, + "objective/train/value_std": 0.01708984375, + "objective/train/weight_avg": 1.000956654548645, + "objective/train/weighted_lm_loss": 1.689243197441101, + "objective/train/weights_max": 1.1999152898788452, + "objective/train/weights_min": 0.40030547976493835, + "theoretical_loss": 3.7263010088847652, + "tokens_seen": 871628800 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007432996308778688, + "loss": 0.0784, + "theoretical_loss": 3.7263010088847652, + "tokens_seen": 871628800 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007432193869362863, + "loss": 0.0753, + "theoretical_loss": 3.726192487666121, + "tokens_seen": 871890944 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007431391429947038, + "loss": 0.0765, + "theoretical_loss": 3.7260840082034714, + "tokens_seen": 872153088 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007430588990531216, + "loss": 0.0759, + "theoretical_loss": 3.7259755704682065, + "tokens_seen": 872415232 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007429786551115391, + "loss": 0.0766, + "theoretical_loss": 3.7258671744317446, + "tokens_seen": 872677376 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007428984111699567, + "loss": 0.0768, + "theoretical_loss": 3.725758820065531, + "tokens_seen": 872939520 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007428181672283743, + "loss": 0.0761, + "theoretical_loss": 3.725650507341042, + "tokens_seen": 873201664 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007427379232867919, + "loss": 0.0774, + "theoretical_loss": 3.7255422362297788, + "tokens_seen": 873463808 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007426576793452094, + "loss": 0.0759, + "theoretical_loss": 3.7254340067032725, + "tokens_seen": 873725952 + }, + { + "epoch": 0.26, + "learning_rate": 0.000742577435403627, + "loss": 0.0766, + "theoretical_loss": 3.7253258187330816, + "tokens_seen": 873988096 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007424971914620446, + "loss": 0.0763, + "theoretical_loss": 3.7252176722907926, + "tokens_seen": 874250240 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007424169475204621, + "loss": 0.0788, + "theoretical_loss": 3.725109567348021, + "tokens_seen": 874512384 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007423367035788799, + "loss": 0.0778, + "theoretical_loss": 3.725001503876408, + "tokens_seen": 874774528 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.00147084123454988, + "objective/train/docs_used": 323008, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5208227634429932, + "objective/train/original_loss": 1.520822525024414, + "objective/train/theoretical_loss": 3.7249474876834325, + "objective/train/tokens_used": 895365600, + "objective/train/value_avg": -0.007843017578125, + "objective/train/value_loss": 0.00020401620713528246, + "objective/train/value_max": -0.00010311603546142578, + "objective/train/value_min": -0.30029296875, + "objective/train/value_reward_corr": 0.789625392294421, + "objective/train/value_std": 0.017059326171875, + "objective/train/weight_avg": 1.0015629529953003, + "objective/train/weighted_lm_loss": 1.5230070352554321, + "objective/train/weights_max": 1.1601619720458984, + "objective/train/weights_min": 0.37002718448638916, + "theoretical_loss": 3.7249474876834325, + "tokens_seen": 874905600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007422564596372974, + "loss": 0.0756, + "theoretical_loss": 3.7248934818476247, + "tokens_seen": 875036672 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007421762156957151, + "loss": 0.0757, + "theoretical_loss": 3.7247855012333693, + "tokens_seen": 875298816 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007420959717541326, + "loss": 0.0759, + "theoretical_loss": 3.7246775620053665, + "tokens_seen": 875560960 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007420157278125502, + "loss": 0.0768, + "theoretical_loss": 3.724569664135372, + "tokens_seen": 875823104 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007419354838709678, + "loss": 0.0763, + "theoretical_loss": 3.7244618075951657, + "tokens_seen": 876085248 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007418552399293853, + "loss": 0.0782, + "theoretical_loss": 3.7243539923565576, + "tokens_seen": 876347392 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007417749959878029, + "loss": 0.0773, + "theoretical_loss": 3.724246218391384, + "tokens_seen": 876609536 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007416947520462205, + "loss": 0.0778, + "theoretical_loss": 3.7241384856715096, + "tokens_seen": 876871680 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007416145081046381, + "loss": 0.0754, + "theoretical_loss": 3.724030794168826, + "tokens_seen": 877133824 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007415342641630557, + "loss": 0.0752, + "theoretical_loss": 3.723923143855253, + "tokens_seen": 877395968 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007414540202214734, + "loss": 0.0746, + "theoretical_loss": 3.723815534702738, + "tokens_seen": 877658112 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007413737762798909, + "loss": 0.0785, + "theoretical_loss": 3.7237079666832553, + "tokens_seen": 877920256 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": -0.0005045576835982502, + "objective/train/docs_used": 324331, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4083878993988037, + "objective/train/original_loss": 1.4083881378173828, + "objective/train/theoretical_loss": 3.7236004397688065, + "objective/train/tokens_used": 898642400, + "objective/train/value_avg": -0.01111602783203125, + "objective/train/value_loss": 0.0004869569675065577, + "objective/train/value_max": -0.00018668174743652344, + "objective/train/value_min": -0.7138671875, + "objective/train/value_reward_corr": 0.598061434276012, + "objective/train/value_std": 0.0183563232421875, + "objective/train/weight_avg": 0.9997174143791199, + "objective/train/weighted_lm_loss": 1.406802773475647, + "objective/train/weights_max": 1.8978021144866943, + "objective/train/weights_min": 0.38027602434158325, + "theoretical_loss": 3.7236004397688065, + "tokens_seen": 878182400 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007412935323383084, + "loss": 0.078, + "theoretical_loss": 3.7236004397688065, + "tokens_seen": 878182400 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007412132883967261, + "loss": 0.0754, + "theoretical_loss": 3.723492953931421, + "tokens_seen": 878444544 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007411330444551436, + "loss": 0.0759, + "theoretical_loss": 3.7233855091431565, + "tokens_seen": 878706688 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007410528005135613, + "loss": 0.0759, + "theoretical_loss": 3.723278105376096, + "tokens_seen": 878968832 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007409725565719788, + "loss": 0.0748, + "theoretical_loss": 3.723170742602351, + "tokens_seen": 879230976 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007408923126303964, + "loss": 0.0775, + "theoretical_loss": 3.7230634207940607, + "tokens_seen": 879493120 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007408120686888141, + "loss": 0.0773, + "theoretical_loss": 3.7229561399233906, + "tokens_seen": 879755264 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007407318247472316, + "loss": 0.0791, + "theoretical_loss": 3.7228488999625338, + "tokens_seen": 880017408 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007406515808056492, + "loss": 0.0775, + "theoretical_loss": 3.722741700883711, + "tokens_seen": 880279552 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007405713368640668, + "loss": 0.0755, + "theoretical_loss": 3.7226345426591694, + "tokens_seen": 880541696 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007404910929224844, + "loss": 0.0732, + "theoretical_loss": 3.722527425261183, + "tokens_seen": 880803840 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007404108489809019, + "loss": 0.0783, + "theoretical_loss": 3.7224203486620535, + "tokens_seen": 881065984 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007403306050393196, + "loss": 0.0777, + "theoretical_loss": 3.7223133128341104, + "tokens_seen": 881328128 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.0017001149244606495, + "objective/train/docs_used": 325483, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4574103355407715, + "objective/train/original_loss": 1.4574103355407715, + "objective/train/theoretical_loss": 3.722259810200693, + "objective/train/tokens_used": 901919200, + "objective/train/value_avg": -0.007709503173828125, + "objective/train/value_loss": 0.00018067251949105412, + "objective/train/value_max": -0.00011593103408813477, + "objective/train/value_min": -0.2108154296875, + "objective/train/value_reward_corr": 0.625343074103676, + "objective/train/value_std": 0.011138916015625, + "objective/train/weight_avg": 1.0017832517623901, + "objective/train/weighted_lm_loss": 1.4603242874145508, + "objective/train/weights_max": 1.2097421884536743, + "objective/train/weights_min": 0.36907634139060974, + "theoretical_loss": 3.722259810200693, + "tokens_seen": 881459200 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007402503610977371, + "loss": 0.0752, + "theoretical_loss": 3.722206317749708, + "tokens_seen": 881590272 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007401701171561546, + "loss": 0.0778, + "theoretical_loss": 3.722099363381229, + "tokens_seen": 881852416 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007400898732145724, + "loss": 0.0769, + "theoretical_loss": 3.7219924497010837, + "tokens_seen": 882114560 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007400096292729899, + "loss": 0.0766, + "theoretical_loss": 3.721885576681708, + "tokens_seen": 882376704 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007399293853314075, + "loss": 0.078, + "theoretical_loss": 3.7217787442955643, + "tokens_seen": 882638848 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007398491413898251, + "loss": 0.0767, + "theoretical_loss": 3.721671952515144, + "tokens_seen": 882900992 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007397688974482427, + "loss": 0.0762, + "theoretical_loss": 3.7215652013129628, + "tokens_seen": 883163136 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007396886535066603, + "loss": 0.0757, + "theoretical_loss": 3.7214584906615644, + "tokens_seen": 883425280 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007396084095650778, + "loss": 0.0732, + "theoretical_loss": 3.7213518205335196, + "tokens_seen": 883687424 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007395281656234954, + "loss": 0.0764, + "theoretical_loss": 3.721245190901425, + "tokens_seen": 883949568 + }, + { + "epoch": 0.27, + "learning_rate": 0.000739447921681913, + "loss": 0.077, + "theoretical_loss": 3.721138601737904, + "tokens_seen": 884211712 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007393676777403307, + "loss": 0.0755, + "theoretical_loss": 3.721032053015607, + "tokens_seen": 884473856 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.0006489785737358034, + "objective/train/docs_used": 326745, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7073997259140015, + "objective/train/original_loss": 1.707399606704712, + "objective/train/theoretical_loss": 3.720925544707211, + "objective/train/tokens_used": 905196000, + "objective/train/value_avg": -0.0076141357421875, + "objective/train/value_loss": 0.00017974516958929598, + "objective/train/value_max": -0.00010389089584350586, + "objective/train/value_min": -0.5849609375, + "objective/train/value_reward_corr": 0.7308202130586332, + "objective/train/value_std": 0.0149078369140625, + "objective/train/weight_avg": 1.0007365942001343, + "objective/train/weighted_lm_loss": 1.7088409662246704, + "objective/train/weights_max": 1.28644597530365, + "objective/train/weights_min": 0.5981242656707764, + "theoretical_loss": 3.720925544707211, + "tokens_seen": 884736000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007392874337987482, + "loss": 0.0761, + "theoretical_loss": 3.720925544707211, + "tokens_seen": 884736000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007392071898571659, + "loss": 0.078, + "theoretical_loss": 3.720819076785419, + "tokens_seen": 884998144 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007391269459155834, + "loss": 0.0741, + "theoretical_loss": 3.720712649222961, + "tokens_seen": 885260288 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007390467019740009, + "loss": 0.0752, + "theoretical_loss": 3.720606261992593, + "tokens_seen": 885522432 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007389664580324186, + "loss": 0.0777, + "theoretical_loss": 3.7204999150670988, + "tokens_seen": 885784576 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007388862140908361, + "loss": 0.0753, + "theoretical_loss": 3.7203936084192866, + "tokens_seen": 886046720 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007388059701492537, + "loss": 0.0775, + "theoretical_loss": 3.720287342021992, + "tokens_seen": 886308864 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007387257262076713, + "loss": 0.0748, + "theoretical_loss": 3.720181115848078, + "tokens_seen": 886571008 + }, + { + "epoch": 0.27, + "learning_rate": 0.000738645482266089, + "loss": 0.0742, + "theoretical_loss": 3.7200749298704316, + "tokens_seen": 886833152 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007385652383245065, + "loss": 0.0778, + "theoretical_loss": 3.7199687840619675, + "tokens_seen": 887095296 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007384849943829242, + "loss": 0.0767, + "theoretical_loss": 3.719862678395627, + "tokens_seen": 887357440 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007384047504413417, + "loss": 0.0755, + "theoretical_loss": 3.719756612844377, + "tokens_seen": 887619584 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007383245064997593, + "loss": 0.0738, + "theoretical_loss": 3.7196505873812105, + "tokens_seen": 887881728 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.0009603702928870916, + "objective/train/docs_used": 327973, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3757928609848022, + "objective/train/original_loss": 1.3757928609848022, + "objective/train/theoretical_loss": 3.719597589674226, + "objective/train/tokens_used": 908472800, + "objective/train/value_avg": -0.006237030029296875, + "objective/train/value_loss": 0.00027475733077153563, + "objective/train/value_max": -0.00013554096221923828, + "objective/train/value_min": -0.5302734375, + "objective/train/value_reward_corr": 0.5065483364707875, + "objective/train/value_std": 0.0123748779296875, + "objective/train/weight_avg": 1.0010859966278076, + "objective/train/weighted_lm_loss": 1.3766732215881348, + "objective/train/weights_max": 1.5861471891403198, + "objective/train/weights_min": 0.37259092926979065, + "theoretical_loss": 3.719597589674226, + "tokens_seen": 888012800 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007382442625581769, + "loss": 0.0765, + "theoretical_loss": 3.7195446019791465, + "tokens_seen": 888143872 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007381640186165944, + "loss": 0.0757, + "theoretical_loss": 3.7194386566112314, + "tokens_seen": 888406016 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007380837746750121, + "loss": 0.0739, + "theoretical_loss": 3.7193327512505356, + "tokens_seen": 888668160 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007380035307334296, + "loss": 0.0773, + "theoretical_loss": 3.7192268858701576, + "tokens_seen": 888930304 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007379232867918472, + "loss": 0.0743, + "theoretical_loss": 3.7191210604432205, + "tokens_seen": 889192448 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007378430428502649, + "loss": 0.0776, + "theoretical_loss": 3.7190152749428735, + "tokens_seen": 889454592 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007377627989086824, + "loss": 0.0758, + "theoretical_loss": 3.7189095293422927, + "tokens_seen": 889716736 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007376825549671, + "loss": 0.0774, + "theoretical_loss": 3.71880382361468, + "tokens_seen": 889978880 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007376023110255176, + "loss": 0.0768, + "theoretical_loss": 3.7186981577332614, + "tokens_seen": 890241024 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007375220670839352, + "loss": 0.0737, + "theoretical_loss": 3.718592531671291, + "tokens_seen": 890503168 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007374418231423527, + "loss": 0.0761, + "theoretical_loss": 3.7184869454020477, + "tokens_seen": 890765312 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007373615792007704, + "loss": 0.075, + "theoretical_loss": 3.7183813988988357, + "tokens_seen": 891027456 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.0005341742653399706, + "objective/train/docs_used": 329220, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4387054443359375, + "objective/train/original_loss": 1.4387052059173584, + "objective/train/theoretical_loss": 3.7182758921349865, + "objective/train/tokens_used": 911749600, + "objective/train/value_avg": -0.00934600830078125, + "objective/train/value_loss": 0.0006961746839806437, + "objective/train/value_max": -0.0002532005310058594, + "objective/train/value_min": -0.74853515625, + "objective/train/value_reward_corr": 0.5610245926975079, + "objective/train/value_std": 0.018096923828125, + "objective/train/weight_avg": 1.0008351802825928, + "objective/train/weighted_lm_loss": 1.439525842666626, + "objective/train/weights_max": 1.8936995267868042, + "objective/train/weights_min": 0.36917349696159363, + "theoretical_loss": 3.7182758921349865, + "tokens_seen": 891289600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007372813352591879, + "loss": 0.0782, + "theoretical_loss": 3.7182758921349865, + "tokens_seen": 891289600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007372010913176055, + "loss": 0.0762, + "theoretical_loss": 3.718170425083856, + "tokens_seen": 891551744 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007371208473760232, + "loss": 0.0755, + "theoretical_loss": 3.718064997718826, + "tokens_seen": 891813888 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007370406034344407, + "loss": 0.0755, + "theoretical_loss": 3.7179596100133034, + "tokens_seen": 892076032 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007369603594928584, + "loss": 0.0782, + "theoretical_loss": 3.7178542619407233, + "tokens_seen": 892338176 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007368801155512759, + "loss": 0.0786, + "theoretical_loss": 3.7177489534745427, + "tokens_seen": 892600320 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007367998716096935, + "loss": 0.0789, + "theoretical_loss": 3.717643684588247, + "tokens_seen": 892862464 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007367196276681111, + "loss": 0.077, + "theoretical_loss": 3.7175384552553457, + "tokens_seen": 893124608 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007366393837265286, + "loss": 0.0743, + "theoretical_loss": 3.7174332654493742, + "tokens_seen": 893386752 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007365591397849462, + "loss": 0.0768, + "theoretical_loss": 3.717328115143894, + "tokens_seen": 893648896 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007364788958433638, + "loss": 0.0768, + "theoretical_loss": 3.717223004312491, + "tokens_seen": 893911040 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007363986519017815, + "loss": 0.077, + "theoretical_loss": 3.717117932928777, + "tokens_seen": 894173184 + }, + { + "epoch": 0.27, + "learning_rate": 0.000736318407960199, + "loss": 0.0754, + "theoretical_loss": 3.7170129009663886, + "tokens_seen": 894435328 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.0014942382695153356, + "objective/train/docs_used": 330336, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3993629217147827, + "objective/train/original_loss": 1.3993628025054932, + "objective/train/theoretical_loss": 3.7169603997599605, + "objective/train/tokens_used": 915026400, + "objective/train/value_avg": -0.006420135498046875, + "objective/train/value_loss": 0.00015116189024411142, + "objective/train/value_max": -0.00015473365783691406, + "objective/train/value_min": -0.1751708984375, + "objective/train/value_reward_corr": 0.49777587233171894, + "objective/train/value_std": 0.0080718994140625, + "objective/train/weight_avg": 1.0015615224838257, + "objective/train/weighted_lm_loss": 1.4017528295516968, + "objective/train/weights_max": 1.12247896194458, + "objective/train/weights_min": 0.36870554089546204, + "theoretical_loss": 3.7169603997599605, + "tokens_seen": 894566400 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007362381640186167, + "loss": 0.0778, + "theoretical_loss": 3.716907908398989, + "tokens_seen": 894697472 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007361579200770342, + "loss": 0.0757, + "theoretical_loss": 3.7168029552002655, + "tokens_seen": 894959616 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007360776761354517, + "loss": 0.0766, + "theoretical_loss": 3.716698041343931, + "tokens_seen": 895221760 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007359974321938694, + "loss": 0.078, + "theoretical_loss": 3.716593166803724, + "tokens_seen": 895483904 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007359171882522869, + "loss": 0.0735, + "theoretical_loss": 3.7164883315534087, + "tokens_seen": 895746048 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007358369443107046, + "loss": 0.0775, + "theoretical_loss": 3.7163835355667723, + "tokens_seen": 896008192 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007357567003691221, + "loss": 0.074, + "theoretical_loss": 3.7162787788176295, + "tokens_seen": 896270336 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007356764564275397, + "loss": 0.0763, + "theoretical_loss": 3.716174061279819, + "tokens_seen": 896532480 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007355962124859574, + "loss": 0.0741, + "theoretical_loss": 3.7160693829272047, + "tokens_seen": 896794624 + }, + { + "epoch": 0.27, + "learning_rate": 0.000735515968544375, + "loss": 0.0747, + "theoretical_loss": 3.715964743733676, + "tokens_seen": 897056768 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007354357246027925, + "loss": 0.0779, + "theoretical_loss": 3.7158601436731464, + "tokens_seen": 897318912 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007353554806612101, + "loss": 0.0751, + "theoretical_loss": 3.715755582719556, + "tokens_seen": 897581056 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.0011574587551876903, + "objective/train/docs_used": 331577, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4930155277252197, + "objective/train/original_loss": 1.4930155277252197, + "objective/train/theoretical_loss": 3.7156510608468674, + "objective/train/tokens_used": 918303200, + "objective/train/value_avg": -0.007770538330078125, + "objective/train/value_loss": 0.0005507190944626927, + "objective/train/value_max": -7.843971252441406e-05, + "objective/train/value_min": -0.568359375, + "objective/train/value_reward_corr": 0.6120014802789004, + "objective/train/value_std": 0.01477813720703125, + "objective/train/weight_avg": 1.0013858079910278, + "objective/train/weighted_lm_loss": 1.4944040775299072, + "objective/train/weights_max": 1.3329410552978516, + "objective/train/weights_min": 0.22743035852909088, + "theoretical_loss": 3.7156510608468674, + "tokens_seen": 897843200 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007352752367196277, + "loss": 0.0722, + "theoretical_loss": 3.7156510608468674, + "tokens_seen": 897843200 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007351949927780452, + "loss": 0.0771, + "theoretical_loss": 3.7155465780290706, + "tokens_seen": 898105344 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007351147488364629, + "loss": 0.0756, + "theoretical_loss": 3.7154421342401793, + "tokens_seen": 898367488 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007350345048948804, + "loss": 0.076, + "theoretical_loss": 3.7153377294542325, + "tokens_seen": 898629632 + }, + { + "epoch": 0.27, + "learning_rate": 0.000734954260953298, + "loss": 0.0757, + "theoretical_loss": 3.715233363645293, + "tokens_seen": 898891776 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007348740170117157, + "loss": 0.0759, + "theoretical_loss": 3.7151290367874497, + "tokens_seen": 899153920 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007347937730701332, + "loss": 0.0747, + "theoretical_loss": 3.715024748854815, + "tokens_seen": 899416064 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007347135291285509, + "loss": 0.0754, + "theoretical_loss": 3.714920499821528, + "tokens_seen": 899678208 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007346332851869684, + "loss": 0.0746, + "theoretical_loss": 3.7148162896617505, + "tokens_seen": 899940352 + }, + { + "epoch": 0.27, + "learning_rate": 0.000734553041245386, + "loss": 0.0742, + "theoretical_loss": 3.714712118349669, + "tokens_seen": 900202496 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007344727973038036, + "loss": 0.0789, + "theoretical_loss": 3.7146079858594976, + "tokens_seen": 900464640 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007343925533622212, + "loss": 0.0753, + "theoretical_loss": 3.714503892165471, + "tokens_seen": 900726784 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007343123094206387, + "loss": 0.0761, + "theoretical_loss": 3.714399837241851, + "tokens_seen": 900988928 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.0022575294133275747, + "objective/train/docs_used": 332819, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6288766860961914, + "objective/train/original_loss": 1.6288766860961914, + "objective/train/theoretical_loss": 3.714347824310907, + "objective/train/tokens_used": 921580000, + "objective/train/value_avg": -0.00806427001953125, + "objective/train/value_loss": 0.00021155290596652776, + "objective/train/value_max": -0.00020182132720947266, + "objective/train/value_min": -0.2548828125, + "objective/train/value_reward_corr": 0.5714205851186827, + "objective/train/value_std": 0.01141357421875, + "objective/train/weight_avg": 1.0023540258407593, + "objective/train/weighted_lm_loss": 1.632721185684204, + "objective/train/weights_max": 1.173399567604065, + "objective/train/weights_min": 0.3694700598716736, + "theoretical_loss": 3.714347824310907, + "tokens_seen": 901120000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007342320654790563, + "loss": 0.0751, + "theoretical_loss": 3.7142958210629233, + "tokens_seen": 901251072 + }, + { + "epoch": 0.27, + "learning_rate": 0.000734151821537474, + "loss": 0.0769, + "theoretical_loss": 3.714191843602998, + "tokens_seen": 901513216 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007340715775958915, + "loss": 0.077, + "theoretical_loss": 3.7140879048364104, + "tokens_seen": 901775360 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007339913336543092, + "loss": 0.0756, + "theoretical_loss": 3.7139840047375183, + "tokens_seen": 902037504 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007339110897127267, + "loss": 0.0761, + "theoretical_loss": 3.713880143280707, + "tokens_seen": 902299648 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007338308457711443, + "loss": 0.076, + "theoretical_loss": 3.713776320440383, + "tokens_seen": 902561792 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007337506018295619, + "loss": 0.0757, + "theoretical_loss": 3.7136725361909795, + "tokens_seen": 902823936 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007336703578879794, + "loss": 0.0765, + "theoretical_loss": 3.713568790506953, + "tokens_seen": 903086080 + }, + { + "epoch": 0.27, + "learning_rate": 0.000733590113946397, + "loss": 0.0747, + "theoretical_loss": 3.7134650833627854, + "tokens_seen": 903348224 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007335098700048146, + "loss": 0.0736, + "theoretical_loss": 3.7133614147329808, + "tokens_seen": 903610368 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007334296260632322, + "loss": 0.0777, + "theoretical_loss": 3.7132577845920696, + "tokens_seen": 903872512 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007333493821216499, + "loss": 0.0761, + "theoretical_loss": 3.7131541929146055, + "tokens_seen": 904134656 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": -0.001184366992674768, + "objective/train/docs_used": 333943, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4446207284927368, + "objective/train/original_loss": 1.4446208477020264, + "objective/train/theoretical_loss": 3.713050639675166, + "objective/train/tokens_used": 924856800, + "objective/train/value_avg": -0.00861358642578125, + "objective/train/value_loss": 0.00047513179015368223, + "objective/train/value_max": -0.00012826919555664062, + "objective/train/value_min": -0.440185546875, + "objective/train/value_reward_corr": 0.6907593542665094, + "objective/train/value_std": 0.01509857177734375, + "objective/train/weight_avg": 0.9990274310112, + "objective/train/weighted_lm_loss": 1.4427376985549927, + "objective/train/weights_max": 1.3462886810302734, + "objective/train/weights_min": 0.37275585532188416, + "theoretical_loss": 3.713050639675166, + "tokens_seen": 904396800 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007332691381800675, + "loss": 0.073, + "theoretical_loss": 3.713050639675166, + "tokens_seen": 904396800 + }, + { + "epoch": 0.27, + "learning_rate": 0.000733188894238485, + "loss": 0.0732, + "theoretical_loss": 3.712947124848354, + "tokens_seen": 904658944 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007331086502969026, + "loss": 0.0772, + "theoretical_loss": 3.7128436484087954, + "tokens_seen": 904921088 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007330284063553202, + "loss": 0.0751, + "theoretical_loss": 3.712740210331141, + "tokens_seen": 905183232 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007329481624137377, + "loss": 0.0769, + "theoretical_loss": 3.712636810590065, + "tokens_seen": 905445376 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007328679184721554, + "loss": 0.0737, + "theoretical_loss": 3.7125334491602664, + "tokens_seen": 905707520 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007327876745305729, + "loss": 0.0745, + "theoretical_loss": 3.712430126016467, + "tokens_seen": 905969664 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007327074305889905, + "loss": 0.0752, + "theoretical_loss": 3.7123268411334136, + "tokens_seen": 906231808 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007326271866474082, + "loss": 0.0738, + "theoretical_loss": 3.7122235944858772, + "tokens_seen": 906493952 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007325469427058258, + "loss": 0.0736, + "theoretical_loss": 3.712120386048652, + "tokens_seen": 906756096 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007324666987642433, + "loss": 0.0761, + "theoretical_loss": 3.712017215796556, + "tokens_seen": 907018240 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007323864548226609, + "loss": 0.0744, + "theoretical_loss": 3.7119140837044315, + "tokens_seen": 907280384 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007323062108810785, + "loss": 0.0736, + "theoretical_loss": 3.7118109897471445, + "tokens_seen": 907542528 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": -0.00023626594338566065, + "objective/train/docs_used": 335091, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4100664854049683, + "objective/train/original_loss": 1.4100666046142578, + "objective/train/theoretical_loss": 3.7117594570612176, + "objective/train/tokens_used": 928133600, + "objective/train/value_avg": -0.006671905517578125, + "objective/train/value_loss": 0.0001805531937861815, + "objective/train/value_max": -9.459257125854492e-05, + "objective/train/value_min": -0.2276611328125, + "objective/train/value_reward_corr": 0.687647519301903, + "objective/train/value_std": 0.0102081298828125, + "objective/train/weight_avg": 0.9998487234115601, + "objective/train/weighted_lm_loss": 1.4099451303482056, + "objective/train/weights_max": 1.0809279680252075, + "objective/train/weights_min": 0.37694263458251953, + "theoretical_loss": 3.7117594570612176, + "tokens_seen": 907673600 + }, + { + "epoch": 0.28, + "learning_rate": 0.000732225966939496, + "loss": 0.0734, + "theoretical_loss": 3.7117079338995858, + "tokens_seen": 907804672 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007321457229979137, + "loss": 0.0769, + "theoretical_loss": 3.7116049161366673, + "tokens_seen": 908066816 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007320654790563312, + "loss": 0.0729, + "theoretical_loss": 3.7115019364333275, + "tokens_seen": 908328960 + }, + { + "epoch": 0.28, + "learning_rate": 0.000731985235114749, + "loss": 0.0762, + "theoretical_loss": 3.7113989947645276, + "tokens_seen": 908591104 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007319049911731665, + "loss": 0.0758, + "theoretical_loss": 3.711296091105252, + "tokens_seen": 908853248 + }, + { + "epoch": 0.28, + "learning_rate": 0.000731824747231584, + "loss": 0.0724, + "theoretical_loss": 3.7111932254305096, + "tokens_seen": 909115392 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007317445032900017, + "loss": 0.075, + "theoretical_loss": 3.7110903977153313, + "tokens_seen": 909377536 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007316642593484192, + "loss": 0.0759, + "theoretical_loss": 3.710987607934774, + "tokens_seen": 909639680 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007315840154068368, + "loss": 0.0759, + "theoretical_loss": 3.7108848560639167, + "tokens_seen": 909901824 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007315037714652544, + "loss": 0.0752, + "theoretical_loss": 3.7107821420778615, + "tokens_seen": 910163968 + }, + { + "epoch": 0.28, + "learning_rate": 0.000731423527523672, + "loss": 0.0747, + "theoretical_loss": 3.7106794659517357, + "tokens_seen": 910426112 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007313432835820895, + "loss": 0.073, + "theoretical_loss": 3.7105768276606885, + "tokens_seen": 910688256 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0003594663867261261, + "objective/train/docs_used": 336141, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.548851728439331, + "objective/train/original_loss": 1.548851490020752, + "objective/train/theoretical_loss": 3.710474227179893, + "objective/train/tokens_used": 931410400, + "objective/train/value_avg": -0.005245208740234375, + "objective/train/value_loss": 0.00010473921429365873, + "objective/train/value_max": -6.973743438720703e-05, + "objective/train/value_min": -0.466552734375, + "objective/train/value_reward_corr": 0.6725354994272675, + "objective/train/value_std": 0.0080413818359375, + "objective/train/weight_avg": 1.0004104375839233, + "objective/train/weighted_lm_loss": 1.550046443939209, + "objective/train/weights_max": 1.089442253112793, + "objective/train/weights_min": 0.6256026029586792, + "theoretical_loss": 3.710474227179893, + "tokens_seen": 910950400 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007312630396405071, + "loss": 0.0742, + "theoretical_loss": 3.710474227179893, + "tokens_seen": 910950400 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007311827956989248, + "loss": 0.0742, + "theoretical_loss": 3.710371664484547, + "tokens_seen": 911212544 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007311025517573423, + "loss": 0.0715, + "theoretical_loss": 3.710269139549869, + "tokens_seen": 911474688 + }, + { + "epoch": 0.28, + "learning_rate": 0.00073102230781576, + "loss": 0.0725, + "theoretical_loss": 3.7101666523511034, + "tokens_seen": 911736832 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007309420638741775, + "loss": 0.0749, + "theoretical_loss": 3.710064202863517, + "tokens_seen": 911998976 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007308618199325952, + "loss": 0.077, + "theoretical_loss": 3.7099617910623994, + "tokens_seen": 912261120 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007307815759910127, + "loss": 0.0755, + "theoretical_loss": 3.7098594169230648, + "tokens_seen": 912523264 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007307013320494302, + "loss": 0.0731, + "theoretical_loss": 3.7097570804208497, + "tokens_seen": 912785408 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007306210881078479, + "loss": 0.0741, + "theoretical_loss": 3.709654781531113, + "tokens_seen": 913047552 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007305408441662654, + "loss": 0.0752, + "theoretical_loss": 3.709552520229239, + "tokens_seen": 913309696 + }, + { + "epoch": 0.28, + "learning_rate": 0.000730460600224683, + "loss": 0.0742, + "theoretical_loss": 3.7094502964906337, + "tokens_seen": 913571840 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007303803562831007, + "loss": 0.0742, + "theoretical_loss": 3.709348110290726, + "tokens_seen": 913833984 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007303001123415183, + "loss": 0.0744, + "theoretical_loss": 3.7092459616049682, + "tokens_seen": 914096128 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0010315380059182644, + "objective/train/docs_used": 337261, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.495456576347351, + "objective/train/original_loss": 1.4954564571380615, + "objective/train/theoretical_loss": 3.709194901322231, + "objective/train/tokens_used": 934687200, + "objective/train/value_avg": -0.01190185546875, + "objective/train/value_loss": 0.00031099331681616604, + "objective/train/value_max": -0.0002453327178955078, + "objective/train/value_min": -0.70556640625, + "objective/train/value_reward_corr": 0.7287494601771468, + "objective/train/value_std": 0.0191650390625, + "objective/train/weight_avg": 1.0011770725250244, + "objective/train/weighted_lm_loss": 1.498216986656189, + "objective/train/weights_max": 1.606416940689087, + "objective/train/weights_min": 0.3689524829387665, + "theoretical_loss": 3.709194901322231, + "tokens_seen": 914227200 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007302198683999358, + "loss": 0.0747, + "theoretical_loss": 3.709143850408837, + "tokens_seen": 914358272 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007301396244583534, + "loss": 0.0742, + "theoretical_loss": 3.7090417766778305, + "tokens_seen": 914620416 + }, + { + "epoch": 0.28, + "learning_rate": 0.000730059380516771, + "loss": 0.0738, + "theoretical_loss": 3.7089397403874704, + "tokens_seen": 914882560 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007299791365751885, + "loss": 0.0742, + "theoretical_loss": 3.708837741513301, + "tokens_seen": 915144704 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007298988926336062, + "loss": 0.0752, + "theoretical_loss": 3.7087357800308904, + "tokens_seen": 915406848 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007298186486920237, + "loss": 0.0735, + "theoretical_loss": 3.708633855915829, + "tokens_seen": 915668992 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007297384047504413, + "loss": 0.0742, + "theoretical_loss": 3.708531969143731, + "tokens_seen": 915931136 + }, + { + "epoch": 0.28, + "learning_rate": 0.000729658160808859, + "loss": 0.0769, + "theoretical_loss": 3.708430119690232, + "tokens_seen": 916193280 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007295779168672765, + "loss": 0.075, + "theoretical_loss": 3.708328307530991, + "tokens_seen": 916455424 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007294976729256942, + "loss": 0.0736, + "theoretical_loss": 3.7082265326416914, + "tokens_seen": 916717568 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007294174289841117, + "loss": 0.0733, + "theoretical_loss": 3.708124794998037, + "tokens_seen": 916979712 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007293371850425293, + "loss": 0.0724, + "theoretical_loss": 3.708023094575756, + "tokens_seen": 917241856 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0011083170538768172, + "objective/train/docs_used": 338425, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5612324476242065, + "objective/train/original_loss": 1.561232328414917, + "objective/train/theoretical_loss": 3.7079214313505986, + "objective/train/tokens_used": 937964000, + "objective/train/value_avg": -0.0082550048828125, + "objective/train/value_loss": 0.0003109085373580456, + "objective/train/value_max": -0.00016868114471435547, + "objective/train/value_min": -0.955078125, + "objective/train/value_reward_corr": 0.687470457987905, + "objective/train/value_std": 0.0196990966796875, + "objective/train/weight_avg": 1.0012561082839966, + "objective/train/weighted_lm_loss": 1.562901258468628, + "objective/train/weights_max": 2.4972524642944336, + "objective/train/weights_min": 0.38558143377304077, + "theoretical_loss": 3.7079214313505986, + "tokens_seen": 917504000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007292569411009469, + "loss": 0.0757, + "theoretical_loss": 3.7079214313505986, + "tokens_seen": 917504000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007291766971593645, + "loss": 0.073, + "theoretical_loss": 3.707819805298338, + "tokens_seen": 917766144 + }, + { + "epoch": 0.28, + "learning_rate": 0.000729096453217782, + "loss": 0.0754, + "theoretical_loss": 3.7077182163947704, + "tokens_seen": 918028288 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007290162092761998, + "loss": 0.0749, + "theoretical_loss": 3.7076166646157134, + "tokens_seen": 918290432 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007289359653346173, + "loss": 0.075, + "theoretical_loss": 3.7075151499370094, + "tokens_seen": 918552576 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007288557213930348, + "loss": 0.0769, + "theoretical_loss": 3.7074136723345212, + "tokens_seen": 918814720 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007287754774514525, + "loss": 0.0771, + "theoretical_loss": 3.707312231784136, + "tokens_seen": 919076864 + }, + { + "epoch": 0.28, + "learning_rate": 0.00072869523350987, + "loss": 0.0746, + "theoretical_loss": 3.7072108282617617, + "tokens_seen": 919339008 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007286149895682876, + "loss": 0.0757, + "theoretical_loss": 3.7071094617433307, + "tokens_seen": 919601152 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007285347456267052, + "loss": 0.075, + "theoretical_loss": 3.707008132204796, + "tokens_seen": 919863296 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007284545016851228, + "loss": 0.0733, + "theoretical_loss": 3.7069068396221345, + "tokens_seen": 920125440 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007283742577435403, + "loss": 0.0733, + "theoretical_loss": 3.706805583971345, + "tokens_seen": 920387584 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007282940138019579, + "loss": 0.0757, + "theoretical_loss": 3.7067043652284495, + "tokens_seen": 920649728 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": -0.00018246793479193002, + "objective/train/docs_used": 339576, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.468998908996582, + "objective/train/original_loss": 1.468998908996582, + "objective/train/theoretical_loss": 3.706653769689974, + "objective/train/tokens_used": 941240800, + "objective/train/value_avg": -0.0093231201171875, + "objective/train/value_loss": 0.0003314831992611289, + "objective/train/value_max": -0.00017261505126953125, + "objective/train/value_min": -0.313720703125, + "objective/train/value_reward_corr": 0.6134654432248161, + "objective/train/value_std": 0.0128936767578125, + "objective/train/weight_avg": 0.9999735355377197, + "objective/train/weighted_lm_loss": 1.4704101085662842, + "objective/train/weights_max": 1.2405765056610107, + "objective/train/weights_min": 0.38339895009994507, + "theoretical_loss": 3.706653769689974, + "tokens_seen": 920780800 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007282137698603756, + "loss": 0.0812, + "theoretical_loss": 3.7066031833694906, + "tokens_seen": 920911872 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007281335259187932, + "loss": 0.0766, + "theoretical_loss": 3.7065020383705347, + "tokens_seen": 921174016 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007280532819772108, + "loss": 0.072, + "theoretical_loss": 3.70640093020767, + "tokens_seen": 921436160 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007279730380356283, + "loss": 0.0763, + "theoretical_loss": 3.7062998588570073, + "tokens_seen": 921698304 + }, + { + "epoch": 0.28, + "learning_rate": 0.000727892794094046, + "loss": 0.0741, + "theoretical_loss": 3.7061988242946793, + "tokens_seen": 921960448 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007278125501524635, + "loss": 0.0745, + "theoretical_loss": 3.7060978264968423, + "tokens_seen": 922222592 + }, + { + "epoch": 0.28, + "learning_rate": 0.000727732306210881, + "loss": 0.0754, + "theoretical_loss": 3.705996865439672, + "tokens_seen": 922484736 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007276520622692987, + "loss": 0.0728, + "theoretical_loss": 3.7058959410993695, + "tokens_seen": 922746880 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007275718183277162, + "loss": 0.0744, + "theoretical_loss": 3.7057950534521558, + "tokens_seen": 923009024 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007274915743861338, + "loss": 0.0764, + "theoretical_loss": 3.705694202474275, + "tokens_seen": 923271168 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007274113304445515, + "loss": 0.0743, + "theoretical_loss": 3.7055933881419936, + "tokens_seen": 923533312 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007273310865029691, + "loss": 0.0708, + "theoretical_loss": 3.7054926104315995, + "tokens_seen": 923795456 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0013274255907163024, + "objective/train/docs_used": 340768, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6183089017868042, + "objective/train/original_loss": 1.6183090209960938, + "objective/train/theoretical_loss": 3.705391869319403, + "objective/train/tokens_used": 944517600, + "objective/train/value_avg": -0.00782012939453125, + "objective/train/value_loss": 0.00026030809385702014, + "objective/train/value_max": -0.0001398324966430664, + "objective/train/value_min": -0.3779296875, + "objective/train/value_reward_corr": 0.6056620533963706, + "objective/train/value_std": 0.0130767822265625, + "objective/train/weight_avg": 1.0014489889144897, + "objective/train/weighted_lm_loss": 1.620178461074829, + "objective/train/weights_max": 1.4343163967132568, + "objective/train/weights_min": 0.37169942259788513, + "theoretical_loss": 3.705391869319403, + "tokens_seen": 924057600 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007272508425613866, + "loss": 0.0733, + "theoretical_loss": 3.705391869319403, + "tokens_seen": 924057600 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007271705986198042, + "loss": 0.0717, + "theoretical_loss": 3.7052911647817357, + "tokens_seen": 924319744 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007270903546782218, + "loss": 0.0754, + "theoretical_loss": 3.7051904967949527, + "tokens_seen": 924581888 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007270101107366394, + "loss": 0.0712, + "theoretical_loss": 3.7050898653354296, + "tokens_seen": 924844032 + }, + { + "epoch": 0.28, + "learning_rate": 0.000726929866795057, + "loss": 0.0747, + "theoretical_loss": 3.7049892703795653, + "tokens_seen": 925106176 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007268496228534745, + "loss": 0.0742, + "theoretical_loss": 3.70488871190378, + "tokens_seen": 925368320 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007267693789118923, + "loss": 0.0738, + "theoretical_loss": 3.704788189884515, + "tokens_seen": 925630464 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007266891349703098, + "loss": 0.0755, + "theoretical_loss": 3.7046877042982347, + "tokens_seen": 925892608 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007266088910287273, + "loss": 0.076, + "theoretical_loss": 3.7045872551214254, + "tokens_seen": 926154752 + }, + { + "epoch": 0.28, + "learning_rate": 0.000726528647087145, + "loss": 0.072, + "theoretical_loss": 3.704486842330594, + "tokens_seen": 926416896 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007264484031455625, + "loss": 0.074, + "theoretical_loss": 3.7043864659022696, + "tokens_seen": 926679040 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007263681592039801, + "loss": 0.0717, + "theoretical_loss": 3.704286125813004, + "tokens_seen": 926941184 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007262879152623977, + "loss": 0.074, + "theoretical_loss": 3.7041858220393706, + "tokens_seen": 927203328 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": -0.0009031386580318213, + "objective/train/docs_used": 341815, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.391129970550537, + "objective/train/original_loss": 1.391129970550537, + "objective/train/theoretical_loss": 3.7041356837636013, + "objective/train/tokens_used": 947794400, + "objective/train/value_avg": -0.0078125, + "objective/train/value_loss": 0.0002524466544855386, + "objective/train/value_max": -8.481740951538086e-05, + "objective/train/value_min": -0.256591796875, + "objective/train/value_reward_corr": 0.7173105114970126, + "objective/train/value_std": 0.012786865234375, + "objective/train/weight_avg": 0.9992140531539917, + "objective/train/weighted_lm_loss": 1.3906961679458618, + "objective/train/weights_max": 1.1406426429748535, + "objective/train/weights_min": 0.3694460988044739, + "theoretical_loss": 3.7041356837636013, + "tokens_seen": 927334400 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007262076713208153, + "loss": 0.0731, + "theoretical_loss": 3.704085554557964, + "tokens_seen": 927465472 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007261274273792328, + "loss": 0.0729, + "theoretical_loss": 3.703985323345399, + "tokens_seen": 927727616 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007260471834376505, + "loss": 0.0707, + "theoretical_loss": 3.7038851283783156, + "tokens_seen": 927989760 + }, + { + "epoch": 0.28, + "learning_rate": 0.000725966939496068, + "loss": 0.0748, + "theoretical_loss": 3.7037849696333724, + "tokens_seen": 928251904 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007258866955544856, + "loss": 0.0767, + "theoretical_loss": 3.703684847087251, + "tokens_seen": 928514048 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007258064516129033, + "loss": 0.0762, + "theoretical_loss": 3.7035847607166534, + "tokens_seen": 928776192 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007257262076713208, + "loss": 0.0735, + "theoretical_loss": 3.703484710498306, + "tokens_seen": 929038336 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007256459637297385, + "loss": 0.0802, + "theoretical_loss": 3.703384696408953, + "tokens_seen": 929300480 + }, + { + "epoch": 0.28, + "learning_rate": 0.000725565719788156, + "loss": 0.0735, + "theoretical_loss": 3.7032847184253628, + "tokens_seen": 929562624 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007254854758465736, + "loss": 0.0732, + "theoretical_loss": 3.7031847765243233, + "tokens_seen": 929824768 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007254052319049912, + "loss": 0.0713, + "theoretical_loss": 3.7030848706826465, + "tokens_seen": 930086912 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007253249879634087, + "loss": 0.0751, + "theoretical_loss": 3.702985000877163, + "tokens_seen": 930349056 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.002034248784184456, + "objective/train/docs_used": 343003, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.581547737121582, + "objective/train/original_loss": 1.581547737121582, + "objective/train/theoretical_loss": 3.7028851670847267, + "objective/train/tokens_used": 951071200, + "objective/train/value_avg": -0.007381439208984375, + "objective/train/value_loss": 0.0002921258274000138, + "objective/train/value_max": -0.00011324882507324219, + "objective/train/value_min": -0.54833984375, + "objective/train/value_reward_corr": 0.569236282685358, + "objective/train/value_std": 0.01422882080078125, + "objective/train/weight_avg": 1.002168893814087, + "objective/train/weighted_lm_loss": 1.5849435329437256, + "objective/train/weights_max": 1.5144890546798706, + "objective/train/weights_min": 0.36928337812423706, + "theoretical_loss": 3.7028851670847267, + "tokens_seen": 930611200 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007252447440218264, + "loss": 0.0746, + "theoretical_loss": 3.7028851670847267, + "tokens_seen": 930611200 + }, + { + "epoch": 0.28, + "learning_rate": 0.000725164500080244, + "loss": 0.0729, + "theoretical_loss": 3.7027853692822124, + "tokens_seen": 930873344 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007250842561386616, + "loss": 0.076, + "theoretical_loss": 3.702685607446516, + "tokens_seen": 931135488 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007250040121970791, + "loss": 0.0761, + "theoretical_loss": 3.7025858815545543, + "tokens_seen": 931397632 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007249237682554968, + "loss": 0.0746, + "theoretical_loss": 3.7024861915832665, + "tokens_seen": 931659776 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007248435243139143, + "loss": 0.0739, + "theoretical_loss": 3.7023865375096126, + "tokens_seen": 931921920 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007247632803723318, + "loss": 0.0758, + "theoretical_loss": 3.7022869193105734, + "tokens_seen": 932184064 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007246830364307495, + "loss": 0.073, + "theoretical_loss": 3.702187336963151, + "tokens_seen": 932446208 + }, + { + "epoch": 0.28, + "learning_rate": 0.000724602792489167, + "loss": 0.0742, + "theoretical_loss": 3.70208779044437, + "tokens_seen": 932708352 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007245225485475848, + "loss": 0.0719, + "theoretical_loss": 3.7019882797312746, + "tokens_seen": 932970496 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007244423046060023, + "loss": 0.0795, + "theoretical_loss": 3.701888804800931, + "tokens_seen": 933232640 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007243620606644199, + "loss": 0.0709, + "theoretical_loss": 3.701789365630426, + "tokens_seen": 933494784 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007242818167228375, + "loss": 0.0728, + "theoretical_loss": 3.701689962196868, + "tokens_seen": 933756928 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0019412703113630414, + "objective/train/docs_used": 344002, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6019786596298218, + "objective/train/original_loss": 1.6019787788391113, + "objective/train/theoretical_loss": 3.7016402738742964, + "objective/train/tokens_used": 954348000, + "objective/train/value_avg": -0.00753021240234375, + "objective/train/value_loss": 0.00015285314293578267, + "objective/train/value_max": -0.00015234947204589844, + "objective/train/value_min": -0.267822265625, + "objective/train/value_reward_corr": 0.5820011602921469, + "objective/train/value_std": 0.0103759765625, + "objective/train/weight_avg": 1.0020129680633545, + "objective/train/weighted_lm_loss": 1.6059612035751343, + "objective/train/weights_max": 1.2121343612670898, + "objective/train/weights_min": 0.3773167133331299, + "theoretical_loss": 3.7016402738742964, + "tokens_seen": 933888000 + }, + { + "epoch": 0.28, + "learning_rate": 0.000724201572781255, + "loss": 0.0768, + "theoretical_loss": 3.701590594477387, + "tokens_seen": 934019072 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007241213288396726, + "loss": 0.0769, + "theoretical_loss": 3.701491262449131, + "tokens_seen": 934281216 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007240410848980902, + "loss": 0.0765, + "theoretical_loss": 3.7013919660892736, + "tokens_seen": 934543360 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007239608409565078, + "loss": 0.0742, + "theoretical_loss": 3.701292705375006, + "tokens_seen": 934805504 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007238805970149253, + "loss": 0.0755, + "theoretical_loss": 3.701193480283542, + "tokens_seen": 935067648 + }, + { + "epoch": 0.28, + "learning_rate": 0.000723800353073343, + "loss": 0.0718, + "theoretical_loss": 3.701094290792116, + "tokens_seen": 935329792 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007237201091317606, + "loss": 0.0754, + "theoretical_loss": 3.7009951368779825, + "tokens_seen": 935591936 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007236398651901781, + "loss": 0.073, + "theoretical_loss": 3.700896018518418, + "tokens_seen": 935854080 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007235596212485958, + "loss": 0.0756, + "theoretical_loss": 3.700796935690719, + "tokens_seen": 936116224 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007234793773070133, + "loss": 0.0737, + "theoretical_loss": 3.700697888372204, + "tokens_seen": 936378368 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007233991333654309, + "loss": 0.0746, + "theoretical_loss": 3.7005988765402114, + "tokens_seen": 936640512 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007233188894238485, + "loss": 0.0737, + "theoretical_loss": 3.700499900172101, + "tokens_seen": 936902656 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0009387860773131251, + "objective/train/docs_used": 344763, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4261451959609985, + "objective/train/original_loss": 1.426145315170288, + "objective/train/theoretical_loss": 3.700400959245252, + "objective/train/tokens_used": 957624800, + "objective/train/value_avg": -0.01166534423828125, + "objective/train/value_loss": 0.0004122898681089282, + "objective/train/value_max": -0.0001398324966430664, + "objective/train/value_min": -0.81689453125, + "objective/train/value_reward_corr": 0.8483639387700215, + "objective/train/value_std": 0.03436279296875, + "objective/train/weight_avg": 1.0011343955993652, + "objective/train/weighted_lm_loss": 1.4269537925720215, + "objective/train/weights_max": 1.7235952615737915, + "objective/train/weights_min": 0.3695659041404724, + "theoretical_loss": 3.700400959245252, + "tokens_seen": 937164800 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007232386454822661, + "loss": 0.0716, + "theoretical_loss": 3.700400959245252, + "tokens_seen": 937164800 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007231584015406837, + "loss": 0.0732, + "theoretical_loss": 3.7003020537370657, + "tokens_seen": 937426944 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007230781575991013, + "loss": 0.0736, + "theoretical_loss": 3.7002031836249643, + "tokens_seen": 937689088 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007229979136575189, + "loss": 0.0756, + "theoretical_loss": 3.7001043488863896, + "tokens_seen": 937951232 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007229176697159365, + "loss": 0.0753, + "theoretical_loss": 3.7000055494988047, + "tokens_seen": 938213376 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007228374257743541, + "loss": 0.0772, + "theoretical_loss": 3.6999067854396936, + "tokens_seen": 938475520 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007227571818327716, + "loss": 0.0715, + "theoretical_loss": 3.6998080566865608, + "tokens_seen": 938737664 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007226769378911893, + "loss": 0.0738, + "theoretical_loss": 3.6997093632169307, + "tokens_seen": 938999808 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007225966939496068, + "loss": 0.0768, + "theoretical_loss": 3.699610705008349, + "tokens_seen": 939261952 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007225164500080244, + "loss": 0.0707, + "theoretical_loss": 3.6995120820383818, + "tokens_seen": 939524096 + }, + { + "epoch": 0.28, + "learning_rate": 0.000722436206066442, + "loss": 0.074, + "theoretical_loss": 3.6994134942846157, + "tokens_seen": 939786240 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007223559621248595, + "loss": 0.0758, + "theoretical_loss": 3.6993149417246576, + "tokens_seen": 940048384 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007222757181832772, + "loss": 0.0737, + "theoretical_loss": 3.699216424336135, + "tokens_seen": 940310528 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0008543147123418748, + "objective/train/docs_used": 345855, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.430222988128662, + "objective/train/original_loss": 1.430222988128662, + "objective/train/theoretical_loss": 3.6991671788241764, + "objective/train/tokens_used": 960901600, + "objective/train/value_avg": -0.00884246826171875, + "objective/train/value_loss": 0.00023561967827845365, + "objective/train/value_max": -0.00021660327911376953, + "objective/train/value_min": -0.6904296875, + "objective/train/value_reward_corr": 0.6824104864233338, + "objective/train/value_std": 0.0145416259765625, + "objective/train/weight_avg": 1.0009649991989136, + "objective/train/weighted_lm_loss": 1.4308393001556396, + "objective/train/weights_max": 1.3335462808609009, + "objective/train/weights_min": 0.3730488717556, + "theoretical_loss": 3.6991671788241764, + "tokens_seen": 940441600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007221954742416948, + "loss": 0.0746, + "theoretical_loss": 3.6991179420966964, + "tokens_seen": 940572672 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007221152303001124, + "loss": 0.0729, + "theoretical_loss": 3.69901949498401, + "tokens_seen": 940834816 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007220349863585299, + "loss": 0.0759, + "theoretical_loss": 3.6989210829757644, + "tokens_seen": 941096960 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007219547424169476, + "loss": 0.0758, + "theoretical_loss": 3.6988227060496692, + "tokens_seen": 941359104 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007218744984753651, + "loss": 0.0761, + "theoretical_loss": 3.6987243641834535, + "tokens_seen": 941621248 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007217942545337827, + "loss": 0.0743, + "theoretical_loss": 3.6986260573548675, + "tokens_seen": 941883392 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007217140105922003, + "loss": 0.0746, + "theoretical_loss": 3.698527785541682, + "tokens_seen": 942145536 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007216337666506178, + "loss": 0.0743, + "theoretical_loss": 3.6984295487216867, + "tokens_seen": 942407680 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007215535227090356, + "loss": 0.0746, + "theoretical_loss": 3.6983313468726924, + "tokens_seen": 942669824 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007214732787674531, + "loss": 0.0773, + "theoretical_loss": 3.6982331799725303, + "tokens_seen": 942931968 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007213930348258707, + "loss": 0.0734, + "theoretical_loss": 3.6981350479990525, + "tokens_seen": 943194112 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007213127908842883, + "loss": 0.0781, + "theoretical_loss": 3.6980369509301285, + "tokens_seen": 943456256 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.0011019601952284575, + "objective/train/docs_used": 346959, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3961070775985718, + "objective/train/original_loss": 1.3961069583892822, + "objective/train/theoretical_loss": 3.6979388887436517, + "objective/train/tokens_used": 964178400, + "objective/train/value_avg": -0.00862884521484375, + "objective/train/value_loss": 0.0001508643908891827, + "objective/train/value_max": -0.00017535686492919922, + "objective/train/value_min": -0.24072265625, + "objective/train/value_reward_corr": 0.7373741548355937, + "objective/train/value_std": 0.012786865234375, + "objective/train/weight_avg": 1.0011765956878662, + "objective/train/weighted_lm_loss": 1.3976231813430786, + "objective/train/weights_max": 1.2201510667800903, + "objective/train/weights_min": 0.8218066692352295, + "theoretical_loss": 3.6979388887436517, + "tokens_seen": 943718400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007212325469427058, + "loss": 0.0758, + "theoretical_loss": 3.6979388887436517, + "tokens_seen": 943718400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007211523030011234, + "loss": 0.0759, + "theoretical_loss": 3.697840861417533, + "tokens_seen": 943980544 + }, + { + "epoch": 0.29, + "learning_rate": 0.000721072059059541, + "loss": 0.0755, + "theoretical_loss": 3.697742868929704, + "tokens_seen": 944242688 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007209918151179586, + "loss": 0.0745, + "theoretical_loss": 3.6976449112581173, + "tokens_seen": 944504832 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007209115711763761, + "loss": 0.0746, + "theoretical_loss": 3.697546988380744, + "tokens_seen": 944766976 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007208313272347939, + "loss": 0.076, + "theoretical_loss": 3.697449100275577, + "tokens_seen": 945029120 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007207510832932114, + "loss": 0.0726, + "theoretical_loss": 3.6973512469206278, + "tokens_seen": 945291264 + }, + { + "epoch": 0.29, + "learning_rate": 0.000720670839351629, + "loss": 0.0733, + "theoretical_loss": 3.6972534282939282, + "tokens_seen": 945553408 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007205905954100466, + "loss": 0.0726, + "theoretical_loss": 3.6971556443735314, + "tokens_seen": 945815552 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007205103514684641, + "loss": 0.0711, + "theoretical_loss": 3.697057895137508, + "tokens_seen": 946077696 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007204301075268818, + "loss": 0.0772, + "theoretical_loss": 3.696960180563951, + "tokens_seen": 946339840 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007203498635852993, + "loss": 0.0752, + "theoretical_loss": 3.6968625006309717, + "tokens_seen": 946601984 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007202696196437169, + "loss": 0.0746, + "theoretical_loss": 3.6967648553167014, + "tokens_seen": 946864128 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": -8.704046194907278e-05, + "objective/train/docs_used": 348186, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.480729579925537, + "objective/train/original_loss": 1.4807298183441162, + "objective/train/theoretical_loss": 3.696716045634754, + "objective/train/tokens_used": 967455200, + "objective/train/value_avg": -0.0102691650390625, + "objective/train/value_loss": 0.0004358619044069201, + "objective/train/value_max": -0.00021660327911376953, + "objective/train/value_min": -0.783203125, + "objective/train/value_reward_corr": 0.7756683391643069, + "objective/train/value_std": 0.0248260498046875, + "objective/train/weight_avg": 1.0001178979873657, + "objective/train/weighted_lm_loss": 1.4807584285736084, + "objective/train/weights_max": 2.014146089553833, + "objective/train/weights_min": 0.3976996839046478, + "theoretical_loss": 3.696716045634754, + "tokens_seen": 946995200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007201893757021345, + "loss": 0.0738, + "theoretical_loss": 3.696667244599292, + "tokens_seen": 947126272 + }, + { + "epoch": 0.29, + "learning_rate": 0.000720109131760552, + "loss": 0.0759, + "theoretical_loss": 3.6965696684569154, + "tokens_seen": 947388416 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007200288878189697, + "loss": 0.0763, + "theoretical_loss": 3.6964721268677616, + "tokens_seen": 947650560 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007199486438773873, + "loss": 0.0744, + "theoretical_loss": 3.696374619810043, + "tokens_seen": 947912704 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007198683999358049, + "loss": 0.0715, + "theoretical_loss": 3.6962771472619886, + "tokens_seen": 948174848 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007197881559942224, + "loss": 0.0743, + "theoretical_loss": 3.69617970920185, + "tokens_seen": 948436992 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007197079120526401, + "loss": 0.0766, + "theoretical_loss": 3.6960823056078973, + "tokens_seen": 948699136 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007196276681110576, + "loss": 0.0765, + "theoretical_loss": 3.6959849364584203, + "tokens_seen": 948961280 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007195474241694752, + "loss": 0.0751, + "theoretical_loss": 3.695887601731728, + "tokens_seen": 949223424 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007194671802278928, + "loss": 0.0765, + "theoretical_loss": 3.69579030140615, + "tokens_seen": 949485568 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007193869362863103, + "loss": 0.0751, + "theoretical_loss": 3.6956930354600352, + "tokens_seen": 949747712 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007193066923447281, + "loss": 0.075, + "theoretical_loss": 3.6955958038717522, + "tokens_seen": 950009856 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.0015586912631988525, + "objective/train/docs_used": 349336, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4084975719451904, + "objective/train/original_loss": 1.4084975719451904, + "objective/train/theoretical_loss": 3.695498606619688, + "objective/train/tokens_used": 970732000, + "objective/train/value_avg": -0.00797271728515625, + "objective/train/value_loss": 0.0002061313862213865, + "objective/train/value_max": -0.00015115737915039062, + "objective/train/value_min": -0.53125, + "objective/train/value_reward_corr": 0.6561778993169798, + "objective/train/value_std": 0.0132598876953125, + "objective/train/weight_avg": 1.0016520023345947, + "objective/train/weighted_lm_loss": 1.411009669303894, + "objective/train/weights_max": 1.2708643674850464, + "objective/train/weights_min": 0.36876535415649414, + "theoretical_loss": 3.695498606619688, + "tokens_seen": 950272000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007192264484031456, + "loss": 0.0747, + "theoretical_loss": 3.695498606619688, + "tokens_seen": 950272000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007191462044615632, + "loss": 0.0728, + "theoretical_loss": 3.6954014436822513, + "tokens_seen": 950534144 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007190659605199808, + "loss": 0.0749, + "theoretical_loss": 3.695304315037868, + "tokens_seen": 950796288 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007189857165783984, + "loss": 0.0747, + "theoretical_loss": 3.6952072206649857, + "tokens_seen": 951058432 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007189054726368159, + "loss": 0.075, + "theoretical_loss": 3.695110160542069, + "tokens_seen": 951320576 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007188252286952335, + "loss": 0.0744, + "theoretical_loss": 3.6950131346476054, + "tokens_seen": 951582720 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007187449847536511, + "loss": 0.0755, + "theoretical_loss": 3.694916142960098, + "tokens_seen": 951844864 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007186647408120686, + "loss": 0.0723, + "theoretical_loss": 3.6948191854580728, + "tokens_seen": 952107008 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007185844968704864, + "loss": 0.0738, + "theoretical_loss": 3.694722262120072, + "tokens_seen": 952369152 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007185042529289039, + "loss": 0.0752, + "theoretical_loss": 3.6946253729246594, + "tokens_seen": 952631296 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007184240089873215, + "loss": 0.0731, + "theoretical_loss": 3.6945285178504172, + "tokens_seen": 952893440 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007183437650457391, + "loss": 0.0729, + "theoretical_loss": 3.694431696875948, + "tokens_seen": 953155584 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007182635211041566, + "loss": 0.0778, + "theoretical_loss": 3.6943349099798715, + "tokens_seen": 953417728 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.0013693609507754445, + "objective/train/docs_used": 350619, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3620078563690186, + "objective/train/original_loss": 1.3620076179504395, + "objective/train/theoretical_loss": 3.694286529304555, + "objective/train/tokens_used": 974008800, + "objective/train/value_avg": -0.005817413330078125, + "objective/train/value_loss": 0.00011372018343536183, + "objective/train/value_max": -0.0002065896987915039, + "objective/train/value_min": -0.55126953125, + "objective/train/value_reward_corr": 0.6999231970698898, + "objective/train/value_std": 0.009918212890625, + "objective/train/weight_avg": 1.0014216899871826, + "objective/train/weighted_lm_loss": 1.3646951913833618, + "objective/train/weights_max": 1.1521174907684326, + "objective/train/weights_min": 0.3827209174633026, + "theoretical_loss": 3.694286529304555, + "tokens_seen": 953548800 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007181832771625743, + "loss": 0.0708, + "theoretical_loss": 3.6942381571408287, + "tokens_seen": 953679872 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007181030332209918, + "loss": 0.0761, + "theoretical_loss": 3.6941414383374793, + "tokens_seen": 953942016 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007180227892794094, + "loss": 0.0726, + "theoretical_loss": 3.6940447535485026, + "tokens_seen": 954204160 + }, + { + "epoch": 0.29, + "learning_rate": 0.000717942545337827, + "loss": 0.0736, + "theoretical_loss": 3.6939481027525956, + "tokens_seen": 954466304 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007178623013962447, + "loss": 0.074, + "theoretical_loss": 3.6938514859284766, + "tokens_seen": 954728448 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007177820574546622, + "loss": 0.0741, + "theoretical_loss": 3.6937549030548813, + "tokens_seen": 954990592 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007177018135130798, + "loss": 0.0749, + "theoretical_loss": 3.693658354110565, + "tokens_seen": 955252736 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007176215695714974, + "loss": 0.0737, + "theoretical_loss": 3.6935618390743032, + "tokens_seen": 955514880 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007175413256299149, + "loss": 0.0743, + "theoretical_loss": 3.6934653579248886, + "tokens_seen": 955777024 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007174610816883326, + "loss": 0.0729, + "theoretical_loss": 3.693368910641135, + "tokens_seen": 956039168 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007173808377467501, + "loss": 0.0761, + "theoretical_loss": 3.693272497201874, + "tokens_seen": 956301312 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007173005938051677, + "loss": 0.0777, + "theoretical_loss": 3.6931761175859554, + "tokens_seen": 956563456 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.0013215347426012158, + "objective/train/docs_used": 351696, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4953937530517578, + "objective/train/original_loss": 1.4953936338424683, + "objective/train/theoretical_loss": 3.693079771772251, + "objective/train/tokens_used": 977285600, + "objective/train/value_avg": -0.005706787109375, + "objective/train/value_loss": 0.00014413167082238942, + "objective/train/value_max": -0.00010889768600463867, + "objective/train/value_min": -0.2381591796875, + "objective/train/value_reward_corr": 0.5217617356406479, + "objective/train/value_std": 0.00839996337890625, + "objective/train/weight_avg": 1.0013859272003174, + "objective/train/weighted_lm_loss": 1.4972920417785645, + "objective/train/weights_max": 1.1233699321746826, + "objective/train/weights_min": 0.3683270514011383, + "theoretical_loss": 3.693079771772251, + "tokens_seen": 956825600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007172203498635853, + "loss": 0.0747, + "theoretical_loss": 3.693079771772251, + "tokens_seen": 956825600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007171401059220028, + "loss": 0.0763, + "theoretical_loss": 3.692983459739649, + "tokens_seen": 957087744 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007170598619804205, + "loss": 0.0776, + "theoretical_loss": 3.6928871814670563, + "tokens_seen": 957349888 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007169796180388381, + "loss": 0.0763, + "theoretical_loss": 3.6927909369334007, + "tokens_seen": 957612032 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007168993740972557, + "loss": 0.0725, + "theoretical_loss": 3.6926947261176277, + "tokens_seen": 957874176 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007168191301556733, + "loss": 0.0762, + "theoretical_loss": 3.692598548998702, + "tokens_seen": 958136320 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007167388862140909, + "loss": 0.0773, + "theoretical_loss": 3.692502405555606, + "tokens_seen": 958398464 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007166586422725084, + "loss": 0.077, + "theoretical_loss": 3.692406295767344, + "tokens_seen": 958660608 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007165783983309261, + "loss": 0.0753, + "theoretical_loss": 3.692310219612936, + "tokens_seen": 958922752 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007164981543893436, + "loss": 0.0743, + "theoretical_loss": 3.6922141770714214, + "tokens_seen": 959184896 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007164179104477611, + "loss": 0.0737, + "theoretical_loss": 3.6921181681218602, + "tokens_seen": 959447040 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007163376665061789, + "loss": 0.0755, + "theoretical_loss": 3.6920221927433294, + "tokens_seen": 959709184 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007162574225645964, + "loss": 0.0772, + "theoretical_loss": 3.691926250914925, + "tokens_seen": 959971328 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.0015116332797333598, + "objective/train/docs_used": 352874, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4265995025634766, + "objective/train/original_loss": 1.4265992641448975, + "objective/train/theoretical_loss": 3.6918782925754936, + "objective/train/tokens_used": 980562400, + "objective/train/value_avg": -0.0121917724609375, + "objective/train/value_loss": 0.00023754734138492495, + "objective/train/value_max": -0.00017130374908447266, + "objective/train/value_min": -0.31884765625, + "objective/train/value_reward_corr": 0.9000245293805224, + "objective/train/value_std": 0.03515625, + "objective/train/weight_avg": 1.0016270875930786, + "objective/train/weighted_lm_loss": 1.4295847415924072, + "objective/train/weights_max": 1.215097427368164, + "objective/train/weights_min": 0.3761727213859558, + "theoretical_loss": 3.6918782925754936, + "tokens_seen": 960102400 + }, + { + "epoch": 0.29, + "learning_rate": 0.000716177178623014, + "loss": 0.0754, + "theoretical_loss": 3.691830342615763, + "tokens_seen": 960233472 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007160969346814316, + "loss": 0.0744, + "theoretical_loss": 3.6917344678249755, + "tokens_seen": 960495616 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007160166907398492, + "loss": 0.0734, + "theoretical_loss": 3.6916386265217156, + "tokens_seen": 960757760 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007159364467982667, + "loss": 0.0726, + "theoretical_loss": 3.6915428186851553, + "tokens_seen": 961019904 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007158562028566843, + "loss": 0.075, + "theoretical_loss": 3.6914470442944824, + "tokens_seen": 961282048 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007157759589151019, + "loss": 0.0722, + "theoretical_loss": 3.691351303328907, + "tokens_seen": 961544192 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007156957149735194, + "loss": 0.0754, + "theoretical_loss": 3.691255595767654, + "tokens_seen": 961806336 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007156154710319372, + "loss": 0.0766, + "theoretical_loss": 3.6911599215899704, + "tokens_seen": 962068480 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007155352270903547, + "loss": 0.0759, + "theoretical_loss": 3.6910642807751195, + "tokens_seen": 962330624 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007154549831487724, + "loss": 0.0749, + "theoretical_loss": 3.6909686733023843, + "tokens_seen": 962592768 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007153747392071899, + "loss": 0.0722, + "theoretical_loss": 3.690873099151065, + "tokens_seen": 962854912 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007152944952656074, + "loss": 0.0729, + "theoretical_loss": 3.690777558300482, + "tokens_seen": 963117056 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.000455680739833042, + "objective/train/docs_used": 353969, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4282293319702148, + "objective/train/original_loss": 1.428229570388794, + "objective/train/theoretical_loss": 3.690682050729972, + "objective/train/tokens_used": 983839200, + "objective/train/value_avg": -0.010162353515625, + "objective/train/value_loss": 0.0002770853752736002, + "objective/train/value_max": -0.00014543533325195312, + "objective/train/value_min": -0.2489013671875, + "objective/train/value_reward_corr": 0.6144828349212833, + "objective/train/value_std": 0.01318359375, + "objective/train/weight_avg": 1.0005788803100586, + "objective/train/weighted_lm_loss": 1.4285677671432495, + "objective/train/weights_max": 1.145114541053772, + "objective/train/weights_min": 0.37042829394340515, + "theoretical_loss": 3.690682050729972, + "tokens_seen": 963379200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007152142513240251, + "loss": 0.0734, + "theoretical_loss": 3.690682050729972, + "tokens_seen": 963379200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007151340073824426, + "loss": 0.0756, + "theoretical_loss": 3.6905865764188923, + "tokens_seen": 963641344 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007150537634408602, + "loss": 0.0722, + "theoretical_loss": 3.6904911353466177, + "tokens_seen": 963903488 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007149735194992778, + "loss": 0.0754, + "theoretical_loss": 3.690395727492541, + "tokens_seen": 964165632 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007148932755576955, + "loss": 0.0733, + "theoretical_loss": 3.690300352836074, + "tokens_seen": 964427776 + }, + { + "epoch": 0.29, + "learning_rate": 0.000714813031616113, + "loss": 0.077, + "theoretical_loss": 3.690205011356646, + "tokens_seen": 964689920 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007147327876745306, + "loss": 0.0755, + "theoretical_loss": 3.6901097030337056, + "tokens_seen": 964952064 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007146525437329482, + "loss": 0.0727, + "theoretical_loss": 3.6900144278467204, + "tokens_seen": 965214208 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007145722997913657, + "loss": 0.0755, + "theoretical_loss": 3.6899191857751736, + "tokens_seen": 965476352 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007144920558497834, + "loss": 0.0723, + "theoretical_loss": 3.6898239767985688, + "tokens_seen": 965738496 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007144118119082009, + "loss": 0.0754, + "theoretical_loss": 3.689728800896428, + "tokens_seen": 966000640 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007143315679666186, + "loss": 0.0735, + "theoretical_loss": 3.68963365804829, + "tokens_seen": 966262784 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007142513240250361, + "loss": 0.076, + "theoretical_loss": 3.689538548233713, + "tokens_seen": 966524928 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.00043955291039310396, + "objective/train/docs_used": 355190, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5631569623947144, + "objective/train/original_loss": 1.5631568431854248, + "objective/train/theoretical_loss": 3.6894910057076267, + "objective/train/tokens_used": 987116000, + "objective/train/value_avg": -0.00716400146484375, + "objective/train/value_loss": 0.0002913126372732222, + "objective/train/value_max": -0.00010150671005249023, + "objective/train/value_min": -0.6953125, + "objective/train/value_reward_corr": 0.7267076806923944, + "objective/train/value_std": 0.0150299072265625, + "objective/train/weight_avg": 1.0005711317062378, + "objective/train/weighted_lm_loss": 1.5634570121765137, + "objective/train/weights_max": 1.3262733221054077, + "objective/train/weights_min": 0.3728724420070648, + "theoretical_loss": 3.6894910057076267, + "tokens_seen": 966656000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007141710800834536, + "loss": 0.0774, + "theoretical_loss": 3.6894434714322726, + "tokens_seen": 966787072 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007140908361418714, + "loss": 0.0731, + "theoretical_loss": 3.689348427623563, + "tokens_seen": 967049216 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007140105922002889, + "loss": 0.0726, + "theoretical_loss": 3.689253416787197, + "tokens_seen": 967311360 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007139303482587065, + "loss": 0.0725, + "theoretical_loss": 3.6891584389028047, + "tokens_seen": 967573504 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007138501043171241, + "loss": 0.0746, + "theoretical_loss": 3.689063493950034, + "tokens_seen": 967835648 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007137698603755417, + "loss": 0.0736, + "theoretical_loss": 3.6889685819085525, + "tokens_seen": 968097792 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007136896164339592, + "loss": 0.0744, + "theoretical_loss": 3.688873702758044, + "tokens_seen": 968359936 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007136093724923768, + "loss": 0.073, + "theoretical_loss": 3.688778856478211, + "tokens_seen": 968622080 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007135291285507944, + "loss": 0.0737, + "theoretical_loss": 3.6886840430487746, + "tokens_seen": 968884224 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007134488846092119, + "loss": 0.0726, + "theoretical_loss": 3.688589262449474, + "tokens_seen": 969146368 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007133686406676297, + "loss": 0.0754, + "theoretical_loss": 3.6884945146600643, + "tokens_seen": 969408512 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007132883967260472, + "loss": 0.0739, + "theoretical_loss": 3.6883997996603215, + "tokens_seen": 969670656 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.0019111091969534755, + "objective/train/docs_used": 356486, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4429503679275513, + "objective/train/original_loss": 1.4429501295089722, + "objective/train/theoretical_loss": 3.688305117430038, + "objective/train/tokens_used": 990392800, + "objective/train/value_avg": -0.0103302001953125, + "objective/train/value_loss": 0.0004549300647340715, + "objective/train/value_max": -8.749961853027344e-05, + "objective/train/value_min": -0.64794921875, + "objective/train/value_reward_corr": 0.6670057748618838, + "objective/train/value_std": 0.0211181640625, + "objective/train/weight_avg": 1.0021229982376099, + "objective/train/weighted_lm_loss": 1.446812391281128, + "objective/train/weights_max": 1.8136069774627686, + "objective/train/weights_min": 0.3739864230155945, + "theoretical_loss": 3.688305117430038, + "tokens_seen": 969932800 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007132081527844648, + "loss": 0.0744, + "theoretical_loss": 3.688305117430038, + "tokens_seen": 969932800 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007131279088428824, + "loss": 0.0744, + "theoretical_loss": 3.688210467949023, + "tokens_seen": 970194944 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007130476649013, + "loss": 0.0741, + "theoretical_loss": 3.6881158511971055, + "tokens_seen": 970457088 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007129674209597176, + "loss": 0.0746, + "theoretical_loss": 3.6880212671541326, + "tokens_seen": 970719232 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007128871770181351, + "loss": 0.0752, + "theoretical_loss": 3.687926715799967, + "tokens_seen": 970981376 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007128069330765527, + "loss": 0.0746, + "theoretical_loss": 3.687832197114491, + "tokens_seen": 971243520 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007127266891349703, + "loss": 0.0762, + "theoretical_loss": 3.687737711077605, + "tokens_seen": 971505664 + }, + { + "epoch": 0.29, + "learning_rate": 0.000712646445193388, + "loss": 0.0739, + "theoretical_loss": 3.687643257669225, + "tokens_seen": 971767808 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007125662012518055, + "loss": 0.0747, + "theoretical_loss": 3.6875488368692877, + "tokens_seen": 972029952 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007124859573102232, + "loss": 0.0736, + "theoretical_loss": 3.687454448657745, + "tokens_seen": 972292096 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007124057133686407, + "loss": 0.0741, + "theoretical_loss": 3.687360093014568, + "tokens_seen": 972554240 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007123254694270582, + "loss": 0.0744, + "theoretical_loss": 3.687265769919745, + "tokens_seen": 972816384 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007122452254854759, + "loss": 0.0753, + "theoretical_loss": 3.6871714793532826, + "tokens_seen": 973078528 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.000875156547408551, + "objective/train/docs_used": 357717, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.565974235534668, + "objective/train/original_loss": 1.5659743547439575, + "objective/train/theoretical_loss": 3.6871243462619425, + "objective/train/tokens_used": 993669600, + "objective/train/value_avg": -0.00701141357421875, + "objective/train/value_loss": 0.000197466419194825, + "objective/train/value_max": -8.958578109741211e-05, + "objective/train/value_min": -0.229248046875, + "objective/train/value_reward_corr": 0.6185040343790946, + "objective/train/value_std": 0.0106201171875, + "objective/train/weight_avg": 1.0009616613388062, + "objective/train/weighted_lm_loss": 1.5670831203460693, + "objective/train/weights_max": 1.1158901453018188, + "objective/train/weights_min": 0.3683288097381592, + "theoretical_loss": 3.6871243462619425, + "tokens_seen": 973209600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007121649815438934, + "loss": 0.0768, + "theoretical_loss": 3.687077221295203, + "tokens_seen": 973340672 + }, + { + "epoch": 0.3, + "learning_rate": 0.000712084737602311, + "loss": 0.0725, + "theoretical_loss": 3.6869829957255496, + "tokens_seen": 973602816 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007120044936607286, + "loss": 0.0734, + "theoretical_loss": 3.68688880262438, + "tokens_seen": 973864960 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007119242497191462, + "loss": 0.0729, + "theoretical_loss": 3.6867946419717716, + "tokens_seen": 974127104 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007118440057775638, + "loss": 0.0726, + "theoretical_loss": 3.6867005137478177, + "tokens_seen": 974389248 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007117637618359814, + "loss": 0.0768, + "theoretical_loss": 3.686606417932631, + "tokens_seen": 974651392 + }, + { + "epoch": 0.3, + "learning_rate": 0.000711683517894399, + "loss": 0.0728, + "theoretical_loss": 3.6865123545063403, + "tokens_seen": 974913536 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007116032739528166, + "loss": 0.0768, + "theoretical_loss": 3.686418323449093, + "tokens_seen": 975175680 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007115230300112342, + "loss": 0.0765, + "theoretical_loss": 3.6863243247410526, + "tokens_seen": 975437824 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007114427860696517, + "loss": 0.0735, + "theoretical_loss": 3.686230358362401, + "tokens_seen": 975699968 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007113625421280694, + "loss": 0.0739, + "theoretical_loss": 3.686136424293338, + "tokens_seen": 975962112 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007112822981864869, + "loss": 0.074, + "theoretical_loss": 3.68604252251408, + "tokens_seen": 976224256 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.001195325399748981, + "objective/train/docs_used": 358882, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3014249801635742, + "objective/train/original_loss": 1.3014247417449951, + "objective/train/theoretical_loss": 3.6859486530048615, + "objective/train/tokens_used": 996946400, + "objective/train/value_avg": -0.00737762451171875, + "objective/train/value_loss": 0.00010647853196132928, + "objective/train/value_max": -0.00010889768600463867, + "objective/train/value_min": -0.2327880859375, + "objective/train/value_reward_corr": 0.708244856375411, + "objective/train/value_std": 0.009979248046875, + "objective/train/weight_avg": 1.0012476444244385, + "objective/train/weighted_lm_loss": 1.3023656606674194, + "objective/train/weights_max": 1.2456159591674805, + "objective/train/weights_min": 0.611983060836792, + "theoretical_loss": 3.6859486530048615, + "tokens_seen": 976486400 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007112020542449044, + "loss": 0.0734, + "theoretical_loss": 3.6859486530048615, + "tokens_seen": 976486400 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007111218103033222, + "loss": 0.0736, + "theoretical_loss": 3.685854815745933, + "tokens_seen": 976748544 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007110415663617397, + "loss": 0.0745, + "theoretical_loss": 3.6857610107175645, + "tokens_seen": 977010688 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007109613224201573, + "loss": 0.0771, + "theoretical_loss": 3.6856672379000415, + "tokens_seen": 977272832 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007108810784785749, + "loss": 0.0763, + "theoretical_loss": 3.6855734972736682, + "tokens_seen": 977534976 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007108008345369925, + "loss": 0.0721, + "theoretical_loss": 3.685479788818766, + "tokens_seen": 977797120 + }, + { + "epoch": 0.3, + "learning_rate": 0.00071072059059541, + "loss": 0.0757, + "theoretical_loss": 3.6853861125156717, + "tokens_seen": 978059264 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007106403466538276, + "loss": 0.0743, + "theoretical_loss": 3.6852924683447412, + "tokens_seen": 978321408 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007105601027122452, + "loss": 0.0725, + "theoretical_loss": 3.6851988562863482, + "tokens_seen": 978583552 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007104798587706628, + "loss": 0.0749, + "theoretical_loss": 3.6851052763208823, + "tokens_seen": 978845696 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007103996148290805, + "loss": 0.0719, + "theoretical_loss": 3.6850117284287505, + "tokens_seen": 979107840 + }, + { + "epoch": 0.3, + "learning_rate": 0.000710319370887498, + "loss": 0.0708, + "theoretical_loss": 3.6849182125903774, + "tokens_seen": 979369984 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007102391269459157, + "loss": 0.0766, + "theoretical_loss": 3.6848247287862046, + "tokens_seen": 979632128 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.0012934327824041247, + "objective/train/docs_used": 360128, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5772310495376587, + "objective/train/original_loss": 1.5772309303283691, + "objective/train/theoretical_loss": 3.6847779988908362, + "objective/train/tokens_used": 1000223200, + "objective/train/value_avg": -0.00775909423828125, + "objective/train/value_loss": 0.0002463175624143332, + "objective/train/value_max": -0.0002065896987915039, + "objective/train/value_min": -0.6826171875, + "objective/train/value_reward_corr": 0.5791152585073471, + "objective/train/value_std": 0.01091766357421875, + "objective/train/weight_avg": 1.0014005899429321, + "objective/train/weighted_lm_loss": 1.5800875425338745, + "objective/train/weights_max": 1.209107518196106, + "objective/train/weights_min": 0.36841312050819397, + "theoretical_loss": 3.6847779988908362, + "tokens_seen": 979763200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007101588830043332, + "loss": 0.0759, + "theoretical_loss": 3.684731276996691, + "tokens_seen": 979894272 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007100786390627508, + "loss": 0.0749, + "theoretical_loss": 3.684637857202312, + "tokens_seen": 980156416 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007099983951211684, + "loss": 0.0742, + "theoretical_loss": 3.684544469383562, + "tokens_seen": 980418560 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007099181511795859, + "loss": 0.0726, + "theoretical_loss": 3.6844511135209497, + "tokens_seen": 980680704 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007098379072380035, + "loss": 0.0733, + "theoretical_loss": 3.684357789595003, + "tokens_seen": 980942848 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007097576632964211, + "loss": 0.0756, + "theoretical_loss": 3.684264497586266, + "tokens_seen": 981204992 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007096774193548388, + "loss": 0.0751, + "theoretical_loss": 3.684171237475301, + "tokens_seen": 981467136 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007095971754132563, + "loss": 0.0755, + "theoretical_loss": 3.6840780092426852, + "tokens_seen": 981729280 + }, + { + "epoch": 0.3, + "learning_rate": 0.000709516931471674, + "loss": 0.0738, + "theoretical_loss": 3.6839848128690145, + "tokens_seen": 981991424 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007094366875300915, + "loss": 0.0762, + "theoretical_loss": 3.683891648334901, + "tokens_seen": 982253568 + }, + { + "epoch": 0.3, + "learning_rate": 0.000709356443588509, + "loss": 0.0751, + "theoretical_loss": 3.6837985156209743, + "tokens_seen": 982515712 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007092761996469267, + "loss": 0.0774, + "theoretical_loss": 3.683705414707881, + "tokens_seen": 982777856 + }, + { + "debugging/Compilability": 1.0, + "debugging/distinct-1-grams": 0.7488293247094114, + "debugging/entropy-1-grams": 5.349564433264487, + "debugging/length": 462.46153846153845, + "debugging/num_segments": 13, + "debugging/raw_token_scores_avg": 0.00979544036090374, + "debugging/raw_token_scores_std": 0.022818773984909058, + "debugging/score": 0.0073770168052465215, + "debugging/score_std": 0.0054551366535427, + "epoch": 0.3, + "objective/train/advantage_avg": 0.0004159576492384076, + "objective/train/docs_used": 361383, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.380717158317566, + "objective/train/original_loss": 1.3807172775268555, + "objective/train/theoretical_loss": 3.6836123455762837, + "objective/train/tokens_used": 1003500000, + "objective/train/value_avg": -0.0102081298828125, + "objective/train/value_loss": 0.00025552461738698184, + "objective/train/value_max": -0.00016605854034423828, + "objective/train/value_min": -0.74853515625, + "objective/train/value_reward_corr": 0.7139572076837593, + "objective/train/value_std": 0.0162353515625, + "objective/train/weight_avg": 1.000531554222107, + "objective/train/weighted_lm_loss": 1.3806174993515015, + "objective/train/weights_max": 1.1966832876205444, + "objective/train/weights_min": 0.3853108584880829, + "theoretical_loss": 3.6836123455762837, + "tokens_seen": 983040000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007091959557053442, + "loss": 0.0735, + "theoretical_loss": 3.6836123455762837, + "tokens_seen": 983040000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007091157117637619, + "loss": 0.075, + "theoretical_loss": 3.683519308206863, + "tokens_seen": 983302144 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007090354678221794, + "loss": 0.0749, + "theoretical_loss": 3.683426302580316, + "tokens_seen": 983564288 + }, + { + "epoch": 0.3, + "learning_rate": 0.000708955223880597, + "loss": 0.0761, + "theoretical_loss": 3.683333328677356, + "tokens_seen": 983826432 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007088749799390147, + "loss": 0.0707, + "theoretical_loss": 3.6832403864787144, + "tokens_seen": 984088576 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007087947359974322, + "loss": 0.0749, + "theoretical_loss": 3.683147475965139, + "tokens_seen": 984350720 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007087144920558498, + "loss": 0.076, + "theoretical_loss": 3.683054597117393, + "tokens_seen": 984612864 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007086342481142674, + "loss": 0.0756, + "theoretical_loss": 3.6829617499162595, + "tokens_seen": 984875008 + }, + { + "epoch": 0.3, + "learning_rate": 0.000708554004172685, + "loss": 0.0751, + "theoretical_loss": 3.6828689343425345, + "tokens_seen": 985137152 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007084737602311025, + "loss": 0.0741, + "theoretical_loss": 3.682776150377034, + "tokens_seen": 985399296 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007083935162895202, + "loss": 0.0765, + "theoretical_loss": 3.682683398000589, + "tokens_seen": 985661440 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007083132723479377, + "loss": 0.0773, + "theoretical_loss": 3.6825906771940478, + "tokens_seen": 985923584 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007082330284063552, + "loss": 0.0738, + "theoretical_loss": 3.682497987938275, + "tokens_seen": 986185728 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.002384291496127844, + "objective/train/docs_used": 362627, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.497067928314209, + "objective/train/original_loss": 1.4970680475234985, + "objective/train/theoretical_loss": 3.682451655135952, + "objective/train/tokens_used": 1006776800, + "objective/train/value_avg": -0.007381439208984375, + "objective/train/value_loss": 0.00016254279762506485, + "objective/train/value_max": -0.0001442432403564453, + "objective/train/value_min": -0.274169921875, + "objective/train/value_reward_corr": 0.5673249772676263, + "objective/train/value_std": 0.008941650390625, + "objective/train/weight_avg": 1.00246000289917, + "objective/train/weighted_lm_loss": 1.5009002685546875, + "objective/train/weights_max": 1.1678251028060913, + "objective/train/weights_min": 0.3721363842487335, + "theoretical_loss": 3.682451655135952, + "tokens_seen": 986316800 + }, + { + "epoch": 0.3, + "learning_rate": 0.000708152784464773, + "loss": 0.0731, + "theoretical_loss": 3.682405330214153, + "tokens_seen": 986447872 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007080725405231905, + "loss": 0.0717, + "theoretical_loss": 3.682312704002579, + "tokens_seen": 986710016 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007079922965816082, + "loss": 0.0762, + "theoretical_loss": 3.6822201092844686, + "tokens_seen": 986972160 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007079120526400257, + "loss": 0.0735, + "theoretical_loss": 3.682127546040753, + "tokens_seen": 987234304 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007078318086984433, + "loss": 0.0753, + "theoretical_loss": 3.6820350142523806, + "tokens_seen": 987496448 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007077515647568609, + "loss": 0.0749, + "theoretical_loss": 3.6819425139003155, + "tokens_seen": 987758592 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007076713208152784, + "loss": 0.0763, + "theoretical_loss": 3.6818500449655396, + "tokens_seen": 988020736 + }, + { + "epoch": 0.3, + "learning_rate": 0.000707591076873696, + "loss": 0.0724, + "theoretical_loss": 3.6817576074290503, + "tokens_seen": 988282880 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007075108329321136, + "loss": 0.0724, + "theoretical_loss": 3.681665201271862, + "tokens_seen": 988545024 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007074305889905313, + "loss": 0.0774, + "theoretical_loss": 3.681572826475006, + "tokens_seen": 988807168 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007073503450489488, + "loss": 0.0752, + "theoretical_loss": 3.681480483019529, + "tokens_seen": 989069312 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007072701011073665, + "loss": 0.0755, + "theoretical_loss": 3.6813881708864953, + "tokens_seen": 989331456 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.000814878090750426, + "objective/train/docs_used": 363941, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4229824542999268, + "objective/train/original_loss": 1.4229824542999268, + "objective/train/theoretical_loss": 3.681295890056985, + "objective/train/tokens_used": 1010053600, + "objective/train/value_avg": -0.007781982421875, + "objective/train/value_loss": 0.00012201992649352178, + "objective/train/value_max": -0.0001080632209777832, + "objective/train/value_min": -0.1939697265625, + "objective/train/value_reward_corr": 0.701736020129122, + "objective/train/value_std": 0.01049041748046875, + "objective/train/weight_avg": 1.000870704650879, + "objective/train/weighted_lm_loss": 1.4249759912490845, + "objective/train/weights_max": 1.2072614431381226, + "objective/train/weights_min": 0.3682592809200287, + "theoretical_loss": 3.681295890056985, + "tokens_seen": 989593600 + }, + { + "epoch": 0.3, + "learning_rate": 0.000707189857165784, + "loss": 0.0741, + "theoretical_loss": 3.681295890056985, + "tokens_seen": 989593600 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007071096132242016, + "loss": 0.0718, + "theoretical_loss": 3.681203640512095, + "tokens_seen": 989855744 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007070293692826192, + "loss": 0.076, + "theoretical_loss": 3.681111422232937, + "tokens_seen": 990117888 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007069491253410367, + "loss": 0.0765, + "theoretical_loss": 3.681019235200643, + "tokens_seen": 990380032 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007068688813994543, + "loss": 0.0747, + "theoretical_loss": 3.680927079396357, + "tokens_seen": 990642176 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007067886374578719, + "loss": 0.0736, + "theoretical_loss": 3.680834954801242, + "tokens_seen": 990904320 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007067083935162896, + "loss": 0.0737, + "theoretical_loss": 3.6807428613964763, + "tokens_seen": 991166464 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007066281495747072, + "loss": 0.0778, + "theoretical_loss": 3.6806507991632555, + "tokens_seen": 991428608 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007065479056331248, + "loss": 0.0746, + "theoretical_loss": 3.68055876808279, + "tokens_seen": 991690752 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007064676616915423, + "loss": 0.0791, + "theoretical_loss": 3.680466768136308, + "tokens_seen": 991952896 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007063874177499599, + "loss": 0.0758, + "theoretical_loss": 3.680374799305053, + "tokens_seen": 992215040 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007063071738083775, + "loss": 0.0744, + "theoretical_loss": 3.6802828615702845, + "tokens_seen": 992477184 + }, + { + "epoch": 0.3, + "learning_rate": 0.000706226929866795, + "loss": 0.0765, + "theoretical_loss": 3.6801909549132796, + "tokens_seen": 992739328 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.001581284566782415, + "objective/train/docs_used": 365105, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4962949752807617, + "objective/train/original_loss": 1.4962949752807617, + "objective/train/theoretical_loss": 3.6801450132330915, + "objective/train/tokens_used": 1013330400, + "objective/train/value_avg": -0.00997161865234375, + "objective/train/value_loss": 0.00031275878427550197, + "objective/train/value_max": -7.486343383789062e-05, + "objective/train/value_min": -0.4453125, + "objective/train/value_reward_corr": 0.7353213483699139, + "objective/train/value_std": 0.020538330078125, + "objective/train/weight_avg": 1.0017253160476685, + "objective/train/weighted_lm_loss": 1.498146414756775, + "objective/train/weights_max": 1.244823932647705, + "objective/train/weights_min": 0.4185446798801422, + "theoretical_loss": 3.6801450132330915, + "tokens_seen": 992870400 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007061466859252127, + "loss": 0.0758, + "theoretical_loss": 3.6800990793153305, + "tokens_seen": 993001472 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007060664419836302, + "loss": 0.075, + "theoretical_loss": 3.6800072347577455, + "tokens_seen": 993263616 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007059861980420478, + "loss": 0.0745, + "theoretical_loss": 3.6799154212218506, + "tokens_seen": 993525760 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007059059541004655, + "loss": 0.0726, + "theoretical_loss": 3.679823638688985, + "tokens_seen": 993787904 + }, + { + "epoch": 0.3, + "learning_rate": 0.000705825710158883, + "loss": 0.0742, + "theoretical_loss": 3.679731887140508, + "tokens_seen": 994050048 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007057454662173006, + "loss": 0.0712, + "theoretical_loss": 3.6796401665577916, + "tokens_seen": 994312192 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007056652222757182, + "loss": 0.0768, + "theoretical_loss": 3.679548476922225, + "tokens_seen": 994574336 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007055849783341358, + "loss": 0.0749, + "theoretical_loss": 3.6794568182152143, + "tokens_seen": 994836480 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007055047343925533, + "loss": 0.0756, + "theoretical_loss": 3.6793651904181806, + "tokens_seen": 995098624 + }, + { + "epoch": 0.3, + "learning_rate": 0.000705424490450971, + "loss": 0.0749, + "theoretical_loss": 3.679273593512563, + "tokens_seen": 995360768 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007053442465093885, + "loss": 0.0767, + "theoretical_loss": 3.679182027479812, + "tokens_seen": 995622912 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007052640025678061, + "loss": 0.0765, + "theoretical_loss": 3.6790904923014005, + "tokens_seen": 995885056 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.0009565390646457672, + "objective/train/docs_used": 366329, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5922820568084717, + "objective/train/original_loss": 1.5922820568084717, + "objective/train/theoretical_loss": 3.6789989879588125, + "objective/train/tokens_used": 1016607200, + "objective/train/value_avg": -0.010162353515625, + "objective/train/value_loss": 0.0004036914324387908, + "objective/train/value_max": -8.028745651245117e-05, + "objective/train/value_min": -0.489990234375, + "objective/train/value_reward_corr": 0.6579596576861813, + "objective/train/value_std": 0.017059326171875, + "objective/train/weight_avg": 1.0011204481124878, + "objective/train/weighted_lm_loss": 1.594104528427124, + "objective/train/weights_max": 1.1680172681808472, + "objective/train/weights_min": 0.2262655794620514, + "theoretical_loss": 3.6789989879588125, + "tokens_seen": 996147200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007051837586262238, + "loss": 0.074, + "theoretical_loss": 3.6789989879588125, + "tokens_seen": 996147200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007051035146846413, + "loss": 0.0744, + "theoretical_loss": 3.6789075144335497, + "tokens_seen": 996409344 + }, + { + "epoch": 0.3, + "learning_rate": 0.000705023270743059, + "loss": 0.0729, + "theoretical_loss": 3.6788160717071303, + "tokens_seen": 996671488 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007049430268014765, + "loss": 0.0771, + "theoretical_loss": 3.678724659761087, + "tokens_seen": 996933632 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007048627828598941, + "loss": 0.0735, + "theoretical_loss": 3.6786332785769695, + "tokens_seen": 997195776 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007047825389183117, + "loss": 0.0736, + "theoretical_loss": 3.678541928136344, + "tokens_seen": 997457920 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007047022949767292, + "loss": 0.0755, + "theoretical_loss": 3.6784506084207904, + "tokens_seen": 997720064 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007046220510351468, + "loss": 0.0779, + "theoretical_loss": 3.6783593194119066, + "tokens_seen": 997982208 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007045418070935644, + "loss": 0.0761, + "theoretical_loss": 3.6782680610913054, + "tokens_seen": 998244352 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007044615631519821, + "loss": 0.079, + "theoretical_loss": 3.6781768334406157, + "tokens_seen": 998506496 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007043813192103996, + "loss": 0.0736, + "theoretical_loss": 3.678085636441482, + "tokens_seen": 998768640 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007043010752688173, + "loss": 0.0783, + "theoretical_loss": 3.677994470075565, + "tokens_seen": 999030784 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007042208313272348, + "loss": 0.0749, + "theoretical_loss": 3.6779033343245406, + "tokens_seen": 999292928 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.0017308670794591308, + "objective/train/docs_used": 367579, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6493194103240967, + "objective/train/original_loss": 1.6493196487426758, + "objective/train/theoretical_loss": 3.677857777923891, + "objective/train/tokens_used": 1019884000, + "objective/train/value_avg": -0.006999969482421875, + "objective/train/value_loss": 0.00011768531840061769, + "objective/train/value_max": -0.00017130374908447266, + "objective/train/value_min": -0.298828125, + "objective/train/value_reward_corr": 0.617122584170849, + "objective/train/value_std": 0.00861358642578125, + "objective/train/weight_avg": 1.0017890930175781, + "objective/train/weighted_lm_loss": 1.6527791023254395, + "objective/train/weights_max": 1.1782879829406738, + "objective/train/weights_min": 0.7371587753295898, + "theoretical_loss": 3.677857777923891, + "tokens_seen": 999424000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007041405873856524, + "loss": 0.0756, + "theoretical_loss": 3.677812229170101, + "tokens_seen": 999555072 + }, + { + "epoch": 0.3, + "learning_rate": 0.00070406034344407, + "loss": 0.0732, + "theoretical_loss": 3.677721154593953, + "tokens_seen": 999817216 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007039800995024875, + "loss": 0.0756, + "theoretical_loss": 3.6776301105778213, + "tokens_seen": 1000079360 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007038998555609052, + "loss": 0.076, + "theoretical_loss": 3.6775390971034447, + "tokens_seen": 1000341504 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007038196116193227, + "loss": 0.0742, + "theoretical_loss": 3.6774481141525777, + "tokens_seen": 1000603648 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007037393676777404, + "loss": 0.075, + "theoretical_loss": 3.6773571617069907, + "tokens_seen": 1000865792 + }, + { + "epoch": 0.3, + "learning_rate": 0.000703659123736158, + "loss": 0.0731, + "theoretical_loss": 3.6772662397484703, + "tokens_seen": 1001127936 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007035788797945756, + "loss": 0.0704, + "theoretical_loss": 3.6771753482588183, + "tokens_seen": 1001390080 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007034986358529931, + "loss": 0.0741, + "theoretical_loss": 3.6770844872198523, + "tokens_seen": 1001652224 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007034183919114107, + "loss": 0.0733, + "theoretical_loss": 3.6769936566134045, + "tokens_seen": 1001914368 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007033381479698283, + "loss": 0.0755, + "theoretical_loss": 3.676902856421324, + "tokens_seen": 1002176512 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007032579040282458, + "loss": 0.076, + "theoretical_loss": 3.6768120866254757, + "tokens_seen": 1002438656 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": -0.0004565780109260231, + "objective/train/docs_used": 368716, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.602346420288086, + "objective/train/original_loss": 1.6023463010787964, + "objective/train/theoretical_loss": 3.6767213472077387, + "objective/train/tokens_used": 1023160800, + "objective/train/value_avg": -0.0089874267578125, + "objective/train/value_loss": 0.0004929814604111016, + "objective/train/value_max": -9.459257125854492e-05, + "objective/train/value_min": -0.70947265625, + "objective/train/value_reward_corr": 0.6152421290672049, + "objective/train/value_std": 0.01763916015625, + "objective/train/weight_avg": 0.9997656345367432, + "objective/train/weighted_lm_loss": 1.6013120412826538, + "objective/train/weights_max": 1.700226902961731, + "objective/train/weights_min": 0.3712657690048218, + "theoretical_loss": 3.6767213472077387, + "tokens_seen": 1002700800 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007031776600866635, + "loss": 0.0745, + "theoretical_loss": 3.6767213472077387, + "tokens_seen": 1002700800 + }, + { + "epoch": 0.3, + "learning_rate": 0.000703097416145081, + "loss": 0.073, + "theoretical_loss": 3.676630638150008, + "tokens_seen": 1002962944 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007030171722034986, + "loss": 0.0722, + "theoretical_loss": 3.6765399594341943, + "tokens_seen": 1003225088 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007029369282619163, + "loss": 0.0758, + "theoretical_loss": 3.676449311042225, + "tokens_seen": 1003487232 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007028566843203338, + "loss": 0.0789, + "theoretical_loss": 3.6763586929560415, + "tokens_seen": 1003749376 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007027764403787515, + "loss": 0.0758, + "theoretical_loss": 3.6762681051576003, + "tokens_seen": 1004011520 + }, + { + "epoch": 0.3, + "learning_rate": 0.000702696196437169, + "loss": 0.0738, + "theoretical_loss": 3.6761775476288747, + "tokens_seen": 1004273664 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007026159524955866, + "loss": 0.075, + "theoretical_loss": 3.6760870203518525, + "tokens_seen": 1004535808 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007025357085540042, + "loss": 0.0715, + "theoretical_loss": 3.6759965233085383, + "tokens_seen": 1004797952 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007024554646124218, + "loss": 0.0748, + "theoretical_loss": 3.6759060564809496, + "tokens_seen": 1005060096 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007023752206708393, + "loss": 0.0748, + "theoretical_loss": 3.6758156198511216, + "tokens_seen": 1005322240 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007022949767292569, + "loss": 0.076, + "theoretical_loss": 3.675725213401104, + "tokens_seen": 1005584384 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007022147327876746, + "loss": 0.0757, + "theoretical_loss": 3.6756348371129617, + "tokens_seen": 1005846528 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.0010321858571842313, + "objective/train/docs_used": 369909, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.614028811454773, + "objective/train/original_loss": 1.6140289306640625, + "objective/train/theoretical_loss": 3.6755896602739933, + "objective/train/tokens_used": 1026437600, + "objective/train/value_avg": -0.00745391845703125, + "objective/train/value_loss": 0.00014535474474541843, + "objective/train/value_max": -8.153915405273438e-05, + "objective/train/value_min": -0.311767578125, + "objective/train/value_reward_corr": 0.76924190262928, + "objective/train/value_std": 0.01385498046875, + "objective/train/weight_avg": 1.001103401184082, + "objective/train/weighted_lm_loss": 1.6157536506652832, + "objective/train/weights_max": 1.166409969329834, + "objective/train/weights_min": 0.6640326976776123, + "theoretical_loss": 3.6755896602739933, + "tokens_seen": 1005977600 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007021344888460921, + "loss": 0.0776, + "theoretical_loss": 3.6755444909687744, + "tokens_seen": 1006108672 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007020542449045098, + "loss": 0.0745, + "theoretical_loss": 3.675454174950639, + "tokens_seen": 1006370816 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007019740009629273, + "loss": 0.0734, + "theoretical_loss": 3.675363889040666, + "tokens_seen": 1006632960 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007018937570213449, + "loss": 0.0748, + "theoretical_loss": 3.675273633220981, + "tokens_seen": 1006895104 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007018135130797625, + "loss": 0.0746, + "theoretical_loss": 3.6751834074737264, + "tokens_seen": 1007157248 + }, + { + "epoch": 0.31, + "learning_rate": 0.00070173326913818, + "loss": 0.0762, + "theoretical_loss": 3.675093211781059, + "tokens_seen": 1007419392 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007016530251965977, + "loss": 0.0773, + "theoretical_loss": 3.67500304612515, + "tokens_seen": 1007681536 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007015727812550152, + "loss": 0.0756, + "theoretical_loss": 3.674912910488187, + "tokens_seen": 1007943680 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007014925373134329, + "loss": 0.0759, + "theoretical_loss": 3.6748228048523726, + "tokens_seen": 1008205824 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007014122933718505, + "loss": 0.0783, + "theoretical_loss": 3.674732729199924, + "tokens_seen": 1008467968 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007013320494302681, + "loss": 0.0742, + "theoretical_loss": 3.674642683513074, + "tokens_seen": 1008730112 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007012518054886856, + "loss": 0.0765, + "theoretical_loss": 3.674552667774071, + "tokens_seen": 1008992256 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.0011902566766366363, + "objective/train/docs_used": 370943, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5907751321792603, + "objective/train/original_loss": 1.5907747745513916, + "objective/train/theoretical_loss": 3.6744626819651773, + "objective/train/tokens_used": 1029714400, + "objective/train/value_avg": -0.00921630859375, + "objective/train/value_loss": 0.0002929775801021606, + "objective/train/value_max": -0.00011324882507324219, + "objective/train/value_min": -0.263427734375, + "objective/train/value_reward_corr": 0.6733462873979555, + "objective/train/value_std": 0.0144805908203125, + "objective/train/weight_avg": 1.0013235807418823, + "objective/train/weighted_lm_loss": 1.5915753841400146, + "objective/train/weights_max": 1.3013832569122314, + "objective/train/weights_min": 0.39872050285339355, + "theoretical_loss": 3.6744626819651773, + "tokens_seen": 1009254400 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007011715615471032, + "loss": 0.0732, + "theoretical_loss": 3.6744626819651773, + "tokens_seen": 1009254400 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007010913176055208, + "loss": 0.0748, + "theoretical_loss": 3.674372726068671, + "tokens_seen": 1009516544 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007010110736639383, + "loss": 0.0738, + "theoretical_loss": 3.6742828000668464, + "tokens_seen": 1009778688 + }, + { + "epoch": 0.31, + "learning_rate": 0.000700930829722356, + "loss": 0.0746, + "theoretical_loss": 3.6741929039420103, + "tokens_seen": 1010040832 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007008505857807735, + "loss": 0.0738, + "theoretical_loss": 3.6741030376764865, + "tokens_seen": 1010302976 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007007703418391912, + "loss": 0.0764, + "theoretical_loss": 3.674013201252614, + "tokens_seen": 1010565120 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007006900978976088, + "loss": 0.0751, + "theoretical_loss": 3.6739233946527454, + "tokens_seen": 1010827264 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007006098539560264, + "loss": 0.0728, + "theoretical_loss": 3.6738336178592492, + "tokens_seen": 1011089408 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007005296100144439, + "loss": 0.0791, + "theoretical_loss": 3.6737438708545094, + "tokens_seen": 1011351552 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007004493660728615, + "loss": 0.0751, + "theoretical_loss": 3.673654153620924, + "tokens_seen": 1011613696 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007003691221312791, + "loss": 0.0784, + "theoretical_loss": 3.673564466140906, + "tokens_seen": 1011875840 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007002888781896967, + "loss": 0.0747, + "theoretical_loss": 3.6734748083968842, + "tokens_seen": 1012137984 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007002086342481143, + "loss": 0.0759, + "theoretical_loss": 3.6733851803713016, + "tokens_seen": 1012400128 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": -0.00044222682481631637, + "objective/train/docs_used": 372166, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5056043863296509, + "objective/train/original_loss": 1.5056045055389404, + "objective/train/theoretical_loss": 3.6733403774974427, + "objective/train/tokens_used": 1032991200, + "objective/train/value_avg": -0.01678466796875, + "objective/train/value_loss": 0.00021295166516210884, + "objective/train/value_max": -0.00011146068572998047, + "objective/train/value_min": -0.6337890625, + "objective/train/value_reward_corr": 0.9449292467966458, + "objective/train/value_std": 0.041168212890625, + "objective/train/weight_avg": 0.9996622800827026, + "objective/train/weighted_lm_loss": 1.5041799545288086, + "objective/train/weights_max": 1.2211940288543701, + "objective/train/weights_min": 0.6597907543182373, + "theoretical_loss": 3.6733403774974427, + "tokens_seen": 1012531200 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007001283903065318, + "loss": 0.076, + "theoretical_loss": 3.673295582046616, + "tokens_seen": 1012662272 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007000481463649496, + "loss": 0.0755, + "theoretical_loss": 3.6732060134053013, + "tokens_seen": 1012924416 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006999679024233671, + "loss": 0.0761, + "theoretical_loss": 3.673116474429844, + "tokens_seen": 1013186560 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006998876584817846, + "loss": 0.0775, + "theoretical_loss": 3.673026965102748, + "tokens_seen": 1013448704 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006998074145402023, + "loss": 0.0739, + "theoretical_loss": 3.67293748540653, + "tokens_seen": 1013710848 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006997271705986198, + "loss": 0.0738, + "theoretical_loss": 3.672848035323723, + "tokens_seen": 1013972992 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006996469266570374, + "loss": 0.0746, + "theoretical_loss": 3.6727586148368743, + "tokens_seen": 1014235136 + }, + { + "epoch": 0.31, + "learning_rate": 0.000699566682715455, + "loss": 0.0755, + "theoretical_loss": 3.672669223928545, + "tokens_seen": 1014497280 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006994864387738726, + "loss": 0.0745, + "theoretical_loss": 3.672579862581313, + "tokens_seen": 1014759424 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006994061948322901, + "loss": 0.0746, + "theoretical_loss": 3.672490530777769, + "tokens_seen": 1015021568 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006993259508907077, + "loss": 0.0752, + "theoretical_loss": 3.6724012285005196, + "tokens_seen": 1015283712 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006992457069491254, + "loss": 0.0762, + "theoretical_loss": 3.6723119557321864, + "tokens_seen": 1015545856 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": -0.000864784000441432, + "objective/train/docs_used": 372984, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4644211530685425, + "objective/train/original_loss": 1.4644211530685425, + "objective/train/theoretical_loss": 3.6722227124554045, + "objective/train/tokens_used": 1036268000, + "objective/train/value_avg": -0.00815582275390625, + "objective/train/value_loss": 0.0006307647563517094, + "objective/train/value_max": -0.00012433528900146484, + "objective/train/value_min": -0.7392578125, + "objective/train/value_reward_corr": 0.6358148568839063, + "objective/train/value_std": 0.01560211181640625, + "objective/train/weight_avg": 0.999402642250061, + "objective/train/weighted_lm_loss": 1.4642397165298462, + "objective/train/weights_max": 2.048352003097534, + "objective/train/weights_min": 0.39648786187171936, + "theoretical_loss": 3.6722227124554045, + "tokens_seen": 1015808000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006991654630075429, + "loss": 0.0757, + "theoretical_loss": 3.6722227124554045, + "tokens_seen": 1015808000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006990852190659606, + "loss": 0.0728, + "theoretical_loss": 3.6721334986528236, + "tokens_seen": 1016070144 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006990049751243781, + "loss": 0.0767, + "theoretical_loss": 3.6720443143071106, + "tokens_seen": 1016332288 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006989247311827958, + "loss": 0.0744, + "theoretical_loss": 3.671955159400943, + "tokens_seen": 1016594432 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006988444872412133, + "loss": 0.0764, + "theoretical_loss": 3.6718660339170173, + "tokens_seen": 1016856576 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006987642432996308, + "loss": 0.076, + "theoretical_loss": 3.6717769378380414, + "tokens_seen": 1017118720 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006986839993580485, + "loss": 0.075, + "theoretical_loss": 3.671687871146739, + "tokens_seen": 1017380864 + }, + { + "epoch": 0.31, + "learning_rate": 0.000698603755416466, + "loss": 0.0732, + "theoretical_loss": 3.6715988338258487, + "tokens_seen": 1017643008 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006985235114748837, + "loss": 0.0745, + "theoretical_loss": 3.6715098258581236, + "tokens_seen": 1017905152 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006984432675333013, + "loss": 0.0762, + "theoretical_loss": 3.6714208472263303, + "tokens_seen": 1018167296 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006983630235917189, + "loss": 0.0739, + "theoretical_loss": 3.6713318979132517, + "tokens_seen": 1018429440 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006982827796501364, + "loss": 0.0755, + "theoretical_loss": 3.671242977901683, + "tokens_seen": 1018691584 + }, + { + "epoch": 0.31, + "learning_rate": 0.000698202535708554, + "loss": 0.0761, + "theoretical_loss": 3.671154087174436, + "tokens_seen": 1018953728 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.0015296280616894364, + "objective/train/docs_used": 374190, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4388104677200317, + "objective/train/original_loss": 1.4388103485107422, + "objective/train/theoretical_loss": 3.6711096527870657, + "objective/train/tokens_used": 1039544800, + "objective/train/value_avg": -0.005893707275390625, + "objective/train/value_loss": 0.00029690880910493433, + "objective/train/value_max": -4.297494888305664e-05, + "objective/train/value_min": -0.744140625, + "objective/train/value_reward_corr": 0.5134253858490913, + "objective/train/value_std": 0.0106964111328125, + "objective/train/weight_avg": 1.0016546249389648, + "objective/train/weighted_lm_loss": 1.4419684410095215, + "objective/train/weights_max": 1.183031678199768, + "objective/train/weights_min": 0.3685206472873688, + "theoretical_loss": 3.6711096527870657, + "tokens_seen": 1019084800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006981222917669716, + "loss": 0.0761, + "theoretical_loss": 3.6710652257143366, + "tokens_seen": 1019215872 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006980420478253891, + "loss": 0.0756, + "theoretical_loss": 3.6709763935042243, + "tokens_seen": 1019478016 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006979618038838068, + "loss": 0.0759, + "theoretical_loss": 3.670887590526953, + "tokens_seen": 1019740160 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006978815599422243, + "loss": 0.0755, + "theoretical_loss": 3.6707988167653927, + "tokens_seen": 1020002304 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006978013160006421, + "loss": 0.075, + "theoretical_loss": 3.670710072202426, + "tokens_seen": 1020264448 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006977210720590596, + "loss": 0.0763, + "theoretical_loss": 3.670621356820951, + "tokens_seen": 1020526592 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006976408281174772, + "loss": 0.0737, + "theoretical_loss": 3.67053267060388, + "tokens_seen": 1020788736 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006975605841758948, + "loss": 0.0761, + "theoretical_loss": 3.6704440135341394, + "tokens_seen": 1021050880 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006974803402343123, + "loss": 0.0755, + "theoretical_loss": 3.6703553855946702, + "tokens_seen": 1021313024 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006974000962927299, + "loss": 0.0734, + "theoretical_loss": 3.6702667867684275, + "tokens_seen": 1021575168 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006973198523511475, + "loss": 0.0727, + "theoretical_loss": 3.670178217038381, + "tokens_seen": 1021837312 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006972396084095651, + "loss": 0.0739, + "theoretical_loss": 3.670089676387515, + "tokens_seen": 1022099456 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.0019567436538636684, + "objective/train/docs_used": 375283, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4512615203857422, + "objective/train/original_loss": 1.451261281967163, + "objective/train/theoretical_loss": 3.6700011647988275, + "objective/train/tokens_used": 1042821600, + "objective/train/value_avg": -0.01641845703125, + "objective/train/value_loss": 0.00019754536333493888, + "objective/train/value_max": -9.5367431640625e-05, + "objective/train/value_min": -0.307861328125, + "objective/train/value_reward_corr": 0.9487556089623391, + "objective/train/value_std": 0.040863037109375, + "objective/train/weight_avg": 1.0020544528961182, + "objective/train/weighted_lm_loss": 1.4532920122146606, + "objective/train/weights_max": 1.1703647375106812, + "objective/train/weights_min": 0.7569119334220886, + "theoretical_loss": 3.6700011647988275, + "tokens_seen": 1022361600 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006971593644679826, + "loss": 0.0772, + "theoretical_loss": 3.6700011647988275, + "tokens_seen": 1022361600 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006970791205264004, + "loss": 0.0726, + "theoretical_loss": 3.6699126822553314, + "tokens_seen": 1022623744 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006969988765848179, + "loss": 0.0735, + "theoretical_loss": 3.669824228740053, + "tokens_seen": 1022885888 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006969186326432354, + "loss": 0.0772, + "theoretical_loss": 3.6697358042360344, + "tokens_seen": 1023148032 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006968383887016531, + "loss": 0.0756, + "theoretical_loss": 3.6696474087263296, + "tokens_seen": 1023410176 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006967581447600706, + "loss": 0.0774, + "theoretical_loss": 3.6695590421940096, + "tokens_seen": 1023672320 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006966779008184882, + "loss": 0.0762, + "theoretical_loss": 3.6694707046221575, + "tokens_seen": 1023934464 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006965976568769058, + "loss": 0.0742, + "theoretical_loss": 3.669382395993871, + "tokens_seen": 1024196608 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006965174129353234, + "loss": 0.0754, + "theoretical_loss": 3.669294116292263, + "tokens_seen": 1024458752 + }, + { + "epoch": 0.31, + "learning_rate": 0.000696437168993741, + "loss": 0.075, + "theoretical_loss": 3.6692058655004605, + "tokens_seen": 1024720896 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006963569250521585, + "loss": 0.0766, + "theoretical_loss": 3.669117643601602, + "tokens_seen": 1024983040 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006962766811105762, + "loss": 0.0718, + "theoretical_loss": 3.6690294505788446, + "tokens_seen": 1025245184 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006961964371689938, + "loss": 0.0738, + "theoretical_loss": 3.668941286415355, + "tokens_seen": 1025507328 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.001169724389910698, + "objective/train/docs_used": 376516, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2908331155776978, + "objective/train/original_loss": 1.2908332347869873, + "objective/train/theoretical_loss": 3.6688972151505803, + "objective/train/tokens_used": 1046098400, + "objective/train/value_avg": -0.006412506103515625, + "objective/train/value_loss": 0.00014659865701105446, + "objective/train/value_max": -6.204843521118164e-05, + "objective/train/value_min": -0.332275390625, + "objective/train/value_reward_corr": 0.7111525298676997, + "objective/train/value_std": 0.012237548828125, + "objective/train/weight_avg": 1.0012383460998535, + "objective/train/weighted_lm_loss": 1.2924283742904663, + "objective/train/weights_max": 1.180111289024353, + "objective/train/weights_min": 0.3988421857357025, + "theoretical_loss": 3.6688972151505803, + "tokens_seen": 1025638400 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006961161932274114, + "loss": 0.0731, + "theoretical_loss": 3.668853151094318, + "tokens_seen": 1025769472 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006960359492858289, + "loss": 0.0728, + "theoretical_loss": 3.6687650445989295, + "tokens_seen": 1026031616 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006959557053442466, + "loss": 0.0732, + "theoretical_loss": 3.6686769669124004, + "tokens_seen": 1026293760 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006958754614026641, + "loss": 0.0744, + "theoretical_loss": 3.6685889180179565, + "tokens_seen": 1026555904 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006957952174610816, + "loss": 0.0741, + "theoretical_loss": 3.668500897898837, + "tokens_seen": 1026818048 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006957149735194993, + "loss": 0.0754, + "theoretical_loss": 3.668412906538295, + "tokens_seen": 1027080192 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006956347295779168, + "loss": 0.0759, + "theoretical_loss": 3.6683249439195977, + "tokens_seen": 1027342336 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006955544856363345, + "loss": 0.0751, + "theoretical_loss": 3.668237010026026, + "tokens_seen": 1027604480 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006954742416947521, + "loss": 0.0754, + "theoretical_loss": 3.668149104840876, + "tokens_seen": 1027866624 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006953939977531697, + "loss": 0.0749, + "theoretical_loss": 3.6680612283474567, + "tokens_seen": 1028128768 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006953137538115872, + "loss": 0.0743, + "theoretical_loss": 3.667973380529091, + "tokens_seen": 1028390912 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006952335098700048, + "loss": 0.0741, + "theoretical_loss": 3.6678855613691157, + "tokens_seen": 1028653056 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.0010213935747742653, + "objective/train/docs_used": 377682, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4129959344863892, + "objective/train/original_loss": 1.4129958152770996, + "objective/train/theoretical_loss": 3.667797770850883, + "objective/train/tokens_used": 1049375200, + "objective/train/value_avg": -0.007488250732421875, + "objective/train/value_loss": 0.00019148027058690786, + "objective/train/value_max": -0.0001055598258972168, + "objective/train/value_min": -0.326904296875, + "objective/train/value_reward_corr": 0.5655283244055973, + "objective/train/value_std": 0.01061248779296875, + "objective/train/weight_avg": 1.0011082887649536, + "objective/train/weighted_lm_loss": 1.4145474433898926, + "objective/train/weights_max": 1.0799387693405151, + "objective/train/weights_min": 0.3737297058105469, + "theoretical_loss": 3.667797770850883, + "tokens_seen": 1028915200 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006951532659284224, + "loss": 0.0734, + "theoretical_loss": 3.667797770850883, + "tokens_seen": 1028915200 + }, + { + "epoch": 0.31, + "learning_rate": 0.00069507302198684, + "loss": 0.0748, + "theoretical_loss": 3.667710008957756, + "tokens_seen": 1029177344 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006949927780452576, + "loss": 0.0776, + "theoretical_loss": 3.667622275673115, + "tokens_seen": 1029439488 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006949125341036751, + "loss": 0.0747, + "theoretical_loss": 3.667534570980353, + "tokens_seen": 1029701632 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006948322901620929, + "loss": 0.076, + "theoretical_loss": 3.667446894862876, + "tokens_seen": 1029963776 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006947520462205104, + "loss": 0.0731, + "theoretical_loss": 3.667359247304104, + "tokens_seen": 1030225920 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006946718022789279, + "loss": 0.074, + "theoretical_loss": 3.667271628287472, + "tokens_seen": 1030488064 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006945915583373456, + "loss": 0.0754, + "theoretical_loss": 3.6671840377964275, + "tokens_seen": 1030750208 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006945113143957631, + "loss": 0.0756, + "theoretical_loss": 3.667096475814433, + "tokens_seen": 1031012352 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006944310704541807, + "loss": 0.0751, + "theoretical_loss": 3.6670089423249643, + "tokens_seen": 1031274496 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006943508265125983, + "loss": 0.0785, + "theoretical_loss": 3.6669214373115104, + "tokens_seen": 1031536640 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006942705825710159, + "loss": 0.0746, + "theoretical_loss": 3.6668339607575744, + "tokens_seen": 1031798784 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006941903386294334, + "loss": 0.0726, + "theoretical_loss": 3.6667465126466743, + "tokens_seen": 1032060928 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.0018603794742375612, + "objective/train/docs_used": 378946, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4675285816192627, + "objective/train/original_loss": 1.4675285816192627, + "objective/train/theoretical_loss": 3.6667027992522145, + "objective/train/tokens_used": 1052652000, + "objective/train/value_avg": -0.00775909423828125, + "objective/train/value_loss": 0.00012807230814360082, + "objective/train/value_max": -0.0001150369644165039, + "objective/train/value_min": -0.2120361328125, + "objective/train/value_reward_corr": 0.680010560022861, + "objective/train/value_std": 0.01026153564453125, + "objective/train/weight_avg": 1.0019227266311646, + "objective/train/weighted_lm_loss": 1.4709603786468506, + "objective/train/weights_max": 1.1424355506896973, + "objective/train/weights_min": 0.5569349527359009, + "theoretical_loss": 3.6667027992522145, + "tokens_seen": 1032192000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006941100946878512, + "loss": 0.0707, + "theoretical_loss": 3.6666590929623393, + "tokens_seen": 1032323072 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006940298507462687, + "loss": 0.0717, + "theoretical_loss": 3.666571701688115, + "tokens_seen": 1032585216 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006939496068046863, + "loss": 0.0769, + "theoretical_loss": 3.6664843388075594, + "tokens_seen": 1032847360 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006938693628631039, + "loss": 0.0707, + "theoretical_loss": 3.6663970043042435, + "tokens_seen": 1033109504 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006937891189215214, + "loss": 0.0716, + "theoretical_loss": 3.6663096981617533, + "tokens_seen": 1033371648 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006937088749799391, + "loss": 0.0717, + "theoretical_loss": 3.6662224203636886, + "tokens_seen": 1033633792 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006936286310383566, + "loss": 0.0744, + "theoretical_loss": 3.6661351708936616, + "tokens_seen": 1033895936 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006935483870967742, + "loss": 0.0694, + "theoretical_loss": 3.6660479497352982, + "tokens_seen": 1034158080 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006934681431551918, + "loss": 0.0754, + "theoretical_loss": 3.665960756872239, + "tokens_seen": 1034420224 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006933878992136093, + "loss": 0.0745, + "theoretical_loss": 3.6658735922881376, + "tokens_seen": 1034682368 + }, + { + "epoch": 0.31, + "learning_rate": 0.000693307655272027, + "loss": 0.0745, + "theoretical_loss": 3.665786455966661, + "tokens_seen": 1034944512 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006932274113304446, + "loss": 0.0747, + "theoretical_loss": 3.6656993478914903, + "tokens_seen": 1035206656 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.00034525172668509185, + "objective/train/docs_used": 379908, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.507829189300537, + "objective/train/original_loss": 1.507829189300537, + "objective/train/theoretical_loss": 3.6656122680463197, + "objective/train/tokens_used": 1055928800, + "objective/train/value_avg": -0.00595855712890625, + "objective/train/value_loss": 9.688719001132995e-05, + "objective/train/value_max": -0.00011682510375976562, + "objective/train/value_min": -0.22119140625, + "objective/train/value_reward_corr": 0.7160243405342503, + "objective/train/value_std": 0.00960540771484375, + "objective/train/weight_avg": 1.000393033027649, + "objective/train/weighted_lm_loss": 1.5080686807632446, + "objective/train/weights_max": 1.0766479969024658, + "objective/train/weights_min": 0.820656955242157, + "theoretical_loss": 3.6656122680463197, + "tokens_seen": 1035468800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006931471673888622, + "loss": 0.0755, + "theoretical_loss": 3.6656122680463197, + "tokens_seen": 1035468800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006930669234472797, + "loss": 0.0731, + "theoretical_loss": 3.6655252164148564, + "tokens_seen": 1035730944 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006929866795056974, + "loss": 0.0761, + "theoretical_loss": 3.6654381929808233, + "tokens_seen": 1035993088 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006929064355641149, + "loss": 0.0735, + "theoretical_loss": 3.6653511977279534, + "tokens_seen": 1036255232 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006928261916225324, + "loss": 0.0746, + "theoretical_loss": 3.6652642306399965, + "tokens_seen": 1036517376 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006927459476809501, + "loss": 0.075, + "theoretical_loss": 3.6651772917007137, + "tokens_seen": 1036779520 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006926657037393676, + "loss": 0.0756, + "theoretical_loss": 3.6650903808938806, + "tokens_seen": 1037041664 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006925854597977854, + "loss": 0.0752, + "theoretical_loss": 3.6650034982032857, + "tokens_seen": 1037303808 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006925052158562029, + "loss": 0.0735, + "theoretical_loss": 3.664916643612732, + "tokens_seen": 1037565952 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006924249719146205, + "loss": 0.0729, + "theoretical_loss": 3.6648298171060345, + "tokens_seen": 1037828096 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006923447279730381, + "loss": 0.0729, + "theoretical_loss": 3.6647430186670222, + "tokens_seen": 1038090240 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006922644840314556, + "loss": 0.0733, + "theoretical_loss": 3.6646562482795373, + "tokens_seen": 1038352384 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006921842400898732, + "loss": 0.0722, + "theoretical_loss": 3.664569505927436, + "tokens_seen": 1038614528 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": -0.00035626679891720414, + "objective/train/docs_used": 381080, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3043032884597778, + "objective/train/original_loss": 1.3043031692504883, + "objective/train/theoretical_loss": 3.6645261452596136, + "objective/train/tokens_used": 1059205600, + "objective/train/value_avg": -0.00853729248046875, + "objective/train/value_loss": 0.00025545063544996083, + "objective/train/value_max": -7.724761962890625e-05, + "objective/train/value_min": -0.296142578125, + "objective/train/value_reward_corr": 0.7234297240140802, + "objective/train/value_std": 0.01416015625, + "objective/train/weight_avg": 0.9997609257698059, + "objective/train/weighted_lm_loss": 1.3044672012329102, + "objective/train/weights_max": 1.3083919286727905, + "objective/train/weights_min": 0.3739407956600189, + "theoretical_loss": 3.6645261452596136, + "tokens_seen": 1038745600 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006921039961482908, + "loss": 0.0737, + "theoretical_loss": 3.664482791594588, + "tokens_seen": 1038876672 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006920237522067084, + "loss": 0.0738, + "theoretical_loss": 3.664396105264875, + "tokens_seen": 1039138816 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006919435082651259, + "loss": 0.0736, + "theoretical_loss": 3.6643094469221933, + "tokens_seen": 1039400960 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006918632643235437, + "loss": 0.0734, + "theoretical_loss": 3.664222816550452, + "tokens_seen": 1039663104 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006917830203819612, + "loss": 0.0722, + "theoretical_loss": 3.6641362141335727, + "tokens_seen": 1039925248 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006917027764403787, + "loss": 0.0733, + "theoretical_loss": 3.6640496396554925, + "tokens_seen": 1040187392 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006916225324987964, + "loss": 0.0753, + "theoretical_loss": 3.6639630931001594, + "tokens_seen": 1040449536 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006915422885572139, + "loss": 0.0737, + "theoretical_loss": 3.6638765744515367, + "tokens_seen": 1040711680 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006914620446156316, + "loss": 0.0761, + "theoretical_loss": 3.6637900836935993, + "tokens_seen": 1040973824 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006913818006740491, + "loss": 0.0736, + "theoretical_loss": 3.6637036208103364, + "tokens_seen": 1041235968 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006913015567324667, + "loss": 0.0712, + "theoretical_loss": 3.663617185785749, + "tokens_seen": 1041498112 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006912213127908843, + "loss": 0.0725, + "theoretical_loss": 3.6635307786038536, + "tokens_seen": 1041760256 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0007461439236067235, + "objective/train/docs_used": 382133, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4432910680770874, + "objective/train/original_loss": 1.4432913064956665, + "objective/train/theoretical_loss": 3.663444399248678, + "objective/train/tokens_used": 1062482400, + "objective/train/value_avg": -0.006084442138671875, + "objective/train/value_loss": 0.00022422554320655763, + "objective/train/value_max": -8.219480514526367e-05, + "objective/train/value_min": -0.318603515625, + "objective/train/value_reward_corr": 0.5543261672765586, + "objective/train/value_std": 0.00926971435546875, + "objective/train/weight_avg": 1.0008444786071777, + "objective/train/weighted_lm_loss": 1.443787693977356, + "objective/train/weights_max": 1.2419401407241821, + "objective/train/weights_min": 0.3867599070072174, + "theoretical_loss": 3.663444399248678, + "tokens_seen": 1042022400 + }, + { + "epoch": 0.32, + "learning_rate": 0.000691141068849302, + "loss": 0.072, + "theoretical_loss": 3.663444399248678, + "tokens_seen": 1042022400 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006910608249077195, + "loss": 0.0754, + "theoretical_loss": 3.6633580477042633, + "tokens_seen": 1042284544 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006909805809661371, + "loss": 0.0734, + "theoretical_loss": 3.663271723954665, + "tokens_seen": 1042546688 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006909003370245547, + "loss": 0.074, + "theoretical_loss": 3.6631854279839513, + "tokens_seen": 1042808832 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006908200930829722, + "loss": 0.0703, + "theoretical_loss": 3.6630991597762024, + "tokens_seen": 1043070976 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006907398491413899, + "loss": 0.0744, + "theoretical_loss": 3.6630129193155128, + "tokens_seen": 1043333120 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006906596051998074, + "loss": 0.0735, + "theoretical_loss": 3.6629267065859894, + "tokens_seen": 1043595264 + }, + { + "epoch": 0.32, + "learning_rate": 0.000690579361258225, + "loss": 0.0756, + "theoretical_loss": 3.662840521571753, + "tokens_seen": 1043857408 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006904991173166426, + "loss": 0.0741, + "theoretical_loss": 3.662754364256937, + "tokens_seen": 1044119552 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006904188733750601, + "loss": 0.0773, + "theoretical_loss": 3.662668234625688, + "tokens_seen": 1044381696 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006903386294334778, + "loss": 0.072, + "theoretical_loss": 3.6625821326621653, + "tokens_seen": 1044643840 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006902583854918954, + "loss": 0.0761, + "theoretical_loss": 3.6624960583505404, + "tokens_seen": 1044905984 + }, + { + "epoch": 0.32, + "learning_rate": 0.000690178141550313, + "loss": 0.0717, + "theoretical_loss": 3.662410011675001, + "tokens_seen": 1045168128 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0007527482812292874, + "objective/train/docs_used": 383342, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4821735620498657, + "objective/train/original_loss": 1.4821735620498657, + "objective/train/theoretical_loss": 3.6623669986958247, + "objective/train/tokens_used": 1065759200, + "objective/train/value_avg": -0.008392333984375, + "objective/train/value_loss": 0.00048586874618195, + "objective/train/value_max": -0.00010722875595092773, + "objective/train/value_min": -0.705078125, + "objective/train/value_reward_corr": 0.709452779874576, + "objective/train/value_std": 0.0186614990234375, + "objective/train/weight_avg": 1.000962257385254, + "objective/train/weighted_lm_loss": 1.482825517654419, + "objective/train/weights_max": 1.404705286026001, + "objective/train/weights_min": 0.36978310346603394, + "theoretical_loss": 3.6623669986958247, + "tokens_seen": 1045299200 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006900978976087306, + "loss": 0.0748, + "theoretical_loss": 3.6623239926197444, + "tokens_seen": 1045430272 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006900176536671482, + "loss": 0.0741, + "theoretical_loss": 3.6622380011689826, + "tokens_seen": 1045692416 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006899374097255657, + "loss": 0.0736, + "theoretical_loss": 3.66215203730694, + "tokens_seen": 1045954560 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006898571657839833, + "loss": 0.0728, + "theoretical_loss": 3.6620661010178543, + "tokens_seen": 1046216704 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006897769218424009, + "loss": 0.0738, + "theoretical_loss": 3.6619801922859763, + "tokens_seen": 1046478848 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006896966779008184, + "loss": 0.0728, + "theoretical_loss": 3.661894311095568, + "tokens_seen": 1046740992 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006896164339592362, + "loss": 0.0752, + "theoretical_loss": 3.6618084574309075, + "tokens_seen": 1047003136 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006895361900176537, + "loss": 0.0727, + "theoretical_loss": 3.6617226312762834, + "tokens_seen": 1047265280 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006894559460760713, + "loss": 0.0724, + "theoretical_loss": 3.6616368326159976, + "tokens_seen": 1047527424 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006893757021344889, + "loss": 0.0748, + "theoretical_loss": 3.6615510614343654, + "tokens_seen": 1047789568 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006892954581929064, + "loss": 0.0714, + "theoretical_loss": 3.661465317715715, + "tokens_seen": 1048051712 + }, + { + "epoch": 0.32, + "learning_rate": 0.000689215214251324, + "loss": 0.073, + "theoretical_loss": 3.6613796014443865, + "tokens_seen": 1048313856 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0009373921202495694, + "objective/train/docs_used": 384505, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5765043497085571, + "objective/train/original_loss": 1.5765044689178467, + "objective/train/theoretical_loss": 3.661293912604734, + "objective/train/tokens_used": 1069036000, + "objective/train/value_avg": -0.007293701171875, + "objective/train/value_loss": 0.00017708793166093528, + "objective/train/value_max": -5.561113357543945e-05, + "objective/train/value_min": -0.2237548828125, + "objective/train/value_reward_corr": 0.6360596224023429, + "objective/train/value_std": 0.0101165771484375, + "objective/train/weight_avg": 1.001020908355713, + "objective/train/weighted_lm_loss": 1.5784999132156372, + "objective/train/weights_max": 1.112996220588684, + "objective/train/weights_min": 0.3697379231452942, + "theoretical_loss": 3.661293912604734, + "tokens_seen": 1048576000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006891349703097416, + "loss": 0.0727, + "theoretical_loss": 3.661293912604734, + "tokens_seen": 1048576000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006890547263681592, + "loss": 0.0737, + "theoretical_loss": 3.661208251181124, + "tokens_seen": 1048838144 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006889744824265767, + "loss": 0.0719, + "theoretical_loss": 3.6611226171579356, + "tokens_seen": 1049100288 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006888942384849945, + "loss": 0.0739, + "theoretical_loss": 3.6610370105195607, + "tokens_seen": 1049362432 + }, + { + "epoch": 0.32, + "learning_rate": 0.000688813994543412, + "loss": 0.0753, + "theoretical_loss": 3.660951431250405, + "tokens_seen": 1049624576 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006887337506018296, + "loss": 0.0712, + "theoretical_loss": 3.6608658793348847, + "tokens_seen": 1049886720 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006886535066602472, + "loss": 0.074, + "theoretical_loss": 3.6607803547574314, + "tokens_seen": 1050148864 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006885732627186647, + "loss": 0.0755, + "theoretical_loss": 3.660694857502487, + "tokens_seen": 1050411008 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006884930187770824, + "loss": 0.073, + "theoretical_loss": 3.660609387554509, + "tokens_seen": 1050673152 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006884127748354999, + "loss": 0.0763, + "theoretical_loss": 3.6605239448979647, + "tokens_seen": 1050935296 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006883325308939175, + "loss": 0.0751, + "theoretical_loss": 3.660438529517336, + "tokens_seen": 1051197440 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006882522869523351, + "loss": 0.0762, + "theoretical_loss": 3.660353141397116, + "tokens_seen": 1051459584 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006881720430107528, + "loss": 0.0766, + "theoretical_loss": 3.6602677805218127, + "tokens_seen": 1051721728 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.00259937415830791, + "objective/train/docs_used": 385698, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4460582733154297, + "objective/train/original_loss": 1.4460582733154297, + "objective/train/theoretical_loss": 3.660225110296166, + "objective/train/tokens_used": 1072312800, + "objective/train/value_avg": -0.0083160400390625, + "objective/train/value_loss": 0.0003496213466860354, + "objective/train/value_max": -9.387731552124023e-05, + "objective/train/value_min": -0.708984375, + "objective/train/value_reward_corr": 0.6992441106554197, + "objective/train/value_std": 0.0217742919921875, + "objective/train/weight_avg": 1.0027629137039185, + "objective/train/weighted_lm_loss": 1.450356125831604, + "objective/train/weights_max": 1.4854079484939575, + "objective/train/weights_min": 0.3811357617378235, + "theoretical_loss": 3.660225110296166, + "tokens_seen": 1051852800 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006880917990691703, + "loss": 0.0753, + "theoretical_loss": 3.660182446875944, + "tokens_seen": 1051983872 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006880115551275879, + "loss": 0.0699, + "theoretical_loss": 3.6600971404440434, + "tokens_seen": 1052246016 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006879313111860055, + "loss": 0.0714, + "theoretical_loss": 3.660011861210654, + "tokens_seen": 1052508160 + }, + { + "epoch": 0.32, + "learning_rate": 0.000687851067244423, + "loss": 0.0713, + "theoretical_loss": 3.659926609160334, + "tokens_seen": 1052770304 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006877708233028407, + "loss": 0.0699, + "theoretical_loss": 3.6598413842776534, + "tokens_seen": 1053032448 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006876905793612582, + "loss": 0.073, + "theoretical_loss": 3.6597561865471935, + "tokens_seen": 1053294592 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006876103354196759, + "loss": 0.0773, + "theoretical_loss": 3.6596710159535504, + "tokens_seen": 1053556736 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006875300914780934, + "loss": 0.0752, + "theoretical_loss": 3.659585872481331, + "tokens_seen": 1053818880 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006874498475365109, + "loss": 0.075, + "theoretical_loss": 3.659500756115156, + "tokens_seen": 1054081024 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006873696035949287, + "loss": 0.0736, + "theoretical_loss": 3.659415666839658, + "tokens_seen": 1054343168 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006872893596533462, + "loss": 0.0723, + "theoretical_loss": 3.6593306046394813, + "tokens_seen": 1054605312 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006872091157117638, + "loss": 0.0748, + "theoretical_loss": 3.6592455694992854, + "tokens_seen": 1054867456 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0018493332900106907, + "objective/train/docs_used": 386814, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4244455099105835, + "objective/train/original_loss": 1.424445390701294, + "objective/train/theoretical_loss": 3.659160561403739, + "objective/train/tokens_used": 1075589600, + "objective/train/value_avg": -0.00870513916015625, + "objective/train/value_loss": 0.00016267823230009526, + "objective/train/value_max": -0.00012636184692382812, + "objective/train/value_min": -0.32421875, + "objective/train/value_reward_corr": 0.6824163394532532, + "objective/train/value_std": 0.01215362548828125, + "objective/train/weight_avg": 1.0019264221191406, + "objective/train/weighted_lm_loss": 1.426944375038147, + "objective/train/weights_max": 1.2696858644485474, + "objective/train/weights_min": 0.3791867196559906, + "theoretical_loss": 3.659160561403739, + "tokens_seen": 1055129600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006871288717701814, + "loss": 0.0698, + "theoretical_loss": 3.659160561403739, + "tokens_seen": 1055129600 + }, + { + "epoch": 0.32, + "learning_rate": 0.000687048627828599, + "loss": 0.0717, + "theoretical_loss": 3.6590755803375252, + "tokens_seen": 1055391744 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006869683838870165, + "loss": 0.0734, + "theoretical_loss": 3.65899062628534, + "tokens_seen": 1055653888 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006868881399454341, + "loss": 0.0726, + "theoretical_loss": 3.65890569923189, + "tokens_seen": 1055916032 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006868078960038517, + "loss": 0.0742, + "theoretical_loss": 3.658820799161896, + "tokens_seen": 1056178176 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006867276520622692, + "loss": 0.0748, + "theoretical_loss": 3.65873592606009, + "tokens_seen": 1056440320 + }, + { + "epoch": 0.32, + "learning_rate": 0.000686647408120687, + "loss": 0.0707, + "theoretical_loss": 3.658651079911218, + "tokens_seen": 1056702464 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006865671641791045, + "loss": 0.0736, + "theoretical_loss": 3.658566260700036, + "tokens_seen": 1056964608 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006864869202375221, + "loss": 0.0734, + "theoretical_loss": 3.658481468411315, + "tokens_seen": 1057226752 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006864066762959397, + "loss": 0.0714, + "theoretical_loss": 3.6583967030298368, + "tokens_seen": 1057488896 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006863264323543572, + "loss": 0.0745, + "theoretical_loss": 3.6583119645403954, + "tokens_seen": 1057751040 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006862461884127749, + "loss": 0.071, + "theoretical_loss": 3.658227252927799, + "tokens_seen": 1058013184 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006861659444711924, + "loss": 0.0704, + "theoretical_loss": 3.6581425681768653, + "tokens_seen": 1058275328 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0014182066079229116, + "objective/train/docs_used": 388023, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3999180793762207, + "objective/train/original_loss": 1.3999180793762207, + "objective/train/theoretical_loss": 3.6581002358697816, + "objective/train/tokens_used": 1078866400, + "objective/train/value_avg": -0.0078582763671875, + "objective/train/value_loss": 0.00017249694792553782, + "objective/train/value_max": -0.00011771917343139648, + "objective/train/value_min": -0.32958984375, + "objective/train/value_reward_corr": 0.6715197510150517, + "objective/train/value_std": 0.01299285888671875, + "objective/train/weight_avg": 1.001496434211731, + "objective/train/weighted_lm_loss": 1.4018974304199219, + "objective/train/weights_max": 1.1797270774841309, + "objective/train/weights_min": 0.3719121813774109, + "theoretical_loss": 3.6581002358697816, + "tokens_seen": 1058406400 + }, + { + "epoch": 0.32, + "learning_rate": 0.00068608570052961, + "loss": 0.0719, + "theoretical_loss": 3.6580579102724267, + "tokens_seen": 1058537472 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006860054565880276, + "loss": 0.0734, + "theoretical_loss": 3.657973279199327, + "tokens_seen": 1058799616 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006859252126464453, + "loss": 0.0735, + "theoretical_loss": 3.6578886749424226, + "tokens_seen": 1059061760 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006858449687048628, + "loss": 0.077, + "theoretical_loss": 3.657804097486581, + "tokens_seen": 1059323904 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006857647247632804, + "loss": 0.0704, + "theoretical_loss": 3.657719546816685, + "tokens_seen": 1059586048 + }, + { + "epoch": 0.32, + "learning_rate": 0.000685684480821698, + "loss": 0.071, + "theoretical_loss": 3.657635022917626, + "tokens_seen": 1059848192 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006856042368801155, + "loss": 0.0714, + "theoretical_loss": 3.65755052577431, + "tokens_seen": 1060110336 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006855239929385332, + "loss": 0.071, + "theoretical_loss": 3.657466055371654, + "tokens_seen": 1060372480 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006854437489969507, + "loss": 0.0722, + "theoretical_loss": 3.657381611694588, + "tokens_seen": 1060634624 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006853635050553683, + "loss": 0.073, + "theoretical_loss": 3.6572971947280544, + "tokens_seen": 1060896768 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006852832611137859, + "loss": 0.0763, + "theoretical_loss": 3.6572128044570067, + "tokens_seen": 1061158912 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006852030171722034, + "loss": 0.0725, + "theoretical_loss": 3.657128440866412, + "tokens_seen": 1061421056 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0011737276799976826, + "objective/train/docs_used": 389244, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5271493196487427, + "objective/train/original_loss": 1.5271493196487427, + "objective/train/theoretical_loss": 3.6570441039412485, + "objective/train/tokens_used": 1082143200, + "objective/train/value_avg": -0.00862884521484375, + "objective/train/value_loss": 0.0003618821792770177, + "objective/train/value_max": -0.00017261505126953125, + "objective/train/value_min": -0.403076171875, + "objective/train/value_reward_corr": 0.6260921546392633, + "objective/train/value_std": 0.01381683349609375, + "objective/train/weight_avg": 1.0013278722763062, + "objective/train/weighted_lm_loss": 1.529421329498291, + "objective/train/weights_max": 1.1920899152755737, + "objective/train/weights_min": 0.37519246339797974, + "theoretical_loss": 3.6570441039412485, + "tokens_seen": 1061683200 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006851227732306211, + "loss": 0.0731, + "theoretical_loss": 3.6570441039412485, + "tokens_seen": 1061683200 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006850425292890387, + "loss": 0.0734, + "theoretical_loss": 3.6569597936665064, + "tokens_seen": 1061945344 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006849622853474563, + "loss": 0.0754, + "theoretical_loss": 3.6568755100271897, + "tokens_seen": 1062207488 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006848820414058739, + "loss": 0.0727, + "theoretical_loss": 3.656791253008313, + "tokens_seen": 1062469632 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006848017974642915, + "loss": 0.0737, + "theoretical_loss": 3.6567070225949028, + "tokens_seen": 1062731776 + }, + { + "epoch": 0.32, + "learning_rate": 0.000684721553522709, + "loss": 0.0727, + "theoretical_loss": 3.656622818771999, + "tokens_seen": 1062993920 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006846413095811267, + "loss": 0.0711, + "theoretical_loss": 3.6565386415246524, + "tokens_seen": 1063256064 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006845610656395442, + "loss": 0.0719, + "theoretical_loss": 3.6564544908379273, + "tokens_seen": 1063518208 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006844808216979617, + "loss": 0.0724, + "theoretical_loss": 3.6563703666968985, + "tokens_seen": 1063780352 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006844005777563795, + "loss": 0.0709, + "theoretical_loss": 3.656286269086653, + "tokens_seen": 1064042496 + }, + { + "epoch": 0.32, + "learning_rate": 0.000684320333814797, + "loss": 0.0733, + "theoretical_loss": 3.6562021979922923, + "tokens_seen": 1064304640 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006842400898732146, + "loss": 0.0755, + "theoretical_loss": 3.6561181533989267, + "tokens_seen": 1064566784 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006841598459316322, + "loss": 0.0731, + "theoretical_loss": 3.6560341352916796, + "tokens_seen": 1064828928 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.001128622330725193, + "objective/train/docs_used": 390462, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3764036893844604, + "objective/train/original_loss": 1.376403570175171, + "objective/train/theoretical_loss": 3.6559921361657057, + "objective/train/tokens_used": 1085420000, + "objective/train/value_avg": -0.01018524169921875, + "objective/train/value_loss": 0.00021231910795904696, + "objective/train/value_max": -6.35385513305664e-05, + "objective/train/value_min": -0.358642578125, + "objective/train/value_reward_corr": 0.7759159261494163, + "objective/train/value_std": 0.0171661376953125, + "objective/train/weight_avg": 1.0012286901474, + "objective/train/weighted_lm_loss": 1.3785223960876465, + "objective/train/weights_max": 1.2096216678619385, + "objective/train/weights_min": 0.37203988432884216, + "theoretical_loss": 3.6559921361657057, + "tokens_seen": 1064960000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006840796019900498, + "loss": 0.073, + "theoretical_loss": 3.655950143655688, + "tokens_seen": 1065091072 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006839993580484673, + "loss": 0.0734, + "theoretical_loss": 3.655866178476098, + "tokens_seen": 1065353216 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006839191141068849, + "loss": 0.0728, + "theoretical_loss": 3.65578223973807, + "tokens_seen": 1065615360 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006838388701653025, + "loss": 0.077, + "theoretical_loss": 3.6556983274267765, + "tokens_seen": 1065877504 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006837586262237201, + "loss": 0.0731, + "theoretical_loss": 3.6556144415273994, + "tokens_seen": 1066139648 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006836783822821378, + "loss": 0.0734, + "theoretical_loss": 3.655530582025136, + "tokens_seen": 1066401792 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006835981383405553, + "loss": 0.0718, + "theoretical_loss": 3.6554467489051925, + "tokens_seen": 1066663936 + }, + { + "epoch": 0.32, + "learning_rate": 0.000683517894398973, + "loss": 0.0745, + "theoretical_loss": 3.6553629421527885, + "tokens_seen": 1066926080 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006834376504573905, + "loss": 0.0743, + "theoretical_loss": 3.655279161753156, + "tokens_seen": 1067188224 + }, + { + "epoch": 0.32, + "learning_rate": 0.000683357406515808, + "loss": 0.0712, + "theoretical_loss": 3.6551954076915374, + "tokens_seen": 1067450368 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006832771625742257, + "loss": 0.0727, + "theoretical_loss": 3.655111679953188, + "tokens_seen": 1067712512 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006831969186326432, + "loss": 0.0737, + "theoretical_loss": 3.6550279785233757, + "tokens_seen": 1067974656 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0003532489645294845, + "objective/train/docs_used": 391774, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5375131368637085, + "objective/train/original_loss": 1.5375131368637085, + "objective/train/theoretical_loss": 3.654944303387378, + "objective/train/tokens_used": 1088696800, + "objective/train/value_avg": -0.00994873046875, + "objective/train/value_loss": 0.00020713380945380777, + "objective/train/value_max": -7.724761962890625e-05, + "objective/train/value_min": -0.334228515625, + "objective/train/value_reward_corr": 0.7750881540426269, + "objective/train/value_std": 0.0160369873046875, + "objective/train/weight_avg": 1.000451683998108, + "objective/train/weighted_lm_loss": 1.5384892225265503, + "objective/train/weights_max": 1.286349892616272, + "objective/train/weights_min": 0.3698126971721649, + "theoretical_loss": 3.654944303387378, + "tokens_seen": 1068236800 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006831166746910608, + "loss": 0.0735, + "theoretical_loss": 3.654944303387378, + "tokens_seen": 1068236800 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006830364307494784, + "loss": 0.073, + "theoretical_loss": 3.654860654530486, + "tokens_seen": 1068498944 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006829561868078961, + "loss": 0.0742, + "theoretical_loss": 3.6547770319380026, + "tokens_seen": 1068761088 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006828759428663136, + "loss": 0.0712, + "theoretical_loss": 3.6546934355952425, + "tokens_seen": 1069023232 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006827956989247312, + "loss": 0.0743, + "theoretical_loss": 3.6546098654875303, + "tokens_seen": 1069285376 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006827154549831488, + "loss": 0.0743, + "theoretical_loss": 3.654526321600205, + "tokens_seen": 1069547520 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006826352110415663, + "loss": 0.0728, + "theoretical_loss": 3.6544428039186165, + "tokens_seen": 1069809664 + }, + { + "epoch": 0.32, + "learning_rate": 0.000682554967099984, + "loss": 0.0716, + "theoretical_loss": 3.6543593124281264, + "tokens_seen": 1070071808 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006824747231584015, + "loss": 0.074, + "theoretical_loss": 3.654275847114107, + "tokens_seen": 1070333952 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006823944792168192, + "loss": 0.0762, + "theoretical_loss": 3.6541924079619443, + "tokens_seen": 1070596096 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006823142352752367, + "loss": 0.0741, + "theoretical_loss": 3.654108994957034, + "tokens_seen": 1070858240 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006822339913336542, + "loss": 0.0751, + "theoretical_loss": 3.654025608084786, + "tokens_seen": 1071120384 + }, + { + "epoch": 0.32, + "learning_rate": 0.000682153747392072, + "loss": 0.0744, + "theoretical_loss": 3.653942247330619, + "tokens_seen": 1071382528 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0008207072969526052, + "objective/train/docs_used": 393037, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5273302793502808, + "objective/train/original_loss": 1.5273303985595703, + "objective/train/theoretical_loss": 3.6539005767432635, + "objective/train/tokens_used": 1091973600, + "objective/train/value_avg": -0.0079193115234375, + "objective/train/value_loss": 0.00018726506095845252, + "objective/train/value_max": -4.792213439941406e-05, + "objective/train/value_min": -0.328125, + "objective/train/value_reward_corr": 0.6683278593111599, + "objective/train/value_std": 0.0133209228515625, + "objective/train/weight_avg": 1.0009064674377441, + "objective/train/weighted_lm_loss": 1.5285539627075195, + "objective/train/weights_max": 1.301619052886963, + "objective/train/weights_min": 0.3697463870048523, + "theoretical_loss": 3.6539005767432635, + "tokens_seen": 1071513600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006820735034504895, + "loss": 0.0727, + "theoretical_loss": 3.653858912679966, + "tokens_seen": 1071644672 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006819932595089071, + "loss": 0.0737, + "theoretical_loss": 3.6537756041182696, + "tokens_seen": 1071906816 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006819130155673247, + "loss": 0.0739, + "theoretical_loss": 3.6536923216309862, + "tokens_seen": 1072168960 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006818327716257423, + "loss": 0.0744, + "theoretical_loss": 3.653609065203582, + "tokens_seen": 1072431104 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006817525276841598, + "loss": 0.0724, + "theoretical_loss": 3.6535258348215356, + "tokens_seen": 1072693248 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006816722837425775, + "loss": 0.0748, + "theoretical_loss": 3.653442630470337, + "tokens_seen": 1072955392 + }, + { + "epoch": 0.33, + "learning_rate": 0.000681592039800995, + "loss": 0.074, + "theoretical_loss": 3.653359452135488, + "tokens_seen": 1073217536 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006815117958594125, + "loss": 0.0737, + "theoretical_loss": 3.653276299802503, + "tokens_seen": 1073479680 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006814315519178303, + "loss": 0.0735, + "theoretical_loss": 3.6531931734569056, + "tokens_seen": 1073741824 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006813513079762478, + "loss": 0.0724, + "theoretical_loss": 3.6531100730842336, + "tokens_seen": 1074003968 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006812710640346655, + "loss": 0.0747, + "theoretical_loss": 3.653026998670035, + "tokens_seen": 1074266112 + }, + { + "epoch": 0.33, + "learning_rate": 0.000681190820093083, + "loss": 0.0734, + "theoretical_loss": 3.652943950199869, + "tokens_seen": 1074528256 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.0013173749903216958, + "objective/train/docs_used": 394219, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3774545192718506, + "objective/train/original_loss": 1.3774546384811401, + "objective/train/theoretical_loss": 3.652860927659307, + "objective/train/tokens_used": 1095250400, + "objective/train/value_avg": -0.009002685546875, + "objective/train/value_loss": 0.000231547121074982, + "objective/train/value_max": -6.300210952758789e-05, + "objective/train/value_min": -0.359130859375, + "objective/train/value_reward_corr": 0.6883898461921806, + "objective/train/value_std": 0.01419830322265625, + "objective/train/weight_avg": 1.0014238357543945, + "objective/train/weighted_lm_loss": 1.3791875839233398, + "objective/train/weights_max": 1.1214268207550049, + "objective/train/weights_min": 0.3686865568161011, + "theoretical_loss": 3.652860927659307, + "tokens_seen": 1074790400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006811105761515006, + "loss": 0.0735, + "theoretical_loss": 3.652860927659307, + "tokens_seen": 1074790400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006810303322099182, + "loss": 0.0728, + "theoretical_loss": 3.6527779310339326, + "tokens_seen": 1075052544 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006809500882683357, + "loss": 0.0741, + "theoretical_loss": 3.652694960309339, + "tokens_seen": 1075314688 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006808698443267533, + "loss": 0.075, + "theoretical_loss": 3.6526120154711332, + "tokens_seen": 1075576832 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006807896003851709, + "loss": 0.0721, + "theoretical_loss": 3.6525290965049324, + "tokens_seen": 1075838976 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006807093564435886, + "loss": 0.0738, + "theoretical_loss": 3.652446203396365, + "tokens_seen": 1076101120 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006806291125020061, + "loss": 0.0731, + "theoretical_loss": 3.6523633361310717, + "tokens_seen": 1076363264 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006805488685604238, + "loss": 0.0737, + "theoretical_loss": 3.6522804946947045, + "tokens_seen": 1076625408 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006804686246188413, + "loss": 0.0729, + "theoretical_loss": 3.6521976790729265, + "tokens_seen": 1076887552 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006803883806772588, + "loss": 0.0727, + "theoretical_loss": 3.652114889251412, + "tokens_seen": 1077149696 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006803081367356765, + "loss": 0.0764, + "theoretical_loss": 3.6520321252158485, + "tokens_seen": 1077411840 + }, + { + "epoch": 0.33, + "learning_rate": 0.000680227892794094, + "loss": 0.0723, + "theoretical_loss": 3.651949386951933, + "tokens_seen": 1077673984 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006801476488525116, + "loss": 0.073, + "theoretical_loss": 3.6518666744453734, + "tokens_seen": 1077936128 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.0019057122990489006, + "objective/train/docs_used": 395298, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3716213703155518, + "objective/train/original_loss": 1.3716213703155518, + "objective/train/theoretical_loss": 3.6518253278466397, + "objective/train/tokens_used": 1098527200, + "objective/train/value_avg": -0.007762908935546875, + "objective/train/value_loss": 0.00023904572299215943, + "objective/train/value_max": -8.219480514526367e-05, + "objective/train/value_min": -0.199462890625, + "objective/train/value_reward_corr": 0.600056819574208, + "objective/train/value_std": 0.0146636962890625, + "objective/train/weight_avg": 1.0020171403884888, + "objective/train/weighted_lm_loss": 1.3752607107162476, + "objective/train/weights_max": 1.1991831064224243, + "objective/train/weights_min": 0.3681375980377197, + "theoretical_loss": 3.6518253278466397, + "tokens_seen": 1078067200 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006800674049109292, + "loss": 0.0731, + "theoretical_loss": 3.651783987681892, + "tokens_seen": 1078198272 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006799871609693469, + "loss": 0.0735, + "theoretical_loss": 3.6517013266472187, + "tokens_seen": 1078460416 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006799069170277645, + "loss": 0.0735, + "theoretical_loss": 3.651618691327098, + "tokens_seen": 1078722560 + }, + { + "epoch": 0.33, + "learning_rate": 0.000679826673086182, + "loss": 0.075, + "theoretical_loss": 3.651536081707284, + "tokens_seen": 1078984704 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006797464291445996, + "loss": 0.0759, + "theoretical_loss": 3.651453497773543, + "tokens_seen": 1079246848 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006796661852030172, + "loss": 0.0745, + "theoretical_loss": 3.6513709395116516, + "tokens_seen": 1079508992 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006795859412614348, + "loss": 0.0716, + "theoretical_loss": 3.651288406907399, + "tokens_seen": 1079771136 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006795056973198523, + "loss": 0.0738, + "theoretical_loss": 3.6512058999465844, + "tokens_seen": 1080033280 + }, + { + "epoch": 0.33, + "learning_rate": 0.00067942545337827, + "loss": 0.0732, + "theoretical_loss": 3.6511234186150197, + "tokens_seen": 1080295424 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006793452094366875, + "loss": 0.0744, + "theoretical_loss": 3.6510409628985263, + "tokens_seen": 1080557568 + }, + { + "epoch": 0.33, + "learning_rate": 0.000679264965495105, + "loss": 0.0734, + "theoretical_loss": 3.6509585327829392, + "tokens_seen": 1080819712 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006791847215535228, + "loss": 0.0729, + "theoretical_loss": 3.6508761282541027, + "tokens_seen": 1081081856 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.0018546562641859055, + "objective/train/docs_used": 396489, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.54283607006073, + "objective/train/original_loss": 1.5428359508514404, + "objective/train/theoretical_loss": 3.6507937492978733, + "objective/train/tokens_used": 1101804000, + "objective/train/value_avg": -0.007076263427734375, + "objective/train/value_loss": 0.0001660433190409094, + "objective/train/value_max": -0.00012242794036865234, + "objective/train/value_min": -0.2880859375, + "objective/train/value_reward_corr": 0.6533642274011282, + "objective/train/value_std": 0.01088714599609375, + "objective/train/weight_avg": 1.0019302368164062, + "objective/train/weighted_lm_loss": 1.5454530715942383, + "objective/train/weights_max": 1.150571584701538, + "objective/train/weights_min": 0.37533560395240784, + "theoretical_loss": 3.6507937492978733, + "tokens_seen": 1081344000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006791044776119403, + "loss": 0.0739, + "theoretical_loss": 3.6507937492978733, + "tokens_seen": 1081344000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006790242336703579, + "loss": 0.0713, + "theoretical_loss": 3.6507113959001183, + "tokens_seen": 1081606144 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006789439897287755, + "loss": 0.0771, + "theoretical_loss": 3.6506290680467166, + "tokens_seen": 1081868288 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006788637457871931, + "loss": 0.0739, + "theoretical_loss": 3.650546765723558, + "tokens_seen": 1082130432 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006787835018456106, + "loss": 0.0729, + "theoretical_loss": 3.650464488916544, + "tokens_seen": 1082392576 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006787032579040283, + "loss": 0.0746, + "theoretical_loss": 3.650382237611587, + "tokens_seen": 1082654720 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006786230139624458, + "loss": 0.0764, + "theoretical_loss": 3.65030001179461, + "tokens_seen": 1082916864 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006785427700208634, + "loss": 0.0735, + "theoretical_loss": 3.650217811451548, + "tokens_seen": 1083179008 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006784625260792811, + "loss": 0.0728, + "theoretical_loss": 3.650135636568347, + "tokens_seen": 1083441152 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006783822821376986, + "loss": 0.0734, + "theoretical_loss": 3.6500534871309642, + "tokens_seen": 1083703296 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006783020381961163, + "loss": 0.0737, + "theoretical_loss": 3.649971363125368, + "tokens_seen": 1083965440 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006782217942545338, + "loss": 0.074, + "theoretical_loss": 3.6498892645375367, + "tokens_seen": 1084227584 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006781415503129514, + "loss": 0.0727, + "theoretical_loss": 3.649807191353462, + "tokens_seen": 1084489728 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.000662712671328336, + "objective/train/docs_used": 397685, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3404814004898071, + "objective/train/original_loss": 1.3404817581176758, + "objective/train/theoretical_loss": 3.649766164283458, + "objective/train/tokens_used": 1105080800, + "objective/train/value_avg": -0.0102081298828125, + "objective/train/value_loss": 0.00020532849885057658, + "objective/train/value_max": -0.00010472536087036133, + "objective/train/value_min": -0.33837890625, + "objective/train/value_reward_corr": 0.7515684867138178, + "objective/train/value_std": 0.01517486572265625, + "objective/train/weight_avg": 1.0007604360580444, + "objective/train/weighted_lm_loss": 1.3414093255996704, + "objective/train/weights_max": 1.1722419261932373, + "objective/train/weights_min": 0.4255511462688446, + "theoretical_loss": 3.649766164283458, + "tokens_seen": 1084620800 + }, + { + "epoch": 0.33, + "learning_rate": 0.000678061306371369, + "loss": 0.0743, + "theoretical_loss": 3.6497251435591442, + "tokens_seen": 1084751872 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006779810624297865, + "loss": 0.0775, + "theoretical_loss": 3.6496431211405973, + "tokens_seen": 1085014016 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006779008184882041, + "loss": 0.0714, + "theoretical_loss": 3.649561124083844, + "tokens_seen": 1085276160 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006778205745466217, + "loss": 0.0752, + "theoretical_loss": 3.6494791523749193, + "tokens_seen": 1085538304 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006777403306050394, + "loss": 0.0739, + "theoretical_loss": 3.6493972059998696, + "tokens_seen": 1085800448 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006776600866634569, + "loss": 0.0735, + "theoretical_loss": 3.649315284944751, + "tokens_seen": 1086062592 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006775798427218746, + "loss": 0.072, + "theoretical_loss": 3.649233389195632, + "tokens_seen": 1086324736 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006774995987802921, + "loss": 0.0736, + "theoretical_loss": 3.6491515187385914, + "tokens_seen": 1086586880 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006774193548387097, + "loss": 0.071, + "theoretical_loss": 3.649069673559719, + "tokens_seen": 1086849024 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006773391108971273, + "loss": 0.0729, + "theoretical_loss": 3.648987853645116, + "tokens_seen": 1087111168 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006772588669555448, + "loss": 0.0715, + "theoretical_loss": 3.648906058980894, + "tokens_seen": 1087373312 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006771786230139625, + "loss": 0.0733, + "theoretical_loss": 3.6488242895531764, + "tokens_seen": 1087635456 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": -0.0011196710402145982, + "objective/train/docs_used": 398862, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.379757046699524, + "objective/train/original_loss": 1.3797568082809448, + "objective/train/theoretical_loss": 3.6487425453480973, + "objective/train/tokens_used": 1108357600, + "objective/train/value_avg": -0.0118865966796875, + "objective/train/value_loss": 0.0003890149819198996, + "objective/train/value_max": -0.00010073184967041016, + "objective/train/value_min": -0.65087890625, + "objective/train/value_reward_corr": 0.7589761731426594, + "objective/train/value_std": 0.0198974609375, + "objective/train/weight_avg": 0.9990611672401428, + "objective/train/weighted_lm_loss": 1.3779809474945068, + "objective/train/weights_max": 1.7288053035736084, + "objective/train/weights_min": 0.3692087233066559, + "theoretical_loss": 3.6487425453480973, + "tokens_seen": 1087897600 + }, + { + "epoch": 0.33, + "learning_rate": 0.00067709837907238, + "loss": 0.0722, + "theoretical_loss": 3.6487425453480973, + "tokens_seen": 1087897600 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006770181351307977, + "loss": 0.0712, + "theoretical_loss": 3.648660826351801, + "tokens_seen": 1088159744 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006769378911892153, + "loss": 0.0735, + "theoretical_loss": 3.6485791325504437, + "tokens_seen": 1088421888 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006768576472476328, + "loss": 0.073, + "theoretical_loss": 3.648497463930192, + "tokens_seen": 1088684032 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006767774033060504, + "loss": 0.0729, + "theoretical_loss": 3.6484158204772235, + "tokens_seen": 1088946176 + }, + { + "epoch": 0.33, + "learning_rate": 0.000676697159364468, + "loss": 0.0749, + "theoretical_loss": 3.648334202177727, + "tokens_seen": 1089208320 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006766169154228856, + "loss": 0.0764, + "theoretical_loss": 3.648252609017902, + "tokens_seen": 1089470464 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006765366714813031, + "loss": 0.0741, + "theoretical_loss": 3.648171040983959, + "tokens_seen": 1089732608 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006764564275397208, + "loss": 0.0763, + "theoretical_loss": 3.648089498062119, + "tokens_seen": 1089994752 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006763761835981383, + "loss": 0.0753, + "theoretical_loss": 3.648007980238614, + "tokens_seen": 1090256896 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006762959396565558, + "loss": 0.0723, + "theoretical_loss": 3.6479264874996877, + "tokens_seen": 1090519040 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006762156957149736, + "loss": 0.0727, + "theoretical_loss": 3.6478450198315926, + "tokens_seen": 1090781184 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006761354517733911, + "loss": 0.0754, + "theoretical_loss": 3.6477635772205947, + "tokens_seen": 1091043328 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.0015444932505488396, + "objective/train/docs_used": 400125, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.572818636894226, + "objective/train/original_loss": 1.572818636894226, + "objective/train/theoretical_loss": 3.647722865307218, + "objective/train/tokens_used": 1111634400, + "objective/train/value_avg": -0.007625579833984375, + "objective/train/value_loss": 0.00013872672570869327, + "objective/train/value_max": -0.00011146068572998047, + "objective/train/value_min": -0.3896484375, + "objective/train/value_reward_corr": 0.6624823334319214, + "objective/train/value_std": 0.0112152099609375, + "objective/train/weight_avg": 1.0016119480133057, + "objective/train/weighted_lm_loss": 1.574569821357727, + "objective/train/weights_max": 1.1813123226165771, + "objective/train/weights_min": 0.6096436381340027, + "theoretical_loss": 3.647722865307218, + "tokens_seen": 1091174400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006760552078318088, + "loss": 0.075, + "theoretical_loss": 3.647682159652969, + "tokens_seen": 1091305472 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006759749638902263, + "loss": 0.074, + "theoretical_loss": 3.647600767115002, + "tokens_seen": 1091567616 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006758947199486439, + "loss": 0.0756, + "theoretical_loss": 3.6475193995929907, + "tokens_seen": 1091829760 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006758144760070615, + "loss": 0.0735, + "theoretical_loss": 3.6474380570732423, + "tokens_seen": 1092091904 + }, + { + "epoch": 0.33, + "learning_rate": 0.000675734232065479, + "loss": 0.075, + "theoretical_loss": 3.6473567395420767, + "tokens_seen": 1092354048 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006756539881238966, + "loss": 0.0729, + "theoretical_loss": 3.647275446985822, + "tokens_seen": 1092616192 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006755737441823142, + "loss": 0.074, + "theoretical_loss": 3.64719417939082, + "tokens_seen": 1092878336 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006754935002407319, + "loss": 0.0757, + "theoretical_loss": 3.6471129367434205, + "tokens_seen": 1093140480 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006754132562991494, + "loss": 0.0715, + "theoretical_loss": 3.647031719029985, + "tokens_seen": 1093402624 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006753330123575671, + "loss": 0.0731, + "theoretical_loss": 3.646950526236887, + "tokens_seen": 1093664768 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006752527684159846, + "loss": 0.0741, + "theoretical_loss": 3.6468693583505085, + "tokens_seen": 1093926912 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006751725244744022, + "loss": 0.0759, + "theoretical_loss": 3.646788215357244, + "tokens_seen": 1094189056 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.0005579335847869515, + "objective/train/docs_used": 401268, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.393877625465393, + "objective/train/original_loss": 1.3938775062561035, + "objective/train/theoretical_loss": 3.646707097243498, + "objective/train/tokens_used": 1114911200, + "objective/train/value_avg": -0.007781982421875, + "objective/train/value_loss": 0.0001916129986057058, + "objective/train/value_max": -0.00014770030975341797, + "objective/train/value_min": -0.6875, + "objective/train/value_reward_corr": 0.7125330836958561, + "objective/train/value_std": 0.01245880126953125, + "objective/train/weight_avg": 1.000646948814392, + "objective/train/weighted_lm_loss": 1.3944873809814453, + "objective/train/weights_max": 1.2134755849838257, + "objective/train/weights_min": 0.3718355596065521, + "theoretical_loss": 3.646707097243498, + "tokens_seen": 1094451200 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006750922805328198, + "loss": 0.0733, + "theoretical_loss": 3.646707097243498, + "tokens_seen": 1094451200 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006750120365912373, + "loss": 0.0727, + "theoretical_loss": 3.646626003995685, + "tokens_seen": 1094713344 + }, + { + "epoch": 0.33, + "learning_rate": 0.000674931792649655, + "loss": 0.0757, + "theoretical_loss": 3.6465449356002315, + "tokens_seen": 1094975488 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006748515487080725, + "loss": 0.0713, + "theoretical_loss": 3.646463892043574, + "tokens_seen": 1095237632 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006747713047664902, + "loss": 0.0736, + "theoretical_loss": 3.6463828733121586, + "tokens_seen": 1095499776 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006746910608249078, + "loss": 0.0747, + "theoretical_loss": 3.6463018793924453, + "tokens_seen": 1095761920 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006746108168833254, + "loss": 0.0713, + "theoretical_loss": 3.6462209102709, + "tokens_seen": 1096024064 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006745305729417429, + "loss": 0.0722, + "theoretical_loss": 3.6461399659340037, + "tokens_seen": 1096286208 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006744503290001605, + "loss": 0.0738, + "theoretical_loss": 3.6460590463682454, + "tokens_seen": 1096548352 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006743700850585781, + "loss": 0.0727, + "theoretical_loss": 3.6459781515601244, + "tokens_seen": 1096810496 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006742898411169956, + "loss": 0.0748, + "theoretical_loss": 3.6458972814961528, + "tokens_seen": 1097072640 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006742095971754133, + "loss": 0.0723, + "theoretical_loss": 3.6458164361628516, + "tokens_seen": 1097334784 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006741293532338308, + "loss": 0.0723, + "theoretical_loss": 3.645735615546752, + "tokens_seen": 1097596928 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.0027343458496034145, + "objective/train/docs_used": 402429, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3593459129333496, + "objective/train/original_loss": 1.35934579372406, + "objective/train/theoretical_loss": 3.645695214503448, + "objective/train/tokens_used": 1118188000, + "objective/train/value_avg": -0.00693511962890625, + "objective/train/value_loss": 0.00017142666911240667, + "objective/train/value_max": -9.316205978393555e-05, + "objective/train/value_min": -0.6669921875, + "objective/train/value_reward_corr": 0.6122719969080082, + "objective/train/value_std": 0.012725830078125, + "objective/train/weight_avg": 1.0028126239776611, + "objective/train/weighted_lm_loss": 1.363199234008789, + "objective/train/weights_max": 1.3812626600265503, + "objective/train/weights_min": 0.38981711864471436, + "theoretical_loss": 3.645695214503448, + "tokens_seen": 1097728000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006740491092922485, + "loss": 0.0753, + "theoretical_loss": 3.645654819634397, + "tokens_seen": 1097859072 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006739688653506661, + "loss": 0.0735, + "theoretical_loss": 3.6455740484123407, + "tokens_seen": 1098121216 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006738886214090836, + "loss": 0.0713, + "theoretical_loss": 3.645493301867145, + "tokens_seen": 1098383360 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006738083774675012, + "loss": 0.0749, + "theoretical_loss": 3.6454125799853854, + "tokens_seen": 1098645504 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006737281335259188, + "loss": 0.0735, + "theoretical_loss": 3.645331882753645, + "tokens_seen": 1098907648 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006736478895843364, + "loss": 0.0733, + "theoretical_loss": 3.6452512101585195, + "tokens_seen": 1099169792 + }, + { + "epoch": 0.33, + "learning_rate": 0.000673567645642754, + "loss": 0.0701, + "theoretical_loss": 3.645170562186615, + "tokens_seen": 1099431936 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006734874017011716, + "loss": 0.0742, + "theoretical_loss": 3.6450899388245466, + "tokens_seen": 1099694080 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006734071577595891, + "loss": 0.0747, + "theoretical_loss": 3.645009340058941, + "tokens_seen": 1099956224 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006733269138180067, + "loss": 0.0734, + "theoretical_loss": 3.644928765876436, + "tokens_seen": 1100218368 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006732466698764244, + "loss": 0.0722, + "theoretical_loss": 3.644848216263678, + "tokens_seen": 1100480512 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006731664259348419, + "loss": 0.0749, + "theoretical_loss": 3.6447676912073255, + "tokens_seen": 1100742656 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": -0.0021009258925914764, + "objective/train/docs_used": 403530, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.643606185913086, + "objective/train/original_loss": 1.6436063051223755, + "objective/train/theoretical_loss": 3.6446871906940466, + "objective/train/tokens_used": 1121464800, + "objective/train/value_avg": -0.01171875, + "objective/train/value_loss": 0.00041994385537691414, + "objective/train/value_max": -0.00010311603546142578, + "objective/train/value_min": -0.2127685546875, + "objective/train/value_reward_corr": 0.8979253185065612, + "objective/train/value_std": 0.0230712890625, + "objective/train/weight_avg": 0.998100221157074, + "objective/train/weighted_lm_loss": 1.6399933099746704, + "objective/train/weights_max": 1.1952372789382935, + "objective/train/weights_min": 0.3681386411190033, + "theoretical_loss": 3.6446871906940466, + "tokens_seen": 1101004800 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006730861819932596, + "loss": 0.0756, + "theoretical_loss": 3.6446871906940466, + "tokens_seen": 1101004800 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006730059380516771, + "loss": 0.072, + "theoretical_loss": 3.6446067147105197, + "tokens_seen": 1101266944 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006729256941100947, + "loss": 0.0712, + "theoretical_loss": 3.644526263243433, + "tokens_seen": 1101529088 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006728454501685123, + "loss": 0.0749, + "theoretical_loss": 3.644445836279488, + "tokens_seen": 1101791232 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006727652062269298, + "loss": 0.0697, + "theoretical_loss": 3.644365433805393, + "tokens_seen": 1102053376 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006726849622853474, + "loss": 0.0719, + "theoretical_loss": 3.6442850558078685, + "tokens_seen": 1102315520 + }, + { + "epoch": 0.33, + "learning_rate": 0.000672604718343765, + "loss": 0.0766, + "theoretical_loss": 3.6442047022736452, + "tokens_seen": 1102577664 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006725244744021827, + "loss": 0.0758, + "theoretical_loss": 3.644124373189464, + "tokens_seen": 1102839808 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006724442304606002, + "loss": 0.0745, + "theoretical_loss": 3.644044068542076, + "tokens_seen": 1103101952 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006723639865190179, + "loss": 0.0728, + "theoretical_loss": 3.643963788318242, + "tokens_seen": 1103364096 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006722837425774354, + "loss": 0.0724, + "theoretical_loss": 3.6438835325047356, + "tokens_seen": 1103626240 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006722034986358531, + "loss": 0.073, + "theoretical_loss": 3.6438033010883375, + "tokens_seen": 1103888384 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006721232546942706, + "loss": 0.0745, + "theoretical_loss": 3.643723094055841, + "tokens_seen": 1104150528 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": -0.0009628982516005635, + "objective/train/docs_used": 404699, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5864622592926025, + "objective/train/original_loss": 1.5864622592926025, + "objective/train/theoretical_loss": 3.643682999679431, + "objective/train/tokens_used": 1124741600, + "objective/train/value_avg": -0.00875091552734375, + "objective/train/value_loss": 0.0005373280146159232, + "objective/train/value_max": -6.502866744995117e-05, + "objective/train/value_min": -0.462158203125, + "objective/train/value_reward_corr": 0.6116719535059587, + "objective/train/value_std": 0.014617919921875, + "objective/train/weight_avg": 0.9992680549621582, + "objective/train/weighted_lm_loss": 1.5853101015090942, + "objective/train/weights_max": 1.250488042831421, + "objective/train/weights_min": 0.3689074218273163, + "theoretical_loss": 3.643682999679431, + "tokens_seen": 1104281600 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006720430107526881, + "loss": 0.0769, + "theoretical_loss": 3.643642911394048, + "tokens_seen": 1104412672 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006719627668111058, + "loss": 0.0735, + "theoretical_loss": 3.643562753089772, + "tokens_seen": 1104674816 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006718825228695233, + "loss": 0.0733, + "theoretical_loss": 3.6434826191298364, + "tokens_seen": 1104936960 + }, + { + "epoch": 0.33, + "learning_rate": 0.000671802278927941, + "loss": 0.0728, + "theoretical_loss": 3.6434025095010747, + "tokens_seen": 1105199104 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006717220349863586, + "loss": 0.0764, + "theoretical_loss": 3.6433224241903304, + "tokens_seen": 1105461248 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006716417910447762, + "loss": 0.0749, + "theoretical_loss": 3.643242363184458, + "tokens_seen": 1105723392 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006715615471031937, + "loss": 0.074, + "theoretical_loss": 3.6431623264703212, + "tokens_seen": 1105985536 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006714813031616113, + "loss": 0.071, + "theoretical_loss": 3.6430823140347943, + "tokens_seen": 1106247680 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006714010592200289, + "loss": 0.0727, + "theoretical_loss": 3.643002325864763, + "tokens_seen": 1106509824 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006713208152784464, + "loss": 0.0727, + "theoretical_loss": 3.6429223619471207, + "tokens_seen": 1106771968 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006712405713368641, + "loss": 0.0749, + "theoretical_loss": 3.6428424222687736, + "tokens_seen": 1107034112 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006711603273952816, + "loss": 0.0739, + "theoretical_loss": 3.642762506816636, + "tokens_seen": 1107296256 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": -0.0005870533641427755, + "objective/train/docs_used": 405946, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4835734367370605, + "objective/train/original_loss": 1.4835731983184814, + "objective/train/theoretical_loss": 3.642682615577634, + "objective/train/tokens_used": 1128018400, + "objective/train/value_avg": -0.007633209228515625, + "objective/train/value_loss": 0.00033086538314819336, + "objective/train/value_max": -3.916025161743164e-05, + "objective/train/value_min": -0.341796875, + "objective/train/value_reward_corr": 0.5667257952888192, + "objective/train/value_std": 0.01132965087890625, + "objective/train/weight_avg": 0.999566912651062, + "objective/train/weighted_lm_loss": 1.4834892749786377, + "objective/train/weights_max": 1.2339105606079102, + "objective/train/weights_min": 0.6143267154693604, + "theoretical_loss": 3.642682615577634, + "tokens_seen": 1107558400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006710800834536994, + "loss": 0.0717, + "theoretical_loss": 3.642682615577634, + "tokens_seen": 1107558400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006709998395121169, + "loss": 0.0757, + "theoretical_loss": 3.6426027485387023, + "tokens_seen": 1107820544 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006709195955705344, + "loss": 0.0737, + "theoretical_loss": 3.6425229056867865, + "tokens_seen": 1108082688 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006708393516289521, + "loss": 0.0723, + "theoretical_loss": 3.642443087008844, + "tokens_seen": 1108344832 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006707591076873696, + "loss": 0.0718, + "theoretical_loss": 3.6423632924918383, + "tokens_seen": 1108606976 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006706788637457872, + "loss": 0.0728, + "theoretical_loss": 3.6422835221227468, + "tokens_seen": 1108869120 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006705986198042048, + "loss": 0.0729, + "theoretical_loss": 3.6422037758885555, + "tokens_seen": 1109131264 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006705183758626224, + "loss": 0.0746, + "theoretical_loss": 3.6421240537762607, + "tokens_seen": 1109393408 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006704381319210399, + "loss": 0.0774, + "theoretical_loss": 3.6420443557728674, + "tokens_seen": 1109655552 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006703578879794575, + "loss": 0.0742, + "theoretical_loss": 3.6419646818653932, + "tokens_seen": 1109917696 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006702776440378752, + "loss": 0.076, + "theoretical_loss": 3.641885032040864, + "tokens_seen": 1110179840 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006701974000962927, + "loss": 0.0751, + "theoretical_loss": 3.6418054062863163, + "tokens_seen": 1110441984 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006701171561547104, + "loss": 0.0731, + "theoretical_loss": 3.6417258045887966, + "tokens_seen": 1110704128 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": -0.0009894539834931493, + "objective/train/docs_used": 406993, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.475312352180481, + "objective/train/original_loss": 1.4753124713897705, + "objective/train/theoretical_loss": 3.6416860127573765, + "objective/train/tokens_used": 1131295200, + "objective/train/value_avg": -0.00717926025390625, + "objective/train/value_loss": 0.00043393525993451476, + "objective/train/value_max": -3.069639205932617e-05, + "objective/train/value_min": -0.8212890625, + "objective/train/value_reward_corr": 0.6151101633973011, + "objective/train/value_std": 0.01456451416015625, + "objective/train/weight_avg": 0.999183714389801, + "objective/train/weighted_lm_loss": 1.4731035232543945, + "objective/train/weights_max": 1.11464262008667, + "objective/train/weights_min": 0.22452190518379211, + "theoretical_loss": 3.6416860127573765, + "tokens_seen": 1110835200 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006700369122131279, + "loss": 0.0746, + "theoretical_loss": 3.641646226935361, + "tokens_seen": 1110966272 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006699566682715455, + "loss": 0.076, + "theoretical_loss": 3.641566673313076, + "tokens_seen": 1111228416 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006698764243299631, + "loss": 0.0776, + "theoretical_loss": 3.6414871437090186, + "tokens_seen": 1111490560 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006697961803883806, + "loss": 0.0752, + "theoretical_loss": 3.641407638110275, + "tokens_seen": 1111752704 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006697159364467983, + "loss": 0.0755, + "theoretical_loss": 3.641328156503942, + "tokens_seen": 1112014848 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006696356925052158, + "loss": 0.0757, + "theoretical_loss": 3.6412486988771255, + "tokens_seen": 1112276992 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006695554485636335, + "loss": 0.0727, + "theoretical_loss": 3.6411692652169423, + "tokens_seen": 1112539136 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006694752046220511, + "loss": 0.0749, + "theoretical_loss": 3.641089855510518, + "tokens_seen": 1112801280 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006693949606804687, + "loss": 0.0726, + "theoretical_loss": 3.64101046974499, + "tokens_seen": 1113063424 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006693147167388862, + "loss": 0.074, + "theoretical_loss": 3.640931107907504, + "tokens_seen": 1113325568 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006692344727973039, + "loss": 0.0768, + "theoretical_loss": 3.6408517699852165, + "tokens_seen": 1113587712 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006691542288557214, + "loss": 0.078, + "theoretical_loss": 3.640772455965293, + "tokens_seen": 1113849856 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.0012157823657616973, + "objective/train/docs_used": 408181, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.420784592628479, + "objective/train/original_loss": 1.4207844734191895, + "objective/train/theoretical_loss": 3.64069316583491, + "objective/train/tokens_used": 1134572000, + "objective/train/value_avg": -0.0094451904296875, + "objective/train/value_loss": 0.00029014694155193865, + "objective/train/value_max": -9.387731552124023e-05, + "objective/train/value_min": -0.5224609375, + "objective/train/value_reward_corr": 0.6501879473840528, + "objective/train/value_std": 0.016510009765625, + "objective/train/weight_avg": 1.0013483762741089, + "objective/train/weighted_lm_loss": 1.4223439693450928, + "objective/train/weights_max": 1.2760461568832397, + "objective/train/weights_min": 0.36923545598983765, + "theoretical_loss": 3.64069316583491, + "tokens_seen": 1114112000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006690739849141389, + "loss": 0.0726, + "theoretical_loss": 3.64069316583491, + "tokens_seen": 1114112000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006689937409725566, + "loss": 0.0752, + "theoretical_loss": 3.640613899581253, + "tokens_seen": 1114374144 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006689134970309741, + "loss": 0.0777, + "theoretical_loss": 3.6405346571915187, + "tokens_seen": 1114636288 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006688332530893918, + "loss": 0.0756, + "theoretical_loss": 3.6404554386529115, + "tokens_seen": 1114898432 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006687530091478094, + "loss": 0.072, + "theoretical_loss": 3.640376243952648, + "tokens_seen": 1115160576 + }, + { + "epoch": 0.34, + "learning_rate": 0.000668672765206227, + "loss": 0.072, + "theoretical_loss": 3.640297073077953, + "tokens_seen": 1115422720 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006685925212646445, + "loss": 0.0779, + "theoretical_loss": 3.640217926016061, + "tokens_seen": 1115684864 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006685122773230621, + "loss": 0.0741, + "theoretical_loss": 3.6401388027542185, + "tokens_seen": 1115947008 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006684320333814797, + "loss": 0.0708, + "theoretical_loss": 3.6400597032796798, + "tokens_seen": 1116209152 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006683517894398973, + "loss": 0.0753, + "theoretical_loss": 3.6399806275797095, + "tokens_seen": 1116471296 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006682715454983149, + "loss": 0.0761, + "theoretical_loss": 3.639901575641582, + "tokens_seen": 1116733440 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006681913015567324, + "loss": 0.0741, + "theoretical_loss": 3.6398225474525816, + "tokens_seen": 1116995584 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006681110576151502, + "loss": 0.0726, + "theoretical_loss": 3.639743543000003, + "tokens_seen": 1117257728 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.000741979107260704, + "objective/train/docs_used": 409409, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5466253757476807, + "objective/train/original_loss": 1.5466253757476807, + "objective/train/theoretical_loss": 3.639704049670904, + "objective/train/tokens_used": 1137848800, + "objective/train/value_avg": -0.00763702392578125, + "objective/train/value_loss": 0.0003765631699934602, + "objective/train/value_max": -5.8770179748535156e-05, + "objective/train/value_min": -0.3564453125, + "objective/train/value_reward_corr": 0.5197531699733359, + "objective/train/value_std": 0.01153564453125, + "objective/train/weight_avg": 1.0008995532989502, + "objective/train/weighted_lm_loss": 1.5469353199005127, + "objective/train/weights_max": 1.3829063177108765, + "objective/train/weights_min": 0.22673387825489044, + "theoretical_loss": 3.639704049670904, + "tokens_seen": 1117388800 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006680308136735677, + "loss": 0.073, + "theoretical_loss": 3.6396645622711494, + "tokens_seen": 1117519872 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006679505697319852, + "loss": 0.0764, + "theoretical_loss": 3.639585605253335, + "tokens_seen": 1117782016 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006678703257904029, + "loss": 0.0738, + "theoretical_loss": 3.639506671933882, + "tokens_seen": 1118044160 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006677900818488204, + "loss": 0.0739, + "theoretical_loss": 3.639427762300125, + "tokens_seen": 1118306304 + }, + { + "epoch": 0.34, + "learning_rate": 0.000667709837907238, + "loss": 0.0748, + "theoretical_loss": 3.6393488763394064, + "tokens_seen": 1118568448 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006676295939656556, + "loss": 0.0738, + "theoretical_loss": 3.639270014039078, + "tokens_seen": 1118830592 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006675493500240732, + "loss": 0.074, + "theoretical_loss": 3.6391911753865034, + "tokens_seen": 1119092736 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006674691060824907, + "loss": 0.0754, + "theoretical_loss": 3.639112360369054, + "tokens_seen": 1119354880 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006673888621409083, + "loss": 0.0763, + "theoretical_loss": 3.6390335689741113, + "tokens_seen": 1119617024 + }, + { + "epoch": 0.34, + "learning_rate": 0.000667308618199326, + "loss": 0.075, + "theoretical_loss": 3.638954801189067, + "tokens_seen": 1119879168 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006672283742577436, + "loss": 0.076, + "theoretical_loss": 3.6388760570013226, + "tokens_seen": 1120141312 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006671481303161612, + "loss": 0.0732, + "theoretical_loss": 3.6387973363982877, + "tokens_seen": 1120403456 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.0013283737935125828, + "objective/train/docs_used": 410712, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4400638341903687, + "objective/train/original_loss": 1.440063714981079, + "objective/train/theoretical_loss": 3.638718639367384, + "objective/train/tokens_used": 1141125600, + "objective/train/value_avg": -0.007556915283203125, + "objective/train/value_loss": 0.0001608040911378339, + "objective/train/value_max": -6.300210952758789e-05, + "objective/train/value_min": -0.611328125, + "objective/train/value_reward_corr": 0.7982278408915517, + "objective/train/value_std": 0.01517486572265625, + "objective/train/weight_avg": 1.0014057159423828, + "objective/train/weighted_lm_loss": 1.4431976079940796, + "objective/train/weights_max": 1.3930864334106445, + "objective/train/weights_min": 0.5440833568572998, + "theoretical_loss": 3.638718639367384, + "tokens_seen": 1120665600 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006670678863745787, + "loss": 0.0728, + "theoretical_loss": 3.638718639367384, + "tokens_seen": 1120665600 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006669876424329964, + "loss": 0.0737, + "theoretical_loss": 3.638639965896041, + "tokens_seen": 1120927744 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006669073984914139, + "loss": 0.0766, + "theoretical_loss": 3.638561315971698, + "tokens_seen": 1121189888 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006668271545498314, + "loss": 0.0733, + "theoretical_loss": 3.638482689581805, + "tokens_seen": 1121452032 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006667469106082491, + "loss": 0.0724, + "theoretical_loss": 3.6384040867138214, + "tokens_seen": 1121714176 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006666666666666666, + "loss": 0.0765, + "theoretical_loss": 3.6383255073552148, + "tokens_seen": 1121976320 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006665864227250843, + "loss": 0.0754, + "theoretical_loss": 3.638246951493463, + "tokens_seen": 1122238464 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006665061787835019, + "loss": 0.0741, + "theoretical_loss": 3.6381684191160555, + "tokens_seen": 1122500608 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006664259348419195, + "loss": 0.0726, + "theoretical_loss": 3.638089910210488, + "tokens_seen": 1122762752 + }, + { + "epoch": 0.34, + "learning_rate": 0.000666345690900337, + "loss": 0.0762, + "theoretical_loss": 3.638011424764269, + "tokens_seen": 1123024896 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006662654469587546, + "loss": 0.0711, + "theoretical_loss": 3.6379329627649137, + "tokens_seen": 1123287040 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006661852030171722, + "loss": 0.0732, + "theoretical_loss": 3.6378545241999487, + "tokens_seen": 1123549184 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006661049590755897, + "loss": 0.0745, + "theoretical_loss": 3.637776109056909, + "tokens_seen": 1123811328 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": -0.0026691153179854155, + "objective/train/docs_used": 411904, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2471657991409302, + "objective/train/original_loss": 1.2471659183502197, + "objective/train/theoretical_loss": 3.637736910264719, + "objective/train/tokens_used": 1144402400, + "objective/train/value_avg": -0.00782012939453125, + "objective/train/value_loss": 0.0007978877983987331, + "objective/train/value_max": -9.459257125854492e-05, + "objective/train/value_min": -0.71435546875, + "objective/train/value_reward_corr": 0.6231384912903344, + "objective/train/value_std": 0.0173797607421875, + "objective/train/weight_avg": 0.9976327419281006, + "objective/train/weighted_lm_loss": 1.243854284286499, + "objective/train/weights_max": 2.0428695678710938, + "objective/train/weights_min": 0.05494469031691551, + "theoretical_loss": 3.637736910264719, + "tokens_seen": 1123942400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006660247151340074, + "loss": 0.0738, + "theoretical_loss": 3.6376977173233405, + "tokens_seen": 1124073472 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006659444711924249, + "loss": 0.073, + "theoretical_loss": 3.6376193489867976, + "tokens_seen": 1124335616 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006658642272508427, + "loss": 0.0757, + "theoretical_loss": 3.6375410040348446, + "tokens_seen": 1124597760 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006657839833092602, + "loss": 0.0712, + "theoretical_loss": 3.637462682455055, + "tokens_seen": 1124859904 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006657037393676778, + "loss": 0.0735, + "theoretical_loss": 3.6373843842350118, + "tokens_seen": 1125122048 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006656234954260954, + "loss": 0.0752, + "theoretical_loss": 3.637306109362308, + "tokens_seen": 1125384192 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006655432514845129, + "loss": 0.0738, + "theoretical_loss": 3.6372278578245454, + "tokens_seen": 1125646336 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006654630075429305, + "loss": 0.0734, + "theoretical_loss": 3.6371496296093357, + "tokens_seen": 1125908480 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006653827636013481, + "loss": 0.0748, + "theoretical_loss": 3.6370714247043003, + "tokens_seen": 1126170624 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006653025196597657, + "loss": 0.0738, + "theoretical_loss": 3.6369932430970695, + "tokens_seen": 1126432768 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006652222757181832, + "loss": 0.0772, + "theoretical_loss": 3.6369150847752834, + "tokens_seen": 1126694912 + }, + { + "epoch": 0.34, + "learning_rate": 0.000665142031776601, + "loss": 0.0748, + "theoretical_loss": 3.6368369497265913, + "tokens_seen": 1126957056 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": -0.004586232826113701, + "objective/train/docs_used": 413086, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5389773845672607, + "objective/train/original_loss": 1.5389773845672607, + "objective/train/theoretical_loss": 3.6367588379386513, + "objective/train/tokens_used": 1147679200, + "objective/train/value_avg": -0.01276397705078125, + "objective/train/value_loss": 0.001206457382068038, + "objective/train/value_max": -8.285045623779297e-05, + "objective/train/value_min": -0.6474609375, + "objective/train/value_reward_corr": 0.7707702012782308, + "objective/train/value_std": 0.0240020751953125, + "objective/train/weight_avg": 0.9959734678268433, + "objective/train/weighted_lm_loss": 1.5339165925979614, + "objective/train/weights_max": 1.6746848821640015, + "objective/train/weights_min": 0.36978310346603394, + "theoretical_loss": 3.6367588379386513, + "tokens_seen": 1127219200 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006650617878350185, + "loss": 0.0734, + "theoretical_loss": 3.6367588379386513, + "tokens_seen": 1127219200 + }, + { + "epoch": 0.34, + "learning_rate": 0.000664981543893436, + "loss": 0.0756, + "theoretical_loss": 3.636680749399133, + "tokens_seen": 1127481344 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006649012999518537, + "loss": 0.0745, + "theoretical_loss": 3.6366026840957133, + "tokens_seen": 1127743488 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006648210560102712, + "loss": 0.076, + "theoretical_loss": 3.636524642016079, + "tokens_seen": 1128005632 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006647408120686889, + "loss": 0.0745, + "theoretical_loss": 3.636446623147927, + "tokens_seen": 1128267776 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006646605681271064, + "loss": 0.0763, + "theoretical_loss": 3.6363686274789626, + "tokens_seen": 1128529920 + }, + { + "epoch": 0.34, + "learning_rate": 0.000664580324185524, + "loss": 0.0731, + "theoretical_loss": 3.6362906549969014, + "tokens_seen": 1128792064 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006645000802439416, + "loss": 0.0743, + "theoretical_loss": 3.6362127056894673, + "tokens_seen": 1129054208 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006644198363023591, + "loss": 0.0753, + "theoretical_loss": 3.6361347795443955, + "tokens_seen": 1129316352 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006643395923607768, + "loss": 0.0739, + "theoretical_loss": 3.636056876549427, + "tokens_seen": 1129578496 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006642593484191944, + "loss": 0.074, + "theoretical_loss": 3.6359789966923164, + "tokens_seen": 1129840640 + }, + { + "epoch": 0.34, + "learning_rate": 0.000664179104477612, + "loss": 0.0773, + "theoretical_loss": 3.6359011399608243, + "tokens_seen": 1130102784 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006640988605360295, + "loss": 0.0749, + "theoretical_loss": 3.6358233063427225, + "tokens_seen": 1130364928 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.0018270047148689628, + "objective/train/docs_used": 414307, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.627175211906433, + "objective/train/original_loss": 1.6271753311157227, + "objective/train/theoretical_loss": 3.635784398197374, + "objective/train/tokens_used": 1150956000, + "objective/train/value_avg": -0.00772857666015625, + "objective/train/value_loss": 0.0002652402617968619, + "objective/train/value_max": -0.00010472536087036133, + "objective/train/value_min": -0.96875, + "objective/train/value_reward_corr": 0.6389814979319877, + "objective/train/value_std": 0.01456451416015625, + "objective/train/weight_avg": 1.0019452571868896, + "objective/train/weighted_lm_loss": 1.630281686782837, + "objective/train/weights_max": 1.5185024738311768, + "objective/train/weights_min": 0.3883684575557709, + "theoretical_loss": 3.635784398197374, + "tokens_seen": 1130496000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006640186165944472, + "loss": 0.0716, + "theoretical_loss": 3.635745495825791, + "tokens_seen": 1130627072 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006639383726528647, + "loss": 0.0721, + "theoretical_loss": 3.63566770839782, + "tokens_seen": 1130889216 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006638581287112822, + "loss": 0.0743, + "theoretical_loss": 3.6355899440466075, + "tokens_seen": 1131151360 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006637778847696999, + "loss": 0.0731, + "theoretical_loss": 3.635512202759964, + "tokens_seen": 1131413504 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006636976408281174, + "loss": 0.0738, + "theoretical_loss": 3.635434484525704, + "tokens_seen": 1131675648 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006636173968865351, + "loss": 0.076, + "theoretical_loss": 3.6353567893316567, + "tokens_seen": 1131937792 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006635371529449527, + "loss": 0.0738, + "theoretical_loss": 3.6352791171656573, + "tokens_seen": 1132199936 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006634569090033703, + "loss": 0.0743, + "theoretical_loss": 3.635201468015551, + "tokens_seen": 1132462080 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006633766650617879, + "loss": 0.074, + "theoretical_loss": 3.635123841869193, + "tokens_seen": 1132724224 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006632964211202054, + "loss": 0.0752, + "theoretical_loss": 3.6350462387144464, + "tokens_seen": 1132986368 + }, + { + "epoch": 0.34, + "learning_rate": 0.000663216177178623, + "loss": 0.0748, + "theoretical_loss": 3.634968658539184, + "tokens_seen": 1133248512 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006631359332370406, + "loss": 0.0768, + "theoretical_loss": 3.6348911013312883, + "tokens_seen": 1133510656 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.0008693314739502966, + "objective/train/docs_used": 415442, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.477584958076477, + "objective/train/original_loss": 1.477584958076477, + "objective/train/theoretical_loss": 3.6348135670786506, + "objective/train/tokens_used": 1154232800, + "objective/train/value_avg": -0.007232666015625, + "objective/train/value_loss": 0.00015491771046072245, + "objective/train/value_max": -9.685754776000977e-05, + "objective/train/value_min": -0.66357421875, + "objective/train/value_reward_corr": 0.5982827764776568, + "objective/train/value_std": 0.01332855224609375, + "objective/train/weight_avg": 1.0009486675262451, + "objective/train/weighted_lm_loss": 1.47885262966156, + "objective/train/weights_max": 1.8655112981796265, + "objective/train/weights_min": 0.6109683513641357, + "theoretical_loss": 3.6348135670786506, + "tokens_seen": 1133772800 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006630556892954582, + "loss": 0.0736, + "theoretical_loss": 3.6348135670786506, + "tokens_seen": 1133772800 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006629754453538757, + "loss": 0.0746, + "theoretical_loss": 3.6347360557691712, + "tokens_seen": 1134034944 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006628952014122935, + "loss": 0.0739, + "theoretical_loss": 3.63465856739076, + "tokens_seen": 1134297088 + }, + { + "epoch": 0.34, + "learning_rate": 0.000662814957470711, + "loss": 0.0747, + "theoretical_loss": 3.634581101931336, + "tokens_seen": 1134559232 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006627347135291286, + "loss": 0.0753, + "theoretical_loss": 3.6345036593788276, + "tokens_seen": 1134821376 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006626544695875462, + "loss": 0.0758, + "theoretical_loss": 3.6344262397211704, + "tokens_seen": 1135083520 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006625742256459637, + "loss": 0.0754, + "theoretical_loss": 3.6343488429463124, + "tokens_seen": 1135345664 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006624939817043813, + "loss": 0.076, + "theoretical_loss": 3.634271469042208, + "tokens_seen": 1135607808 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006624137377627989, + "loss": 0.0728, + "theoretical_loss": 3.634194117996822, + "tokens_seen": 1135869952 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006623334938212165, + "loss": 0.0736, + "theoretical_loss": 3.634116789798129, + "tokens_seen": 1136132096 + }, + { + "epoch": 0.34, + "learning_rate": 0.000662253249879634, + "loss": 0.077, + "theoretical_loss": 3.6340394844341097, + "tokens_seen": 1136394240 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006621730059380518, + "loss": 0.0771, + "theoretical_loss": 3.6339622018927575, + "tokens_seen": 1136656384 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006620927619964693, + "loss": 0.0741, + "theoretical_loss": 3.633884942162073, + "tokens_seen": 1136918528 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.0013180155074223876, + "objective/train/docs_used": 416597, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.555419683456421, + "objective/train/original_loss": 1.555419683456421, + "objective/train/theoretical_loss": 3.633846320846984, + "objective/train/tokens_used": 1157509600, + "objective/train/value_avg": -0.006999969482421875, + "objective/train/value_loss": 0.00019526074174791574, + "objective/train/value_max": -0.00011771917343139648, + "objective/train/value_min": -0.3037109375, + "objective/train/value_reward_corr": 0.5462273072896175, + "objective/train/value_std": 0.009796142578125, + "objective/train/weight_avg": 1.0014064311981201, + "objective/train/weighted_lm_loss": 1.5573372840881348, + "objective/train/weights_max": 1.306922197341919, + "objective/train/weights_min": 0.36874493956565857, + "theoretical_loss": 3.633846320846984, + "tokens_seen": 1137049600 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006620125180548869, + "loss": 0.0735, + "theoretical_loss": 3.6338077052300664, + "tokens_seen": 1137180672 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006619322741133045, + "loss": 0.0725, + "theoretical_loss": 3.633730491084756, + "tokens_seen": 1137442816 + }, + { + "epoch": 0.34, + "learning_rate": 0.000661852030171722, + "loss": 0.0747, + "theoretical_loss": 3.6336532997141706, + "tokens_seen": 1137704960 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006617717862301397, + "loss": 0.0781, + "theoretical_loss": 3.6335761311063473, + "tokens_seen": 1137967104 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006616915422885572, + "loss": 0.0764, + "theoretical_loss": 3.633498985249332, + "tokens_seen": 1138229248 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006616112983469748, + "loss": 0.074, + "theoretical_loss": 3.63342186213118, + "tokens_seen": 1138491392 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006615310544053924, + "loss": 0.0737, + "theoretical_loss": 3.6333447617399557, + "tokens_seen": 1138753536 + }, + { + "epoch": 0.35, + "learning_rate": 0.00066145081046381, + "loss": 0.0752, + "theoretical_loss": 3.6332676840637324, + "tokens_seen": 1139015680 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006613705665222276, + "loss": 0.076, + "theoretical_loss": 3.633190629090592, + "tokens_seen": 1139277824 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006612903225806452, + "loss": 0.0751, + "theoretical_loss": 3.6331135968086263, + "tokens_seen": 1139539968 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006612100786390628, + "loss": 0.0739, + "theoretical_loss": 3.633036587205935, + "tokens_seen": 1139802112 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006611298346974803, + "loss": 0.0747, + "theoretical_loss": 3.6329596002706275, + "tokens_seen": 1140064256 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.0006165788508951664, + "objective/train/docs_used": 417664, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4508864879608154, + "objective/train/original_loss": 1.4508862495422363, + "objective/train/theoretical_loss": 3.632882635990822, + "objective/train/tokens_used": 1160786400, + "objective/train/value_avg": -0.006664276123046875, + "objective/train/value_loss": 0.00015237460320349783, + "objective/train/value_max": -8.285045623779297e-05, + "objective/train/value_min": -0.4951171875, + "objective/train/value_reward_corr": 0.7813708563816653, + "objective/train/value_std": 0.0169219970703125, + "objective/train/weight_avg": 1.000691533088684, + "objective/train/weighted_lm_loss": 1.4525622129440308, + "objective/train/weights_max": 1.1556388139724731, + "objective/train/weights_min": 0.6773775815963745, + "theoretical_loss": 3.632882635990822, + "tokens_seen": 1140326400 + }, + { + "epoch": 0.35, + "learning_rate": 0.000661049590755898, + "loss": 0.0745, + "theoretical_loss": 3.632882635990822, + "tokens_seen": 1140326400 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006609693468143155, + "loss": 0.0764, + "theoretical_loss": 3.632805694354646, + "tokens_seen": 1140588544 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006608891028727331, + "loss": 0.0752, + "theoretical_loss": 3.6327287753502358, + "tokens_seen": 1140850688 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006608088589311507, + "loss": 0.0753, + "theoretical_loss": 3.632651878965735, + "tokens_seen": 1141112832 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006607286149895682, + "loss": 0.0734, + "theoretical_loss": 3.632575005189299, + "tokens_seen": 1141374976 + }, + { + "epoch": 0.35, + "learning_rate": 0.000660648371047986, + "loss": 0.0765, + "theoretical_loss": 3.6324981540090895, + "tokens_seen": 1141637120 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006605681271064035, + "loss": 0.074, + "theoretical_loss": 3.6324213254132793, + "tokens_seen": 1141899264 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006604878831648211, + "loss": 0.0758, + "theoretical_loss": 3.632344519390049, + "tokens_seen": 1142161408 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006604076392232387, + "loss": 0.0726, + "theoretical_loss": 3.632267735927588, + "tokens_seen": 1142423552 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006603273952816562, + "loss": 0.0747, + "theoretical_loss": 3.632190975014094, + "tokens_seen": 1142685696 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006602471513400738, + "loss": 0.0723, + "theoretical_loss": 3.6321142366377757, + "tokens_seen": 1142947840 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006601669073984914, + "loss": 0.072, + "theoretical_loss": 3.6320375207868483, + "tokens_seen": 1143209984 + }, + { + "epoch": 0.35, + "learning_rate": 0.000660086663456909, + "loss": 0.0732, + "theoretical_loss": 3.6319608274495376, + "tokens_seen": 1143472128 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.002307515824213624, + "objective/train/docs_used": 418760, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4072736501693726, + "objective/train/original_loss": 1.407273769378662, + "objective/train/theoretical_loss": 3.6319224892198108, + "objective/train/tokens_used": 1164063200, + "objective/train/value_avg": -0.00730133056640625, + "objective/train/value_loss": 0.00011779867054428905, + "objective/train/value_max": -0.00014889240264892578, + "objective/train/value_min": -0.4365234375, + "objective/train/value_reward_corr": 0.7994153780614749, + "objective/train/value_std": 0.01413726806640625, + "objective/train/weight_avg": 1.0023618936538696, + "objective/train/weighted_lm_loss": 1.4107744693756104, + "objective/train/weights_max": 1.1094887256622314, + "objective/train/weights_min": 0.37357577681541443, + "theoretical_loss": 3.6319224892198108, + "tokens_seen": 1143603200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006600064195153265, + "loss": 0.0745, + "theoretical_loss": 3.6318841566140767, + "tokens_seen": 1143734272 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006599261755737443, + "loss": 0.0724, + "theoretical_loss": 3.63180750826871, + "tokens_seen": 1143996416 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006598459316321618, + "loss": 0.0701, + "theoretical_loss": 3.6317308824016874, + "tokens_seen": 1144258560 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006597656876905794, + "loss": 0.0707, + "theoretical_loss": 3.6316542790012702, + "tokens_seen": 1144520704 + }, + { + "epoch": 0.35, + "learning_rate": 0.000659685443748997, + "loss": 0.0718, + "theoretical_loss": 3.631577698055727, + "tokens_seen": 1144782848 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006596051998074145, + "loss": 0.0708, + "theoretical_loss": 3.631501139553337, + "tokens_seen": 1145044992 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006595249558658322, + "loss": 0.0739, + "theoretical_loss": 3.6314246034823867, + "tokens_seen": 1145307136 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006594447119242497, + "loss": 0.0751, + "theoretical_loss": 3.631348089831171, + "tokens_seen": 1145569280 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006593644679826673, + "loss": 0.0737, + "theoretical_loss": 3.631271598587995, + "tokens_seen": 1145831424 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006592842240410849, + "loss": 0.0741, + "theoretical_loss": 3.631195129741172, + "tokens_seen": 1146093568 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006592039800995026, + "loss": 0.0749, + "theoretical_loss": 3.631118683279024, + "tokens_seen": 1146355712 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006591237361579201, + "loss": 0.0735, + "theoretical_loss": 3.6310422591898814, + "tokens_seen": 1146617856 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.001571335014887154, + "objective/train/docs_used": 419954, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4320921897888184, + "objective/train/original_loss": 1.4320919513702393, + "objective/train/theoretical_loss": 3.630965857462084, + "objective/train/tokens_used": 1167340000, + "objective/train/value_avg": -0.00972747802734375, + "objective/train/value_loss": 0.00031021423637866974, + "objective/train/value_max": -0.00012934207916259766, + "objective/train/value_min": -0.625, + "objective/train/value_reward_corr": 0.6921582137917927, + "objective/train/value_std": 0.0173187255859375, + "objective/train/weight_avg": 1.0017169713974, + "objective/train/weighted_lm_loss": 1.4336016178131104, + "objective/train/weights_max": 1.7058966159820557, + "objective/train/weights_min": 0.5872719287872314, + "theoretical_loss": 3.630965857462084, + "tokens_seen": 1146880000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006590434922163377, + "loss": 0.0716, + "theoretical_loss": 3.630965857462084, + "tokens_seen": 1146880000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006589632482747553, + "loss": 0.0744, + "theoretical_loss": 3.6308894780839798, + "tokens_seen": 1147142144 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006588830043331728, + "loss": 0.0716, + "theoretical_loss": 3.630813121043926, + "tokens_seen": 1147404288 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006588027603915905, + "loss": 0.0737, + "theoretical_loss": 3.630736786330288, + "tokens_seen": 1147666432 + }, + { + "epoch": 0.35, + "learning_rate": 0.000658722516450008, + "loss": 0.0746, + "theoretical_loss": 3.630660473931441, + "tokens_seen": 1147928576 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006586422725084256, + "loss": 0.0756, + "theoretical_loss": 3.6305841838357673, + "tokens_seen": 1148190720 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006585620285668432, + "loss": 0.0706, + "theoretical_loss": 3.630507916031659, + "tokens_seen": 1148452864 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006584817846252607, + "loss": 0.0745, + "theoretical_loss": 3.630431670507517, + "tokens_seen": 1148715008 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006584015406836785, + "loss": 0.0741, + "theoretical_loss": 3.6303554472517496, + "tokens_seen": 1148977152 + }, + { + "epoch": 0.35, + "learning_rate": 0.000658321296742096, + "loss": 0.0716, + "theoretical_loss": 3.6302792462527758, + "tokens_seen": 1149239296 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006582410528005136, + "loss": 0.071, + "theoretical_loss": 3.6302030674990213, + "tokens_seen": 1149501440 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006581608088589312, + "loss": 0.075, + "theoretical_loss": 3.6301269109789214, + "tokens_seen": 1149763584 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006580805649173488, + "loss": 0.0719, + "theoretical_loss": 3.63005077668092, + "tokens_seen": 1150025728 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.0009393309592269361, + "objective/train/docs_used": 421268, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4447449445724487, + "objective/train/original_loss": 1.4447450637817383, + "objective/train/theoretical_loss": 3.630012717861597, + "objective/train/tokens_used": 1170616800, + "objective/train/value_avg": -0.00754547119140625, + "objective/train/value_loss": 0.0002241473994217813, + "objective/train/value_max": -0.0001767873764038086, + "objective/train/value_min": -0.6748046875, + "objective/train/value_reward_corr": 0.743822312179771, + "objective/train/value_std": 0.0171661376953125, + "objective/train/weight_avg": 1.0010449886322021, + "objective/train/weighted_lm_loss": 1.4458165168762207, + "objective/train/weights_max": 1.4855008125305176, + "objective/train/weights_min": 0.5288990139961243, + "theoretical_loss": 3.630012717861597, + "tokens_seen": 1150156800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006580003209757663, + "loss": 0.0723, + "theoretical_loss": 3.62997466459347, + "tokens_seen": 1150287872 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006579200770341839, + "loss": 0.0705, + "theoretical_loss": 3.629898574705031, + "tokens_seen": 1150550016 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006578398330926015, + "loss": 0.0735, + "theoretical_loss": 3.629822507004075, + "tokens_seen": 1150812160 + }, + { + "epoch": 0.35, + "learning_rate": 0.000657759589151019, + "loss": 0.0723, + "theoretical_loss": 3.629746461479079, + "tokens_seen": 1151074304 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006576793452094368, + "loss": 0.0714, + "theoretical_loss": 3.62967043811853, + "tokens_seen": 1151336448 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006575991012678543, + "loss": 0.0736, + "theoretical_loss": 3.629594436910924, + "tokens_seen": 1151598592 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006575188573262719, + "loss": 0.0728, + "theoretical_loss": 3.6295184578447643, + "tokens_seen": 1151860736 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006574386133846895, + "loss": 0.0699, + "theoretical_loss": 3.6294425009085645, + "tokens_seen": 1152122880 + }, + { + "epoch": 0.35, + "learning_rate": 0.000657358369443107, + "loss": 0.073, + "theoretical_loss": 3.6293665660908454, + "tokens_seen": 1152385024 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006572781255015246, + "loss": 0.0718, + "theoretical_loss": 3.6292906533801372, + "tokens_seen": 1152647168 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006571978815599422, + "loss": 0.0742, + "theoretical_loss": 3.6292147627649776, + "tokens_seen": 1152909312 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006571176376183598, + "loss": 0.0752, + "theoretical_loss": 3.6291388942339147, + "tokens_seen": 1153171456 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.00047070140135474503, + "objective/train/docs_used": 422447, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6008491516113281, + "objective/train/original_loss": 1.6008491516113281, + "objective/train/theoretical_loss": 3.6290630477755026, + "objective/train/tokens_used": 1173893600, + "objective/train/value_avg": -0.00966644287109375, + "objective/train/value_loss": 0.0003931581450160593, + "objective/train/value_max": -9.608268737792969e-05, + "objective/train/value_min": -0.392333984375, + "objective/train/value_reward_corr": 0.647847413214932, + "objective/train/value_std": 0.015899658203125, + "objective/train/weight_avg": 1.0006462335586548, + "objective/train/weighted_lm_loss": 1.601322054862976, + "objective/train/weights_max": 1.4302529096603394, + "objective/train/weights_min": 0.3789495527744293, + "theoretical_loss": 3.6290630477755026, + "tokens_seen": 1153433600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006570373936767774, + "loss": 0.0714, + "theoretical_loss": 3.6290630477755026, + "tokens_seen": 1153433600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006569571497351951, + "loss": 0.0717, + "theoretical_loss": 3.6289872233783065, + "tokens_seen": 1153695744 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006568769057936126, + "loss": 0.0705, + "theoretical_loss": 3.6289114210308977, + "tokens_seen": 1153957888 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006567966618520302, + "loss": 0.0728, + "theoretical_loss": 3.628835640721859, + "tokens_seen": 1154220032 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006567164179104478, + "loss": 0.0725, + "theoretical_loss": 3.6287598824397787, + "tokens_seen": 1154482176 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006566361739688653, + "loss": 0.0747, + "theoretical_loss": 3.6286841461732546, + "tokens_seen": 1154744320 + }, + { + "epoch": 0.35, + "learning_rate": 0.000656555930027283, + "loss": 0.0713, + "theoretical_loss": 3.6286084319108944, + "tokens_seen": 1155006464 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006564756860857005, + "loss": 0.0701, + "theoretical_loss": 3.628532739641312, + "tokens_seen": 1155268608 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006563954421441181, + "loss": 0.0699, + "theoretical_loss": 3.6284570693531317, + "tokens_seen": 1155530752 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006563151982025357, + "loss": 0.0717, + "theoretical_loss": 3.6283814210349847, + "tokens_seen": 1155792896 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006562349542609534, + "loss": 0.0709, + "theoretical_loss": 3.628305794675512, + "tokens_seen": 1156055040 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006561547103193709, + "loss": 0.0711, + "theoretical_loss": 3.6282301902633627, + "tokens_seen": 1156317184 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006560744663777885, + "loss": 0.0692, + "theoretical_loss": 3.628154607787194, + "tokens_seen": 1156579328 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.000545949034858495, + "objective/train/docs_used": 423626, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2599838972091675, + "objective/train/original_loss": 1.2599838972091675, + "objective/train/theoretical_loss": 3.62811682477156, + "objective/train/tokens_used": 1177170400, + "objective/train/value_avg": -0.0085296630859375, + "objective/train/value_loss": 0.00013630140165332705, + "objective/train/value_max": -8.094310760498047e-05, + "objective/train/value_min": -0.234619140625, + "objective/train/value_reward_corr": 0.6883133819120735, + "objective/train/value_std": 0.01348876953125, + "objective/train/weight_avg": 1.000613808631897, + "objective/train/weighted_lm_loss": 1.2607276439666748, + "objective/train/weights_max": 1.1398074626922607, + "objective/train/weights_min": 0.8252780437469482, + "theoretical_loss": 3.62811682477156, + "tokens_seen": 1156710400 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006559942224362061, + "loss": 0.0703, + "theoretical_loss": 3.6280790472356705, + "tokens_seen": 1156841472 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006559139784946236, + "loss": 0.07, + "theoretical_loss": 3.628003508597468, + "tokens_seen": 1157103616 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006558337345530413, + "loss": 0.0734, + "theoretical_loss": 3.6279279918612675, + "tokens_seen": 1157365760 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006557534906114588, + "loss": 0.0705, + "theoretical_loss": 3.6278524970157613, + "tokens_seen": 1157627904 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006556732466698765, + "loss": 0.0729, + "theoretical_loss": 3.6277770240496476, + "tokens_seen": 1157890048 + }, + { + "epoch": 0.35, + "learning_rate": 0.000655593002728294, + "loss": 0.0695, + "theoretical_loss": 3.6277015729516355, + "tokens_seen": 1158152192 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006555127587867115, + "loss": 0.0709, + "theoretical_loss": 3.6276261437104402, + "tokens_seen": 1158414336 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006554325148451293, + "loss": 0.0741, + "theoretical_loss": 3.6275507363147868, + "tokens_seen": 1158676480 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006553522709035468, + "loss": 0.0715, + "theoretical_loss": 3.6274753507534077, + "tokens_seen": 1158938624 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006552720269619644, + "loss": 0.0747, + "theoretical_loss": 3.6273999870150444, + "tokens_seen": 1159200768 + }, + { + "epoch": 0.35, + "learning_rate": 0.000655191783020382, + "loss": 0.0684, + "theoretical_loss": 3.627324645088446, + "tokens_seen": 1159462912 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006551115390787996, + "loss": 0.0722, + "theoretical_loss": 3.627249324962371, + "tokens_seen": 1159725056 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.0010166645515710115, + "objective/train/docs_used": 424758, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4893251657485962, + "objective/train/original_loss": 1.4893251657485962, + "objective/train/theoretical_loss": 3.6271740266255854, + "objective/train/tokens_used": 1180447200, + "objective/train/value_avg": -0.00568389892578125, + "objective/train/value_loss": 0.00012459652498364449, + "objective/train/value_max": -9.101629257202148e-05, + "objective/train/value_min": -0.2247314453125, + "objective/train/value_reward_corr": 0.6477667845280287, + "objective/train/value_std": 0.009185791015625, + "objective/train/weight_avg": 1.0010737180709839, + "objective/train/weighted_lm_loss": 1.4913969039916992, + "objective/train/weights_max": 1.1035935878753662, + "objective/train/weights_min": 0.36922982335090637, + "theoretical_loss": 3.6271740266255854, + "tokens_seen": 1159987200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006550312951372171, + "loss": 0.0719, + "theoretical_loss": 3.6271740266255854, + "tokens_seen": 1159987200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006549510511956347, + "loss": 0.0727, + "theoretical_loss": 3.6270987500668648, + "tokens_seen": 1160249344 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006548708072540523, + "loss": 0.0727, + "theoretical_loss": 3.6270234952749902, + "tokens_seen": 1160511488 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006547905633124698, + "loss": 0.0712, + "theoretical_loss": 3.6269482622387548, + "tokens_seen": 1160773632 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006547103193708876, + "loss": 0.0725, + "theoretical_loss": 3.6268730509469567, + "tokens_seen": 1161035776 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006546300754293051, + "loss": 0.0699, + "theoretical_loss": 3.626797861388404, + "tokens_seen": 1161297920 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006545498314877228, + "loss": 0.073, + "theoretical_loss": 3.6267226935519132, + "tokens_seen": 1161560064 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006544695875461403, + "loss": 0.0723, + "theoretical_loss": 3.626647547426309, + "tokens_seen": 1161822208 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006543893436045578, + "loss": 0.0728, + "theoretical_loss": 3.6265724230004226, + "tokens_seen": 1162084352 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006543090996629755, + "loss": 0.0713, + "theoretical_loss": 3.6264973202630966, + "tokens_seen": 1162346496 + }, + { + "epoch": 0.35, + "learning_rate": 0.000654228855721393, + "loss": 0.0697, + "theoretical_loss": 3.6264222392031797, + "tokens_seen": 1162608640 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006541486117798106, + "loss": 0.0696, + "theoretical_loss": 3.6263471798095286, + "tokens_seen": 1162870784 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006540683678382282, + "loss": 0.0749, + "theoretical_loss": 3.6262721420710093, + "tokens_seen": 1163132928 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.0016251426422968507, + "objective/train/docs_used": 425912, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4275366067886353, + "objective/train/original_loss": 1.4275367259979248, + "objective/train/theoretical_loss": 3.6262346313189475, + "objective/train/tokens_used": 1183724000, + "objective/train/value_avg": -0.008636474609375, + "objective/train/value_loss": 0.00021610024850815535, + "objective/train/value_max": -0.0001366138458251953, + "objective/train/value_min": -0.300537109375, + "objective/train/value_reward_corr": 0.6914367282185478, + "objective/train/value_std": 0.01494598388671875, + "objective/train/weight_avg": 1.0017211437225342, + "objective/train/weighted_lm_loss": 1.4297378063201904, + "objective/train/weights_max": 1.1434952020645142, + "objective/train/weights_min": 0.37196043133735657, + "theoretical_loss": 3.6262346313189475, + "tokens_seen": 1163264000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006539881238966459, + "loss": 0.0709, + "theoretical_loss": 3.6261971259764962, + "tokens_seen": 1163395072 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006539078799550634, + "loss": 0.0696, + "theoretical_loss": 3.626122131514871, + "tokens_seen": 1163657216 + }, + { + "epoch": 0.35, + "learning_rate": 0.000653827636013481, + "loss": 0.0724, + "theoretical_loss": 3.626047158675024, + "tokens_seen": 1163919360 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006537473920718986, + "loss": 0.0702, + "theoretical_loss": 3.625972207445854, + "tokens_seen": 1164181504 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006536671481303161, + "loss": 0.0686, + "theoretical_loss": 3.625897277816267, + "tokens_seen": 1164443648 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006535869041887338, + "loss": 0.0709, + "theoretical_loss": 3.625822369775179, + "tokens_seen": 1164705792 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006535066602471513, + "loss": 0.0731, + "theoretical_loss": 3.6257474833115113, + "tokens_seen": 1164967936 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006534264163055689, + "loss": 0.0726, + "theoretical_loss": 3.625672618414198, + "tokens_seen": 1165230080 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006533461723639865, + "loss": 0.0715, + "theoretical_loss": 3.6255977750721753, + "tokens_seen": 1165492224 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006532659284224042, + "loss": 0.0746, + "theoretical_loss": 3.6255229532743933, + "tokens_seen": 1165754368 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006531856844808218, + "loss": 0.0713, + "theoretical_loss": 3.625448153009807, + "tokens_seen": 1166016512 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006531054405392393, + "loss": 0.0692, + "theoretical_loss": 3.6253733742673795, + "tokens_seen": 1166278656 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.0018858092371374369, + "objective/train/docs_used": 427016, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.495458960533142, + "objective/train/original_loss": 1.4954588413238525, + "objective/train/theoretical_loss": 3.625298617036084, + "objective/train/tokens_used": 1187000800, + "objective/train/value_avg": -0.008880615234375, + "objective/train/value_loss": 0.00015289847215171903, + "objective/train/value_max": -0.00010973215103149414, + "objective/train/value_min": -0.302001953125, + "objective/train/value_reward_corr": 0.7871390335182029, + "objective/train/value_std": 0.01666259765625, + "objective/train/weight_avg": 1.001957893371582, + "objective/train/weighted_lm_loss": 1.4991862773895264, + "objective/train/weights_max": 1.1765505075454712, + "objective/train/weights_min": 0.373849481344223, + "theoretical_loss": 3.625298617036084, + "tokens_seen": 1166540800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006530251965976569, + "loss": 0.0746, + "theoretical_loss": 3.625298617036084, + "tokens_seen": 1166540800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006529449526560745, + "loss": 0.0728, + "theoretical_loss": 3.6252238813049, + "tokens_seen": 1166802944 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006528647087144921, + "loss": 0.0732, + "theoretical_loss": 3.6251491670628155, + "tokens_seen": 1167065088 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006527844647729096, + "loss": 0.071, + "theoretical_loss": 3.6250744742988275, + "tokens_seen": 1167327232 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006527042208313273, + "loss": 0.0729, + "theoretical_loss": 3.6249998030019404, + "tokens_seen": 1167589376 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006526239768897448, + "loss": 0.0708, + "theoretical_loss": 3.6249251531611666, + "tokens_seen": 1167851520 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006525437329481623, + "loss": 0.0716, + "theoretical_loss": 3.6248505247655265, + "tokens_seen": 1168113664 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006524634890065801, + "loss": 0.073, + "theoretical_loss": 3.6247759178040493, + "tokens_seen": 1168375808 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006523832450649976, + "loss": 0.0705, + "theoretical_loss": 3.624701332265772, + "tokens_seen": 1168637952 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006523030011234152, + "loss": 0.0703, + "theoretical_loss": 3.6246267681397386, + "tokens_seen": 1168900096 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006522227571818328, + "loss": 0.0713, + "theoretical_loss": 3.624552225415003, + "tokens_seen": 1169162240 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006521425132402504, + "loss": 0.0692, + "theoretical_loss": 3.6244777040806255, + "tokens_seen": 1169424384 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006520622692986679, + "loss": 0.0723, + "theoretical_loss": 3.624403204125676, + "tokens_seen": 1169686528 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.00018043404270429164, + "objective/train/docs_used": 428177, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4640066623687744, + "objective/train/original_loss": 1.4640066623687744, + "objective/train/theoretical_loss": 3.6243659621620727, + "objective/train/tokens_used": 1190277600, + "objective/train/value_avg": -0.010498046875, + "objective/train/value_loss": 0.00026088516460731626, + "objective/train/value_max": -5.4776668548583984e-05, + "objective/train/value_min": -0.31982421875, + "objective/train/value_reward_corr": 0.7486174859686674, + "objective/train/value_std": 0.017181396484375, + "objective/train/weight_avg": 1.000304102897644, + "objective/train/weighted_lm_loss": 1.4634703397750854, + "objective/train/weights_max": 1.2624222040176392, + "objective/train/weights_min": 0.3704579770565033, + "theoretical_loss": 3.6243659621620727, + "tokens_seen": 1169817600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006519820253570855, + "loss": 0.0749, + "theoretical_loss": 3.6243287255392307, + "tokens_seen": 1169948672 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006519017814155031, + "loss": 0.0723, + "theoretical_loss": 3.624254268310375, + "tokens_seen": 1170210816 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006518215374739207, + "loss": 0.0697, + "theoretical_loss": 3.6241798324282017, + "tokens_seen": 1170472960 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006517412935323384, + "loss": 0.0731, + "theoretical_loss": 3.624105417881813, + "tokens_seen": 1170735104 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006516610495907559, + "loss": 0.0731, + "theoretical_loss": 3.624031024660317, + "tokens_seen": 1170997248 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006515808056491736, + "loss": 0.0734, + "theoretical_loss": 3.6239566527528306, + "tokens_seen": 1171259392 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006515005617075911, + "loss": 0.0715, + "theoretical_loss": 3.6238823021484796, + "tokens_seen": 1171521536 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006514203177660086, + "loss": 0.0729, + "theoretical_loss": 3.6238079728363974, + "tokens_seen": 1171783680 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006513400738244263, + "loss": 0.0711, + "theoretical_loss": 3.6237336648057243, + "tokens_seen": 1172045824 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006512598298828438, + "loss": 0.0692, + "theoretical_loss": 3.62365937804561, + "tokens_seen": 1172307968 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006511795859412614, + "loss": 0.0686, + "theoretical_loss": 3.6235851125452108, + "tokens_seen": 1172570112 + }, + { + "epoch": 0.36, + "learning_rate": 0.000651099341999679, + "loss": 0.0722, + "theoretical_loss": 3.6235108682936916, + "tokens_seen": 1172832256 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0007615335052832961, + "objective/train/docs_used": 429267, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.439132809638977, + "objective/train/original_loss": 1.4391329288482666, + "objective/train/theoretical_loss": 3.623436645280226, + "objective/train/tokens_used": 1193554400, + "objective/train/value_avg": -0.005706787109375, + "objective/train/value_loss": 8.419386722380295e-05, + "objective/train/value_max": -5.692243576049805e-05, + "objective/train/value_min": -0.278564453125, + "objective/train/value_reward_corr": 0.6225248522847999, + "objective/train/value_std": 0.007747650146484375, + "objective/train/weight_avg": 1.0008031129837036, + "objective/train/weighted_lm_loss": 1.4398229122161865, + "objective/train/weights_max": 1.1341171264648438, + "objective/train/weights_min": 0.7353724837303162, + "theoretical_loss": 3.623436645280226, + "tokens_seen": 1173094400 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006510190980580967, + "loss": 0.0723, + "theoretical_loss": 3.623436645280226, + "tokens_seen": 1173094400 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006509388541165142, + "loss": 0.0725, + "theoretical_loss": 3.6233624434939946, + "tokens_seen": 1173356544 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006508586101749318, + "loss": 0.07, + "theoretical_loss": 3.623288262924186, + "tokens_seen": 1173618688 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006507783662333494, + "loss": 0.0739, + "theoretical_loss": 3.623214103559997, + "tokens_seen": 1173880832 + }, + { + "epoch": 0.36, + "learning_rate": 0.000650698122291767, + "loss": 0.072, + "theoretical_loss": 3.6231399653906315, + "tokens_seen": 1174142976 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006506178783501846, + "loss": 0.0686, + "theoretical_loss": 3.623065848405303, + "tokens_seen": 1174405120 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006505376344086021, + "loss": 0.0717, + "theoretical_loss": 3.622991752593231, + "tokens_seen": 1174667264 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006504573904670198, + "loss": 0.0724, + "theoretical_loss": 3.6229176779436445, + "tokens_seen": 1174929408 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006503771465254373, + "loss": 0.0706, + "theoretical_loss": 3.6228436244457796, + "tokens_seen": 1175191552 + }, + { + "epoch": 0.36, + "learning_rate": 0.000650296902583855, + "loss": 0.072, + "theoretical_loss": 3.6227695920888796, + "tokens_seen": 1175453696 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006502166586422726, + "loss": 0.0705, + "theoretical_loss": 3.622695580862197, + "tokens_seen": 1175715840 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006501364147006901, + "loss": 0.0702, + "theoretical_loss": 3.6226215907549912, + "tokens_seen": 1175977984 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006500561707591077, + "loss": 0.0719, + "theoretical_loss": 3.6225476217565307, + "tokens_seen": 1176240128 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 2.649899761308916e-05, + "objective/train/docs_used": 430645, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.540775179862976, + "objective/train/original_loss": 1.5407750606536865, + "objective/train/theoretical_loss": 3.6225106451697275, + "objective/train/tokens_used": 1196831200, + "objective/train/value_avg": -0.00846099853515625, + "objective/train/value_loss": 0.00018133767298422754, + "objective/train/value_max": -3.0219554901123047e-05, + "objective/train/value_min": -0.51318359375, + "objective/train/value_reward_corr": 0.7236088657344646, + "objective/train/value_std": 0.01409912109375, + "objective/train/weight_avg": 1.0001122951507568, + "objective/train/weighted_lm_loss": 1.5408101081848145, + "objective/train/weights_max": 1.1531023979187012, + "objective/train/weights_min": 0.3940271735191345, + "theoretical_loss": 3.6225106451697275, + "tokens_seen": 1176371200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006499759268175253, + "loss": 0.0713, + "theoretical_loss": 3.6224736738560894, + "tokens_seen": 1176502272 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006498956828759429, + "loss": 0.0755, + "theoretical_loss": 3.6223997470429516, + "tokens_seen": 1176764416 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006498154389343604, + "loss": 0.0713, + "theoretical_loss": 3.6223258413064086, + "tokens_seen": 1177026560 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006497351949927781, + "loss": 0.0734, + "theoretical_loss": 3.6222519566357585, + "tokens_seen": 1177288704 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006496549510511956, + "loss": 0.0711, + "theoretical_loss": 3.6221780930203096, + "tokens_seen": 1177550848 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006495747071096131, + "loss": 0.0729, + "theoretical_loss": 3.6221042504493743, + "tokens_seen": 1177812992 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006494944631680309, + "loss": 0.0738, + "theoretical_loss": 3.622030428912276, + "tokens_seen": 1178075136 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006494142192264484, + "loss": 0.0699, + "theoretical_loss": 3.6219566283983458, + "tokens_seen": 1178337280 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006493339752848661, + "loss": 0.0747, + "theoretical_loss": 3.6218828488969197, + "tokens_seen": 1178599424 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006492537313432836, + "loss": 0.0706, + "theoretical_loss": 3.6218090903973446, + "tokens_seen": 1178861568 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006491734874017012, + "loss": 0.0764, + "theoretical_loss": 3.621735352888974, + "tokens_seen": 1179123712 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006490932434601188, + "loss": 0.0787, + "theoretical_loss": 3.621661636361169, + "tokens_seen": 1179385856 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": -0.00019712685025297105, + "objective/train/docs_used": 431781, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4777541160583496, + "objective/train/original_loss": 1.4777541160583496, + "objective/train/theoretical_loss": 3.6215879408032987, + "objective/train/tokens_used": 1200108000, + "objective/train/value_avg": -0.006439208984375, + "objective/train/value_loss": 0.000245880801230669, + "objective/train/value_max": -6.920099258422852e-05, + "objective/train/value_min": -0.25927734375, + "objective/train/value_reward_corr": 0.6481294948793725, + "objective/train/value_std": 0.011993408203125, + "objective/train/weight_avg": 0.999910295009613, + "objective/train/weighted_lm_loss": 1.476799488067627, + "objective/train/weights_max": 1.2023646831512451, + "objective/train/weights_min": 0.37629902362823486, + "theoretical_loss": 3.6215879408032987, + "tokens_seen": 1179648000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006490129995185363, + "loss": 0.0742, + "theoretical_loss": 3.6215879408032987, + "tokens_seen": 1179648000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006489327555769539, + "loss": 0.074, + "theoretical_loss": 3.6215142662047395, + "tokens_seen": 1179910144 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006488525116353715, + "loss": 0.0742, + "theoretical_loss": 3.621440612554876, + "tokens_seen": 1180172288 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006487722676937892, + "loss": 0.0741, + "theoretical_loss": 3.6213669798431005, + "tokens_seen": 1180434432 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006486920237522067, + "loss": 0.0724, + "theoretical_loss": 3.621293368058813, + "tokens_seen": 1180696576 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006486117798106244, + "loss": 0.0739, + "theoretical_loss": 3.621219777191421, + "tokens_seen": 1180958720 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006485315358690419, + "loss": 0.0746, + "theoretical_loss": 3.62114620723034, + "tokens_seen": 1181220864 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006484512919274594, + "loss": 0.0738, + "theoretical_loss": 3.621072658164993, + "tokens_seen": 1181483008 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006483710479858771, + "loss": 0.0735, + "theoretical_loss": 3.6209991299848108, + "tokens_seen": 1181745152 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006482908040442946, + "loss": 0.0751, + "theoretical_loss": 3.620925622679232, + "tokens_seen": 1182007296 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006482105601027123, + "loss": 0.0738, + "theoretical_loss": 3.620852136237702, + "tokens_seen": 1182269440 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006481303161611298, + "loss": 0.0741, + "theoretical_loss": 3.620778670649676, + "tokens_seen": 1182531584 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006480500722195475, + "loss": 0.0709, + "theoretical_loss": 3.620705225904614, + "tokens_seen": 1182793728 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0012156600132584572, + "objective/train/docs_used": 433029, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4418714046478271, + "objective/train/original_loss": 1.4418714046478271, + "objective/train/theoretical_loss": 3.6206685113449044, + "objective/train/tokens_used": 1203384800, + "objective/train/value_avg": -0.007598876953125, + "objective/train/value_loss": 0.0002602523018140346, + "objective/train/value_max": -0.00014889240264892578, + "objective/train/value_min": -0.91015625, + "objective/train/value_reward_corr": 0.7045290549833653, + "objective/train/value_std": 0.014434814453125, + "objective/train/weight_avg": 1.0013296604156494, + "objective/train/weighted_lm_loss": 1.4433629512786865, + "objective/train/weights_max": 1.1642762422561646, + "objective/train/weights_min": 0.37042829394340515, + "theoretical_loss": 3.6206685113449044, + "tokens_seen": 1182924800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006479698282779651, + "loss": 0.0734, + "theoretical_loss": 3.620631801991987, + "tokens_seen": 1183055872 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006478895843363826, + "loss": 0.0752, + "theoretical_loss": 3.6205583989012697, + "tokens_seen": 1183318016 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006478093403948002, + "loss": 0.075, + "theoretical_loss": 3.6204850166219478, + "tokens_seen": 1183580160 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006477290964532178, + "loss": 0.07, + "theoretical_loss": 3.6204116551435126, + "tokens_seen": 1183842304 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006476488525116354, + "loss": 0.0746, + "theoretical_loss": 3.620338314455465, + "tokens_seen": 1184104448 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006475686085700529, + "loss": 0.0741, + "theoretical_loss": 3.6202649945473113, + "tokens_seen": 1184366592 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006474883646284706, + "loss": 0.0724, + "theoretical_loss": 3.6201916954085664, + "tokens_seen": 1184628736 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006474081206868881, + "loss": 0.0724, + "theoretical_loss": 3.620118417028754, + "tokens_seen": 1184890880 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006473278767453056, + "loss": 0.0735, + "theoretical_loss": 3.620045159397403, + "tokens_seen": 1185153024 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006472476328037234, + "loss": 0.0741, + "theoretical_loss": 3.619971922504052, + "tokens_seen": 1185415168 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006471673888621409, + "loss": 0.0712, + "theoretical_loss": 3.6198987063382457, + "tokens_seen": 1185677312 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006470871449205585, + "loss": 0.0757, + "theoretical_loss": 3.6198255108895374, + "tokens_seen": 1185939456 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.001317921094596386, + "objective/train/docs_used": 434243, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5565327405929565, + "objective/train/original_loss": 1.556532859802246, + "objective/train/theoretical_loss": 3.6197523361474877, + "objective/train/tokens_used": 1206661600, + "objective/train/value_avg": -0.0074920654296875, + "objective/train/value_loss": 0.00021194624423515052, + "objective/train/value_max": -0.00010389089584350586, + "objective/train/value_min": -0.22802734375, + "objective/train/value_reward_corr": 0.585920330003381, + "objective/train/value_std": 0.01111602783203125, + "objective/train/weight_avg": 1.0014089345932007, + "objective/train/weighted_lm_loss": 1.5591508150100708, + "objective/train/weights_max": 1.1952372789382935, + "objective/train/weights_min": 0.3731912076473236, + "theoretical_loss": 3.6197523361474877, + "tokens_seen": 1186201600 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006470069009789761, + "loss": 0.0754, + "theoretical_loss": 3.6197523361474877, + "tokens_seen": 1186201600 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006469266570373937, + "loss": 0.0723, + "theoretical_loss": 3.619679182101664, + "tokens_seen": 1186463744 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006468464130958113, + "loss": 0.0766, + "theoretical_loss": 3.619606048741643, + "tokens_seen": 1186725888 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006467661691542289, + "loss": 0.0751, + "theoretical_loss": 3.6195329360570065, + "tokens_seen": 1186988032 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006466859252126464, + "loss": 0.0743, + "theoretical_loss": 3.6194598440373467, + "tokens_seen": 1187250176 + }, + { + "epoch": 0.36, + "learning_rate": 0.000646605681271064, + "loss": 0.0721, + "theoretical_loss": 3.619386772672261, + "tokens_seen": 1187512320 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006465254373294817, + "loss": 0.0767, + "theoretical_loss": 3.6193137219513556, + "tokens_seen": 1187774464 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006464451933878992, + "loss": 0.0759, + "theoretical_loss": 3.619240691864243, + "tokens_seen": 1188036608 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006463649494463169, + "loss": 0.071, + "theoretical_loss": 3.619167682400545, + "tokens_seen": 1188298752 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006462847055047344, + "loss": 0.0769, + "theoretical_loss": 3.619094693549889, + "tokens_seen": 1188560896 + }, + { + "epoch": 0.36, + "learning_rate": 0.000646204461563152, + "loss": 0.0729, + "theoretical_loss": 3.6190217253019124, + "tokens_seen": 1188823040 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006461242176215696, + "loss": 0.0745, + "theoretical_loss": 3.6189487776462568, + "tokens_seen": 1189085184 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006460439736799871, + "loss": 0.0707, + "theoretical_loss": 3.6188758505725738, + "tokens_seen": 1189347328 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0009901868179440498, + "objective/train/docs_used": 435462, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4739112854003906, + "objective/train/original_loss": 1.4739114046096802, + "objective/train/theoretical_loss": 3.61883939475074, + "objective/train/tokens_used": 1209938400, + "objective/train/value_avg": -0.00931549072265625, + "objective/train/value_loss": 0.0008738775504752994, + "objective/train/value_max": -0.00011771917343139648, + "objective/train/value_min": -0.97705078125, + "objective/train/value_reward_corr": 0.5927413814353182, + "objective/train/value_std": 0.02313232421875, + "objective/train/weight_avg": 1.0013524293899536, + "objective/train/weighted_lm_loss": 1.4759513139724731, + "objective/train/weights_max": 2.4594109058380127, + "objective/train/weights_min": 0.22472499310970306, + "theoretical_loss": 3.61883939475074, + "tokens_seen": 1189478400 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006459637297384047, + "loss": 0.0769, + "theoretical_loss": 3.618802944070522, + "tokens_seen": 1189609472 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006458834857968223, + "loss": 0.0741, + "theoretical_loss": 3.618730058129766, + "tokens_seen": 1189871616 + }, + { + "epoch": 0.36, + "learning_rate": 0.00064580324185524, + "loss": 0.0746, + "theoretical_loss": 3.61865719273998, + "tokens_seen": 1190133760 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006457229979136575, + "loss": 0.0738, + "theoretical_loss": 3.6185843478908453, + "tokens_seen": 1190395904 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006456427539720752, + "loss": 0.0736, + "theoretical_loss": 3.618511523572049, + "tokens_seen": 1190658048 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006455625100304927, + "loss": 0.0728, + "theoretical_loss": 3.6184387197732875, + "tokens_seen": 1190920192 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006454822660889103, + "loss": 0.0773, + "theoretical_loss": 3.6183659364842624, + "tokens_seen": 1191182336 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006454020221473279, + "loss": 0.0743, + "theoretical_loss": 3.6182931736946857, + "tokens_seen": 1191444480 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006453217782057454, + "loss": 0.0722, + "theoretical_loss": 3.618220431394274, + "tokens_seen": 1191706624 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006452415342641631, + "loss": 0.0704, + "theoretical_loss": 3.618147709572754, + "tokens_seen": 1191968768 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006451612903225806, + "loss": 0.0739, + "theoretical_loss": 3.618075008219858, + "tokens_seen": 1192230912 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006450810463809983, + "loss": 0.0716, + "theoretical_loss": 3.6180023273253252, + "tokens_seen": 1192493056 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0011386601254343987, + "objective/train/docs_used": 436657, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5473337173461914, + "objective/train/original_loss": 1.5473337173461914, + "objective/train/theoretical_loss": 3.6179296668789043, + "objective/train/tokens_used": 1213215200, + "objective/train/value_avg": -0.01073455810546875, + "objective/train/value_loss": 0.00023775658337399364, + "objective/train/value_max": -0.00012934207916259766, + "objective/train/value_min": -0.73193359375, + "objective/train/value_reward_corr": 0.7040417032080557, + "objective/train/value_std": 0.0167999267578125, + "objective/train/weight_avg": 1.0012502670288086, + "objective/train/weighted_lm_loss": 1.5486918687820435, + "objective/train/weights_max": 1.341107964515686, + "objective/train/weights_min": 0.3696603775024414, + "theoretical_loss": 3.6179296668789043, + "tokens_seen": 1192755200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006450008024394159, + "loss": 0.0754, + "theoretical_loss": 3.6179296668789043, + "tokens_seen": 1192755200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006449205584978334, + "loss": 0.0737, + "theoretical_loss": 3.6178570268703494, + "tokens_seen": 1193017344 + }, + { + "epoch": 0.36, + "learning_rate": 0.000644840314556251, + "loss": 0.0734, + "theoretical_loss": 3.617784407289424, + "tokens_seen": 1193279488 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006447600706146686, + "loss": 0.0747, + "theoretical_loss": 3.617711808125896, + "tokens_seen": 1193541632 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006446798266730862, + "loss": 0.0753, + "theoretical_loss": 3.6176392293695434, + "tokens_seen": 1193803776 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006445995827315037, + "loss": 0.0752, + "theoretical_loss": 3.6175666710101506, + "tokens_seen": 1194065920 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006445193387899214, + "loss": 0.0732, + "theoretical_loss": 3.6174941330375097, + "tokens_seen": 1194328064 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006444390948483389, + "loss": 0.0752, + "theoretical_loss": 3.617421615441419, + "tokens_seen": 1194590208 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006443588509067564, + "loss": 0.0778, + "theoretical_loss": 3.617349118211685, + "tokens_seen": 1194852352 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006442786069651742, + "loss": 0.0732, + "theoretical_loss": 3.6172766413381225, + "tokens_seen": 1195114496 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006441983630235917, + "loss": 0.0702, + "theoretical_loss": 3.617204184810552, + "tokens_seen": 1195376640 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006441181190820094, + "loss": 0.074, + "theoretical_loss": 3.6171317486188013, + "tokens_seen": 1195638784 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006440378751404269, + "loss": 0.0744, + "theoretical_loss": 3.6170593327527074, + "tokens_seen": 1195900928 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0009150534751825035, + "objective/train/docs_used": 437781, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5300925970077515, + "objective/train/original_loss": 1.530092477798462, + "objective/train/theoretical_loss": 3.617023132438607, + "objective/train/tokens_used": 1216492000, + "objective/train/value_avg": -0.006450653076171875, + "objective/train/value_loss": 0.00013045086234342307, + "objective/train/value_max": -0.00013446807861328125, + "objective/train/value_min": -0.371826171875, + "objective/train/value_reward_corr": 0.6219127786332249, + "objective/train/value_std": 0.009521484375, + "objective/train/weight_avg": 1.000975251197815, + "objective/train/weighted_lm_loss": 1.5323002338409424, + "objective/train/weights_max": 1.1521174907684326, + "objective/train/weights_min": 0.3824407160282135, + "theoretical_loss": 3.617023132438607, + "tokens_seen": 1196032000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006439576311988445, + "loss": 0.0727, + "theoretical_loss": 3.616986937202112, + "tokens_seen": 1196163072 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006438773872572621, + "loss": 0.0745, + "theoretical_loss": 3.616914561956867, + "tokens_seen": 1196425216 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006437971433156797, + "loss": 0.0743, + "theoretical_loss": 3.6168422070068287, + "tokens_seen": 1196687360 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006437168993740972, + "loss": 0.0731, + "theoretical_loss": 3.6167698723418624, + "tokens_seen": 1196949504 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006436366554325149, + "loss": 0.0761, + "theoretical_loss": 3.6166975579518406, + "tokens_seen": 1197211648 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006435564114909325, + "loss": 0.0729, + "theoretical_loss": 3.6166252638266423, + "tokens_seen": 1197473792 + }, + { + "epoch": 0.36, + "learning_rate": 0.00064347616754935, + "loss": 0.0746, + "theoretical_loss": 3.616552989956155, + "tokens_seen": 1197735936 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006433959236077677, + "loss": 0.0745, + "theoretical_loss": 3.616480736330272, + "tokens_seen": 1197998080 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006433156796661852, + "loss": 0.0711, + "theoretical_loss": 3.616408502938895, + "tokens_seen": 1198260224 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006432354357246028, + "loss": 0.0701, + "theoretical_loss": 3.616336289771932, + "tokens_seen": 1198522368 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006431551917830204, + "loss": 0.0711, + "theoretical_loss": 3.6162640968192994, + "tokens_seen": 1198784512 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006430749478414379, + "loss": 0.0728, + "theoretical_loss": 3.61619192407092, + "tokens_seen": 1199046656 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0014068173477426171, + "objective/train/docs_used": 438488, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5579599142074585, + "objective/train/original_loss": 1.5579596757888794, + "objective/train/theoretical_loss": 3.6161197715167237, + "objective/train/tokens_used": 1219768800, + "objective/train/value_avg": -0.00788116455078125, + "objective/train/value_loss": 0.0003655260952655226, + "objective/train/value_max": -9.840726852416992e-05, + "objective/train/value_min": -0.95556640625, + "objective/train/value_reward_corr": 0.7506662225236421, + "objective/train/value_std": 0.0210113525390625, + "objective/train/weight_avg": 1.0015637874603271, + "objective/train/weighted_lm_loss": 1.5613462924957275, + "objective/train/weights_max": 1.6007517576217651, + "objective/train/weights_min": 0.22900784015655518, + "theoretical_loss": 3.6161197715167237, + "tokens_seen": 1199308800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006429947038998556, + "loss": 0.0722, + "theoretical_loss": 3.6161197715167237, + "tokens_seen": 1199308800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006429144599582731, + "loss": 0.0719, + "theoretical_loss": 3.616047639146648, + "tokens_seen": 1199570944 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006428342160166908, + "loss": 0.0717, + "theoretical_loss": 3.6159755269506375, + "tokens_seen": 1199833088 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006427539720751084, + "loss": 0.0737, + "theoretical_loss": 3.6159034349186445, + "tokens_seen": 1200095232 + }, + { + "epoch": 0.36, + "learning_rate": 0.000642673728133526, + "loss": 0.0759, + "theoretical_loss": 3.615831363040628, + "tokens_seen": 1200357376 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006425934841919435, + "loss": 0.071, + "theoretical_loss": 3.615759311306553, + "tokens_seen": 1200619520 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006425132402503611, + "loss": 0.0715, + "theoretical_loss": 3.6156872797063944, + "tokens_seen": 1200881664 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006424329963087787, + "loss": 0.0745, + "theoretical_loss": 3.6156152682301324, + "tokens_seen": 1201143808 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006423527523671962, + "loss": 0.0753, + "theoretical_loss": 3.6155432768677542, + "tokens_seen": 1201405952 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006422725084256139, + "loss": 0.0726, + "theoretical_loss": 3.615471305609255, + "tokens_seen": 1201668096 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006421922644840314, + "loss": 0.0735, + "theoretical_loss": 3.6153993544446372, + "tokens_seen": 1201930240 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006421120205424491, + "loss": 0.0729, + "theoretical_loss": 3.61532742336391, + "tokens_seen": 1202192384 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006420317766008667, + "loss": 0.071, + "theoretical_loss": 3.6152555123570895, + "tokens_seen": 1202454528 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.00142679491546005, + "objective/train/docs_used": 439824, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5132286548614502, + "objective/train/original_loss": 1.5132288932800293, + "objective/train/theoretical_loss": 3.6152195643782763, + "objective/train/tokens_used": 1223045600, + "objective/train/value_avg": -0.00974273681640625, + "objective/train/value_loss": 0.00020919894450344145, + "objective/train/value_max": -0.00011414289474487305, + "objective/train/value_min": -0.3330078125, + "objective/train/value_reward_corr": 0.7210474728015025, + "objective/train/value_std": 0.01556396484375, + "objective/train/weight_avg": 1.0015263557434082, + "objective/train/weighted_lm_loss": 1.5158843994140625, + "objective/train/weights_max": 1.3555892705917358, + "objective/train/weights_min": 0.3693855106830597, + "theoretical_loss": 3.6152195643782763, + "tokens_seen": 1202585600 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006419515326592842, + "loss": 0.0687, + "theoretical_loss": 3.6151836214141992, + "tokens_seen": 1202716672 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006418712887177018, + "loss": 0.0738, + "theoretical_loss": 3.61511175052527, + "tokens_seen": 1202978816 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006417910447761194, + "loss": 0.0712, + "theoretical_loss": 3.6150398996803395, + "tokens_seen": 1203240960 + }, + { + "epoch": 0.36, + "learning_rate": 0.000641710800834537, + "loss": 0.0735, + "theoretical_loss": 3.6149680688694525, + "tokens_seen": 1203503104 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006416305568929546, + "loss": 0.0736, + "theoretical_loss": 3.614896258082661, + "tokens_seen": 1203765248 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006415503129513722, + "loss": 0.071, + "theoretical_loss": 3.614824467310025, + "tokens_seen": 1204027392 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006414700690097897, + "loss": 0.0744, + "theoretical_loss": 3.614752696541609, + "tokens_seen": 1204289536 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006413898250682074, + "loss": 0.0733, + "theoretical_loss": 3.614680945767488, + "tokens_seen": 1204551680 + }, + { + "epoch": 0.37, + "learning_rate": 0.000641309581126625, + "loss": 0.0727, + "theoretical_loss": 3.614609214977741, + "tokens_seen": 1204813824 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006412293371850425, + "loss": 0.075, + "theoretical_loss": 3.614537504162457, + "tokens_seen": 1205075968 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006411490932434602, + "loss": 0.0737, + "theoretical_loss": 3.614465813311729, + "tokens_seen": 1205338112 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006410688493018777, + "loss": 0.0735, + "theoretical_loss": 3.6143941424156596, + "tokens_seen": 1205600256 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.00039125708281062543, + "objective/train/docs_used": 441116, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4485985040664673, + "objective/train/original_loss": 1.4485986232757568, + "objective/train/theoretical_loss": 3.614322491464357, + "objective/train/tokens_used": 1226322400, + "objective/train/value_avg": -0.01201629638671875, + "objective/train/value_loss": 0.0005621611489914358, + "objective/train/value_max": -0.00015115737915039062, + "objective/train/value_min": -0.95751953125, + "objective/train/value_reward_corr": 0.7828427996520164, + "objective/train/value_std": 0.02960205078125, + "objective/train/weight_avg": 1.0006436109542847, + "objective/train/weighted_lm_loss": 1.449430227279663, + "objective/train/weights_max": 1.638288974761963, + "objective/train/weights_min": 0.3702460527420044, + "theoretical_loss": 3.614322491464357, + "tokens_seen": 1205862400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006409886053602953, + "loss": 0.0726, + "theoretical_loss": 3.614322491464357, + "tokens_seen": 1205862400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006409083614187129, + "loss": 0.0761, + "theoretical_loss": 3.614250860447936, + "tokens_seen": 1206124544 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006408281174771304, + "loss": 0.0747, + "theoretical_loss": 3.6141792493565212, + "tokens_seen": 1206386688 + }, + { + "epoch": 0.37, + "learning_rate": 0.000640747873535548, + "loss": 0.0711, + "theoretical_loss": 3.6141076581802416, + "tokens_seen": 1206648832 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006406676295939656, + "loss": 0.0718, + "theoretical_loss": 3.614036086909234, + "tokens_seen": 1206910976 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006405873856523833, + "loss": 0.075, + "theoretical_loss": 3.6139645355336425, + "tokens_seen": 1207173120 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006405071417108009, + "loss": 0.0761, + "theoretical_loss": 3.613893004043617, + "tokens_seen": 1207435264 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006404268977692185, + "loss": 0.0724, + "theoretical_loss": 3.6138214924293166, + "tokens_seen": 1207697408 + }, + { + "epoch": 0.37, + "learning_rate": 0.000640346653827636, + "loss": 0.077, + "theoretical_loss": 3.6137500006809056, + "tokens_seen": 1207959552 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006402664098860537, + "loss": 0.0739, + "theoretical_loss": 3.6136785287885553, + "tokens_seen": 1208221696 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006401861659444712, + "loss": 0.0738, + "theoretical_loss": 3.6136070767424457, + "tokens_seen": 1208483840 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006401059220028887, + "loss": 0.0745, + "theoretical_loss": 3.6135356445327624, + "tokens_seen": 1208745984 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006400256780613064, + "loss": 0.0691, + "theoretical_loss": 3.6134642321496977, + "tokens_seen": 1209008128 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.0001867167593445629, + "objective/train/docs_used": 442331, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2952028512954712, + "objective/train/original_loss": 1.2952029705047607, + "objective/train/theoretical_loss": 3.613428533390085, + "objective/train/tokens_used": 1229599200, + "objective/train/value_avg": -0.00867462158203125, + "objective/train/value_loss": 0.00022595007612835616, + "objective/train/value_max": -4.756450653076172e-05, + "objective/train/value_min": -0.47412109375, + "objective/train/value_reward_corr": 0.820079703282189, + "objective/train/value_std": 0.019287109375, + "objective/train/weight_avg": 1.0002939701080322, + "objective/train/weighted_lm_loss": 1.2954621315002441, + "objective/train/weights_max": 1.459554672241211, + "objective/train/weights_min": 0.3717447817325592, + "theoretical_loss": 3.613428533390085, + "tokens_seen": 1209139200 + }, + { + "epoch": 0.37, + "learning_rate": 0.000639945434119724, + "loss": 0.0705, + "theoretical_loss": 3.613392839583452, + "tokens_seen": 1209270272 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006398651901781416, + "loss": 0.0742, + "theoretical_loss": 3.6133214668242317, + "tokens_seen": 1209532416 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006397849462365592, + "loss": 0.0707, + "theoretical_loss": 3.613250113862251, + "tokens_seen": 1209794560 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006397047022949768, + "loss": 0.0724, + "theoretical_loss": 3.61317878068773, + "tokens_seen": 1210056704 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006396244583533943, + "loss": 0.0716, + "theoretical_loss": 3.6131074672908965, + "tokens_seen": 1210318848 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006395442144118119, + "loss": 0.0714, + "theoretical_loss": 3.6130361736619854, + "tokens_seen": 1210580992 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006394639704702295, + "loss": 0.0739, + "theoretical_loss": 3.6129648997912382, + "tokens_seen": 1210843136 + }, + { + "epoch": 0.37, + "learning_rate": 0.000639383726528647, + "loss": 0.0724, + "theoretical_loss": 3.6128936456689034, + "tokens_seen": 1211105280 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006393034825870647, + "loss": 0.073, + "theoretical_loss": 3.612822411285236, + "tokens_seen": 1211367424 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006392232386454822, + "loss": 0.0755, + "theoretical_loss": 3.612751196630499, + "tokens_seen": 1211629568 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006391429947039, + "loss": 0.0725, + "theoretical_loss": 3.61268000169496, + "tokens_seen": 1211891712 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006390627507623175, + "loss": 0.0744, + "theoretical_loss": 3.612608826468897, + "tokens_seen": 1212153856 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": -0.0002951657515950501, + "objective/train/docs_used": 443456, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.476730465888977, + "objective/train/original_loss": 1.476730465888977, + "objective/train/theoretical_loss": 3.6125376709425923, + "objective/train/tokens_used": 1232876000, + "objective/train/value_avg": -0.0062255859375, + "objective/train/value_loss": 0.00031703378772363067, + "objective/train/value_max": -0.00010073184967041016, + "objective/train/value_min": -0.1983642578125, + "objective/train/value_reward_corr": 0.5360043017511746, + "objective/train/value_std": 0.00960540771484375, + "objective/train/weight_avg": 0.999844491481781, + "objective/train/weighted_lm_loss": 1.4760133028030396, + "objective/train/weights_max": 1.2021102905273438, + "objective/train/weights_min": 0.3719916045665741, + "theoretical_loss": 3.6125376709425923, + "tokens_seen": 1212416000 + }, + { + "epoch": 0.37, + "learning_rate": 0.000638982506820735, + "loss": 0.0738, + "theoretical_loss": 3.6125376709425923, + "tokens_seen": 1212416000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006389022628791527, + "loss": 0.0683, + "theoretical_loss": 3.6124665351063356, + "tokens_seen": 1212678144 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006388220189375702, + "loss": 0.0737, + "theoretical_loss": 3.612395418950424, + "tokens_seen": 1212940288 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006387417749959878, + "loss": 0.0743, + "theoretical_loss": 3.6123243224651604, + "tokens_seen": 1213202432 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006386615310544054, + "loss": 0.0719, + "theoretical_loss": 3.612253245640856, + "tokens_seen": 1213464576 + }, + { + "epoch": 0.37, + "learning_rate": 0.000638581287112823, + "loss": 0.0716, + "theoretical_loss": 3.6121821884678287, + "tokens_seen": 1213726720 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006385010431712405, + "loss": 0.0743, + "theoretical_loss": 3.6121111509364017, + "tokens_seen": 1213988864 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006384207992296582, + "loss": 0.075, + "theoretical_loss": 3.6120401330369067, + "tokens_seen": 1214251008 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006383405552880758, + "loss": 0.0722, + "theoretical_loss": 3.6119691347596814, + "tokens_seen": 1214513152 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006382603113464933, + "loss": 0.074, + "theoretical_loss": 3.611898156095071, + "tokens_seen": 1214775296 + }, + { + "epoch": 0.37, + "learning_rate": 0.000638180067404911, + "loss": 0.0736, + "theoretical_loss": 3.6118271970334264, + "tokens_seen": 1215037440 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006380998234633285, + "loss": 0.0722, + "theoretical_loss": 3.6117562575651068, + "tokens_seen": 1215299584 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006380195795217462, + "loss": 0.0728, + "theoretical_loss": 3.611685337680477, + "tokens_seen": 1215561728 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.0022401802707463503, + "objective/train/docs_used": 444688, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3715991973876953, + "objective/train/original_loss": 1.3715991973876953, + "objective/train/theoretical_loss": 3.6116498850790366, + "objective/train/tokens_used": 1236152800, + "objective/train/value_avg": -0.005950927734375, + "objective/train/value_loss": 0.00014855283370707184, + "objective/train/value_max": -0.0001233816146850586, + "objective/train/value_min": -0.921875, + "objective/train/value_reward_corr": 0.452322266230042, + "objective/train/value_std": 0.01007080078125, + "objective/train/weight_avg": 1.0023151636123657, + "objective/train/weighted_lm_loss": 1.3756544589996338, + "objective/train/weights_max": 2.5139997005462646, + "objective/train/weights_min": 0.3757654130458832, + "theoretical_loss": 3.6116498850790366, + "tokens_seen": 1215692800 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006379393355801637, + "loss": 0.072, + "theoretical_loss": 3.6116144373699086, + "tokens_seen": 1215823872 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006378590916385812, + "loss": 0.0728, + "theoretical_loss": 3.611543556623782, + "tokens_seen": 1216086016 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006377788476969989, + "loss": 0.0728, + "theoretical_loss": 3.6114726954324814, + "tokens_seen": 1216348160 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006376986037554164, + "loss": 0.0724, + "theoretical_loss": 3.6114018537864, + "tokens_seen": 1216610304 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006376183598138341, + "loss": 0.0725, + "theoretical_loss": 3.6113310316759373, + "tokens_seen": 1216872448 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006375381158722517, + "loss": 0.0692, + "theoretical_loss": 3.6112602290914984, + "tokens_seen": 1217134592 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006374578719306693, + "loss": 0.0715, + "theoretical_loss": 3.611189446023497, + "tokens_seen": 1217396736 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006373776279890868, + "loss": 0.0725, + "theoretical_loss": 3.6111186824623527, + "tokens_seen": 1217658880 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006372973840475045, + "loss": 0.0736, + "theoretical_loss": 3.6110479383984915, + "tokens_seen": 1217921024 + }, + { + "epoch": 0.37, + "learning_rate": 0.000637217140105922, + "loss": 0.073, + "theoretical_loss": 3.6109772138223466, + "tokens_seen": 1218183168 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006371368961643395, + "loss": 0.0736, + "theoretical_loss": 3.6109065087243577, + "tokens_seen": 1218445312 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006370566522227572, + "loss": 0.0714, + "theoretical_loss": 3.610835823094972, + "tokens_seen": 1218707456 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.0002287530805915594, + "objective/train/docs_used": 445814, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5659476518630981, + "objective/train/original_loss": 1.5659475326538086, + "objective/train/theoretical_loss": 3.6107651569246424, + "objective/train/tokens_used": 1239429600, + "objective/train/value_avg": -0.01335906982421875, + "objective/train/value_loss": 0.0003773129137698561, + "objective/train/value_max": -0.00010150671005249023, + "objective/train/value_min": -0.476318359375, + "objective/train/value_reward_corr": 0.8288847136796991, + "objective/train/value_std": 0.0251617431640625, + "objective/train/weight_avg": 1.0004096031188965, + "objective/train/weighted_lm_loss": 1.5659234523773193, + "objective/train/weights_max": 1.2777711153030396, + "objective/train/weights_min": 0.5512197613716125, + "theoretical_loss": 3.6107651569246424, + "tokens_seen": 1218969600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006369764082811747, + "loss": 0.073, + "theoretical_loss": 3.6107651569246424, + "tokens_seen": 1218969600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006368961643395924, + "loss": 0.0712, + "theoretical_loss": 3.610694510203829, + "tokens_seen": 1219231744 + }, + { + "epoch": 0.37, + "learning_rate": 0.00063681592039801, + "loss": 0.0717, + "theoretical_loss": 3.610623882922999, + "tokens_seen": 1219493888 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006367356764564276, + "loss": 0.0716, + "theoretical_loss": 3.6105532750726255, + "tokens_seen": 1219756032 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006366554325148452, + "loss": 0.0718, + "theoretical_loss": 3.6104826866431887, + "tokens_seen": 1220018176 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006365751885732627, + "loss": 0.0716, + "theoretical_loss": 3.6104121176251764, + "tokens_seen": 1220280320 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006364949446316803, + "loss": 0.0723, + "theoretical_loss": 3.6103415680090816, + "tokens_seen": 1220542464 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006364147006900979, + "loss": 0.0739, + "theoretical_loss": 3.6102710377854046, + "tokens_seen": 1220804608 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006363344567485155, + "loss": 0.0705, + "theoretical_loss": 3.610200526944652, + "tokens_seen": 1221066752 + }, + { + "epoch": 0.37, + "learning_rate": 0.000636254212806933, + "loss": 0.0723, + "theoretical_loss": 3.610130035477339, + "tokens_seen": 1221328896 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006361739688653508, + "loss": 0.0753, + "theoretical_loss": 3.6100595633739854, + "tokens_seen": 1221591040 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006360937249237683, + "loss": 0.073, + "theoretical_loss": 3.6099891106251176, + "tokens_seen": 1221853184 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006360134809821858, + "loss": 0.0748, + "theoretical_loss": 3.6099186772212697, + "tokens_seen": 1222115328 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.0006381941493600607, + "objective/train/docs_used": 446976, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.419157862663269, + "objective/train/original_loss": 1.419157862663269, + "objective/train/theoretical_loss": 3.6098834677707723, + "objective/train/tokens_used": 1242706400, + "objective/train/value_avg": -0.0130615234375, + "objective/train/value_loss": 0.0005473028286360204, + "objective/train/value_max": -0.00012242794036865234, + "objective/train/value_min": -0.96533203125, + "objective/train/value_reward_corr": 0.8783592914327736, + "objective/train/value_std": 0.036163330078125, + "objective/train/weight_avg": 1.0008801221847534, + "objective/train/weighted_lm_loss": 1.4213128089904785, + "objective/train/weights_max": 1.5615370273590088, + "objective/train/weights_min": 0.368775874376297, + "theoretical_loss": 3.6098834677707723, + "tokens_seen": 1222246400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006359332370406035, + "loss": 0.0709, + "theoretical_loss": 3.6098482631529825, + "tokens_seen": 1222377472 + }, + { + "epoch": 0.37, + "learning_rate": 0.000635852993099021, + "loss": 0.0727, + "theoretical_loss": 3.6097778684108026, + "tokens_seen": 1222639616 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006357727491574386, + "loss": 0.0718, + "theoretical_loss": 3.609707492985284, + "tokens_seen": 1222901760 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006356925052158562, + "loss": 0.0713, + "theoretical_loss": 3.609637136866987, + "tokens_seen": 1223163904 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006356122612742738, + "loss": 0.0696, + "theoretical_loss": 3.609566800046478, + "tokens_seen": 1223426048 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006355320173326913, + "loss": 0.0711, + "theoretical_loss": 3.609496482514332, + "tokens_seen": 1223688192 + }, + { + "epoch": 0.37, + "learning_rate": 0.000635451773391109, + "loss": 0.0733, + "theoretical_loss": 3.609426184261128, + "tokens_seen": 1223950336 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006353715294495266, + "loss": 0.0735, + "theoretical_loss": 3.6093559052774538, + "tokens_seen": 1224212480 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006352912855079442, + "loss": 0.0716, + "theoretical_loss": 3.6092856455539017, + "tokens_seen": 1224474624 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006352110415663618, + "loss": 0.0746, + "theoretical_loss": 3.6092154050810725, + "tokens_seen": 1224736768 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006351307976247793, + "loss": 0.073, + "theoretical_loss": 3.6091451838495727, + "tokens_seen": 1224998912 + }, + { + "epoch": 0.37, + "learning_rate": 0.000635050553683197, + "loss": 0.0714, + "theoretical_loss": 3.609074981850016, + "tokens_seen": 1225261056 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.0011464846320450306, + "objective/train/docs_used": 448161, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4441373348236084, + "objective/train/original_loss": 1.4441375732421875, + "objective/train/theoretical_loss": 3.6090047990730216, + "objective/train/tokens_used": 1245983200, + "objective/train/value_avg": -0.0064849853515625, + "objective/train/value_loss": 0.00032763619674369693, + "objective/train/value_max": -0.00011235475540161133, + "objective/train/value_min": -0.94091796875, + "objective/train/value_reward_corr": 0.7932949888128629, + "objective/train/value_std": 0.0202789306640625, + "objective/train/weight_avg": 1.0012918710708618, + "objective/train/weighted_lm_loss": 1.4470592737197876, + "objective/train/weights_max": 1.4689112901687622, + "objective/train/weights_min": 0.4429895877838135, + "theoretical_loss": 3.6090047990730216, + "tokens_seen": 1225523200 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006349703097416145, + "loss": 0.0747, + "theoretical_loss": 3.6090047990730216, + "tokens_seen": 1225523200 + }, + { + "epoch": 0.37, + "learning_rate": 0.000634890065800032, + "loss": 0.0712, + "theoretical_loss": 3.6089346355092164, + "tokens_seen": 1225785344 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006348098218584497, + "loss": 0.0737, + "theoretical_loss": 3.6088644911492334, + "tokens_seen": 1226047488 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006347295779168672, + "loss": 0.0731, + "theoretical_loss": 3.6087943659837114, + "tokens_seen": 1226309632 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006346493339752849, + "loss": 0.0708, + "theoretical_loss": 3.6087242600032976, + "tokens_seen": 1226571776 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006345690900337025, + "loss": 0.0721, + "theoretical_loss": 3.6086541731986443, + "tokens_seen": 1226833920 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006344888460921201, + "loss": 0.0688, + "theoretical_loss": 3.6085841055604106, + "tokens_seen": 1227096064 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006344086021505376, + "loss": 0.0729, + "theoretical_loss": 3.608514057079262, + "tokens_seen": 1227358208 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006343283582089553, + "loss": 0.0711, + "theoretical_loss": 3.6084440277458714, + "tokens_seen": 1227620352 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006342481142673728, + "loss": 0.0703, + "theoretical_loss": 3.6083740175509176, + "tokens_seen": 1227882496 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006341678703257904, + "loss": 0.0749, + "theoretical_loss": 3.6083040264850856, + "tokens_seen": 1228144640 + }, + { + "epoch": 0.37, + "learning_rate": 0.000634087626384208, + "loss": 0.0713, + "theoretical_loss": 3.6082340545390674, + "tokens_seen": 1228406784 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006340073824426255, + "loss": 0.0733, + "theoretical_loss": 3.6081641017035615, + "tokens_seen": 1228668928 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": -6.965985812712461e-05, + "objective/train/docs_used": 449484, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5495264530181885, + "objective/train/original_loss": 1.5495264530181885, + "objective/train/theoretical_loss": 3.6081291324493456, + "objective/train/tokens_used": 1249260000, + "objective/train/value_avg": -0.00957489013671875, + "objective/train/value_loss": 0.0002603928733151406, + "objective/train/value_max": -9.03010368347168e-05, + "objective/train/value_min": -0.61083984375, + "objective/train/value_reward_corr": 0.7232506307301012, + "objective/train/value_std": 0.01611328125, + "objective/train/weight_avg": 1.000057578086853, + "objective/train/weighted_lm_loss": 1.5498452186584473, + "objective/train/weights_max": 1.781590461730957, + "objective/train/weights_min": 0.5239829421043396, + "theoretical_loss": 3.6081291324493456, + "tokens_seen": 1228800000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006339271385010433, + "loss": 0.0749, + "theoretical_loss": 3.608094167969273, + "tokens_seen": 1228931072 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006338468945594608, + "loss": 0.0717, + "theoretical_loss": 3.6080242533269136, + "tokens_seen": 1229193216 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006337666506178784, + "loss": 0.0702, + "theoretical_loss": 3.6079543577672, + "tokens_seen": 1229455360 + }, + { + "epoch": 0.37, + "learning_rate": 0.000633686406676296, + "loss": 0.074, + "theoretical_loss": 3.6078844812808577, + "tokens_seen": 1229717504 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006336061627347135, + "loss": 0.0757, + "theoretical_loss": 3.6078146238586175, + "tokens_seen": 1229979648 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006335259187931311, + "loss": 0.0737, + "theoretical_loss": 3.6077447854912164, + "tokens_seen": 1230241792 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006334456748515487, + "loss": 0.0721, + "theoretical_loss": 3.6076749661693985, + "tokens_seen": 1230503936 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006333654309099663, + "loss": 0.0743, + "theoretical_loss": 3.607605165883914, + "tokens_seen": 1230766080 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006332851869683838, + "loss": 0.0729, + "theoretical_loss": 3.60753538462552, + "tokens_seen": 1231028224 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006332049430268016, + "loss": 0.0719, + "theoretical_loss": 3.607465622384979, + "tokens_seen": 1231290368 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006331246990852191, + "loss": 0.0732, + "theoretical_loss": 3.6073958791530614, + "tokens_seen": 1231552512 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006330444551436366, + "loss": 0.0727, + "theoretical_loss": 3.607326154920543, + "tokens_seen": 1231814656 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.001238655881024897, + "objective/train/docs_used": 450737, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.530470371246338, + "objective/train/original_loss": 1.530470371246338, + "objective/train/theoretical_loss": 3.6072564496782062, + "objective/train/tokens_used": 1252536800, + "objective/train/value_avg": -0.00855255126953125, + "objective/train/value_loss": 0.00031318055698648095, + "objective/train/value_max": -0.00013136863708496094, + "objective/train/value_min": -0.65771484375, + "objective/train/value_reward_corr": 0.6789382313438648, + "objective/train/value_std": 0.01499176025390625, + "objective/train/weight_avg": 1.0013794898986816, + "objective/train/weighted_lm_loss": 1.5326346158981323, + "objective/train/weights_max": 1.1837249994277954, + "objective/train/weights_min": 0.37208810448646545, + "theoretical_loss": 3.6072564496782062, + "tokens_seen": 1232076800 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006329642112020543, + "loss": 0.0717, + "theoretical_loss": 3.6072564496782062, + "tokens_seen": 1232076800 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006328839672604718, + "loss": 0.0703, + "theoretical_loss": 3.607186763416841, + "tokens_seen": 1232338944 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006328037233188895, + "loss": 0.073, + "theoretical_loss": 3.607117096127242, + "tokens_seen": 1232601088 + }, + { + "epoch": 0.37, + "learning_rate": 0.000632723479377307, + "loss": 0.0743, + "theoretical_loss": 3.60704744780021, + "tokens_seen": 1232863232 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006326432354357246, + "loss": 0.072, + "theoretical_loss": 3.606977818426555, + "tokens_seen": 1233125376 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006325629914941422, + "loss": 0.0745, + "theoretical_loss": 3.6069082079970913, + "tokens_seen": 1233387520 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006324827475525598, + "loss": 0.0741, + "theoretical_loss": 3.60683861650264, + "tokens_seen": 1233649664 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006324025036109774, + "loss": 0.0708, + "theoretical_loss": 3.606769043934027, + "tokens_seen": 1233911808 + }, + { + "epoch": 0.37, + "learning_rate": 0.000632322259669395, + "loss": 0.0736, + "theoretical_loss": 3.606699490282088, + "tokens_seen": 1234173952 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006322420157278126, + "loss": 0.07, + "theoretical_loss": 3.606629955537663, + "tokens_seen": 1234436096 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006321617717862301, + "loss": 0.0712, + "theoretical_loss": 3.606560439691598, + "tokens_seen": 1234698240 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006320815278446478, + "loss": 0.0723, + "theoretical_loss": 3.6064909427347462, + "tokens_seen": 1234960384 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006320012839030653, + "loss": 0.0733, + "theoretical_loss": 3.6064214646579673, + "tokens_seen": 1235222528 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": -0.0005033229826949537, + "objective/train/docs_used": 451987, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4716780185699463, + "objective/train/original_loss": 1.4716781377792358, + "objective/train/theoretical_loss": 3.60638673269675, + "objective/train/tokens_used": 1255813600, + "objective/train/value_avg": -0.0082550048828125, + "objective/train/value_loss": 0.00027764757396653295, + "objective/train/value_max": -5.9664249420166016e-05, + "objective/train/value_min": -0.393798828125, + "objective/train/value_reward_corr": 0.6338732515401595, + "objective/train/value_std": 0.01284027099609375, + "objective/train/weight_avg": 0.9996262192726135, + "objective/train/weighted_lm_loss": 1.4702317714691162, + "objective/train/weights_max": 1.3029727935791016, + "objective/train/weights_min": 0.39436399936676025, + "theoretical_loss": 3.60638673269675, + "tokens_seen": 1235353600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006319210399614828, + "loss": 0.0742, + "theoretical_loss": 3.606352005452126, + "tokens_seen": 1235484672 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006318407960199005, + "loss": 0.0729, + "theoretical_loss": 3.6062825651080956, + "tokens_seen": 1235746816 + }, + { + "epoch": 0.37, + "learning_rate": 0.000631760552078318, + "loss": 0.0737, + "theoretical_loss": 3.6062131436167544, + "tokens_seen": 1236008960 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006316803081367358, + "loss": 0.0716, + "theoretical_loss": 3.606143740968986, + "tokens_seen": 1236271104 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006316000641951533, + "loss": 0.0749, + "theoretical_loss": 3.6060743571556833, + "tokens_seen": 1236533248 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006315198202535709, + "loss": 0.0741, + "theoretical_loss": 3.606004992167742, + "tokens_seen": 1236795392 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006314395763119885, + "loss": 0.0716, + "theoretical_loss": 3.605935645996067, + "tokens_seen": 1237057536 + }, + { + "epoch": 0.37, + "learning_rate": 0.000631359332370406, + "loss": 0.0697, + "theoretical_loss": 3.605866318631568, + "tokens_seen": 1237319680 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006312790884288236, + "loss": 0.0713, + "theoretical_loss": 3.605797010065161, + "tokens_seen": 1237581824 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006311988444872412, + "loss": 0.0715, + "theoretical_loss": 3.6057277202877698, + "tokens_seen": 1237843968 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006311186005456588, + "loss": 0.072, + "theoretical_loss": 3.6056584492903223, + "tokens_seen": 1238106112 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006310383566040763, + "loss": 0.0715, + "theoretical_loss": 3.6055891970637544, + "tokens_seen": 1238368256 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0013174168998375535, + "objective/train/docs_used": 453261, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5999850034713745, + "objective/train/original_loss": 1.5999850034713745, + "objective/train/theoretical_loss": 3.605519963599008, + "objective/train/tokens_used": 1259090400, + "objective/train/value_avg": -0.0081024169921875, + "objective/train/value_loss": 0.00033738784259185195, + "objective/train/value_max": -8.028745651245117e-05, + "objective/train/value_min": -0.69580078125, + "objective/train/value_reward_corr": 0.677158113803972, + "objective/train/value_std": 0.01568603515625, + "objective/train/weight_avg": 1.0014675855636597, + "objective/train/weighted_lm_loss": 1.6025694608688354, + "objective/train/weights_max": 1.330289602279663, + "objective/train/weights_min": 0.3702276945114136, + "theoretical_loss": 3.605519963599008, + "tokens_seen": 1238630400 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006309581126624941, + "loss": 0.0749, + "theoretical_loss": 3.605519963599008, + "tokens_seen": 1238630400 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006308778687209116, + "loss": 0.0724, + "theoretical_loss": 3.60545074888703, + "tokens_seen": 1238892544 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006307976247793292, + "loss": 0.0715, + "theoretical_loss": 3.6053815529187756, + "tokens_seen": 1239154688 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006307173808377468, + "loss": 0.0739, + "theoretical_loss": 3.6053123756852052, + "tokens_seen": 1239416832 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006306371368961643, + "loss": 0.0709, + "theoretical_loss": 3.605243217177285, + "tokens_seen": 1239678976 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006305568929545819, + "loss": 0.0711, + "theoretical_loss": 3.6051740773859877, + "tokens_seen": 1239941120 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006304766490129995, + "loss": 0.0729, + "theoretical_loss": 3.605104956302293, + "tokens_seen": 1240203264 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006303964050714171, + "loss": 0.0729, + "theoretical_loss": 3.605035853917187, + "tokens_seen": 1240465408 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006303161611298347, + "loss": 0.0747, + "theoretical_loss": 3.604966770221661, + "tokens_seen": 1240727552 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006302359171882524, + "loss": 0.0697, + "theoretical_loss": 3.604897705206713, + "tokens_seen": 1240989696 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006301556732466699, + "loss": 0.0722, + "theoretical_loss": 3.6048286588633465, + "tokens_seen": 1241251840 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006300754293050875, + "loss": 0.0749, + "theoretical_loss": 3.6047596311825725, + "tokens_seen": 1241513984 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006299951853635051, + "loss": 0.0745, + "theoretical_loss": 3.6046906221554087, + "tokens_seen": 1241776128 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0009522174368612468, + "objective/train/docs_used": 454384, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.473624348640442, + "objective/train/original_loss": 1.4736244678497314, + "objective/train/theoretical_loss": 3.604656124634124, + "objective/train/tokens_used": 1262367200, + "objective/train/value_avg": -0.00734710693359375, + "objective/train/value_loss": 0.00023657429846934974, + "objective/train/value_max": -9.995698928833008e-05, + "objective/train/value_min": -0.265869140625, + "objective/train/value_reward_corr": 0.6545632899431301, + "objective/train/value_std": 0.01197052001953125, + "objective/train/weight_avg": 1.0010582208633423, + "objective/train/weighted_lm_loss": 1.4750497341156006, + "objective/train/weights_max": 1.304564356803894, + "objective/train/weights_min": 0.39513498544692993, + "theoretical_loss": 3.604656124634124, + "tokens_seen": 1241907200 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006299149414219226, + "loss": 0.0737, + "theoretical_loss": 3.6046216317728765, + "tokens_seen": 1242038272 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006298346974803403, + "loss": 0.0723, + "theoretical_loss": 3.6045526600260054, + "tokens_seen": 1242300416 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006297544535387578, + "loss": 0.0716, + "theoretical_loss": 3.6044837069058318, + "tokens_seen": 1242562560 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006296742095971754, + "loss": 0.0751, + "theoretical_loss": 3.6044147724033957, + "tokens_seen": 1242824704 + }, + { + "epoch": 0.38, + "learning_rate": 0.000629593965655593, + "loss": 0.0752, + "theoretical_loss": 3.6043458565097453, + "tokens_seen": 1243086848 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006295137217140106, + "loss": 0.0701, + "theoretical_loss": 3.6042769592159356, + "tokens_seen": 1243348992 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006294334777724282, + "loss": 0.0725, + "theoretical_loss": 3.6042080805130254, + "tokens_seen": 1243611136 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006293532338308458, + "loss": 0.0749, + "theoretical_loss": 3.6041392203920815, + "tokens_seen": 1243873280 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006292729898892634, + "loss": 0.0747, + "theoretical_loss": 3.6040703788441757, + "tokens_seen": 1244135424 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006291927459476809, + "loss": 0.0745, + "theoretical_loss": 3.6040015558603877, + "tokens_seen": 1244397568 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006291125020060986, + "loss": 0.0731, + "theoretical_loss": 3.603932751431802, + "tokens_seen": 1244659712 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006290322580645161, + "loss": 0.0719, + "theoretical_loss": 3.6038639655495093, + "tokens_seen": 1244921856 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": -0.0012195850722491741, + "objective/train/docs_used": 455531, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5041210651397705, + "objective/train/original_loss": 1.5041210651397705, + "objective/train/theoretical_loss": 3.6037951982046064, + "objective/train/tokens_used": 1265644000, + "objective/train/value_avg": -0.01177215576171875, + "objective/train/value_loss": 0.0004948603454977274, + "objective/train/value_max": -0.0001366138458251953, + "objective/train/value_min": -0.5390625, + "objective/train/value_reward_corr": 0.73076830896824, + "objective/train/value_std": 0.0195465087890625, + "objective/train/weight_avg": 0.9990051984786987, + "objective/train/weighted_lm_loss": 1.5013059377670288, + "objective/train/weights_max": 1.3366363048553467, + "objective/train/weights_min": 0.3697971701622009, + "theoretical_loss": 3.6037951982046064, + "tokens_seen": 1245184000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006289520141229337, + "loss": 0.0726, + "theoretical_loss": 3.6037951982046064, + "tokens_seen": 1245184000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006288717701813513, + "loss": 0.0729, + "theoretical_loss": 3.6037264493881973, + "tokens_seen": 1245446144 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006287915262397688, + "loss": 0.075, + "theoretical_loss": 3.603657719091391, + "tokens_seen": 1245708288 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006287112822981866, + "loss": 0.0732, + "theoretical_loss": 3.603589007305303, + "tokens_seen": 1245970432 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006286310383566041, + "loss": 0.0749, + "theoretical_loss": 3.603520314021055, + "tokens_seen": 1246232576 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006285507944150217, + "loss": 0.0719, + "theoretical_loss": 3.6034516392297746, + "tokens_seen": 1246494720 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006284705504734393, + "loss": 0.0759, + "theoretical_loss": 3.6033829829225965, + "tokens_seen": 1246756864 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006283903065318568, + "loss": 0.0749, + "theoretical_loss": 3.6033143450906593, + "tokens_seen": 1247019008 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006283100625902744, + "loss": 0.0733, + "theoretical_loss": 3.6032457257251105, + "tokens_seen": 1247281152 + }, + { + "epoch": 0.38, + "learning_rate": 0.000628229818648692, + "loss": 0.0701, + "theoretical_loss": 3.603177124817101, + "tokens_seen": 1247543296 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006281495747071096, + "loss": 0.0725, + "theoretical_loss": 3.603108542357791, + "tokens_seen": 1247805440 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006280693307655271, + "loss": 0.0743, + "theoretical_loss": 3.603039978338343, + "tokens_seen": 1248067584 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006279890868239449, + "loss": 0.0714, + "theoretical_loss": 3.6029714327499285, + "tokens_seen": 1248329728 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.00015995489957276732, + "objective/train/docs_used": 456712, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4048641920089722, + "objective/train/original_loss": 1.4048643112182617, + "objective/train/theoretical_loss": 3.602937166864601, + "objective/train/tokens_used": 1268920800, + "objective/train/value_avg": -0.00669097900390625, + "objective/train/value_loss": 0.00019214232452213764, + "objective/train/value_max": -0.00010639429092407227, + "objective/train/value_min": -0.24560546875, + "objective/train/value_reward_corr": 0.6105372464341148, + "objective/train/value_std": 0.00984954833984375, + "objective/train/weight_avg": 1.0002514123916626, + "objective/train/weighted_lm_loss": 1.405979871749878, + "objective/train/weights_max": 1.278395175933838, + "objective/train/weights_min": 0.4344214200973511, + "theoretical_loss": 3.602937166864601, + "tokens_seen": 1248460800 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006279088428823624, + "loss": 0.0752, + "theoretical_loss": 3.602902905583724, + "tokens_seen": 1248591872 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006278285989407801, + "loss": 0.0717, + "theoretical_loss": 3.602834396830912, + "tokens_seen": 1248854016 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006277483549991976, + "loss": 0.0726, + "theoretical_loss": 3.6027659064826816, + "tokens_seen": 1249116160 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006276681110576151, + "loss": 0.0744, + "theoretical_loss": 3.602697434530227, + "tokens_seen": 1249378304 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006275878671160328, + "loss": 0.0744, + "theoretical_loss": 3.6026289809647487, + "tokens_seen": 1249640448 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006275076231744503, + "loss": 0.0741, + "theoretical_loss": 3.6025605457774548, + "tokens_seen": 1249902592 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006274273792328679, + "loss": 0.0721, + "theoretical_loss": 3.602492128959558, + "tokens_seen": 1250164736 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006273471352912855, + "loss": 0.0732, + "theoretical_loss": 3.6024237305022764, + "tokens_seen": 1250426880 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006272668913497032, + "loss": 0.0741, + "theoretical_loss": 3.602355350396836, + "tokens_seen": 1250689024 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006271866474081207, + "loss": 0.072, + "theoretical_loss": 3.602286988634467, + "tokens_seen": 1250951168 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006271064034665383, + "loss": 0.0719, + "theoretical_loss": 3.6022186452064076, + "tokens_seen": 1251213312 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006270261595249559, + "loss": 0.0716, + "theoretical_loss": 3.6021503201039, + "tokens_seen": 1251475456 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0013131473679095507, + "objective/train/docs_used": 458046, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4418436288833618, + "objective/train/original_loss": 1.4418433904647827, + "objective/train/theoretical_loss": 3.6020820133181934, + "objective/train/tokens_used": 1272197600, + "objective/train/value_avg": -0.0069580078125, + "objective/train/value_loss": 0.00013802826288156211, + "objective/train/value_max": -7.486343383789062e-05, + "objective/train/value_min": -0.235595703125, + "objective/train/value_reward_corr": 0.722366365333374, + "objective/train/value_std": 0.0119781494140625, + "objective/train/weight_avg": 1.0013810396194458, + "objective/train/weighted_lm_loss": 1.4443731307983398, + "objective/train/weights_max": 1.2093919515609741, + "objective/train/weights_min": 0.6394911408424377, + "theoretical_loss": 3.6020820133181934, + "tokens_seen": 1251737600 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006269459155833734, + "loss": 0.071, + "theoretical_loss": 3.6020820133181934, + "tokens_seen": 1251737600 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006268656716417911, + "loss": 0.0744, + "theoretical_loss": 3.6020137248405435, + "tokens_seen": 1251999744 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006267854277002086, + "loss": 0.0733, + "theoretical_loss": 3.6019454546622107, + "tokens_seen": 1252261888 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006267051837586262, + "loss": 0.0711, + "theoretical_loss": 3.601877202774463, + "tokens_seen": 1252524032 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006266249398170438, + "loss": 0.0729, + "theoretical_loss": 3.601808969168573, + "tokens_seen": 1252786176 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006265446958754613, + "loss": 0.0719, + "theoretical_loss": 3.60174075383582, + "tokens_seen": 1253048320 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006264644519338791, + "loss": 0.0752, + "theoretical_loss": 3.6016725567674883, + "tokens_seen": 1253310464 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006263842079922966, + "loss": 0.0728, + "theoretical_loss": 3.60160437795487, + "tokens_seen": 1253572608 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006263039640507142, + "loss": 0.073, + "theoretical_loss": 3.6015362173892624, + "tokens_seen": 1253834752 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006262237201091318, + "loss": 0.0745, + "theoretical_loss": 3.6014680750619674, + "tokens_seen": 1254096896 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006261434761675494, + "loss": 0.0725, + "theoretical_loss": 3.6013999509642947, + "tokens_seen": 1254359040 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006260632322259669, + "loss": 0.0724, + "theoretical_loss": 3.601331845087559, + "tokens_seen": 1254621184 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006259829882843845, + "loss": 0.073, + "theoretical_loss": 3.601263757423082, + "tokens_seen": 1254883328 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0014171048533171415, + "objective/train/docs_used": 459153, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5080225467681885, + "objective/train/original_loss": 1.508022427558899, + "objective/train/theoretical_loss": 3.601229720417729, + "objective/train/tokens_used": 1275474400, + "objective/train/value_avg": -0.00691986083984375, + "objective/train/value_loss": 0.00018681980145629495, + "objective/train/value_max": -0.00014090538024902344, + "objective/train/value_min": -0.630859375, + "objective/train/value_reward_corr": 0.6682120767318891, + "objective/train/value_std": 0.0105743408203125, + "objective/train/weight_avg": 1.001495361328125, + "objective/train/weighted_lm_loss": 1.5094605684280396, + "objective/train/weights_max": 1.1394519805908203, + "objective/train/weights_min": 0.22579480707645416, + "theoretical_loss": 3.601229720417729, + "tokens_seen": 1255014400 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006259027443428021, + "loss": 0.0728, + "theoretical_loss": 3.601195687962189, + "tokens_seen": 1255145472 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006258225004012196, + "loss": 0.0718, + "theoretical_loss": 3.601127636696214, + "tokens_seen": 1255407616 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006257422564596374, + "loss": 0.0727, + "theoretical_loss": 3.6010596036164957, + "tokens_seen": 1255669760 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006256620125180549, + "loss": 0.0725, + "theoretical_loss": 3.600991588714378, + "tokens_seen": 1255931904 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006255817685764725, + "loss": 0.0705, + "theoretical_loss": 3.6009235919812124, + "tokens_seen": 1256194048 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006255015246348901, + "loss": 0.0716, + "theoretical_loss": 3.6008556134083545, + "tokens_seen": 1256456192 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006254212806933076, + "loss": 0.0732, + "theoretical_loss": 3.600787652987167, + "tokens_seen": 1256718336 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006253410367517252, + "loss": 0.0716, + "theoretical_loss": 3.6007197107090185, + "tokens_seen": 1256980480 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006252607928101428, + "loss": 0.0735, + "theoretical_loss": 3.600651786565283, + "tokens_seen": 1257242624 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006251805488685604, + "loss": 0.0719, + "theoretical_loss": 3.6005838805473402, + "tokens_seen": 1257504768 + }, + { + "epoch": 0.38, + "learning_rate": 0.000625100304926978, + "loss": 0.0717, + "theoretical_loss": 3.600515992646577, + "tokens_seen": 1257766912 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006250200609853957, + "loss": 0.072, + "theoretical_loss": 3.6004481228543854, + "tokens_seen": 1258029056 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0018183041829615831, + "objective/train/docs_used": 460354, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.566368579864502, + "objective/train/original_loss": 1.5663683414459229, + "objective/train/theoretical_loss": 3.600380271162162, + "objective/train/tokens_used": 1278751200, + "objective/train/value_avg": -0.01047515869140625, + "objective/train/value_loss": 0.0005354974418878555, + "objective/train/value_max": -0.00010639429092407227, + "objective/train/value_min": -0.8798828125, + "objective/train/value_reward_corr": 0.6700587084228133, + "objective/train/value_std": 0.019775390625, + "objective/train/weight_avg": 1.0020546913146973, + "objective/train/weighted_lm_loss": 1.5695098638534546, + "objective/train/weights_max": 1.7010573148727417, + "objective/train/weights_min": 0.3695588707923889, + "theoretical_loss": 3.600380271162162, + "tokens_seen": 1258291200 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006249398170438132, + "loss": 0.071, + "theoretical_loss": 3.600380271162162, + "tokens_seen": 1258291200 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006248595731022309, + "loss": 0.0693, + "theoretical_loss": 3.6003124375613114, + "tokens_seen": 1258553344 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006247793291606484, + "loss": 0.0747, + "theoretical_loss": 3.6002446220432427, + "tokens_seen": 1258815488 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006246990852190659, + "loss": 0.074, + "theoretical_loss": 3.6001768245993713, + "tokens_seen": 1259077632 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006246188412774836, + "loss": 0.0731, + "theoretical_loss": 3.600109045221119, + "tokens_seen": 1259339776 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006245385973359011, + "loss": 0.0748, + "theoretical_loss": 3.600041283899912, + "tokens_seen": 1259601920 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006244583533943187, + "loss": 0.0714, + "theoretical_loss": 3.599973540627184, + "tokens_seen": 1259864064 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006243781094527363, + "loss": 0.0741, + "theoretical_loss": 3.599905815394374, + "tokens_seen": 1260126208 + }, + { + "epoch": 0.38, + "learning_rate": 0.000624297865511154, + "loss": 0.0736, + "theoretical_loss": 3.599838108192926, + "tokens_seen": 1260388352 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006242176215695715, + "loss": 0.0724, + "theoretical_loss": 3.5997704190142907, + "tokens_seen": 1260650496 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006241373776279891, + "loss": 0.0741, + "theoretical_loss": 3.599702747849925, + "tokens_seen": 1260912640 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006240571336864067, + "loss": 0.073, + "theoretical_loss": 3.5996350946912896, + "tokens_seen": 1261174784 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006239768897448243, + "loss": 0.0715, + "theoretical_loss": 3.5995674595298537, + "tokens_seen": 1261436928 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0015806846786290407, + "objective/train/docs_used": 461530, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3945951461791992, + "objective/train/original_loss": 1.3945950269699097, + "objective/train/theoretical_loss": 3.599533648695421, + "objective/train/tokens_used": 1282028000, + "objective/train/value_avg": -0.006893157958984375, + "objective/train/value_loss": 0.00015965731290634722, + "objective/train/value_max": -7.31348991394043e-05, + "objective/train/value_min": -0.7705078125, + "objective/train/value_reward_corr": 0.6894917108257843, + "objective/train/value_std": 0.0124969482421875, + "objective/train/weight_avg": 1.0016539096832275, + "objective/train/weighted_lm_loss": 1.39777410030365, + "objective/train/weights_max": 1.2735666036605835, + "objective/train/weights_min": 0.36959269642829895, + "theoretical_loss": 3.599533648695421, + "tokens_seen": 1261568000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006238966458032419, + "loss": 0.072, + "theoretical_loss": 3.5994998423570914, + "tokens_seen": 1261699072 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006238164018616594, + "loss": 0.0713, + "theoretical_loss": 3.599432243164481, + "tokens_seen": 1261961216 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006237361579200771, + "loss": 0.0714, + "theoretical_loss": 3.599364661943509, + "tokens_seen": 1262223360 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006236559139784946, + "loss": 0.0726, + "theoretical_loss": 3.599297098685666, + "tokens_seen": 1262485504 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006235756700369121, + "loss": 0.071, + "theoretical_loss": 3.599229553382449, + "tokens_seen": 1262747648 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006234954260953299, + "loss": 0.0725, + "theoretical_loss": 3.599162026025361, + "tokens_seen": 1263009792 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006234151821537474, + "loss": 0.0706, + "theoretical_loss": 3.599094516605911, + "tokens_seen": 1263271936 + }, + { + "epoch": 0.38, + "learning_rate": 0.000623334938212165, + "loss": 0.0737, + "theoretical_loss": 3.5990270251156122, + "tokens_seen": 1263534080 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006232546942705826, + "loss": 0.0748, + "theoretical_loss": 3.5989595515459856, + "tokens_seen": 1263796224 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006231744503290002, + "loss": 0.0743, + "theoretical_loss": 3.598892095888557, + "tokens_seen": 1264058368 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006230942063874177, + "loss": 0.0716, + "theoretical_loss": 3.5988246581348573, + "tokens_seen": 1264320512 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006230139624458353, + "loss": 0.073, + "theoretical_loss": 3.5987572382764252, + "tokens_seen": 1264582656 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 7.375935092568398e-05, + "objective/train/docs_used": 462829, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4854201078414917, + "objective/train/original_loss": 1.4854199886322021, + "objective/train/theoretical_loss": 3.5986898363048025, + "objective/train/tokens_used": 1285304800, + "objective/train/value_avg": -0.0058441162109375, + "objective/train/value_loss": 0.00019366215565241873, + "objective/train/value_max": -0.00012242794036865234, + "objective/train/value_min": -0.383544921875, + "objective/train/value_reward_corr": 0.5646258964894881, + "objective/train/value_std": 0.00856781005859375, + "objective/train/weight_avg": 1.0001623630523682, + "objective/train/weighted_lm_loss": 1.4864187240600586, + "objective/train/weights_max": 1.2285127639770508, + "objective/train/weights_min": 0.39554521441459656, + "theoretical_loss": 3.5986898363048025, + "tokens_seen": 1264844800 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006229337185042529, + "loss": 0.0724, + "theoretical_loss": 3.5986898363048025, + "tokens_seen": 1264844800 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006228534745626704, + "loss": 0.0713, + "theoretical_loss": 3.5986224522115395, + "tokens_seen": 1265106944 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006227732306210882, + "loss": 0.0704, + "theoretical_loss": 3.59855508598819, + "tokens_seen": 1265369088 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006226929866795057, + "loss": 0.0737, + "theoretical_loss": 3.5984877376263142, + "tokens_seen": 1265631232 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006226127427379234, + "loss": 0.07, + "theoretical_loss": 3.5984204071174783, + "tokens_seen": 1265893376 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006225324987963409, + "loss": 0.0763, + "theoretical_loss": 3.598353094453255, + "tokens_seen": 1266155520 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006224522548547584, + "loss": 0.0724, + "theoretical_loss": 3.598285799625221, + "tokens_seen": 1266417664 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006223720109131761, + "loss": 0.0721, + "theoretical_loss": 3.5982185226249594, + "tokens_seen": 1266679808 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006222917669715936, + "loss": 0.0718, + "theoretical_loss": 3.5981512634440604, + "tokens_seen": 1266941952 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006222115230300112, + "loss": 0.0725, + "theoretical_loss": 3.5980840220741177, + "tokens_seen": 1267204096 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006221312790884289, + "loss": 0.0748, + "theoretical_loss": 3.5980167985067317, + "tokens_seen": 1267466240 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006220510351468465, + "loss": 0.0739, + "theoretical_loss": 3.5979495927335092, + "tokens_seen": 1267728384 + }, + { + "epoch": 0.38, + "learning_rate": 0.000621970791205264, + "loss": 0.0726, + "theoretical_loss": 3.5978824047460614, + "tokens_seen": 1267990528 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0012868504272773862, + "objective/train/docs_used": 464032, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5202511548995972, + "objective/train/original_loss": 1.5202511548995972, + "objective/train/theoretical_loss": 3.5978488174193832, + "objective/train/tokens_used": 1288581600, + "objective/train/value_avg": -0.00853729248046875, + "objective/train/value_loss": 0.0002907871676143259, + "objective/train/value_max": -0.00011771917343139648, + "objective/train/value_min": -0.3349609375, + "objective/train/value_reward_corr": 0.5771171767033785, + "objective/train/value_std": 0.013885498046875, + "objective/train/weight_avg": 1.0014193058013916, + "objective/train/weighted_lm_loss": 1.5222471952438354, + "objective/train/weights_max": 1.397885799407959, + "objective/train/weights_min": 0.37770265340805054, + "theoretical_loss": 3.5978488174193832, + "tokens_seen": 1268121600 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006218905472636816, + "loss": 0.0733, + "theoretical_loss": 3.5978152345360055, + "tokens_seen": 1268252672 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006218103033220992, + "loss": 0.0724, + "theoretical_loss": 3.5977480820949657, + "tokens_seen": 1268514816 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006217300593805167, + "loss": 0.0741, + "theoretical_loss": 3.5976809474145703, + "tokens_seen": 1268776960 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006216498154389344, + "loss": 0.0731, + "theoretical_loss": 3.597613830486454, + "tokens_seen": 1269039104 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006215695714973519, + "loss": 0.0718, + "theoretical_loss": 3.5975467313022564, + "tokens_seen": 1269301248 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006214893275557696, + "loss": 0.0735, + "theoretical_loss": 3.5974796498536237, + "tokens_seen": 1269563392 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006214090836141871, + "loss": 0.0708, + "theoretical_loss": 3.597412586132208, + "tokens_seen": 1269825536 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006213288396726048, + "loss": 0.0728, + "theoretical_loss": 3.5973455401296652, + "tokens_seen": 1270087680 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006212485957310224, + "loss": 0.07, + "theoretical_loss": 3.597278511837659, + "tokens_seen": 1270349824 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006211683517894399, + "loss": 0.0716, + "theoretical_loss": 3.597211501247858, + "tokens_seen": 1270611968 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006210881078478575, + "loss": 0.0703, + "theoretical_loss": 3.5971445083519358, + "tokens_seen": 1270874112 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006210078639062751, + "loss": 0.0733, + "theoretical_loss": 3.5970775331415723, + "tokens_seen": 1271136256 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.0005056922673247755, + "objective/train/docs_used": 465264, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4634767770767212, + "objective/train/original_loss": 1.4634767770767212, + "objective/train/theoretical_loss": 3.5970105756084525, + "objective/train/tokens_used": 1291858400, + "objective/train/value_avg": -0.007724761962890625, + "objective/train/value_loss": 0.00021731469314545393, + "objective/train/value_max": -0.00010639429092407227, + "objective/train/value_min": -0.34912109375, + "objective/train/value_reward_corr": 0.6817886298040029, + "objective/train/value_std": 0.016876220703125, + "objective/train/weight_avg": 1.0006133317947388, + "objective/train/weighted_lm_loss": 1.4629930257797241, + "objective/train/weights_max": 1.2334792613983154, + "objective/train/weights_min": 0.729193389415741, + "theoretical_loss": 3.5970105756084525, + "tokens_seen": 1271398400 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006209276199646927, + "loss": 0.0702, + "theoretical_loss": 3.5970105756084525, + "tokens_seen": 1271398400 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006208473760231102, + "loss": 0.0727, + "theoretical_loss": 3.5969436357442683, + "tokens_seen": 1271660544 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006207671320815279, + "loss": 0.0721, + "theoretical_loss": 3.5968767135407154, + "tokens_seen": 1271922688 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006206868881399454, + "loss": 0.0712, + "theoretical_loss": 3.596809808989496, + "tokens_seen": 1272184832 + }, + { + "epoch": 0.39, + "learning_rate": 0.000620606644198363, + "loss": 0.0728, + "theoretical_loss": 3.596742922082319, + "tokens_seen": 1272446976 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006205264002567807, + "loss": 0.0705, + "theoretical_loss": 3.5966760528108965, + "tokens_seen": 1272709120 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006204461563151982, + "loss": 0.0726, + "theoretical_loss": 3.596609201166948, + "tokens_seen": 1272971264 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006203659123736158, + "loss": 0.0726, + "theoretical_loss": 3.596542367142198, + "tokens_seen": 1273233408 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006202856684320334, + "loss": 0.0727, + "theoretical_loss": 3.5964755507283774, + "tokens_seen": 1273495552 + }, + { + "epoch": 0.39, + "learning_rate": 0.000620205424490451, + "loss": 0.0736, + "theoretical_loss": 3.5964087519172203, + "tokens_seen": 1273757696 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006201251805488686, + "loss": 0.0745, + "theoretical_loss": 3.59634197070047, + "tokens_seen": 1274019840 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006200449366072861, + "loss": 0.0751, + "theoretical_loss": 3.5962752070698727, + "tokens_seen": 1274281984 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006199646926657037, + "loss": 0.0694, + "theoretical_loss": 3.59620846101718, + "tokens_seen": 1274544128 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.0023682687897235155, + "objective/train/docs_used": 466434, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4091410636901855, + "objective/train/original_loss": 1.4091408252716064, + "objective/train/theoretical_loss": 3.5961750945799724, + "objective/train/tokens_used": 1295135200, + "objective/train/value_avg": -0.012420654296875, + "objective/train/value_loss": 0.00021746121637988836, + "objective/train/value_max": -0.00011414289474487305, + "objective/train/value_min": -0.407958984375, + "objective/train/value_reward_corr": 0.8011572186934655, + "objective/train/value_std": 0.0207366943359375, + "objective/train/weight_avg": 1.002469778060913, + "objective/train/weighted_lm_loss": 1.4116214513778687, + "objective/train/weights_max": 1.1912201642990112, + "objective/train/weights_min": 0.3865475058555603, + "theoretical_loss": 3.5961750945799724, + "tokens_seen": 1274675200 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006198844487241214, + "loss": 0.0727, + "theoretical_loss": 3.596141732534151, + "tokens_seen": 1274806272 + }, + { + "epoch": 0.39, + "learning_rate": 0.000619804204782539, + "loss": 0.0749, + "theoretical_loss": 3.596075021612549, + "tokens_seen": 1275068416 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006197239608409565, + "loss": 0.0739, + "theoretical_loss": 3.5960083282441433, + "tokens_seen": 1275330560 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006196437168993742, + "loss": 0.0741, + "theoretical_loss": 3.5959416524207084, + "tokens_seen": 1275592704 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006195634729577917, + "loss": 0.0732, + "theoretical_loss": 3.5958749941340242, + "tokens_seen": 1275854848 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006194832290162092, + "loss": 0.0735, + "theoretical_loss": 3.595808353375877, + "tokens_seen": 1276116992 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006194029850746269, + "loss": 0.0724, + "theoretical_loss": 3.5957417301380588, + "tokens_seen": 1276379136 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006193227411330444, + "loss": 0.0735, + "theoretical_loss": 3.595675124412365, + "tokens_seen": 1276641280 + }, + { + "epoch": 0.39, + "learning_rate": 0.000619242497191462, + "loss": 0.0762, + "theoretical_loss": 3.595608536190599, + "tokens_seen": 1276903424 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006191622532498797, + "loss": 0.0723, + "theoretical_loss": 3.595541965464568, + "tokens_seen": 1277165568 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006190820093082973, + "loss": 0.0702, + "theoretical_loss": 3.5954754122260866, + "tokens_seen": 1277427712 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006190017653667148, + "loss": 0.074, + "theoretical_loss": 3.595408876466972, + "tokens_seen": 1277689856 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": -0.0013968882849439979, + "objective/train/docs_used": 467639, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.402967095375061, + "objective/train/original_loss": 1.4029669761657715, + "objective/train/theoretical_loss": 3.59534235817905, + "objective/train/tokens_used": 1298412000, + "objective/train/value_avg": -0.01238250732421875, + "objective/train/value_loss": 0.0012696925550699234, + "objective/train/value_max": -0.00010150671005249023, + "objective/train/value_min": -0.794921875, + "objective/train/value_reward_corr": 0.703285806535258, + "objective/train/value_std": 0.029815673828125, + "objective/train/weight_avg": 0.9991534948348999, + "objective/train/weighted_lm_loss": 1.3983840942382812, + "objective/train/weights_max": 2.1239025592803955, + "objective/train/weights_min": 0.3922275900840759, + "theoretical_loss": 3.59534235817905, + "tokens_seen": 1277952000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006189215214251324, + "loss": 0.0727, + "theoretical_loss": 3.59534235817905, + "tokens_seen": 1277952000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00061884127748355, + "loss": 0.0732, + "theoretical_loss": 3.5952758573541503, + "tokens_seen": 1278214144 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006187610335419676, + "loss": 0.0722, + "theoretical_loss": 3.5952093739841073, + "tokens_seen": 1278476288 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006186807896003852, + "loss": 0.0743, + "theoretical_loss": 3.5951429080607635, + "tokens_seen": 1278738432 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006186005456588027, + "loss": 0.0718, + "theoretical_loss": 3.5950764595759646, + "tokens_seen": 1279000576 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006185203017172204, + "loss": 0.0726, + "theoretical_loss": 3.5950100285215623, + "tokens_seen": 1279262720 + }, + { + "epoch": 0.39, + "learning_rate": 0.000618440057775638, + "loss": 0.0705, + "theoretical_loss": 3.594943614889414, + "tokens_seen": 1279524864 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006183598138340556, + "loss": 0.0746, + "theoretical_loss": 3.5948772186713827, + "tokens_seen": 1279787008 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006182795698924732, + "loss": 0.0726, + "theoretical_loss": 3.5948108398593357, + "tokens_seen": 1280049152 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006181993259508907, + "loss": 0.0735, + "theoretical_loss": 3.5947444784451488, + "tokens_seen": 1280311296 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006181190820093083, + "loss": 0.0716, + "theoretical_loss": 3.5946781344206995, + "tokens_seen": 1280573440 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006180388380677259, + "loss": 0.0745, + "theoretical_loss": 3.5946118077778735, + "tokens_seen": 1280835584 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006179585941261435, + "loss": 0.0758, + "theoretical_loss": 3.59454549850856, + "tokens_seen": 1281097728 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.0009923300240188837, + "objective/train/docs_used": 468763, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6140085458755493, + "objective/train/original_loss": 1.6140084266662598, + "objective/train/theoretical_loss": 3.594512350386438, + "objective/train/tokens_used": 1301688800, + "objective/train/value_avg": -0.007724761962890625, + "objective/train/value_loss": 0.00035045703407377005, + "objective/train/value_max": -8.684396743774414e-05, + "objective/train/value_min": -0.94189453125, + "objective/train/value_reward_corr": 0.6209565433413602, + "objective/train/value_std": 0.01407623291015625, + "objective/train/weight_avg": 1.0011414289474487, + "objective/train/weighted_lm_loss": 1.615778923034668, + "objective/train/weights_max": 1.344037413597107, + "objective/train/weights_min": 0.36872875690460205, + "theoretical_loss": 3.594512350386438, + "tokens_seen": 1281228800 + }, + { + "epoch": 0.39, + "learning_rate": 0.000617878350184561, + "loss": 0.0737, + "theoretical_loss": 3.594479206604655, + "tokens_seen": 1281359872 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006177981062429787, + "loss": 0.0747, + "theoretical_loss": 3.59441293205806, + "tokens_seen": 1281622016 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006177178623013962, + "loss": 0.0724, + "theoretical_loss": 3.59434667486068, + "tokens_seen": 1281884160 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006176376183598139, + "loss": 0.0726, + "theoretical_loss": 3.5942804350044284, + "tokens_seen": 1282146304 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006175573744182315, + "loss": 0.0724, + "theoretical_loss": 3.594214212481222, + "tokens_seen": 1282408448 + }, + { + "epoch": 0.39, + "learning_rate": 0.000617477130476649, + "loss": 0.0702, + "theoretical_loss": 3.594148007282983, + "tokens_seen": 1282670592 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006173968865350667, + "loss": 0.0671, + "theoretical_loss": 3.59408181940164, + "tokens_seen": 1282932736 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006173166425934842, + "loss": 0.0711, + "theoretical_loss": 3.5940156488291266, + "tokens_seen": 1283194880 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006172363986519018, + "loss": 0.0703, + "theoretical_loss": 3.593949495557381, + "tokens_seen": 1283457024 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006171561547103194, + "loss": 0.072, + "theoretical_loss": 3.5938833595783484, + "tokens_seen": 1283719168 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006170759107687369, + "loss": 0.0722, + "theoretical_loss": 3.593817240883978, + "tokens_seen": 1283981312 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006169956668271545, + "loss": 0.072, + "theoretical_loss": 3.5937511394662254, + "tokens_seen": 1284243456 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.0013025725493207574, + "objective/train/docs_used": 470107, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4686199426651, + "objective/train/original_loss": 1.4686200618743896, + "objective/train/theoretical_loss": 3.5936850553170503, + "objective/train/tokens_used": 1304965600, + "objective/train/value_avg": -0.005847930908203125, + "objective/train/value_loss": 9.238809434464201e-05, + "objective/train/value_max": -8.958578109741211e-05, + "objective/train/value_min": -0.5205078125, + "objective/train/value_reward_corr": 0.705613709892375, + "objective/train/value_std": 0.01006317138671875, + "objective/train/weight_avg": 1.0013480186462402, + "objective/train/weighted_lm_loss": 1.4713242053985596, + "objective/train/weights_max": 1.1862255334854126, + "objective/train/weights_min": 0.6386135220527649, + "theoretical_loss": 3.5936850553170503, + "tokens_seen": 1284505600 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006169154228855722, + "loss": 0.0716, + "theoretical_loss": 3.5936850553170503, + "tokens_seen": 1284505600 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006168351789439898, + "loss": 0.0736, + "theoretical_loss": 3.593618988428419, + "tokens_seen": 1284767744 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006167549350024073, + "loss": 0.0701, + "theoretical_loss": 3.593552938792303, + "tokens_seen": 1285029888 + }, + { + "epoch": 0.39, + "learning_rate": 0.000616674691060825, + "loss": 0.0717, + "theoretical_loss": 3.5934869064006785, + "tokens_seen": 1285292032 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006165944471192425, + "loss": 0.0734, + "theoretical_loss": 3.593420891245527, + "tokens_seen": 1285554176 + }, + { + "epoch": 0.39, + "learning_rate": 0.00061651420317766, + "loss": 0.0748, + "theoretical_loss": 3.5933548933188377, + "tokens_seen": 1285816320 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006164339592360777, + "loss": 0.0712, + "theoretical_loss": 3.5932889126126017, + "tokens_seen": 1286078464 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006163537152944952, + "loss": 0.0739, + "theoretical_loss": 3.5932229491188172, + "tokens_seen": 1286340608 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006162734713529129, + "loss": 0.0709, + "theoretical_loss": 3.5931570028294884, + "tokens_seen": 1286602752 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006161932274113304, + "loss": 0.0721, + "theoretical_loss": 3.5930910737366233, + "tokens_seen": 1286864896 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006161129834697481, + "loss": 0.0729, + "theoretical_loss": 3.5930251618322364, + "tokens_seen": 1287127040 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006160327395281657, + "loss": 0.0722, + "theoretical_loss": 3.5929592671083466, + "tokens_seen": 1287389184 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006159524955865832, + "loss": 0.0717, + "theoretical_loss": 3.5928933895569792, + "tokens_seen": 1287651328 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": -0.0021324241533875465, + "objective/train/docs_used": 471101, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3588120937347412, + "objective/train/original_loss": 1.358811855316162, + "objective/train/theoretical_loss": 3.592860457218501, + "objective/train/tokens_used": 1308242400, + "objective/train/value_avg": -0.01183319091796875, + "objective/train/value_loss": 0.0003357592795509845, + "objective/train/value_max": -0.0001596212387084961, + "objective/train/value_min": -0.321044921875, + "objective/train/value_reward_corr": 0.8440921060828028, + "objective/train/value_std": 0.02423095703125, + "objective/train/weight_avg": 0.9980318546295166, + "objective/train/weighted_lm_loss": 1.3537436723709106, + "objective/train/weights_max": 1.2598665952682495, + "objective/train/weights_min": 0.7315114736557007, + "theoretical_loss": 3.592860457218501, + "tokens_seen": 1287782400 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006158722516450008, + "loss": 0.07, + "theoretical_loss": 3.5928275291701643, + "tokens_seen": 1287913472 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006157920077034184, + "loss": 0.0711, + "theoretical_loss": 3.592761685939937, + "tokens_seen": 1288175616 + }, + { + "epoch": 0.39, + "learning_rate": 0.000615711763761836, + "loss": 0.0717, + "theoretical_loss": 3.5926958598583383, + "tokens_seen": 1288437760 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006156315198202535, + "loss": 0.0692, + "theoretical_loss": 3.5926300509174136, + "tokens_seen": 1288699904 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006155512758786712, + "loss": 0.0711, + "theoretical_loss": 3.5925642591092153, + "tokens_seen": 1288962048 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006154710319370887, + "loss": 0.0696, + "theoretical_loss": 3.592498484425799, + "tokens_seen": 1289224192 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006153907879955064, + "loss": 0.0737, + "theoretical_loss": 3.5924327268592267, + "tokens_seen": 1289486336 + }, + { + "epoch": 0.39, + "learning_rate": 0.000615310544053924, + "loss": 0.0698, + "theoretical_loss": 3.5923669864015664, + "tokens_seen": 1289748480 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006152303001123415, + "loss": 0.0699, + "theoretical_loss": 3.59230126304489, + "tokens_seen": 1290010624 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006151500561707592, + "loss": 0.0733, + "theoretical_loss": 3.592235556781276, + "tokens_seen": 1290272768 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006150698122291767, + "loss": 0.0705, + "theoretical_loss": 3.5921698676028058, + "tokens_seen": 1290534912 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006149895682875943, + "loss": 0.0683, + "theoretical_loss": 3.59210419550157, + "tokens_seen": 1290797056 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.0010878384346142411, + "objective/train/docs_used": 472355, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5228557586669922, + "objective/train/original_loss": 1.5228557586669922, + "objective/train/theoretical_loss": 3.5920385404696606, + "objective/train/tokens_used": 1311519200, + "objective/train/value_avg": -0.0082855224609375, + "objective/train/value_loss": 0.00018637104949448258, + "objective/train/value_max": -0.00010973215103149414, + "objective/train/value_min": -0.97314453125, + "objective/train/value_reward_corr": 0.8474394273861514, + "objective/train/value_std": 0.019683837890625, + "objective/train/weight_avg": 1.0011759996414185, + "objective/train/weighted_lm_loss": 1.5245059728622437, + "objective/train/weights_max": 1.336806058883667, + "objective/train/weights_min": 0.5558143258094788, + "theoretical_loss": 3.5920385404696606, + "tokens_seen": 1291059200 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006149093243460119, + "loss": 0.0722, + "theoretical_loss": 3.5920385404696606, + "tokens_seen": 1291059200 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006148290804044295, + "loss": 0.0714, + "theoretical_loss": 3.591972902499177, + "tokens_seen": 1291321344 + }, + { + "epoch": 0.39, + "learning_rate": 0.000614748836462847, + "loss": 0.0713, + "theoretical_loss": 3.5919072815822233, + "tokens_seen": 1291583488 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006146685925212647, + "loss": 0.0713, + "theoretical_loss": 3.5918416777109092, + "tokens_seen": 1291845632 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006145883485796823, + "loss": 0.0697, + "theoretical_loss": 3.591776090877349, + "tokens_seen": 1292107776 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006145081046380998, + "loss": 0.0752, + "theoretical_loss": 3.5917105210736624, + "tokens_seen": 1292369920 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006144278606965175, + "loss": 0.072, + "theoretical_loss": 3.591644968291975, + "tokens_seen": 1292632064 + }, + { + "epoch": 0.39, + "learning_rate": 0.000614347616754935, + "loss": 0.0707, + "theoretical_loss": 3.5915794325244175, + "tokens_seen": 1292894208 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006142673728133526, + "loss": 0.0723, + "theoretical_loss": 3.5915139137631242, + "tokens_seen": 1293156352 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006141871288717702, + "loss": 0.0701, + "theoretical_loss": 3.5914484120002372, + "tokens_seen": 1293418496 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006141068849301877, + "loss": 0.0718, + "theoretical_loss": 3.591382927227902, + "tokens_seen": 1293680640 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006140266409886053, + "loss": 0.0746, + "theoretical_loss": 3.5913174594382693, + "tokens_seen": 1293942784 + }, + { + "epoch": 0.39, + "learning_rate": 0.000613946397047023, + "loss": 0.0725, + "theoretical_loss": 3.5912520086234974, + "tokens_seen": 1294204928 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.0006294844206422567, + "objective/train/docs_used": 473604, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4397389888763428, + "objective/train/original_loss": 1.4397391080856323, + "objective/train/theoretical_loss": 3.591219289579233, + "objective/train/tokens_used": 1314796000, + "objective/train/value_avg": -0.0067596435546875, + "objective/train/value_loss": 0.00012036753469146788, + "objective/train/value_max": -8.219480514526367e-05, + "objective/train/value_min": -0.262939453125, + "objective/train/value_reward_corr": 0.6750268958365275, + "objective/train/value_std": 0.00977325439453125, + "objective/train/weight_avg": 1.0006887912750244, + "objective/train/weighted_lm_loss": 1.4400802850723267, + "objective/train/weights_max": 1.1841590404510498, + "objective/train/weights_min": 0.8148074746131897, + "theoretical_loss": 3.591219289579233, + "tokens_seen": 1294336000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006138661531054406, + "loss": 0.0735, + "theoretical_loss": 3.5911865747757457, + "tokens_seen": 1294467072 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006137859091638582, + "loss": 0.0715, + "theoretical_loss": 3.5911211578871827, + "tokens_seen": 1294729216 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006137056652222758, + "loss": 0.0701, + "theoretical_loss": 3.591055757949981, + "tokens_seen": 1294991360 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006136254212806933, + "loss": 0.074, + "theoretical_loss": 3.590990374956316, + "tokens_seen": 1295253504 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006135451773391109, + "loss": 0.074, + "theoretical_loss": 3.5909250088983713, + "tokens_seen": 1295515648 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006134649333975285, + "loss": 0.0741, + "theoretical_loss": 3.5908596597683347, + "tokens_seen": 1295777792 + }, + { + "epoch": 0.39, + "learning_rate": 0.000613384689455946, + "loss": 0.0714, + "theoretical_loss": 3.590794327558399, + "tokens_seen": 1296039936 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006133044455143637, + "loss": 0.0697, + "theoretical_loss": 3.590729012260762, + "tokens_seen": 1296302080 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006132242015727812, + "loss": 0.0739, + "theoretical_loss": 3.5906637138676265, + "tokens_seen": 1296564224 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006131439576311989, + "loss": 0.0708, + "theoretical_loss": 3.590598432371202, + "tokens_seen": 1296826368 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006130637136896165, + "loss": 0.0704, + "theoretical_loss": 3.5905331677637013, + "tokens_seen": 1297088512 + }, + { + "epoch": 0.39, + "learning_rate": 0.000612983469748034, + "loss": 0.0712, + "theoretical_loss": 3.5904679200373435, + "tokens_seen": 1297350656 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.0006142263882793486, + "objective/train/docs_used": 474727, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.378791332244873, + "objective/train/original_loss": 1.378791332244873, + "objective/train/theoretical_loss": 3.5904026891843523, + "objective/train/tokens_used": 1318072800, + "objective/train/value_avg": -0.005767822265625, + "objective/train/value_loss": 0.000269250973360613, + "objective/train/value_max": -6.0617923736572266e-05, + "objective/train/value_min": -0.232666015625, + "objective/train/value_reward_corr": 0.4479749164985532, + "objective/train/value_std": 0.00815582275390625, + "objective/train/weight_avg": 1.0007208585739136, + "objective/train/weighted_lm_loss": 1.379752278327942, + "objective/train/weights_max": 1.1730239391326904, + "objective/train/weights_min": 0.22749631106853485, + "theoretical_loss": 3.5904026891843523, + "tokens_seen": 1297612800 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006129032258064516, + "loss": 0.0701, + "theoretical_loss": 3.5904026891843523, + "tokens_seen": 1297612800 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006128229818648692, + "loss": 0.0684, + "theoretical_loss": 3.5903374751969563, + "tokens_seen": 1297874944 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006127427379232868, + "loss": 0.0687, + "theoretical_loss": 3.59027227806739, + "tokens_seen": 1298137088 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006126624939817043, + "loss": 0.0709, + "theoretical_loss": 3.5902070977878937, + "tokens_seen": 1298399232 + }, + { + "epoch": 0.39, + "learning_rate": 0.000612582250040122, + "loss": 0.07, + "theoretical_loss": 3.5901419343507106, + "tokens_seen": 1298661376 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006125020060985395, + "loss": 0.0703, + "theoretical_loss": 3.5900767877480906, + "tokens_seen": 1298923520 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006124217621569572, + "loss": 0.0696, + "theoretical_loss": 3.5900116579722883, + "tokens_seen": 1299185664 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006123415182153748, + "loss": 0.0716, + "theoretical_loss": 3.5899465450155637, + "tokens_seen": 1299447808 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006122612742737923, + "loss": 0.0727, + "theoretical_loss": 3.589881448870182, + "tokens_seen": 1299709952 + }, + { + "epoch": 0.39, + "learning_rate": 0.00061218103033221, + "loss": 0.0723, + "theoretical_loss": 3.589816369528413, + "tokens_seen": 1299972096 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006121007863906275, + "loss": 0.0702, + "theoretical_loss": 3.5897513069825324, + "tokens_seen": 1300234240 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006120205424490451, + "loss": 0.0721, + "theoretical_loss": 3.589686261224819, + "tokens_seen": 1300496384 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006119402985074627, + "loss": 0.0718, + "theoretical_loss": 3.5896212322475605, + "tokens_seen": 1300758528 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.001577448914758861, + "objective/train/docs_used": 475730, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3788347244262695, + "objective/train/original_loss": 1.3788347244262695, + "objective/train/theoretical_loss": 3.5895887240491917, + "objective/train/tokens_used": 1321349600, + "objective/train/value_avg": -0.00556182861328125, + "objective/train/value_loss": 0.0001541156234452501, + "objective/train/value_max": -0.00013768672943115234, + "objective/train/value_min": -0.29736328125, + "objective/train/value_reward_corr": 0.6665854299912869, + "objective/train/value_std": 0.009857177734375, + "objective/train/weight_avg": 1.0016462802886963, + "objective/train/weighted_lm_loss": 1.3818347454071045, + "objective/train/weights_max": 1.1952372789382935, + "objective/train/weights_min": 0.3779159486293793, + "theoretical_loss": 3.5895887240491917, + "tokens_seen": 1300889600 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006118600545658803, + "loss": 0.0712, + "theoretical_loss": 3.589556220043046, + "tokens_seen": 1301020672 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006117798106242978, + "loss": 0.0713, + "theoretical_loss": 3.589491224603571, + "tokens_seen": 1301282816 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006116995666827155, + "loss": 0.0715, + "theoretical_loss": 3.5894262459214366, + "tokens_seen": 1301544960 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006116193227411331, + "loss": 0.0698, + "theoretical_loss": 3.589361283988948, + "tokens_seen": 1301807104 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006115390787995506, + "loss": 0.0711, + "theoretical_loss": 3.589296338798418, + "tokens_seen": 1302069248 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006114588348579683, + "loss": 0.0724, + "theoretical_loss": 3.5892314103421596, + "tokens_seen": 1302331392 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006113785909163858, + "loss": 0.0712, + "theoretical_loss": 3.589166498612496, + "tokens_seen": 1302593536 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006112983469748035, + "loss": 0.0719, + "theoretical_loss": 3.589101603601752, + "tokens_seen": 1302855680 + }, + { + "epoch": 0.39, + "learning_rate": 0.000611218103033221, + "loss": 0.0733, + "theoretical_loss": 3.58903672530226, + "tokens_seen": 1303117824 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006111378590916385, + "loss": 0.0721, + "theoretical_loss": 3.5889718637063552, + "tokens_seen": 1303379968 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006110576151500562, + "loss": 0.0703, + "theoretical_loss": 3.5889070188063794, + "tokens_seen": 1303642112 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006109773712084738, + "loss": 0.0733, + "theoretical_loss": 3.5888421905946783, + "tokens_seen": 1303904256 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.0006969193345867097, + "objective/train/docs_used": 476908, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3389830589294434, + "objective/train/original_loss": 1.3389828205108643, + "objective/train/theoretical_loss": 3.588777379063604, + "objective/train/tokens_used": 1324626400, + "objective/train/value_avg": -0.00836181640625, + "objective/train/value_loss": 0.0003036280977539718, + "objective/train/value_max": -3.24249267578125e-05, + "objective/train/value_min": -0.76318359375, + "objective/train/value_reward_corr": 0.5790845446495612, + "objective/train/value_std": 0.01256561279296875, + "objective/train/weight_avg": 1.0008283853530884, + "objective/train/weighted_lm_loss": 1.3403379917144775, + "objective/train/weights_max": 1.2200902700424194, + "objective/train/weights_min": 0.36939677596092224, + "theoretical_loss": 3.588777379063604, + "tokens_seen": 1304166400 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006108971272668914, + "loss": 0.0701, + "theoretical_loss": 3.588777379063604, + "tokens_seen": 1304166400 + }, + { + "epoch": 0.4, + "learning_rate": 0.000610816883325309, + "loss": 0.074, + "theoretical_loss": 3.5887125842055116, + "tokens_seen": 1304428544 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006107366393837266, + "loss": 0.07, + "theoretical_loss": 3.588647806012765, + "tokens_seen": 1304690688 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006106563954421441, + "loss": 0.0712, + "theoretical_loss": 3.588583044477728, + "tokens_seen": 1304952832 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006105761515005617, + "loss": 0.0742, + "theoretical_loss": 3.5885182995927734, + "tokens_seen": 1305214976 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006104959075589793, + "loss": 0.0712, + "theoretical_loss": 3.5884535713502776, + "tokens_seen": 1305477120 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006104156636173968, + "loss": 0.0702, + "theoretical_loss": 3.588388859742622, + "tokens_seen": 1305739264 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006103354196758145, + "loss": 0.0678, + "theoretical_loss": 3.5883241647621933, + "tokens_seen": 1306001408 + }, + { + "epoch": 0.4, + "learning_rate": 0.000610255175734232, + "loss": 0.0701, + "theoretical_loss": 3.588259486401383, + "tokens_seen": 1306263552 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006101749317926497, + "loss": 0.0711, + "theoretical_loss": 3.5881948246525877, + "tokens_seen": 1306525696 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006100946878510673, + "loss": 0.071, + "theoretical_loss": 3.588130179508209, + "tokens_seen": 1306787840 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006100144439094848, + "loss": 0.0717, + "theoretical_loss": 3.5880655509606534, + "tokens_seen": 1307049984 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006099341999679025, + "loss": 0.0734, + "theoretical_loss": 3.5880009390023324, + "tokens_seen": 1307312128 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": -7.018262112978846e-05, + "objective/train/docs_used": 478148, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4914653301239014, + "objective/train/original_loss": 1.4914653301239014, + "objective/train/theoretical_loss": 3.587968639241765, + "objective/train/tokens_used": 1327903200, + "objective/train/value_avg": -0.006618499755859375, + "objective/train/value_loss": 0.00026124040596187115, + "objective/train/value_max": -6.920099258422852e-05, + "objective/train/value_min": -0.21337890625, + "objective/train/value_reward_corr": 0.6777787710933085, + "objective/train/value_std": 0.01326751708984375, + "objective/train/weight_avg": 1.0000460147857666, + "objective/train/weighted_lm_loss": 1.4916276931762695, + "objective/train/weights_max": 1.1570359468460083, + "objective/train/weights_min": 0.36851781606674194, + "theoretical_loss": 3.587968639241765, + "tokens_seen": 1307443200 + }, + { + "epoch": 0.4, + "learning_rate": 0.00060985395602632, + "loss": 0.0694, + "theoretical_loss": 3.5879363436256626, + "tokens_seen": 1307574272 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006097737120847376, + "loss": 0.0723, + "theoretical_loss": 3.587871764823066, + "tokens_seen": 1307836416 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006096934681431552, + "loss": 0.0729, + "theoretical_loss": 3.5878072025869683, + "tokens_seen": 1308098560 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006096132242015728, + "loss": 0.0698, + "theoretical_loss": 3.5877426569098017, + "tokens_seen": 1308360704 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006095329802599903, + "loss": 0.069, + "theoretical_loss": 3.5876781277840024, + "tokens_seen": 1308622848 + }, + { + "epoch": 0.4, + "learning_rate": 0.000609452736318408, + "loss": 0.0712, + "theoretical_loss": 3.5876136152020117, + "tokens_seen": 1308884992 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006093724923768256, + "loss": 0.0714, + "theoretical_loss": 3.587549119156276, + "tokens_seen": 1309147136 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006092922484352431, + "loss": 0.0748, + "theoretical_loss": 3.5874846396392472, + "tokens_seen": 1309409280 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006092120044936608, + "loss": 0.0719, + "theoretical_loss": 3.5874201766433815, + "tokens_seen": 1309671424 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006091317605520783, + "loss": 0.0728, + "theoretical_loss": 3.58735573016114, + "tokens_seen": 1309933568 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006090515166104959, + "loss": 0.0724, + "theoretical_loss": 3.5872913001849884, + "tokens_seen": 1310195712 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006089712726689135, + "loss": 0.0693, + "theoretical_loss": 3.5872268867073993, + "tokens_seen": 1310457856 + }, + { + "debugging/Compilability": 0.9, + "debugging/distinct-1-grams": 0.7732995475821178, + "debugging/entropy-1-grams": 5.431960387820077, + "debugging/length": 501.4, + "debugging/num_segments": 10, + "debugging/raw_token_scores_avg": 0.009144442155957222, + "debugging/raw_token_scores_std": 0.03945710137486458, + "debugging/score": 0.007191963134632398, + "debugging/score_std": 0.006508080924945895, + "epoch": 0.4, + "objective/train/advantage_avg": 0.0012943374458700418, + "objective/train/docs_used": 479375, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4710427522659302, + "objective/train/original_loss": 1.4710428714752197, + "objective/train/theoretical_loss": 3.587162489720847, + "objective/train/tokens_used": 1331180000, + "objective/train/value_avg": -0.01043701171875, + "objective/train/value_loss": 0.0005823342362418771, + "objective/train/value_max": -0.00010973215103149414, + "objective/train/value_min": -0.89501953125, + "objective/train/value_reward_corr": 0.7931646191164867, + "objective/train/value_std": 0.029541015625, + "objective/train/weight_avg": 1.0015616416931152, + "objective/train/weighted_lm_loss": 1.4743027687072754, + "objective/train/weights_max": 2.00335693359375, + "objective/train/weights_min": 0.40475231409072876, + "theoretical_loss": 3.587162489720847, + "tokens_seen": 1310720000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006088910287273311, + "loss": 0.0695, + "theoretical_loss": 3.587162489720847, + "tokens_seen": 1310720000 + } + ], + "max_steps": 12588, + "num_train_epochs": 9223372036854775807, + "total_flos": 6.6891364171776e+17, + "trial_name": null, + "trial_params": null +}