| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 400, |
| "global_step": 23658, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00507227998985544, |
| "grad_norm": 9.971283912658691, |
| "learning_rate": 5.6338028169014084e-06, |
| "loss": 9.5562, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01014455997971088, |
| "grad_norm": 7.090546607971191, |
| "learning_rate": 1.1267605633802817e-05, |
| "loss": 5.3074, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.01521683996956632, |
| "grad_norm": 3.03216290473938, |
| "learning_rate": 1.6901408450704224e-05, |
| "loss": 4.6246, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.02028911995942176, |
| "grad_norm": 2.908621311187744, |
| "learning_rate": 2.2535211267605634e-05, |
| "loss": 4.4839, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0253613999492772, |
| "grad_norm": 4.422246932983398, |
| "learning_rate": 2.8169014084507046e-05, |
| "loss": 4.4611, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.03043367993913264, |
| "grad_norm": 5.368454933166504, |
| "learning_rate": 3.380281690140845e-05, |
| "loss": 4.4124, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.03550595992898808, |
| "grad_norm": 6.5281853675842285, |
| "learning_rate": 3.943661971830986e-05, |
| "loss": 4.369, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.04057823991884352, |
| "grad_norm": 3.8368284702301025, |
| "learning_rate": 4.507042253521127e-05, |
| "loss": 4.3359, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.04565051990869896, |
| "grad_norm": 3.0560672283172607, |
| "learning_rate": 5.070422535211268e-05, |
| "loss": 4.3045, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.0507227998985544, |
| "grad_norm": 3.674811363220215, |
| "learning_rate": 5.633802816901409e-05, |
| "loss": 4.2641, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.0507227998985544, |
| "eval_action_accuracy": 0.12628173828125, |
| "eval_loss": 4.223208427429199, |
| "eval_runtime": 35.638, |
| "eval_samples_per_second": 57.467, |
| "eval_steps_per_second": 3.592, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.05579507988840984, |
| "grad_norm": 3.5248770713806152, |
| "learning_rate": 6.197183098591549e-05, |
| "loss": 4.1921, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.06086735987826528, |
| "grad_norm": 3.289992570877075, |
| "learning_rate": 6.76056338028169e-05, |
| "loss": 4.114, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.06593963986812072, |
| "grad_norm": 2.6550910472869873, |
| "learning_rate": 7.323943661971832e-05, |
| "loss": 4.072, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.07101191985797616, |
| "grad_norm": 2.488555669784546, |
| "learning_rate": 7.887323943661972e-05, |
| "loss": 4.0293, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.0760841998478316, |
| "grad_norm": 2.4804906845092773, |
| "learning_rate": 8.450704225352113e-05, |
| "loss": 3.9972, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.08115647983768703, |
| "grad_norm": 2.8795552253723145, |
| "learning_rate": 9.014084507042254e-05, |
| "loss": 3.9798, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.08622875982754248, |
| "grad_norm": 2.2428362369537354, |
| "learning_rate": 9.577464788732394e-05, |
| "loss": 3.9605, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.09130103981739791, |
| "grad_norm": 1.8302778005599976, |
| "learning_rate": 9.999995314564068e-05, |
| "loss": 3.9435, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.09637331980725336, |
| "grad_norm": 2.1241495609283447, |
| "learning_rate": 9.999882864540755e-05, |
| "loss": 3.931, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.1014455997971088, |
| "grad_norm": 1.8298007249832153, |
| "learning_rate": 9.999620484431368e-05, |
| "loss": 3.9123, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.1014455997971088, |
| "eval_action_accuracy": 0.13214111328125, |
| "eval_loss": 3.9098541736602783, |
| "eval_runtime": 30.3817, |
| "eval_samples_per_second": 67.409, |
| "eval_steps_per_second": 4.213, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.10651787978696424, |
| "grad_norm": 1.7416859865188599, |
| "learning_rate": 9.999208182103823e-05, |
| "loss": 3.9008, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.11159015977681969, |
| "grad_norm": 1.6751515865325928, |
| "learning_rate": 9.998645969921714e-05, |
| "loss": 3.8869, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.11666243976667512, |
| "grad_norm": 1.5095834732055664, |
| "learning_rate": 9.997933864743942e-05, |
| "loss": 3.8741, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.12173471975653057, |
| "grad_norm": 1.510201334953308, |
| "learning_rate": 9.997071887924204e-05, |
| "loss": 3.8671, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.126806999746386, |
| "grad_norm": 1.5421382188796997, |
| "learning_rate": 9.996060065310357e-05, |
| "loss": 3.8587, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.13187927973624144, |
| "grad_norm": 1.5371688604354858, |
| "learning_rate": 9.994898427243645e-05, |
| "loss": 3.8549, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.13695155972609688, |
| "grad_norm": 1.5408425331115723, |
| "learning_rate": 9.99358700855778e-05, |
| "loss": 3.8422, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.1420238397159523, |
| "grad_norm": 1.7944141626358032, |
| "learning_rate": 9.99212584857791e-05, |
| "loss": 3.838, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.14709611970580777, |
| "grad_norm": 1.500591516494751, |
| "learning_rate": 9.990514991119424e-05, |
| "loss": 3.8274, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.1521683996956632, |
| "grad_norm": 1.2871513366699219, |
| "learning_rate": 9.988754484486657e-05, |
| "loss": 3.8198, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.1521683996956632, |
| "eval_action_accuracy": 0.133544921875, |
| "eval_loss": 3.82753849029541, |
| "eval_runtime": 252.3214, |
| "eval_samples_per_second": 8.117, |
| "eval_steps_per_second": 0.507, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.15724067968551864, |
| "grad_norm": 1.2817726135253906, |
| "learning_rate": 9.98684438147143e-05, |
| "loss": 3.8133, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.16231295967537407, |
| "grad_norm": 1.4230026006698608, |
| "learning_rate": 9.984784739351463e-05, |
| "loss": 3.8104, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.16738523966522953, |
| "grad_norm": 1.4561232328414917, |
| "learning_rate": 9.982575619888667e-05, |
| "loss": 3.8052, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.17245751965508496, |
| "grad_norm": 1.2932708263397217, |
| "learning_rate": 9.980217089327291e-05, |
| "loss": 3.8026, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.1775297996449404, |
| "grad_norm": 1.2332886457443237, |
| "learning_rate": 9.977709218391926e-05, |
| "loss": 3.795, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.18260207963479583, |
| "grad_norm": 1.2658277750015259, |
| "learning_rate": 9.975052082285399e-05, |
| "loss": 3.7948, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.1876743596246513, |
| "grad_norm": 1.2154490947723389, |
| "learning_rate": 9.972245760686504e-05, |
| "loss": 3.7817, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.19274663961450672, |
| "grad_norm": 1.2812718152999878, |
| "learning_rate": 9.969290337747618e-05, |
| "loss": 3.7757, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.19781891960436215, |
| "grad_norm": 1.3050552606582642, |
| "learning_rate": 9.966185902092184e-05, |
| "loss": 3.778, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.2028911995942176, |
| "grad_norm": 1.2343411445617676, |
| "learning_rate": 9.962932546812042e-05, |
| "loss": 3.7715, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.2028911995942176, |
| "eval_action_accuracy": 0.134033203125, |
| "eval_loss": 3.7644243240356445, |
| "eval_runtime": 201.2869, |
| "eval_samples_per_second": 10.175, |
| "eval_steps_per_second": 0.636, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.20796347958407305, |
| "grad_norm": 1.263569712638855, |
| "learning_rate": 9.959530369464645e-05, |
| "loss": 3.7654, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.21303575957392848, |
| "grad_norm": 1.182700276374817, |
| "learning_rate": 9.955979472070133e-05, |
| "loss": 3.7597, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.2181080395637839, |
| "grad_norm": 1.1188846826553345, |
| "learning_rate": 9.952279961108268e-05, |
| "loss": 3.7569, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.22318031955363937, |
| "grad_norm": 1.126511573791504, |
| "learning_rate": 9.94843194751525e-05, |
| "loss": 3.7503, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.2282525995434948, |
| "grad_norm": 1.1715574264526367, |
| "learning_rate": 9.944435546680384e-05, |
| "loss": 3.7482, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.23332487953335024, |
| "grad_norm": 1.2102463245391846, |
| "learning_rate": 9.940290878442622e-05, |
| "loss": 3.7484, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.23839715952320567, |
| "grad_norm": 1.0691642761230469, |
| "learning_rate": 9.935998067086967e-05, |
| "loss": 3.7423, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.24346943951306113, |
| "grad_norm": 1.204828143119812, |
| "learning_rate": 9.931557241340752e-05, |
| "loss": 3.7366, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.24854171950291656, |
| "grad_norm": 1.1264965534210205, |
| "learning_rate": 9.926968534369774e-05, |
| "loss": 3.7337, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.253613999492772, |
| "grad_norm": 1.0742850303649902, |
| "learning_rate": 9.922232083774302e-05, |
| "loss": 3.7288, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.253613999492772, |
| "eval_action_accuracy": 0.1356201171875, |
| "eval_loss": 3.7233614921569824, |
| "eval_runtime": 192.5766, |
| "eval_samples_per_second": 10.635, |
| "eval_steps_per_second": 0.665, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.25868627948262746, |
| "grad_norm": 1.2803360223770142, |
| "learning_rate": 9.917348031584952e-05, |
| "loss": 3.7233, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.2637585594724829, |
| "grad_norm": 1.0941964387893677, |
| "learning_rate": 9.91231652425843e-05, |
| "loss": 3.7238, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.2688308394623383, |
| "grad_norm": 1.1122888326644897, |
| "learning_rate": 9.907137712673133e-05, |
| "loss": 3.7228, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.27390311945219376, |
| "grad_norm": 1.1875488758087158, |
| "learning_rate": 9.901811752124635e-05, |
| "loss": 3.7163, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.2789753994420492, |
| "grad_norm": 0.9829818606376648, |
| "learning_rate": 9.896338802321021e-05, |
| "loss": 3.7091, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.2840476794319046, |
| "grad_norm": 1.1234384775161743, |
| "learning_rate": 9.890719027378103e-05, |
| "loss": 3.7129, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.2891199594217601, |
| "grad_norm": 0.9604063630104065, |
| "learning_rate": 9.884952595814497e-05, |
| "loss": 3.7119, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.29419223941161554, |
| "grad_norm": 1.0036152601242065, |
| "learning_rate": 9.87903968054657e-05, |
| "loss": 3.7038, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.299264519401471, |
| "grad_norm": 0.9695063829421997, |
| "learning_rate": 9.872980458883257e-05, |
| "loss": 3.7012, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.3043367993913264, |
| "grad_norm": 1.0221703052520752, |
| "learning_rate": 9.866775112520734e-05, |
| "loss": 3.6981, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.3043367993913264, |
| "eval_action_accuracy": 0.1356201171875, |
| "eval_loss": 3.699704647064209, |
| "eval_runtime": 94.9575, |
| "eval_samples_per_second": 21.568, |
| "eval_steps_per_second": 1.348, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.30940907938118184, |
| "grad_norm": 0.992612361907959, |
| "learning_rate": 9.86042382753699e-05, |
| "loss": 3.6976, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.3144813593710373, |
| "grad_norm": 1.0093075037002563, |
| "learning_rate": 9.853926794386223e-05, |
| "loss": 3.6934, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.3195536393608927, |
| "grad_norm": 1.016074299812317, |
| "learning_rate": 9.847284207893149e-05, |
| "loss": 3.6916, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.32462591935074814, |
| "grad_norm": 0.9795224070549011, |
| "learning_rate": 9.840496267247142e-05, |
| "loss": 3.6842, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.3296981993406036, |
| "grad_norm": 0.9658825397491455, |
| "learning_rate": 9.83356317599628e-05, |
| "loss": 3.6824, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.33477047933045906, |
| "grad_norm": 0.9373340010643005, |
| "learning_rate": 9.826485142041225e-05, |
| "loss": 3.6843, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.3398427593203145, |
| "grad_norm": 0.9244083166122437, |
| "learning_rate": 9.819262377628999e-05, |
| "loss": 3.6786, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.3449150393101699, |
| "grad_norm": 0.9753099083900452, |
| "learning_rate": 9.811895099346613e-05, |
| "loss": 3.6793, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.34998731930002536, |
| "grad_norm": 0.91774582862854, |
| "learning_rate": 9.804383528114575e-05, |
| "loss": 3.6759, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.3550595992898808, |
| "grad_norm": 0.9600816965103149, |
| "learning_rate": 9.796727889180268e-05, |
| "loss": 3.6719, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.3550595992898808, |
| "eval_action_accuracy": 0.136962890625, |
| "eval_loss": 3.670527458190918, |
| "eval_runtime": 255.8314, |
| "eval_samples_per_second": 8.005, |
| "eval_steps_per_second": 0.5, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.3601318792797362, |
| "grad_norm": 0.9501272439956665, |
| "learning_rate": 9.788928412111189e-05, |
| "loss": 3.6677, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.36520415926959165, |
| "grad_norm": 0.9030894041061401, |
| "learning_rate": 9.780985330788073e-05, |
| "loss": 3.6643, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.37027643925944714, |
| "grad_norm": 0.9130657911300659, |
| "learning_rate": 9.772898883397871e-05, |
| "loss": 3.6622, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.3753487192493026, |
| "grad_norm": 0.9452864527702332, |
| "learning_rate": 9.764669312426617e-05, |
| "loss": 3.6627, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.380420999239158, |
| "grad_norm": 0.8997258543968201, |
| "learning_rate": 9.756296864652149e-05, |
| "loss": 3.6584, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.38549327922901344, |
| "grad_norm": 0.9096789956092834, |
| "learning_rate": 9.74778179113671e-05, |
| "loss": 3.6542, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.3905655592188689, |
| "grad_norm": 0.8591508269309998, |
| "learning_rate": 9.739124347219422e-05, |
| "loss": 3.6526, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.3956378392087243, |
| "grad_norm": 0.9069055318832397, |
| "learning_rate": 9.73032479250863e-05, |
| "loss": 3.6522, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.40071011919857974, |
| "grad_norm": 0.8761900067329407, |
| "learning_rate": 9.721383390874117e-05, |
| "loss": 3.6511, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.4057823991884352, |
| "grad_norm": 0.8452698588371277, |
| "learning_rate": 9.712300410439186e-05, |
| "loss": 3.6458, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.4057823991884352, |
| "eval_action_accuracy": 0.1363525390625, |
| "eval_loss": 3.6467831134796143, |
| "eval_runtime": 225.4966, |
| "eval_samples_per_second": 9.082, |
| "eval_steps_per_second": 0.568, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.41085467917829066, |
| "grad_norm": 0.9788098931312561, |
| "learning_rate": 9.703076123572625e-05, |
| "loss": 3.6447, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.4159269591681461, |
| "grad_norm": 0.8087634444236755, |
| "learning_rate": 9.69371080688054e-05, |
| "loss": 3.6406, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.4209992391580015, |
| "grad_norm": 0.8617214560508728, |
| "learning_rate": 9.684204741198056e-05, |
| "loss": 3.64, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.42607151914785696, |
| "grad_norm": 0.8623685240745544, |
| "learning_rate": 9.674558211580902e-05, |
| "loss": 3.6414, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.4311437991377124, |
| "grad_norm": 0.8432904481887817, |
| "learning_rate": 9.664771507296857e-05, |
| "loss": 3.6386, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.4362160791275678, |
| "grad_norm": 0.8696951270103455, |
| "learning_rate": 9.654844921817078e-05, |
| "loss": 3.6364, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.44128835911742326, |
| "grad_norm": 0.9080593585968018, |
| "learning_rate": 9.644778752807306e-05, |
| "loss": 3.6295, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.44636063910727874, |
| "grad_norm": 0.8657037019729614, |
| "learning_rate": 9.634573302118925e-05, |
| "loss": 3.6337, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.4514329190971342, |
| "grad_norm": 0.8938254117965698, |
| "learning_rate": 9.624228875779928e-05, |
| "loss": 3.6296, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.4565051990869896, |
| "grad_norm": 0.9272467494010925, |
| "learning_rate": 9.613745783985724e-05, |
| "loss": 3.6274, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.4565051990869896, |
| "eval_action_accuracy": 0.13897705078125, |
| "eval_loss": 3.62253475189209, |
| "eval_runtime": 233.2421, |
| "eval_samples_per_second": 8.781, |
| "eval_steps_per_second": 0.549, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.46157747907684504, |
| "grad_norm": 0.9175124168395996, |
| "learning_rate": 9.603124341089854e-05, |
| "loss": 3.625, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.4666497590667005, |
| "grad_norm": 0.8574010133743286, |
| "learning_rate": 9.592364865594543e-05, |
| "loss": 3.6249, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.4717220390565559, |
| "grad_norm": 0.926366925239563, |
| "learning_rate": 9.58146768014117e-05, |
| "loss": 3.6219, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.47679431904641134, |
| "grad_norm": 0.9614307284355164, |
| "learning_rate": 9.570433111500582e-05, |
| "loss": 3.6162, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.48186659903626683, |
| "grad_norm": 0.8699825406074524, |
| "learning_rate": 9.559261490563294e-05, |
| "loss": 3.6203, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.48693887902612226, |
| "grad_norm": 0.9095637202262878, |
| "learning_rate": 9.547953152329573e-05, |
| "loss": 3.6145, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.4920111590159777, |
| "grad_norm": 0.8285691142082214, |
| "learning_rate": 9.536508435899388e-05, |
| "loss": 3.6128, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.4970834390058331, |
| "grad_norm": 0.8655169010162354, |
| "learning_rate": 9.524927684462242e-05, |
| "loss": 3.6122, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.5021557189956886, |
| "grad_norm": 0.7997918128967285, |
| "learning_rate": 9.513211245286883e-05, |
| "loss": 3.6087, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.507227998985544, |
| "grad_norm": 0.8405264616012573, |
| "learning_rate": 9.501359469710889e-05, |
| "loss": 3.6071, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.507227998985544, |
| "eval_action_accuracy": 0.13909912109375, |
| "eval_loss": 3.599052667617798, |
| "eval_runtime": 150.3774, |
| "eval_samples_per_second": 13.619, |
| "eval_steps_per_second": 0.851, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.5123002789753994, |
| "grad_norm": 0.8451080918312073, |
| "learning_rate": 9.489372713130131e-05, |
| "loss": 3.6066, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.5173725589652549, |
| "grad_norm": 0.8817796111106873, |
| "learning_rate": 9.477251334988122e-05, |
| "loss": 3.6002, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.5224448389551103, |
| "grad_norm": 0.8508093953132629, |
| "learning_rate": 9.464995698765227e-05, |
| "loss": 3.6005, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.5275171189449658, |
| "grad_norm": 0.8357366919517517, |
| "learning_rate": 9.452606171967777e-05, |
| "loss": 3.6006, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.5325893989348212, |
| "grad_norm": 0.8193793296813965, |
| "learning_rate": 9.440083126117039e-05, |
| "loss": 3.5962, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.5376616789246766, |
| "grad_norm": 0.8299962878227234, |
| "learning_rate": 9.427426936738078e-05, |
| "loss": 3.5972, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.5427339589145321, |
| "grad_norm": 0.8073312640190125, |
| "learning_rate": 9.414637983348498e-05, |
| "loss": 3.5979, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.5478062389043875, |
| "grad_norm": 0.8564472794532776, |
| "learning_rate": 9.401716649447059e-05, |
| "loss": 3.597, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.552878518894243, |
| "grad_norm": 0.8052465319633484, |
| "learning_rate": 9.388663322502182e-05, |
| "loss": 3.5952, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.5579507988840984, |
| "grad_norm": 0.8336448669433594, |
| "learning_rate": 9.37547839394032e-05, |
| "loss": 3.5849, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.5579507988840984, |
| "eval_action_accuracy": 0.13922119140625, |
| "eval_loss": 3.5810041427612305, |
| "eval_runtime": 198.5321, |
| "eval_samples_per_second": 10.316, |
| "eval_steps_per_second": 0.645, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.5630230788739539, |
| "grad_norm": 0.7685551047325134, |
| "learning_rate": 9.362162259134232e-05, |
| "loss": 3.5918, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.5680953588638092, |
| "grad_norm": 0.8058731555938721, |
| "learning_rate": 9.348715317391121e-05, |
| "loss": 3.5858, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.5731676388536647, |
| "grad_norm": 0.8100183010101318, |
| "learning_rate": 9.335137971940659e-05, |
| "loss": 3.5839, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.5782399188435202, |
| "grad_norm": 0.781609833240509, |
| "learning_rate": 9.321430629922897e-05, |
| "loss": 3.5818, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.5833121988333756, |
| "grad_norm": 0.8029278516769409, |
| "learning_rate": 9.307593702376061e-05, |
| "loss": 3.5768, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.5883844788232311, |
| "grad_norm": 0.7732046246528625, |
| "learning_rate": 9.293627604224217e-05, |
| "loss": 3.5787, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.5934567588130865, |
| "grad_norm": 0.8962281942367554, |
| "learning_rate": 9.279532754264837e-05, |
| "loss": 3.5776, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.598529038802942, |
| "grad_norm": 0.7350103855133057, |
| "learning_rate": 9.265309575156235e-05, |
| "loss": 3.5764, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.6036013187927973, |
| "grad_norm": 0.7271069288253784, |
| "learning_rate": 9.250958493404897e-05, |
| "loss": 3.5736, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.6086735987826528, |
| "grad_norm": 0.7939120531082153, |
| "learning_rate": 9.236479939352692e-05, |
| "loss": 3.5729, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.6086735987826528, |
| "eval_action_accuracy": 0.138916015625, |
| "eval_loss": 3.5635457038879395, |
| "eval_runtime": 240.0733, |
| "eval_samples_per_second": 8.531, |
| "eval_steps_per_second": 0.533, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.6137458787725082, |
| "grad_norm": 0.756108820438385, |
| "learning_rate": 9.221874347163956e-05, |
| "loss": 3.573, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.6188181587623637, |
| "grad_norm": 0.7624183893203735, |
| "learning_rate": 9.207142154812496e-05, |
| "loss": 3.5688, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.6238904387522192, |
| "grad_norm": 0.7523402571678162, |
| "learning_rate": 9.192283804068427e-05, |
| "loss": 3.5684, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.6289627187420745, |
| "grad_norm": 0.780206024646759, |
| "learning_rate": 9.177299740484952e-05, |
| "loss": 3.5652, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.63403499873193, |
| "grad_norm": 0.7238260507583618, |
| "learning_rate": 9.162190413384988e-05, |
| "loss": 3.5691, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.6391072787217854, |
| "grad_norm": 0.8539979457855225, |
| "learning_rate": 9.146956275847689e-05, |
| "loss": 3.5625, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.6441795587116409, |
| "grad_norm": 0.8123564720153809, |
| "learning_rate": 9.131597784694868e-05, |
| "loss": 3.5631, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.6492518387014963, |
| "grad_norm": 0.7440022230148315, |
| "learning_rate": 9.116115400477294e-05, |
| "loss": 3.5569, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.6543241186913518, |
| "grad_norm": 0.7737935185432434, |
| "learning_rate": 9.100509587460883e-05, |
| "loss": 3.5575, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.6593963986812073, |
| "grad_norm": 0.7868227958679199, |
| "learning_rate": 9.084780813612779e-05, |
| "loss": 3.5549, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.6593963986812073, |
| "eval_action_accuracy": 0.138671875, |
| "eval_loss": 3.54689621925354, |
| "eval_runtime": 275.3848, |
| "eval_samples_per_second": 7.437, |
| "eval_steps_per_second": 0.465, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.6644686786710626, |
| "grad_norm": 0.7422760128974915, |
| "learning_rate": 9.06892955058731e-05, |
| "loss": 3.5545, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.6695409586609181, |
| "grad_norm": 0.7535783648490906, |
| "learning_rate": 9.052956273711861e-05, |
| "loss": 3.5478, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.6746132386507735, |
| "grad_norm": 0.7200169563293457, |
| "learning_rate": 9.036861461972607e-05, |
| "loss": 3.5501, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.679685518640629, |
| "grad_norm": 0.8011751770973206, |
| "learning_rate": 9.020645598000158e-05, |
| "loss": 3.5506, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.6847577986304844, |
| "grad_norm": 0.7450407147407532, |
| "learning_rate": 9.004309168055081e-05, |
| "loss": 3.5499, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.6898300786203398, |
| "grad_norm": 0.7414825558662415, |
| "learning_rate": 8.987852662013321e-05, |
| "loss": 3.5465, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.6949023586101953, |
| "grad_norm": 0.7712804079055786, |
| "learning_rate": 8.971276573351513e-05, |
| "loss": 3.5432, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.6999746386000507, |
| "grad_norm": 0.745634913444519, |
| "learning_rate": 8.954581399132183e-05, |
| "loss": 3.544, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.7050469185899062, |
| "grad_norm": 0.7983254790306091, |
| "learning_rate": 8.937767639988839e-05, |
| "loss": 3.5411, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.7101191985797616, |
| "grad_norm": 0.7511922717094421, |
| "learning_rate": 8.920835800110964e-05, |
| "loss": 3.539, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.7101191985797616, |
| "eval_action_accuracy": 0.1375732421875, |
| "eval_loss": 3.526184558868408, |
| "eval_runtime": 188.9796, |
| "eval_samples_per_second": 10.837, |
| "eval_steps_per_second": 0.677, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.7151914785696171, |
| "grad_norm": 0.74385005235672, |
| "learning_rate": 8.903786387228895e-05, |
| "loss": 3.5383, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.7202637585594724, |
| "grad_norm": 0.7223657369613647, |
| "learning_rate": 8.886619912598599e-05, |
| "loss": 3.5411, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.7253360385493279, |
| "grad_norm": 0.7494250535964966, |
| "learning_rate": 8.869336890986338e-05, |
| "loss": 3.5366, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.7304083185391833, |
| "grad_norm": 0.752515971660614, |
| "learning_rate": 8.851937840653234e-05, |
| "loss": 3.5378, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.7354805985290388, |
| "grad_norm": 0.783234715461731, |
| "learning_rate": 8.83442328333974e-05, |
| "loss": 3.5354, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.7405528785188943, |
| "grad_norm": 0.8204740881919861, |
| "learning_rate": 8.816793744249971e-05, |
| "loss": 3.5301, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.7456251585087497, |
| "grad_norm": 0.7523532509803772, |
| "learning_rate": 8.799049752035975e-05, |
| "loss": 3.5323, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.7506974384986052, |
| "grad_norm": 0.7469347715377808, |
| "learning_rate": 8.781191838781876e-05, |
| "loss": 3.5265, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.7557697184884605, |
| "grad_norm": 0.760491669178009, |
| "learning_rate": 8.76322053998791e-05, |
| "loss": 3.525, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.760841998478316, |
| "grad_norm": 0.739592969417572, |
| "learning_rate": 8.745136394554381e-05, |
| "loss": 3.5243, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.760841998478316, |
| "eval_action_accuracy": 0.140625, |
| "eval_loss": 3.5108320713043213, |
| "eval_runtime": 219.3428, |
| "eval_samples_per_second": 9.337, |
| "eval_steps_per_second": 0.584, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.7659142784681714, |
| "grad_norm": 0.7475005388259888, |
| "learning_rate": 8.726939944765485e-05, |
| "loss": 3.5238, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.7709865584580269, |
| "grad_norm": 0.800801157951355, |
| "learning_rate": 8.708631736273066e-05, |
| "loss": 3.519, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.7760588384478824, |
| "grad_norm": 0.7466565370559692, |
| "learning_rate": 8.690212318080235e-05, |
| "loss": 3.5242, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.7811311184377377, |
| "grad_norm": 0.6981168389320374, |
| "learning_rate": 8.671682242524928e-05, |
| "loss": 3.5167, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.7862033984275932, |
| "grad_norm": 0.7748053669929504, |
| "learning_rate": 8.653042065263326e-05, |
| "loss": 3.515, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.7912756784174486, |
| "grad_norm": 0.7502596974372864, |
| "learning_rate": 8.634292345253198e-05, |
| "loss": 3.517, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.7963479584073041, |
| "grad_norm": 0.7047079801559448, |
| "learning_rate": 8.615433644737143e-05, |
| "loss": 3.516, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.8014202383971595, |
| "grad_norm": 0.7337208986282349, |
| "learning_rate": 8.59646652922573e-05, |
| "loss": 3.5121, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.806492518387015, |
| "grad_norm": 0.7593693733215332, |
| "learning_rate": 8.577391567480533e-05, |
| "loss": 3.5129, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.8115647983768705, |
| "grad_norm": 0.7421781420707703, |
| "learning_rate": 8.558209331497084e-05, |
| "loss": 3.5113, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.8115647983768705, |
| "eval_action_accuracy": 0.1082763671875, |
| "eval_loss": 3.5035910606384277, |
| "eval_runtime": 197.1129, |
| "eval_samples_per_second": 10.39, |
| "eval_steps_per_second": 0.649, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.8166370783667258, |
| "grad_norm": 0.748630166053772, |
| "learning_rate": 8.538920396487719e-05, |
| "loss": 3.5104, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.8217093583565813, |
| "grad_norm": 0.7239616513252258, |
| "learning_rate": 8.519525340864324e-05, |
| "loss": 3.5074, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.8267816383464367, |
| "grad_norm": 0.7293316721916199, |
| "learning_rate": 8.500024746220996e-05, |
| "loss": 3.5049, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.8318539183362922, |
| "grad_norm": 0.7635191082954407, |
| "learning_rate": 8.4804191973166e-05, |
| "loss": 3.5058, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.8369261983261476, |
| "grad_norm": 0.7280616164207458, |
| "learning_rate": 8.46070928205724e-05, |
| "loss": 3.5009, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.841998478316003, |
| "grad_norm": 0.7468705177307129, |
| "learning_rate": 8.440895591478614e-05, |
| "loss": 3.5002, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.8470707583058584, |
| "grad_norm": 0.7745243310928345, |
| "learning_rate": 8.420978719728311e-05, |
| "loss": 3.4971, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.8521430382957139, |
| "grad_norm": 0.7581838369369507, |
| "learning_rate": 8.400959264047985e-05, |
| "loss": 3.5001, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.8572153182855694, |
| "grad_norm": 0.7228087186813354, |
| "learning_rate": 8.380837824755439e-05, |
| "loss": 3.5033, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.8622875982754248, |
| "grad_norm": 9.996931076049805, |
| "learning_rate": 8.360615005226632e-05, |
| "loss": 5.4383, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.8622875982754248, |
| "eval_action_accuracy": 0.00238037109375, |
| "eval_loss": 6.149868011474609, |
| "eval_runtime": 135.6492, |
| "eval_samples_per_second": 15.098, |
| "eval_steps_per_second": 0.944, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.8673598782652803, |
| "grad_norm": 2.9023468494415283, |
| "learning_rate": 8.340291411877589e-05, |
| "loss": 5.3437, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.8724321582551356, |
| "grad_norm": 337.53240966796875, |
| "learning_rate": 8.319867654146204e-05, |
| "loss": 4.8335, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.8775044382449911, |
| "grad_norm": 0.6138980984687805, |
| "learning_rate": 8.299344344473979e-05, |
| "loss": 4.7753, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.8825767182348465, |
| "grad_norm": 0.6990567445755005, |
| "learning_rate": 8.278722098287644e-05, |
| "loss": 4.4881, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.887648998224702, |
| "grad_norm": 0.48511803150177, |
| "learning_rate": 8.25800153398072e-05, |
| "loss": 4.4721, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.8927212782145575, |
| "grad_norm": 2.293332815170288, |
| "learning_rate": 8.23718327289496e-05, |
| "loss": 4.4603, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.8977935582044129, |
| "grad_norm": 2.738636016845703, |
| "learning_rate": 8.216267939301723e-05, |
| "loss": 4.4459, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.9028658381942684, |
| "grad_norm": 4.165946960449219, |
| "learning_rate": 8.195256160383256e-05, |
| "loss": 4.4312, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.9079381181841237, |
| "grad_norm": 2.03474760055542, |
| "learning_rate": 8.174148566213883e-05, |
| "loss": 4.4189, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.9130103981739792, |
| "grad_norm": 2.4315898418426514, |
| "learning_rate": 8.152945789741115e-05, |
| "loss": 4.4123, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.9130103981739792, |
| "eval_action_accuracy": 0.07720947265625, |
| "eval_loss": 4.408840179443359, |
| "eval_runtime": 225.9743, |
| "eval_samples_per_second": 9.063, |
| "eval_steps_per_second": 0.566, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.9180826781638346, |
| "grad_norm": 3.4418368339538574, |
| "learning_rate": 8.13164846676667e-05, |
| "loss": 4.4018, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.9231549581536901, |
| "grad_norm": 2.912829637527466, |
| "learning_rate": 8.110257235927399e-05, |
| "loss": 4.4006, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.9282272381435456, |
| "grad_norm": 3.1742701530456543, |
| "learning_rate": 8.088772738676147e-05, |
| "loss": 4.3849, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.933299518133401, |
| "grad_norm": 2.608149528503418, |
| "learning_rate": 8.06719561926251e-05, |
| "loss": 4.3848, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.9383717981232564, |
| "grad_norm": 3.916184425354004, |
| "learning_rate": 8.045526524713522e-05, |
| "loss": 4.3698, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.9434440781131118, |
| "grad_norm": 1.0830720663070679, |
| "learning_rate": 8.023766104814249e-05, |
| "loss": 4.3521, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.9485163581029673, |
| "grad_norm": 4.330826282501221, |
| "learning_rate": 8.0019150120883e-05, |
| "loss": 4.3412, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.9535886380928227, |
| "grad_norm": 5.002548694610596, |
| "learning_rate": 7.97997390177827e-05, |
| "loss": 4.33, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.9586609180826782, |
| "grad_norm": 2.605839729309082, |
| "learning_rate": 7.957943431826084e-05, |
| "loss": 4.3083, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.9637331980725337, |
| "grad_norm": 2.4096832275390625, |
| "learning_rate": 7.93582426285327e-05, |
| "loss": 4.2866, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.9637331980725337, |
| "eval_action_accuracy": 0.12493896484375, |
| "eval_loss": 4.285426139831543, |
| "eval_runtime": 231.2686, |
| "eval_samples_per_second": 8.856, |
| "eval_steps_per_second": 0.553, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.968805478062389, |
| "grad_norm": 4.2472243309021, |
| "learning_rate": 7.913617058141147e-05, |
| "loss": 4.2785, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.9738777580522445, |
| "grad_norm": 2.99232816696167, |
| "learning_rate": 7.89132248361094e-05, |
| "loss": 4.2632, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.9789500380420999, |
| "grad_norm": 6.356353282928467, |
| "learning_rate": 7.868941207803807e-05, |
| "loss": 4.2352, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.9840223180319554, |
| "grad_norm": 4.926124095916748, |
| "learning_rate": 7.846473901860789e-05, |
| "loss": 4.212, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.9890945980218108, |
| "grad_norm": 4.553915023803711, |
| "learning_rate": 7.823921239502695e-05, |
| "loss": 4.1994, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.9941668780116663, |
| "grad_norm": 3.6262214183807373, |
| "learning_rate": 7.80128389700989e-05, |
| "loss": 4.1624, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.9992391580015216, |
| "grad_norm": 3.167363166809082, |
| "learning_rate": 7.778562553202017e-05, |
| "loss": 4.1414, |
| "step": 7880 |
| }, |
| { |
| "epoch": 1.0043114379913771, |
| "grad_norm": 4.281521797180176, |
| "learning_rate": 7.755757889417648e-05, |
| "loss": 4.1162, |
| "step": 7920 |
| }, |
| { |
| "epoch": 1.0093837179812326, |
| "grad_norm": 2.3562097549438477, |
| "learning_rate": 7.73287058949384e-05, |
| "loss": 4.0933, |
| "step": 7960 |
| }, |
| { |
| "epoch": 1.014455997971088, |
| "grad_norm": 4.595114231109619, |
| "learning_rate": 7.709901339745642e-05, |
| "loss": 4.0731, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.014455997971088, |
| "eval_action_accuracy": 0.12646484375, |
| "eval_loss": 4.067330360412598, |
| "eval_runtime": 32.7296, |
| "eval_samples_per_second": 62.573, |
| "eval_steps_per_second": 3.911, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.0195282779609434, |
| "grad_norm": 2.8510732650756836, |
| "learning_rate": 7.68685082894551e-05, |
| "loss": 4.0461, |
| "step": 8040 |
| }, |
| { |
| "epoch": 1.0246005579507989, |
| "grad_norm": 3.8514857292175293, |
| "learning_rate": 7.663719748302649e-05, |
| "loss": 4.0303, |
| "step": 8080 |
| }, |
| { |
| "epoch": 1.0296728379406543, |
| "grad_norm": 6.056658744812012, |
| "learning_rate": 7.640508791442292e-05, |
| "loss": 4.0205, |
| "step": 8120 |
| }, |
| { |
| "epoch": 1.0347451179305098, |
| "grad_norm": 5.616896629333496, |
| "learning_rate": 7.617218654384895e-05, |
| "loss": 3.9963, |
| "step": 8160 |
| }, |
| { |
| "epoch": 1.0398173979203653, |
| "grad_norm": 4.593634128570557, |
| "learning_rate": 7.59385003552527e-05, |
| "loss": 3.9845, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.0448896779102206, |
| "grad_norm": 2.184180974960327, |
| "learning_rate": 7.570403635611645e-05, |
| "loss": 3.9796, |
| "step": 8240 |
| }, |
| { |
| "epoch": 1.049961957900076, |
| "grad_norm": 3.9647679328918457, |
| "learning_rate": 7.546880157724637e-05, |
| "loss": 3.9604, |
| "step": 8280 |
| }, |
| { |
| "epoch": 1.0550342378899316, |
| "grad_norm": 3.0800178050994873, |
| "learning_rate": 7.523280307256189e-05, |
| "loss": 3.9499, |
| "step": 8320 |
| }, |
| { |
| "epoch": 1.060106517879787, |
| "grad_norm": 1.8450560569763184, |
| "learning_rate": 7.499604791888399e-05, |
| "loss": 3.9388, |
| "step": 8360 |
| }, |
| { |
| "epoch": 1.0651787978696423, |
| "grad_norm": 2.2816243171691895, |
| "learning_rate": 7.475854321572311e-05, |
| "loss": 3.9263, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.0651787978696423, |
| "eval_action_accuracy": 0.13165283203125, |
| "eval_loss": 3.927708864212036, |
| "eval_runtime": 32.2116, |
| "eval_samples_per_second": 63.58, |
| "eval_steps_per_second": 3.974, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.0702510778594978, |
| "grad_norm": 3.747340679168701, |
| "learning_rate": 7.452029608506625e-05, |
| "loss": 3.9185, |
| "step": 8440 |
| }, |
| { |
| "epoch": 1.0753233578493533, |
| "grad_norm": 3.5790855884552, |
| "learning_rate": 7.42813136711633e-05, |
| "loss": 3.9228, |
| "step": 8480 |
| }, |
| { |
| "epoch": 1.0803956378392088, |
| "grad_norm": 2.9461708068847656, |
| "learning_rate": 7.404160314031293e-05, |
| "loss": 3.9258, |
| "step": 8520 |
| }, |
| { |
| "epoch": 1.0854679178290643, |
| "grad_norm": 2.2783358097076416, |
| "learning_rate": 7.380117168064765e-05, |
| "loss": 3.8958, |
| "step": 8560 |
| }, |
| { |
| "epoch": 1.0905401978189195, |
| "grad_norm": 2.0436387062072754, |
| "learning_rate": 7.356002650191826e-05, |
| "loss": 3.891, |
| "step": 8600 |
| }, |
| { |
| "epoch": 1.095612477808775, |
| "grad_norm": 3.3393843173980713, |
| "learning_rate": 7.331817483527761e-05, |
| "loss": 3.8801, |
| "step": 8640 |
| }, |
| { |
| "epoch": 1.1006847577986305, |
| "grad_norm": 1.5066816806793213, |
| "learning_rate": 7.307562393306388e-05, |
| "loss": 3.9537, |
| "step": 8680 |
| }, |
| { |
| "epoch": 1.105757037788486, |
| "grad_norm": 3.5615596771240234, |
| "learning_rate": 7.283238106858299e-05, |
| "loss": 3.8907, |
| "step": 8720 |
| }, |
| { |
| "epoch": 1.1108293177783413, |
| "grad_norm": 1.8178229331970215, |
| "learning_rate": 7.258845353589051e-05, |
| "loss": 3.8741, |
| "step": 8760 |
| }, |
| { |
| "epoch": 1.1159015977681968, |
| "grad_norm": 2.923635721206665, |
| "learning_rate": 7.234384864957303e-05, |
| "loss": 3.861, |
| "step": 8800 |
| }, |
| { |
| "epoch": 1.1159015977681968, |
| "eval_action_accuracy": 0.1334228515625, |
| "eval_loss": 3.8630127906799316, |
| "eval_runtime": 224.4597, |
| "eval_samples_per_second": 9.124, |
| "eval_steps_per_second": 0.57, |
| "step": 8800 |
| }, |
| { |
| "epoch": 1.1209738777580522, |
| "grad_norm": 2.2543115615844727, |
| "learning_rate": 7.209857374452868e-05, |
| "loss": 3.8702, |
| "step": 8840 |
| }, |
| { |
| "epoch": 1.1260461577479077, |
| "grad_norm": 4.547013282775879, |
| "learning_rate": 7.185263617574732e-05, |
| "loss": 3.8536, |
| "step": 8880 |
| }, |
| { |
| "epoch": 1.1311184377377632, |
| "grad_norm": 2.3391380310058594, |
| "learning_rate": 7.160604331808988e-05, |
| "loss": 3.8498, |
| "step": 8920 |
| }, |
| { |
| "epoch": 1.1361907177276185, |
| "grad_norm": 1.3658958673477173, |
| "learning_rate": 7.135880256606728e-05, |
| "loss": 3.8391, |
| "step": 8960 |
| }, |
| { |
| "epoch": 1.141262997717474, |
| "grad_norm": 1.8794150352478027, |
| "learning_rate": 7.111092133361864e-05, |
| "loss": 3.8323, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.1463352777073295, |
| "grad_norm": 2.647994041442871, |
| "learning_rate": 7.0862407053889e-05, |
| "loss": 3.8246, |
| "step": 9040 |
| }, |
| { |
| "epoch": 1.151407557697185, |
| "grad_norm": 1.9109671115875244, |
| "learning_rate": 7.061326717900643e-05, |
| "loss": 3.826, |
| "step": 9080 |
| }, |
| { |
| "epoch": 1.1564798376870402, |
| "grad_norm": 2.2575666904449463, |
| "learning_rate": 7.036350917985849e-05, |
| "loss": 3.8153, |
| "step": 9120 |
| }, |
| { |
| "epoch": 1.1615521176768957, |
| "grad_norm": 2.557769298553467, |
| "learning_rate": 7.011314054586834e-05, |
| "loss": 3.8129, |
| "step": 9160 |
| }, |
| { |
| "epoch": 1.1666243976667512, |
| "grad_norm": 1.6273854970932007, |
| "learning_rate": 6.986216878477e-05, |
| "loss": 3.805, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.1666243976667512, |
| "eval_action_accuracy": 0.134033203125, |
| "eval_loss": 3.803765296936035, |
| "eval_runtime": 214.3228, |
| "eval_samples_per_second": 9.556, |
| "eval_steps_per_second": 0.597, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.1716966776566067, |
| "grad_norm": 2.280379295349121, |
| "learning_rate": 6.961060142238336e-05, |
| "loss": 3.7966, |
| "step": 9240 |
| }, |
| { |
| "epoch": 1.1767689576464622, |
| "grad_norm": 1.739105463027954, |
| "learning_rate": 6.935844600238839e-05, |
| "loss": 3.7988, |
| "step": 9280 |
| }, |
| { |
| "epoch": 1.1818412376363174, |
| "grad_norm": 1.481899619102478, |
| "learning_rate": 6.910571008609898e-05, |
| "loss": 3.7919, |
| "step": 9320 |
| }, |
| { |
| "epoch": 1.186913517626173, |
| "grad_norm": 1.5556796789169312, |
| "learning_rate": 6.885240125223623e-05, |
| "loss": 3.7861, |
| "step": 9360 |
| }, |
| { |
| "epoch": 1.1919857976160284, |
| "grad_norm": 2.1016335487365723, |
| "learning_rate": 6.859852709670113e-05, |
| "loss": 3.7836, |
| "step": 9400 |
| }, |
| { |
| "epoch": 1.197058077605884, |
| "grad_norm": 1.5450862646102905, |
| "learning_rate": 6.834409523234685e-05, |
| "loss": 3.7768, |
| "step": 9440 |
| }, |
| { |
| "epoch": 1.2021303575957392, |
| "grad_norm": 1.5863109827041626, |
| "learning_rate": 6.808911328875039e-05, |
| "loss": 3.773, |
| "step": 9480 |
| }, |
| { |
| "epoch": 1.2072026375855947, |
| "grad_norm": 1.5427820682525635, |
| "learning_rate": 6.783358891198378e-05, |
| "loss": 3.7688, |
| "step": 9520 |
| }, |
| { |
| "epoch": 1.2122749175754501, |
| "grad_norm": 1.2848241329193115, |
| "learning_rate": 6.757752976438494e-05, |
| "loss": 3.7613, |
| "step": 9560 |
| }, |
| { |
| "epoch": 1.2173471975653056, |
| "grad_norm": 1.413462519645691, |
| "learning_rate": 6.732094352432775e-05, |
| "loss": 3.7541, |
| "step": 9600 |
| }, |
| { |
| "epoch": 1.2173471975653056, |
| "eval_action_accuracy": 0.13427734375, |
| "eval_loss": 3.7621703147888184, |
| "eval_runtime": 298.648, |
| "eval_samples_per_second": 6.858, |
| "eval_steps_per_second": 0.429, |
| "step": 9600 |
| }, |
| { |
| "epoch": 1.2224194775551611, |
| "grad_norm": 1.9794578552246094, |
| "learning_rate": 6.706383788599185e-05, |
| "loss": 3.7602, |
| "step": 9640 |
| }, |
| { |
| "epoch": 1.2274917575450166, |
| "grad_norm": 1.9551501274108887, |
| "learning_rate": 6.680622055913198e-05, |
| "loss": 3.7513, |
| "step": 9680 |
| }, |
| { |
| "epoch": 1.2325640375348719, |
| "grad_norm": 1.13520085811615, |
| "learning_rate": 6.654809926884667e-05, |
| "loss": 3.7555, |
| "step": 9720 |
| }, |
| { |
| "epoch": 1.2376363175247274, |
| "grad_norm": 2.2174594402313232, |
| "learning_rate": 6.628948175534677e-05, |
| "loss": 3.7478, |
| "step": 9760 |
| }, |
| { |
| "epoch": 1.2427085975145828, |
| "grad_norm": 1.2913436889648438, |
| "learning_rate": 6.603037577372314e-05, |
| "loss": 3.7403, |
| "step": 9800 |
| }, |
| { |
| "epoch": 1.2477808775044383, |
| "grad_norm": 1.5598777532577515, |
| "learning_rate": 6.577078909371421e-05, |
| "loss": 3.7358, |
| "step": 9840 |
| }, |
| { |
| "epoch": 1.2528531574942936, |
| "grad_norm": 1.298091173171997, |
| "learning_rate": 6.551072949947304e-05, |
| "loss": 3.7334, |
| "step": 9880 |
| }, |
| { |
| "epoch": 1.257925437484149, |
| "grad_norm": 1.5344069004058838, |
| "learning_rate": 6.525020478933375e-05, |
| "loss": 3.7295, |
| "step": 9920 |
| }, |
| { |
| "epoch": 1.2629977174740046, |
| "grad_norm": 1.6545060873031616, |
| "learning_rate": 6.498922277557782e-05, |
| "loss": 3.7278, |
| "step": 9960 |
| }, |
| { |
| "epoch": 1.26806999746386, |
| "grad_norm": 1.0720863342285156, |
| "learning_rate": 6.472779128419974e-05, |
| "loss": 3.7316, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.26806999746386, |
| "eval_action_accuracy": 0.13653564453125, |
| "eval_loss": 3.7235488891601562, |
| "eval_runtime": 333.1976, |
| "eval_samples_per_second": 6.147, |
| "eval_steps_per_second": 0.384, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.2731422774537156, |
| "grad_norm": 1.3172401189804077, |
| "learning_rate": 6.446591815467238e-05, |
| "loss": 3.721, |
| "step": 10040 |
| }, |
| { |
| "epoch": 1.2782145574435708, |
| "grad_norm": 1.9167495965957642, |
| "learning_rate": 6.420361123971186e-05, |
| "loss": 3.7172, |
| "step": 10080 |
| }, |
| { |
| "epoch": 1.2832868374334263, |
| "grad_norm": 0.8781843781471252, |
| "learning_rate": 6.394087840504213e-05, |
| "loss": 3.7177, |
| "step": 10120 |
| }, |
| { |
| "epoch": 1.2883591174232818, |
| "grad_norm": 1.2992794513702393, |
| "learning_rate": 6.367772752915906e-05, |
| "loss": 3.711, |
| "step": 10160 |
| }, |
| { |
| "epoch": 1.2934313974131373, |
| "grad_norm": 1.4007689952850342, |
| "learning_rate": 6.341416650309422e-05, |
| "loss": 3.7257, |
| "step": 10200 |
| }, |
| { |
| "epoch": 1.2985036774029926, |
| "grad_norm": 1.4746208190917969, |
| "learning_rate": 6.31502032301782e-05, |
| "loss": 3.7169, |
| "step": 10240 |
| }, |
| { |
| "epoch": 1.303575957392848, |
| "grad_norm": 1.7429252862930298, |
| "learning_rate": 6.28858456258037e-05, |
| "loss": 3.7152, |
| "step": 10280 |
| }, |
| { |
| "epoch": 1.3086482373827035, |
| "grad_norm": 1.0173007249832153, |
| "learning_rate": 6.262110161718812e-05, |
| "loss": 3.7171, |
| "step": 10320 |
| }, |
| { |
| "epoch": 1.313720517372559, |
| "grad_norm": 1.7349942922592163, |
| "learning_rate": 6.235597914313582e-05, |
| "loss": 3.6991, |
| "step": 10360 |
| }, |
| { |
| "epoch": 1.3187927973624145, |
| "grad_norm": 1.29923415184021, |
| "learning_rate": 6.209048615380012e-05, |
| "loss": 3.696, |
| "step": 10400 |
| }, |
| { |
| "epoch": 1.3187927973624145, |
| "eval_action_accuracy": 0.1380615234375, |
| "eval_loss": 3.6903038024902344, |
| "eval_runtime": 303.8751, |
| "eval_samples_per_second": 6.74, |
| "eval_steps_per_second": 0.421, |
| "step": 10400 |
| }, |
| { |
| "epoch": 1.3238650773522698, |
| "grad_norm": 1.0686568021774292, |
| "learning_rate": 6.18246306104449e-05, |
| "loss": 3.6912, |
| "step": 10440 |
| }, |
| { |
| "epoch": 1.3289373573421253, |
| "grad_norm": 1.2505710124969482, |
| "learning_rate": 6.155842048520579e-05, |
| "loss": 3.6875, |
| "step": 10480 |
| }, |
| { |
| "epoch": 1.3340096373319807, |
| "grad_norm": 1.1971619129180908, |
| "learning_rate": 6.129186376085124e-05, |
| "loss": 3.6826, |
| "step": 10520 |
| }, |
| { |
| "epoch": 1.3390819173218362, |
| "grad_norm": 1.4397510290145874, |
| "learning_rate": 6.1024968430543e-05, |
| "loss": 3.684, |
| "step": 10560 |
| }, |
| { |
| "epoch": 1.3441541973116915, |
| "grad_norm": 0.9133204817771912, |
| "learning_rate": 6.0757742497596536e-05, |
| "loss": 3.701, |
| "step": 10600 |
| }, |
| { |
| "epoch": 1.349226477301547, |
| "grad_norm": 1.0655523538589478, |
| "learning_rate": 6.0490193975241026e-05, |
| "loss": 3.6814, |
| "step": 10640 |
| }, |
| { |
| "epoch": 1.3542987572914025, |
| "grad_norm": 1.0900158882141113, |
| "learning_rate": 6.0222330886379006e-05, |
| "loss": 3.6751, |
| "step": 10680 |
| }, |
| { |
| "epoch": 1.359371037281258, |
| "grad_norm": 0.8660451769828796, |
| "learning_rate": 5.995416126334583e-05, |
| "loss": 3.6737, |
| "step": 10720 |
| }, |
| { |
| "epoch": 1.3644433172711135, |
| "grad_norm": 1.2826400995254517, |
| "learning_rate": 5.9685693147668855e-05, |
| "loss": 3.6704, |
| "step": 10760 |
| }, |
| { |
| "epoch": 1.369515597260969, |
| "grad_norm": 0.9602544903755188, |
| "learning_rate": 5.941693458982619e-05, |
| "loss": 3.6622, |
| "step": 10800 |
| }, |
| { |
| "epoch": 1.369515597260969, |
| "eval_action_accuracy": 0.137451171875, |
| "eval_loss": 3.6570138931274414, |
| "eval_runtime": 330.4329, |
| "eval_samples_per_second": 6.198, |
| "eval_steps_per_second": 0.387, |
| "step": 10800 |
| }, |
| { |
| "epoch": 1.3745878772508242, |
| "grad_norm": 0.9139319658279419, |
| "learning_rate": 5.914789364900537e-05, |
| "loss": 3.6607, |
| "step": 10840 |
| }, |
| { |
| "epoch": 1.3796601572406797, |
| "grad_norm": 0.9442414045333862, |
| "learning_rate": 5.8878578392861717e-05, |
| "loss": 3.6577, |
| "step": 10880 |
| }, |
| { |
| "epoch": 1.3847324372305352, |
| "grad_norm": 1.427770733833313, |
| "learning_rate": 5.8608996897276316e-05, |
| "loss": 3.6576, |
| "step": 10920 |
| }, |
| { |
| "epoch": 1.3898047172203905, |
| "grad_norm": 0.9186159372329712, |
| "learning_rate": 5.8339157246113894e-05, |
| "loss": 3.6547, |
| "step": 10960 |
| }, |
| { |
| "epoch": 1.394876997210246, |
| "grad_norm": 0.8701043128967285, |
| "learning_rate": 5.806906753098047e-05, |
| "loss": 3.649, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.3999492772001014, |
| "grad_norm": 0.9969470500946045, |
| "learning_rate": 5.7798735850980624e-05, |
| "loss": 3.65, |
| "step": 11040 |
| }, |
| { |
| "epoch": 1.405021557189957, |
| "grad_norm": 1.0402882099151611, |
| "learning_rate": 5.752817031247465e-05, |
| "loss": 3.6432, |
| "step": 11080 |
| }, |
| { |
| "epoch": 1.4100938371798124, |
| "grad_norm": 0.9199869632720947, |
| "learning_rate": 5.725737902883556e-05, |
| "loss": 3.6453, |
| "step": 11120 |
| }, |
| { |
| "epoch": 1.415166117169668, |
| "grad_norm": 1.2621873617172241, |
| "learning_rate": 5.6986370120205635e-05, |
| "loss": 3.642, |
| "step": 11160 |
| }, |
| { |
| "epoch": 1.4202383971595232, |
| "grad_norm": 1.1047650575637817, |
| "learning_rate": 5.671515171325309e-05, |
| "loss": 3.6424, |
| "step": 11200 |
| }, |
| { |
| "epoch": 1.4202383971595232, |
| "eval_action_accuracy": 0.1365966796875, |
| "eval_loss": 3.631456136703491, |
| "eval_runtime": 291.4897, |
| "eval_samples_per_second": 7.026, |
| "eval_steps_per_second": 0.439, |
| "step": 11200 |
| }, |
| { |
| "epoch": 1.4253106771493786, |
| "grad_norm": 1.544075846672058, |
| "learning_rate": 5.6443731940928245e-05, |
| "loss": 3.6375, |
| "step": 11240 |
| }, |
| { |
| "epoch": 1.4303829571392341, |
| "grad_norm": 1.037488579750061, |
| "learning_rate": 5.617211894221978e-05, |
| "loss": 3.6328, |
| "step": 11280 |
| }, |
| { |
| "epoch": 1.4354552371290894, |
| "grad_norm": 1.0984132289886475, |
| "learning_rate": 5.5900320861910524e-05, |
| "loss": 3.6335, |
| "step": 11320 |
| }, |
| { |
| "epoch": 1.440527517118945, |
| "grad_norm": 0.8929972052574158, |
| "learning_rate": 5.562834585033333e-05, |
| "loss": 3.6311, |
| "step": 11360 |
| }, |
| { |
| "epoch": 1.4455997971088004, |
| "grad_norm": 1.275006890296936, |
| "learning_rate": 5.535620206312667e-05, |
| "loss": 3.6271, |
| "step": 11400 |
| }, |
| { |
| "epoch": 1.4506720770986559, |
| "grad_norm": 0.8102837204933167, |
| "learning_rate": 5.508389766098999e-05, |
| "loss": 3.6243, |
| "step": 11440 |
| }, |
| { |
| "epoch": 1.4557443570885114, |
| "grad_norm": 0.8364322185516357, |
| "learning_rate": 5.4811440809439075e-05, |
| "loss": 3.6244, |
| "step": 11480 |
| }, |
| { |
| "epoch": 1.4608166370783668, |
| "grad_norm": 1.1363658905029297, |
| "learning_rate": 5.453883967856119e-05, |
| "loss": 3.6205, |
| "step": 11520 |
| }, |
| { |
| "epoch": 1.465888917068222, |
| "grad_norm": 0.9556430578231812, |
| "learning_rate": 5.426610244277002e-05, |
| "loss": 3.6202, |
| "step": 11560 |
| }, |
| { |
| "epoch": 1.4709611970580776, |
| "grad_norm": 1.451727271080017, |
| "learning_rate": 5.399323728056059e-05, |
| "loss": 3.6111, |
| "step": 11600 |
| }, |
| { |
| "epoch": 1.4709611970580776, |
| "eval_action_accuracy": 0.138916015625, |
| "eval_loss": 3.6043567657470703, |
| "eval_runtime": 213.2403, |
| "eval_samples_per_second": 9.604, |
| "eval_steps_per_second": 0.6, |
| "step": 11600 |
| }, |
| { |
| "epoch": 1.476033477047933, |
| "grad_norm": 0.8800786733627319, |
| "learning_rate": 5.372025237426409e-05, |
| "loss": 3.611, |
| "step": 11640 |
| }, |
| { |
| "epoch": 1.4811057570377884, |
| "grad_norm": 1.2481964826583862, |
| "learning_rate": 5.3447155909802374e-05, |
| "loss": 3.6118, |
| "step": 11680 |
| }, |
| { |
| "epoch": 1.4861780370276438, |
| "grad_norm": 0.8092613816261292, |
| "learning_rate": 5.3173956076442544e-05, |
| "loss": 3.6074, |
| "step": 11720 |
| }, |
| { |
| "epoch": 1.4912503170174993, |
| "grad_norm": 0.9781074523925781, |
| "learning_rate": 5.2900661066551473e-05, |
| "loss": 3.6103, |
| "step": 11760 |
| }, |
| { |
| "epoch": 1.4963225970073548, |
| "grad_norm": 1.2322132587432861, |
| "learning_rate": 5.262727907535001e-05, |
| "loss": 3.6052, |
| "step": 11800 |
| }, |
| { |
| "epoch": 1.5013948769972103, |
| "grad_norm": 0.9783710837364197, |
| "learning_rate": 5.2353818300667276e-05, |
| "loss": 3.597, |
| "step": 11840 |
| }, |
| { |
| "epoch": 1.5064671569870658, |
| "grad_norm": 0.7827417850494385, |
| "learning_rate": 5.208028694269491e-05, |
| "loss": 3.6, |
| "step": 11880 |
| }, |
| { |
| "epoch": 1.5115394369769213, |
| "grad_norm": 1.0520368814468384, |
| "learning_rate": 5.180669320374108e-05, |
| "loss": 3.5987, |
| "step": 11920 |
| }, |
| { |
| "epoch": 1.5166117169667765, |
| "grad_norm": 0.9360650777816772, |
| "learning_rate": 5.153304528798449e-05, |
| "loss": 3.6012, |
| "step": 11960 |
| }, |
| { |
| "epoch": 1.521683996956632, |
| "grad_norm": 0.6922670602798462, |
| "learning_rate": 5.1259351401228575e-05, |
| "loss": 3.5966, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.521683996956632, |
| "eval_action_accuracy": 0.14019775390625, |
| "eval_loss": 3.5850491523742676, |
| "eval_runtime": 210.2848, |
| "eval_samples_per_second": 9.739, |
| "eval_steps_per_second": 0.609, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.5267562769464873, |
| "grad_norm": 0.9070268273353577, |
| "learning_rate": 5.0985619750655154e-05, |
| "loss": 3.5986, |
| "step": 12040 |
| }, |
| { |
| "epoch": 1.5318285569363428, |
| "grad_norm": 0.7900585532188416, |
| "learning_rate": 5.071185854457852e-05, |
| "loss": 3.5976, |
| "step": 12080 |
| }, |
| { |
| "epoch": 1.5369008369261983, |
| "grad_norm": 0.9794080257415771, |
| "learning_rate": 5.043807599219923e-05, |
| "loss": 3.5902, |
| "step": 12120 |
| }, |
| { |
| "epoch": 1.5419731169160538, |
| "grad_norm": 0.9867932200431824, |
| "learning_rate": 5.016428030335796e-05, |
| "loss": 3.5825, |
| "step": 12160 |
| }, |
| { |
| "epoch": 1.5470453969059093, |
| "grad_norm": 0.8320935368537903, |
| "learning_rate": 4.989047968828926e-05, |
| "loss": 3.5852, |
| "step": 12200 |
| }, |
| { |
| "epoch": 1.5521176768957647, |
| "grad_norm": 1.1050447225570679, |
| "learning_rate": 4.961668235737548e-05, |
| "loss": 3.5841, |
| "step": 12240 |
| }, |
| { |
| "epoch": 1.5571899568856202, |
| "grad_norm": 1.1726595163345337, |
| "learning_rate": 4.934289652090038e-05, |
| "loss": 3.5749, |
| "step": 12280 |
| }, |
| { |
| "epoch": 1.5622622368754755, |
| "grad_norm": 1.187261700630188, |
| "learning_rate": 4.906913038880315e-05, |
| "loss": 3.575, |
| "step": 12320 |
| }, |
| { |
| "epoch": 1.567334516865331, |
| "grad_norm": 0.957165002822876, |
| "learning_rate": 4.879539217043203e-05, |
| "loss": 3.5713, |
| "step": 12360 |
| }, |
| { |
| "epoch": 1.5724067968551863, |
| "grad_norm": 1.2354923486709595, |
| "learning_rate": 4.852169007429829e-05, |
| "loss": 3.5709, |
| "step": 12400 |
| }, |
| { |
| "epoch": 1.5724067968551863, |
| "eval_action_accuracy": 0.138671875, |
| "eval_loss": 3.5651776790618896, |
| "eval_runtime": 35.8538, |
| "eval_samples_per_second": 57.121, |
| "eval_steps_per_second": 3.57, |
| "step": 12400 |
| }, |
| { |
| "epoch": 1.5774790768450417, |
| "grad_norm": 0.7800391912460327, |
| "learning_rate": 4.824803230782992e-05, |
| "loss": 3.5684, |
| "step": 12440 |
| }, |
| { |
| "epoch": 1.5825513568348972, |
| "grad_norm": 0.8663883209228516, |
| "learning_rate": 4.79744270771257e-05, |
| "loss": 3.564, |
| "step": 12480 |
| }, |
| { |
| "epoch": 1.5876236368247527, |
| "grad_norm": 0.9180625081062317, |
| "learning_rate": 4.770088258670897e-05, |
| "loss": 3.5626, |
| "step": 12520 |
| }, |
| { |
| "epoch": 1.5926959168146082, |
| "grad_norm": 1.0298022031784058, |
| "learning_rate": 4.742740703928172e-05, |
| "loss": 3.5648, |
| "step": 12560 |
| }, |
| { |
| "epoch": 1.5977681968044637, |
| "grad_norm": 1.177050232887268, |
| "learning_rate": 4.7154008635478525e-05, |
| "loss": 3.562, |
| "step": 12600 |
| }, |
| { |
| "epoch": 1.6028404767943192, |
| "grad_norm": 0.8682090044021606, |
| "learning_rate": 4.688069557362071e-05, |
| "loss": 3.5629, |
| "step": 12640 |
| }, |
| { |
| "epoch": 1.6079127567841744, |
| "grad_norm": 1.0756064653396606, |
| "learning_rate": 4.660747604947043e-05, |
| "loss": 3.5634, |
| "step": 12680 |
| }, |
| { |
| "epoch": 1.61298503677403, |
| "grad_norm": 0.8048375844955444, |
| "learning_rate": 4.6334358255984985e-05, |
| "loss": 3.555, |
| "step": 12720 |
| }, |
| { |
| "epoch": 1.6180573167638852, |
| "grad_norm": 0.8976615071296692, |
| "learning_rate": 4.606135038307109e-05, |
| "loss": 3.558, |
| "step": 12760 |
| }, |
| { |
| "epoch": 1.6231295967537407, |
| "grad_norm": 0.868787944316864, |
| "learning_rate": 4.578846061733934e-05, |
| "loss": 3.5558, |
| "step": 12800 |
| }, |
| { |
| "epoch": 1.6231295967537407, |
| "eval_action_accuracy": 0.1395263671875, |
| "eval_loss": 3.5358495712280273, |
| "eval_runtime": 211.9369, |
| "eval_samples_per_second": 9.663, |
| "eval_steps_per_second": 0.604, |
| "step": 12800 |
| }, |
| { |
| "epoch": 1.6282018767435962, |
| "grad_norm": 0.7598440647125244, |
| "learning_rate": 4.551569714185862e-05, |
| "loss": 3.5466, |
| "step": 12840 |
| }, |
| { |
| "epoch": 1.6332741567334517, |
| "grad_norm": 1.093045711517334, |
| "learning_rate": 4.524306813591085e-05, |
| "loss": 3.5519, |
| "step": 12880 |
| }, |
| { |
| "epoch": 1.6383464367233072, |
| "grad_norm": 0.7852573394775391, |
| "learning_rate": 4.497058177474558e-05, |
| "loss": 3.5472, |
| "step": 12920 |
| }, |
| { |
| "epoch": 1.6434187167131626, |
| "grad_norm": 0.7886530756950378, |
| "learning_rate": 4.469824622933498e-05, |
| "loss": 3.545, |
| "step": 12960 |
| }, |
| { |
| "epoch": 1.6484909967030181, |
| "grad_norm": 1.0125157833099365, |
| "learning_rate": 4.4426069666128705e-05, |
| "loss": 3.5428, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.6535632766928734, |
| "grad_norm": 1.0761477947235107, |
| "learning_rate": 4.415406024680908e-05, |
| "loss": 3.5373, |
| "step": 13040 |
| }, |
| { |
| "epoch": 1.6586355566827289, |
| "grad_norm": 1.1242908239364624, |
| "learning_rate": 4.38822261280463e-05, |
| "loss": 3.5381, |
| "step": 13080 |
| }, |
| { |
| "epoch": 1.6637078366725844, |
| "grad_norm": 0.8285259008407593, |
| "learning_rate": 4.361057546125393e-05, |
| "loss": 3.5401, |
| "step": 13120 |
| }, |
| { |
| "epoch": 1.6687801166624396, |
| "grad_norm": 0.9908625483512878, |
| "learning_rate": 4.333911639234435e-05, |
| "loss": 3.5299, |
| "step": 13160 |
| }, |
| { |
| "epoch": 1.6738523966522951, |
| "grad_norm": 0.7884112000465393, |
| "learning_rate": 4.3067857061484574e-05, |
| "loss": 3.5271, |
| "step": 13200 |
| }, |
| { |
| "epoch": 1.6738523966522951, |
| "eval_action_accuracy": 0.13958740234375, |
| "eval_loss": 3.521883487701416, |
| "eval_runtime": 277.2848, |
| "eval_samples_per_second": 7.386, |
| "eval_steps_per_second": 0.462, |
| "step": 13200 |
| }, |
| { |
| "epoch": 1.6789246766421506, |
| "grad_norm": 1.062119483947754, |
| "learning_rate": 4.279680560285212e-05, |
| "loss": 3.5305, |
| "step": 13240 |
| }, |
| { |
| "epoch": 1.683996956632006, |
| "grad_norm": 0.9488341212272644, |
| "learning_rate": 4.2525970144391156e-05, |
| "loss": 3.5266, |
| "step": 13280 |
| }, |
| { |
| "epoch": 1.6890692366218616, |
| "grad_norm": 0.7958804965019226, |
| "learning_rate": 4.225535880756858e-05, |
| "loss": 3.5247, |
| "step": 13320 |
| }, |
| { |
| "epoch": 1.694141516611717, |
| "grad_norm": 0.7736839056015015, |
| "learning_rate": 4.198497970713079e-05, |
| "loss": 3.5245, |
| "step": 13360 |
| }, |
| { |
| "epoch": 1.6992137966015726, |
| "grad_norm": 1.1035319566726685, |
| "learning_rate": 4.171484095086002e-05, |
| "loss": 3.5214, |
| "step": 13400 |
| }, |
| { |
| "epoch": 1.7042860765914278, |
| "grad_norm": 0.9785400629043579, |
| "learning_rate": 4.144495063933148e-05, |
| "loss": 3.5174, |
| "step": 13440 |
| }, |
| { |
| "epoch": 1.7093583565812833, |
| "grad_norm": 0.8356533646583557, |
| "learning_rate": 4.117531686567028e-05, |
| "loss": 3.514, |
| "step": 13480 |
| }, |
| { |
| "epoch": 1.7144306365711386, |
| "grad_norm": 0.9696844220161438, |
| "learning_rate": 4.090594771530882e-05, |
| "loss": 3.5175, |
| "step": 13520 |
| }, |
| { |
| "epoch": 1.719502916560994, |
| "grad_norm": 0.9695699214935303, |
| "learning_rate": 4.0636851265744305e-05, |
| "loss": 3.5108, |
| "step": 13560 |
| }, |
| { |
| "epoch": 1.7245751965508496, |
| "grad_norm": 0.9095252156257629, |
| "learning_rate": 4.036803558629656e-05, |
| "loss": 3.513, |
| "step": 13600 |
| }, |
| { |
| "epoch": 1.7245751965508496, |
| "eval_action_accuracy": 0.14178466796875, |
| "eval_loss": 3.5030040740966797, |
| "eval_runtime": 228.2505, |
| "eval_samples_per_second": 8.973, |
| "eval_steps_per_second": 0.561, |
| "step": 13600 |
| }, |
| { |
| "epoch": 1.729647476540705, |
| "grad_norm": 0.9833335876464844, |
| "learning_rate": 4.0099508737866006e-05, |
| "loss": 3.5113, |
| "step": 13640 |
| }, |
| { |
| "epoch": 1.7347197565305605, |
| "grad_norm": 0.8735532164573669, |
| "learning_rate": 3.983127877269199e-05, |
| "loss": 3.5082, |
| "step": 13680 |
| }, |
| { |
| "epoch": 1.739792036520416, |
| "grad_norm": 0.843338668346405, |
| "learning_rate": 3.9563353734111285e-05, |
| "loss": 3.5023, |
| "step": 13720 |
| }, |
| { |
| "epoch": 1.7448643165102715, |
| "grad_norm": 0.8809418678283691, |
| "learning_rate": 3.929574165631696e-05, |
| "loss": 3.5043, |
| "step": 13760 |
| }, |
| { |
| "epoch": 1.7499365965001268, |
| "grad_norm": 1.0490487813949585, |
| "learning_rate": 3.9028450564117354e-05, |
| "loss": 3.5026, |
| "step": 13800 |
| }, |
| { |
| "epoch": 1.7550088764899823, |
| "grad_norm": 0.9039780497550964, |
| "learning_rate": 3.8761488472695544e-05, |
| "loss": 3.5008, |
| "step": 13840 |
| }, |
| { |
| "epoch": 1.7600811564798375, |
| "grad_norm": 1.1890782117843628, |
| "learning_rate": 3.849486338736893e-05, |
| "loss": 3.502, |
| "step": 13880 |
| }, |
| { |
| "epoch": 1.765153436469693, |
| "grad_norm": 0.8219150900840759, |
| "learning_rate": 3.822858330334923e-05, |
| "loss": 3.4983, |
| "step": 13920 |
| }, |
| { |
| "epoch": 1.7702257164595485, |
| "grad_norm": 0.8659482002258301, |
| "learning_rate": 3.796265620550267e-05, |
| "loss": 3.4919, |
| "step": 13960 |
| }, |
| { |
| "epoch": 1.775297996449404, |
| "grad_norm": 0.8262388706207275, |
| "learning_rate": 3.769709006811064e-05, |
| "loss": 3.4927, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.775297996449404, |
| "eval_action_accuracy": 0.140380859375, |
| "eval_loss": 3.480764389038086, |
| "eval_runtime": 201.5749, |
| "eval_samples_per_second": 10.16, |
| "eval_steps_per_second": 0.635, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.7803702764392595, |
| "grad_norm": 0.8509693741798401, |
| "learning_rate": 3.7431892854630414e-05, |
| "loss": 3.4891, |
| "step": 14040 |
| }, |
| { |
| "epoch": 1.785442556429115, |
| "grad_norm": 0.9192864894866943, |
| "learning_rate": 3.7167072517456536e-05, |
| "loss": 3.4856, |
| "step": 14080 |
| }, |
| { |
| "epoch": 1.7905148364189705, |
| "grad_norm": 0.8431236147880554, |
| "learning_rate": 3.6902636997682216e-05, |
| "loss": 3.481, |
| "step": 14120 |
| }, |
| { |
| "epoch": 1.7955871164088257, |
| "grad_norm": 0.8478639721870422, |
| "learning_rate": 3.663859422486129e-05, |
| "loss": 3.4844, |
| "step": 14160 |
| }, |
| { |
| "epoch": 1.8006593963986812, |
| "grad_norm": 0.8935319781303406, |
| "learning_rate": 3.637495211677035e-05, |
| "loss": 3.4808, |
| "step": 14200 |
| }, |
| { |
| "epoch": 1.8057316763885365, |
| "grad_norm": 0.9666652679443359, |
| "learning_rate": 3.611171857917144e-05, |
| "loss": 3.4818, |
| "step": 14240 |
| }, |
| { |
| "epoch": 1.810803956378392, |
| "grad_norm": 0.7934905886650085, |
| "learning_rate": 3.5848901505574814e-05, |
| "loss": 3.4783, |
| "step": 14280 |
| }, |
| { |
| "epoch": 1.8158762363682475, |
| "grad_norm": 0.8093165755271912, |
| "learning_rate": 3.558650877700245e-05, |
| "loss": 3.4764, |
| "step": 14320 |
| }, |
| { |
| "epoch": 1.820948516358103, |
| "grad_norm": 0.9005354046821594, |
| "learning_rate": 3.532454826175151e-05, |
| "loss": 3.4735, |
| "step": 14360 |
| }, |
| { |
| "epoch": 1.8260207963479584, |
| "grad_norm": 0.9937548637390137, |
| "learning_rate": 3.506302781515859e-05, |
| "loss": 3.4693, |
| "step": 14400 |
| }, |
| { |
| "epoch": 1.8260207963479584, |
| "eval_action_accuracy": 0.14202880859375, |
| "eval_loss": 3.4609575271606445, |
| "eval_runtime": 282.6532, |
| "eval_samples_per_second": 7.246, |
| "eval_steps_per_second": 0.453, |
| "step": 14400 |
| }, |
| { |
| "epoch": 1.831093076337814, |
| "grad_norm": 1.0596122741699219, |
| "learning_rate": 3.4801955279363955e-05, |
| "loss": 3.4687, |
| "step": 14440 |
| }, |
| { |
| "epoch": 1.8361653563276694, |
| "grad_norm": 1.0068224668502808, |
| "learning_rate": 3.4541338483076644e-05, |
| "loss": 3.4676, |
| "step": 14480 |
| }, |
| { |
| "epoch": 1.8412376363175247, |
| "grad_norm": 1.050114393234253, |
| "learning_rate": 3.428118524133941e-05, |
| "loss": 3.4682, |
| "step": 14520 |
| }, |
| { |
| "epoch": 1.8463099163073802, |
| "grad_norm": 0.9435964226722717, |
| "learning_rate": 3.402150335529464e-05, |
| "loss": 3.4658, |
| "step": 14560 |
| }, |
| { |
| "epoch": 1.8513821962972357, |
| "grad_norm": 0.9514583945274353, |
| "learning_rate": 3.3762300611950236e-05, |
| "loss": 3.4611, |
| "step": 14600 |
| }, |
| { |
| "epoch": 1.856454476287091, |
| "grad_norm": 0.9340800642967224, |
| "learning_rate": 3.350358478394626e-05, |
| "loss": 3.4592, |
| "step": 14640 |
| }, |
| { |
| "epoch": 1.8615267562769464, |
| "grad_norm": 0.8389933109283447, |
| "learning_rate": 3.324536362932166e-05, |
| "loss": 3.4604, |
| "step": 14680 |
| }, |
| { |
| "epoch": 1.866599036266802, |
| "grad_norm": 0.856787919998169, |
| "learning_rate": 3.29876448912819e-05, |
| "loss": 3.4564, |
| "step": 14720 |
| }, |
| { |
| "epoch": 1.8716713162566574, |
| "grad_norm": 0.8933659791946411, |
| "learning_rate": 3.2730436297966485e-05, |
| "loss": 3.4554, |
| "step": 14760 |
| }, |
| { |
| "epoch": 1.8767435962465129, |
| "grad_norm": 0.8152870535850525, |
| "learning_rate": 3.247374556221745e-05, |
| "loss": 3.4543, |
| "step": 14800 |
| }, |
| { |
| "epoch": 1.8767435962465129, |
| "eval_action_accuracy": 0.1414794921875, |
| "eval_loss": 3.439603805541992, |
| "eval_runtime": 265.6804, |
| "eval_samples_per_second": 7.709, |
| "eval_steps_per_second": 0.482, |
| "step": 14800 |
| }, |
| { |
| "epoch": 1.8818158762363684, |
| "grad_norm": 0.8039724826812744, |
| "learning_rate": 3.2217580381347914e-05, |
| "loss": 3.4509, |
| "step": 14840 |
| }, |
| { |
| "epoch": 1.8868881562262236, |
| "grad_norm": 1.140610933303833, |
| "learning_rate": 3.1961948436911386e-05, |
| "loss": 3.4495, |
| "step": 14880 |
| }, |
| { |
| "epoch": 1.8919604362160791, |
| "grad_norm": 1.1068456172943115, |
| "learning_rate": 3.17068573944713e-05, |
| "loss": 3.4469, |
| "step": 14920 |
| }, |
| { |
| "epoch": 1.8970327162059346, |
| "grad_norm": 0.905805766582489, |
| "learning_rate": 3.1452314903371286e-05, |
| "loss": 3.4476, |
| "step": 14960 |
| }, |
| { |
| "epoch": 1.9021049961957899, |
| "grad_norm": 0.8861454725265503, |
| "learning_rate": 3.1198328596505646e-05, |
| "loss": 3.4424, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.9071772761856454, |
| "grad_norm": 0.916279137134552, |
| "learning_rate": 3.09449060900906e-05, |
| "loss": 3.4418, |
| "step": 15040 |
| }, |
| { |
| "epoch": 1.9122495561755009, |
| "grad_norm": 0.9068135619163513, |
| "learning_rate": 3.069205498343579e-05, |
| "loss": 3.4389, |
| "step": 15080 |
| }, |
| { |
| "epoch": 1.9173218361653563, |
| "grad_norm": 0.9586207866668701, |
| "learning_rate": 3.0439782858716527e-05, |
| "loss": 3.4393, |
| "step": 15120 |
| }, |
| { |
| "epoch": 1.9223941161552118, |
| "grad_norm": 0.9862120747566223, |
| "learning_rate": 3.0188097280746263e-05, |
| "loss": 3.4332, |
| "step": 15160 |
| }, |
| { |
| "epoch": 1.9274663961450673, |
| "grad_norm": 0.8921750783920288, |
| "learning_rate": 2.9937005796749905e-05, |
| "loss": 3.4347, |
| "step": 15200 |
| }, |
| { |
| "epoch": 1.9274663961450673, |
| "eval_action_accuracy": 0.1422119140625, |
| "eval_loss": 3.418545722961426, |
| "eval_runtime": 269.9638, |
| "eval_samples_per_second": 7.586, |
| "eval_steps_per_second": 0.474, |
| "step": 15200 |
| }, |
| { |
| "epoch": 1.9325386761349228, |
| "grad_norm": 0.9488757848739624, |
| "learning_rate": 2.9686515936137393e-05, |
| "loss": 3.4321, |
| "step": 15240 |
| }, |
| { |
| "epoch": 1.937610956124778, |
| "grad_norm": 0.8360820412635803, |
| "learning_rate": 2.9436635210277987e-05, |
| "loss": 3.4293, |
| "step": 15280 |
| }, |
| { |
| "epoch": 1.9426832361146336, |
| "grad_norm": 0.8490392565727234, |
| "learning_rate": 2.9187371112274958e-05, |
| "loss": 3.4273, |
| "step": 15320 |
| }, |
| { |
| "epoch": 1.9477555161044888, |
| "grad_norm": 0.8668012619018555, |
| "learning_rate": 2.893873111674097e-05, |
| "loss": 3.4276, |
| "step": 15360 |
| }, |
| { |
| "epoch": 1.9528277960943443, |
| "grad_norm": 0.933603823184967, |
| "learning_rate": 2.869072267957385e-05, |
| "loss": 3.4244, |
| "step": 15400 |
| }, |
| { |
| "epoch": 1.9579000760841998, |
| "grad_norm": 0.9964644908905029, |
| "learning_rate": 2.8443353237733126e-05, |
| "loss": 3.422, |
| "step": 15440 |
| }, |
| { |
| "epoch": 1.9629723560740553, |
| "grad_norm": 1.1415656805038452, |
| "learning_rate": 2.8196630209016878e-05, |
| "loss": 3.4252, |
| "step": 15480 |
| }, |
| { |
| "epoch": 1.9680446360639108, |
| "grad_norm": 1.0899465084075928, |
| "learning_rate": 2.7950560991839437e-05, |
| "loss": 3.4179, |
| "step": 15520 |
| }, |
| { |
| "epoch": 1.9731169160537663, |
| "grad_norm": 1.0375711917877197, |
| "learning_rate": 2.770515296500944e-05, |
| "loss": 3.4191, |
| "step": 15560 |
| }, |
| { |
| "epoch": 1.9781891960436218, |
| "grad_norm": 0.9184868335723877, |
| "learning_rate": 2.7460413487508635e-05, |
| "loss": 3.415, |
| "step": 15600 |
| }, |
| { |
| "epoch": 1.9781891960436218, |
| "eval_action_accuracy": 0.14495849609375, |
| "eval_loss": 3.3952927589416504, |
| "eval_runtime": 265.4459, |
| "eval_samples_per_second": 7.715, |
| "eval_steps_per_second": 0.482, |
| "step": 15600 |
| }, |
| { |
| "epoch": 1.983261476033477, |
| "grad_norm": 0.939353883266449, |
| "learning_rate": 2.7216349898271098e-05, |
| "loss": 3.4138, |
| "step": 15640 |
| }, |
| { |
| "epoch": 1.9883337560233325, |
| "grad_norm": 0.9041317701339722, |
| "learning_rate": 2.69729695159633e-05, |
| "loss": 3.409, |
| "step": 15680 |
| }, |
| { |
| "epoch": 1.9934060360131878, |
| "grad_norm": 0.8650372624397278, |
| "learning_rate": 2.6730279638764523e-05, |
| "loss": 3.4096, |
| "step": 15720 |
| }, |
| { |
| "epoch": 1.9984783160030433, |
| "grad_norm": 0.9451566338539124, |
| "learning_rate": 2.648828754414811e-05, |
| "loss": 3.4077, |
| "step": 15760 |
| }, |
| { |
| "epoch": 2.0035505959928988, |
| "grad_norm": 1.003873586654663, |
| "learning_rate": 2.624700048866317e-05, |
| "loss": 3.3977, |
| "step": 15800 |
| }, |
| { |
| "epoch": 2.0086228759827542, |
| "grad_norm": 0.9338696599006653, |
| "learning_rate": 2.600642570771703e-05, |
| "loss": 3.3862, |
| "step": 15840 |
| }, |
| { |
| "epoch": 2.0136951559726097, |
| "grad_norm": 1.1462304592132568, |
| "learning_rate": 2.5766570415358184e-05, |
| "loss": 3.3863, |
| "step": 15880 |
| }, |
| { |
| "epoch": 2.018767435962465, |
| "grad_norm": 0.9377365708351135, |
| "learning_rate": 2.5527441804060087e-05, |
| "loss": 3.3864, |
| "step": 15920 |
| }, |
| { |
| "epoch": 2.0238397159523207, |
| "grad_norm": 1.0909967422485352, |
| "learning_rate": 2.5289047044505347e-05, |
| "loss": 3.3844, |
| "step": 15960 |
| }, |
| { |
| "epoch": 2.028911995942176, |
| "grad_norm": 0.924224317073822, |
| "learning_rate": 2.505139328537082e-05, |
| "loss": 3.3818, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.028911995942176, |
| "eval_action_accuracy": 0.144775390625, |
| "eval_loss": 3.3769216537475586, |
| "eval_runtime": 29.3615, |
| "eval_samples_per_second": 69.751, |
| "eval_steps_per_second": 4.359, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.0339842759320312, |
| "grad_norm": 1.0833228826522827, |
| "learning_rate": 2.4814487653113172e-05, |
| "loss": 3.3775, |
| "step": 16040 |
| }, |
| { |
| "epoch": 2.0390565559218867, |
| "grad_norm": 1.1306848526000977, |
| "learning_rate": 2.4578337251755213e-05, |
| "loss": 3.375, |
| "step": 16080 |
| }, |
| { |
| "epoch": 2.044128835911742, |
| "grad_norm": 0.9547039866447449, |
| "learning_rate": 2.43429491626728e-05, |
| "loss": 3.3752, |
| "step": 16120 |
| }, |
| { |
| "epoch": 2.0492011159015977, |
| "grad_norm": 0.886420726776123, |
| "learning_rate": 2.4108330444382632e-05, |
| "loss": 3.3733, |
| "step": 16160 |
| }, |
| { |
| "epoch": 2.054273395891453, |
| "grad_norm": 0.9832823276519775, |
| "learning_rate": 2.387448813233041e-05, |
| "loss": 3.3715, |
| "step": 16200 |
| }, |
| { |
| "epoch": 2.0593456758813087, |
| "grad_norm": 0.9641093015670776, |
| "learning_rate": 2.364142923868002e-05, |
| "loss": 3.3693, |
| "step": 16240 |
| }, |
| { |
| "epoch": 2.064417955871164, |
| "grad_norm": 0.9319785833358765, |
| "learning_rate": 2.3409160752103183e-05, |
| "loss": 3.3697, |
| "step": 16280 |
| }, |
| { |
| "epoch": 2.0694902358610197, |
| "grad_norm": 0.9033721089363098, |
| "learning_rate": 2.3177689637569932e-05, |
| "loss": 3.3655, |
| "step": 16320 |
| }, |
| { |
| "epoch": 2.074562515850875, |
| "grad_norm": 1.0644468069076538, |
| "learning_rate": 2.294702283613966e-05, |
| "loss": 3.3684, |
| "step": 16360 |
| }, |
| { |
| "epoch": 2.0796347958407306, |
| "grad_norm": 0.9422540664672852, |
| "learning_rate": 2.271716726475312e-05, |
| "loss": 3.3596, |
| "step": 16400 |
| }, |
| { |
| "epoch": 2.0796347958407306, |
| "eval_action_accuracy": 0.145751953125, |
| "eval_loss": 3.356306791305542, |
| "eval_runtime": 33.337, |
| "eval_samples_per_second": 61.433, |
| "eval_steps_per_second": 3.84, |
| "step": 16400 |
| }, |
| { |
| "epoch": 2.0847070758305857, |
| "grad_norm": 0.9470515847206116, |
| "learning_rate": 2.2488129816024882e-05, |
| "loss": 3.3596, |
| "step": 16440 |
| }, |
| { |
| "epoch": 2.089779355820441, |
| "grad_norm": 0.9862939119338989, |
| "learning_rate": 2.225991735803673e-05, |
| "loss": 3.3618, |
| "step": 16480 |
| }, |
| { |
| "epoch": 2.0948516358102967, |
| "grad_norm": 0.9781101942062378, |
| "learning_rate": 2.2032536734131693e-05, |
| "loss": 3.3537, |
| "step": 16520 |
| }, |
| { |
| "epoch": 2.099923915800152, |
| "grad_norm": 0.9585676789283752, |
| "learning_rate": 2.1805994762708816e-05, |
| "loss": 3.355, |
| "step": 16560 |
| }, |
| { |
| "epoch": 2.1049961957900076, |
| "grad_norm": 1.042656421661377, |
| "learning_rate": 2.1580298237018665e-05, |
| "loss": 3.3558, |
| "step": 16600 |
| }, |
| { |
| "epoch": 2.110068475779863, |
| "grad_norm": 1.2123557329177856, |
| "learning_rate": 2.135545392495974e-05, |
| "loss": 3.3507, |
| "step": 16640 |
| }, |
| { |
| "epoch": 2.1151407557697186, |
| "grad_norm": 1.0929179191589355, |
| "learning_rate": 2.1131468568875356e-05, |
| "loss": 3.3528, |
| "step": 16680 |
| }, |
| { |
| "epoch": 2.120213035759574, |
| "grad_norm": 1.0415383577346802, |
| "learning_rate": 2.0908348885351643e-05, |
| "loss": 3.3499, |
| "step": 16720 |
| }, |
| { |
| "epoch": 2.125285315749429, |
| "grad_norm": 0.9704334139823914, |
| "learning_rate": 2.0686101565016002e-05, |
| "loss": 3.3477, |
| "step": 16760 |
| }, |
| { |
| "epoch": 2.1303575957392846, |
| "grad_norm": 0.930077075958252, |
| "learning_rate": 2.046473327233656e-05, |
| "loss": 3.3447, |
| "step": 16800 |
| }, |
| { |
| "epoch": 2.1303575957392846, |
| "eval_action_accuracy": 0.145751953125, |
| "eval_loss": 3.337432861328125, |
| "eval_runtime": 207.6445, |
| "eval_samples_per_second": 9.863, |
| "eval_steps_per_second": 0.616, |
| "step": 16800 |
| }, |
| { |
| "epoch": 2.13542987572914, |
| "grad_norm": 1.1401617527008057, |
| "learning_rate": 2.0244250645422225e-05, |
| "loss": 3.3486, |
| "step": 16840 |
| }, |
| { |
| "epoch": 2.1405021557189956, |
| "grad_norm": 1.0366841554641724, |
| "learning_rate": 2.0024660295823765e-05, |
| "loss": 3.3392, |
| "step": 16880 |
| }, |
| { |
| "epoch": 2.145574435708851, |
| "grad_norm": 1.0000267028808594, |
| "learning_rate": 1.9805968808335412e-05, |
| "loss": 3.34, |
| "step": 16920 |
| }, |
| { |
| "epoch": 2.1506467156987066, |
| "grad_norm": 1.0371631383895874, |
| "learning_rate": 1.9588182740797527e-05, |
| "loss": 3.3381, |
| "step": 16960 |
| }, |
| { |
| "epoch": 2.155718995688562, |
| "grad_norm": 1.0280026197433472, |
| "learning_rate": 1.9371308623899882e-05, |
| "loss": 3.337, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.1607912756784176, |
| "grad_norm": 1.0037304162979126, |
| "learning_rate": 1.9155352960985833e-05, |
| "loss": 3.3357, |
| "step": 17040 |
| }, |
| { |
| "epoch": 2.165863555668273, |
| "grad_norm": 1.1836119890213013, |
| "learning_rate": 1.89403222278573e-05, |
| "loss": 3.3354, |
| "step": 17080 |
| }, |
| { |
| "epoch": 2.1709358356581285, |
| "grad_norm": 1.016123652458191, |
| "learning_rate": 1.8726222872580624e-05, |
| "loss": 3.3296, |
| "step": 17120 |
| }, |
| { |
| "epoch": 2.1760081156479836, |
| "grad_norm": 1.043012261390686, |
| "learning_rate": 1.8513061315293145e-05, |
| "loss": 3.3346, |
| "step": 17160 |
| }, |
| { |
| "epoch": 2.181080395637839, |
| "grad_norm": 0.9651705026626587, |
| "learning_rate": 1.8300843948010744e-05, |
| "loss": 3.3248, |
| "step": 17200 |
| }, |
| { |
| "epoch": 2.181080395637839, |
| "eval_action_accuracy": 0.14715576171875, |
| "eval_loss": 3.3139657974243164, |
| "eval_runtime": 283.1082, |
| "eval_samples_per_second": 7.234, |
| "eval_steps_per_second": 0.452, |
| "step": 17200 |
| }, |
| { |
| "epoch": 2.1861526756276946, |
| "grad_norm": 1.0534641742706299, |
| "learning_rate": 1.808957713443613e-05, |
| "loss": 3.3245, |
| "step": 17240 |
| }, |
| { |
| "epoch": 2.19122495561755, |
| "grad_norm": 1.0732275247573853, |
| "learning_rate": 1.787926720976805e-05, |
| "loss": 3.3223, |
| "step": 17280 |
| }, |
| { |
| "epoch": 2.1962972356074055, |
| "grad_norm": 0.9667773842811584, |
| "learning_rate": 1.766992048051126e-05, |
| "loss": 3.3227, |
| "step": 17320 |
| }, |
| { |
| "epoch": 2.201369515597261, |
| "grad_norm": 1.1117255687713623, |
| "learning_rate": 1.746154322428742e-05, |
| "loss": 3.3235, |
| "step": 17360 |
| }, |
| { |
| "epoch": 2.2064417955871165, |
| "grad_norm": 1.161832571029663, |
| "learning_rate": 1.7254141689646948e-05, |
| "loss": 3.3192, |
| "step": 17400 |
| }, |
| { |
| "epoch": 2.211514075576972, |
| "grad_norm": 1.0808424949645996, |
| "learning_rate": 1.7047722095881503e-05, |
| "loss": 3.3177, |
| "step": 17440 |
| }, |
| { |
| "epoch": 2.2165863555668275, |
| "grad_norm": 0.9801045656204224, |
| "learning_rate": 1.68422906328376e-05, |
| "loss": 3.3169, |
| "step": 17480 |
| }, |
| { |
| "epoch": 2.2216586355566825, |
| "grad_norm": 1.0057750940322876, |
| "learning_rate": 1.663785346073094e-05, |
| "loss": 3.3187, |
| "step": 17520 |
| }, |
| { |
| "epoch": 2.226730915546538, |
| "grad_norm": 1.193770170211792, |
| "learning_rate": 1.6434416709961726e-05, |
| "loss": 3.3113, |
| "step": 17560 |
| }, |
| { |
| "epoch": 2.2318031955363935, |
| "grad_norm": 1.0598925352096558, |
| "learning_rate": 1.6231986480930743e-05, |
| "loss": 3.3096, |
| "step": 17600 |
| }, |
| { |
| "epoch": 2.2318031955363935, |
| "eval_action_accuracy": 0.1495361328125, |
| "eval_loss": 3.299508571624756, |
| "eval_runtime": 317.2452, |
| "eval_samples_per_second": 6.456, |
| "eval_steps_per_second": 0.403, |
| "step": 17600 |
| }, |
| { |
| "epoch": 2.236875475526249, |
| "grad_norm": 1.039414644241333, |
| "learning_rate": 1.6030568843856568e-05, |
| "loss": 3.3094, |
| "step": 17640 |
| }, |
| { |
| "epoch": 2.2419477555161045, |
| "grad_norm": 1.2232924699783325, |
| "learning_rate": 1.58301698385934e-05, |
| "loss": 3.3062, |
| "step": 17680 |
| }, |
| { |
| "epoch": 2.24702003550596, |
| "grad_norm": 0.9876456260681152, |
| "learning_rate": 1.5630795474450055e-05, |
| "loss": 3.3048, |
| "step": 17720 |
| }, |
| { |
| "epoch": 2.2520923154958155, |
| "grad_norm": 1.1325111389160156, |
| "learning_rate": 1.5432451730009705e-05, |
| "loss": 3.3004, |
| "step": 17760 |
| }, |
| { |
| "epoch": 2.257164595485671, |
| "grad_norm": 1.1618984937667847, |
| "learning_rate": 1.5235144552950643e-05, |
| "loss": 3.3, |
| "step": 17800 |
| }, |
| { |
| "epoch": 2.2622368754755264, |
| "grad_norm": 1.0500718355178833, |
| "learning_rate": 1.5038879859867855e-05, |
| "loss": 3.3005, |
| "step": 17840 |
| }, |
| { |
| "epoch": 2.267309155465382, |
| "grad_norm": 1.2077546119689941, |
| "learning_rate": 1.484366353609571e-05, |
| "loss": 3.3015, |
| "step": 17880 |
| }, |
| { |
| "epoch": 2.272381435455237, |
| "grad_norm": 1.0480679273605347, |
| "learning_rate": 1.4649501435531365e-05, |
| "loss": 3.2968, |
| "step": 17920 |
| }, |
| { |
| "epoch": 2.2774537154450925, |
| "grad_norm": 1.0955984592437744, |
| "learning_rate": 1.4456399380459324e-05, |
| "loss": 3.2985, |
| "step": 17960 |
| }, |
| { |
| "epoch": 2.282525995434948, |
| "grad_norm": 1.0870320796966553, |
| "learning_rate": 1.4264363161376787e-05, |
| "loss": 3.2914, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.282525995434948, |
| "eval_action_accuracy": 0.1488037109375, |
| "eval_loss": 3.281026840209961, |
| "eval_runtime": 256.6774, |
| "eval_samples_per_second": 7.979, |
| "eval_steps_per_second": 0.499, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.2875982754248034, |
| "grad_norm": 1.224732756614685, |
| "learning_rate": 1.4073398536820049e-05, |
| "loss": 3.294, |
| "step": 18040 |
| }, |
| { |
| "epoch": 2.292670555414659, |
| "grad_norm": 1.1285483837127686, |
| "learning_rate": 1.3883511233191748e-05, |
| "loss": 3.2926, |
| "step": 18080 |
| }, |
| { |
| "epoch": 2.2977428354045144, |
| "grad_norm": 1.109192132949829, |
| "learning_rate": 1.3694706944589275e-05, |
| "loss": 3.2901, |
| "step": 18120 |
| }, |
| { |
| "epoch": 2.30281511539437, |
| "grad_norm": 1.0854719877243042, |
| "learning_rate": 1.3506991332633879e-05, |
| "loss": 3.2883, |
| "step": 18160 |
| }, |
| { |
| "epoch": 2.3078873953842254, |
| "grad_norm": 1.1602346897125244, |
| "learning_rate": 1.332037002630101e-05, |
| "loss": 3.2878, |
| "step": 18200 |
| }, |
| { |
| "epoch": 2.3129596753740804, |
| "grad_norm": 1.1119505167007446, |
| "learning_rate": 1.3134848621751483e-05, |
| "loss": 3.2878, |
| "step": 18240 |
| }, |
| { |
| "epoch": 2.318031955363936, |
| "grad_norm": 1.101004719734192, |
| "learning_rate": 1.295043268216367e-05, |
| "loss": 3.2847, |
| "step": 18280 |
| }, |
| { |
| "epoch": 2.3231042353537914, |
| "grad_norm": 1.1685012578964233, |
| "learning_rate": 1.2767127737566631e-05, |
| "loss": 3.2793, |
| "step": 18320 |
| }, |
| { |
| "epoch": 2.328176515343647, |
| "grad_norm": 1.1972112655639648, |
| "learning_rate": 1.2584939284674396e-05, |
| "loss": 3.2767, |
| "step": 18360 |
| }, |
| { |
| "epoch": 2.3332487953335024, |
| "grad_norm": 1.0951577425003052, |
| "learning_rate": 1.2403872786721005e-05, |
| "loss": 3.2748, |
| "step": 18400 |
| }, |
| { |
| "epoch": 2.3332487953335024, |
| "eval_action_accuracy": 0.14862060546875, |
| "eval_loss": 3.2656493186950684, |
| "eval_runtime": 340.411, |
| "eval_samples_per_second": 6.016, |
| "eval_steps_per_second": 0.376, |
| "step": 18400 |
| }, |
| { |
| "epoch": 2.338321075323358, |
| "grad_norm": 1.3195337057113647, |
| "learning_rate": 1.2223933673296795e-05, |
| "loss": 3.2745, |
| "step": 18440 |
| }, |
| { |
| "epoch": 2.3433933553132134, |
| "grad_norm": 1.189841389656067, |
| "learning_rate": 1.204512734018553e-05, |
| "loss": 3.2712, |
| "step": 18480 |
| }, |
| { |
| "epoch": 2.348465635303069, |
| "grad_norm": 1.161684513092041, |
| "learning_rate": 1.186745914920262e-05, |
| "loss": 3.2738, |
| "step": 18520 |
| }, |
| { |
| "epoch": 2.3535379152929243, |
| "grad_norm": 1.2144806385040283, |
| "learning_rate": 1.1690934428034283e-05, |
| "loss": 3.2704, |
| "step": 18560 |
| }, |
| { |
| "epoch": 2.35861019528278, |
| "grad_norm": 1.2111247777938843, |
| "learning_rate": 1.1515558470077875e-05, |
| "loss": 3.2673, |
| "step": 18600 |
| }, |
| { |
| "epoch": 2.363682475272635, |
| "grad_norm": 1.0861356258392334, |
| "learning_rate": 1.1341336534283075e-05, |
| "loss": 3.2668, |
| "step": 18640 |
| }, |
| { |
| "epoch": 2.3687547552624904, |
| "grad_norm": 1.1214416027069092, |
| "learning_rate": 1.1168273844994248e-05, |
| "loss": 3.2679, |
| "step": 18680 |
| }, |
| { |
| "epoch": 2.373827035252346, |
| "grad_norm": 1.1087627410888672, |
| "learning_rate": 1.099637559179375e-05, |
| "loss": 3.2662, |
| "step": 18720 |
| }, |
| { |
| "epoch": 2.3788993152422013, |
| "grad_norm": 1.155854344367981, |
| "learning_rate": 1.082564692934634e-05, |
| "loss": 3.2599, |
| "step": 18760 |
| }, |
| { |
| "epoch": 2.383971595232057, |
| "grad_norm": 1.2042206525802612, |
| "learning_rate": 1.0656092977244536e-05, |
| "loss": 3.2585, |
| "step": 18800 |
| }, |
| { |
| "epoch": 2.383971595232057, |
| "eval_action_accuracy": 0.14996337890625, |
| "eval_loss": 3.2426531314849854, |
| "eval_runtime": 306.3779, |
| "eval_samples_per_second": 6.685, |
| "eval_steps_per_second": 0.418, |
| "step": 18800 |
| }, |
| { |
| "epoch": 2.3890438752219123, |
| "grad_norm": 1.1641093492507935, |
| "learning_rate": 1.0487718819855196e-05, |
| "loss": 3.2621, |
| "step": 18840 |
| }, |
| { |
| "epoch": 2.394116155211768, |
| "grad_norm": 1.1490825414657593, |
| "learning_rate": 1.032052950616697e-05, |
| "loss": 3.2599, |
| "step": 18880 |
| }, |
| { |
| "epoch": 2.3991884352016233, |
| "grad_norm": 1.1739047765731812, |
| "learning_rate": 1.0154530049638954e-05, |
| "loss": 3.255, |
| "step": 18920 |
| }, |
| { |
| "epoch": 2.4042607151914783, |
| "grad_norm": 1.123138189315796, |
| "learning_rate": 9.989725428050328e-06, |
| "loss": 3.2555, |
| "step": 18960 |
| }, |
| { |
| "epoch": 2.409332995181334, |
| "grad_norm": 1.2329597473144531, |
| "learning_rate": 9.8261205833511e-06, |
| "loss": 3.2542, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.4144052751711893, |
| "grad_norm": 1.2946821451187134, |
| "learning_rate": 9.66372042151386e-06, |
| "loss": 3.2542, |
| "step": 19040 |
| }, |
| { |
| "epoch": 2.419477555161045, |
| "grad_norm": 1.2074557542800903, |
| "learning_rate": 9.50252981238678e-06, |
| "loss": 3.2498, |
| "step": 19080 |
| }, |
| { |
| "epoch": 2.4245498351509003, |
| "grad_norm": 1.1963822841644287, |
| "learning_rate": 9.342553589547438e-06, |
| "loss": 3.2502, |
| "step": 19120 |
| }, |
| { |
| "epoch": 2.4296221151407558, |
| "grad_norm": 1.121868371963501, |
| "learning_rate": 9.183796550158014e-06, |
| "loss": 3.2495, |
| "step": 19160 |
| }, |
| { |
| "epoch": 2.4346943951306113, |
| "grad_norm": 1.1852304935455322, |
| "learning_rate": 9.026263454821343e-06, |
| "loss": 3.2449, |
| "step": 19200 |
| }, |
| { |
| "epoch": 2.4346943951306113, |
| "eval_action_accuracy": 0.14996337890625, |
| "eval_loss": 3.2290799617767334, |
| "eval_runtime": 225.6468, |
| "eval_samples_per_second": 9.076, |
| "eval_steps_per_second": 0.567, |
| "step": 19200 |
| }, |
| { |
| "epoch": 2.4397666751204667, |
| "grad_norm": 1.200818419456482, |
| "learning_rate": 8.869959027438219e-06, |
| "loss": 3.245, |
| "step": 19240 |
| }, |
| { |
| "epoch": 2.4448389551103222, |
| "grad_norm": 1.1980122327804565, |
| "learning_rate": 8.714887955065665e-06, |
| "loss": 3.2476, |
| "step": 19280 |
| }, |
| { |
| "epoch": 2.4499112351001777, |
| "grad_norm": 1.1194685697555542, |
| "learning_rate": 8.561054887776498e-06, |
| "loss": 3.2424, |
| "step": 19320 |
| }, |
| { |
| "epoch": 2.454983515090033, |
| "grad_norm": 1.2032148838043213, |
| "learning_rate": 8.408464438519759e-06, |
| "loss": 3.2398, |
| "step": 19360 |
| }, |
| { |
| "epoch": 2.4600557950798883, |
| "grad_norm": 1.1496555805206299, |
| "learning_rate": 8.257121182982508e-06, |
| "loss": 3.2414, |
| "step": 19400 |
| }, |
| { |
| "epoch": 2.4651280750697437, |
| "grad_norm": 1.1436223983764648, |
| "learning_rate": 8.107029659452498e-06, |
| "loss": 3.2359, |
| "step": 19440 |
| }, |
| { |
| "epoch": 2.4702003550595992, |
| "grad_norm": 1.1477023363113403, |
| "learning_rate": 7.958194368682215e-06, |
| "loss": 3.2399, |
| "step": 19480 |
| }, |
| { |
| "epoch": 2.4752726350494547, |
| "grad_norm": 1.2040438652038574, |
| "learning_rate": 7.810619773753775e-06, |
| "loss": 3.2335, |
| "step": 19520 |
| }, |
| { |
| "epoch": 2.48034491503931, |
| "grad_norm": 1.1406538486480713, |
| "learning_rate": 7.664310299945227e-06, |
| "loss": 3.2381, |
| "step": 19560 |
| }, |
| { |
| "epoch": 2.4854171950291657, |
| "grad_norm": 1.1902375221252441, |
| "learning_rate": 7.519270334597717e-06, |
| "loss": 3.2305, |
| "step": 19600 |
| }, |
| { |
| "epoch": 2.4854171950291657, |
| "eval_action_accuracy": 0.1500244140625, |
| "eval_loss": 3.2100226879119873, |
| "eval_runtime": 280.9496, |
| "eval_samples_per_second": 7.29, |
| "eval_steps_per_second": 0.456, |
| "step": 19600 |
| }, |
| { |
| "epoch": 2.490489475019021, |
| "grad_norm": 1.202954649925232, |
| "learning_rate": 7.375504226984059e-06, |
| "loss": 3.2296, |
| "step": 19640 |
| }, |
| { |
| "epoch": 2.4955617550088767, |
| "grad_norm": 1.4342540502548218, |
| "learning_rate": 7.233016288178213e-06, |
| "loss": 3.2288, |
| "step": 19680 |
| }, |
| { |
| "epoch": 2.5006340349987317, |
| "grad_norm": 1.2139471769332886, |
| "learning_rate": 7.091810790926068e-06, |
| "loss": 3.2284, |
| "step": 19720 |
| }, |
| { |
| "epoch": 2.505706314988587, |
| "grad_norm": 1.219605565071106, |
| "learning_rate": 6.9518919695172935e-06, |
| "loss": 3.2277, |
| "step": 19760 |
| }, |
| { |
| "epoch": 2.5107785949784427, |
| "grad_norm": 1.2192333936691284, |
| "learning_rate": 6.813264019658377e-06, |
| "loss": 3.2288, |
| "step": 19800 |
| }, |
| { |
| "epoch": 2.515850874968298, |
| "grad_norm": 1.1787699460983276, |
| "learning_rate": 6.675931098346783e-06, |
| "loss": 3.223, |
| "step": 19840 |
| }, |
| { |
| "epoch": 2.5209231549581537, |
| "grad_norm": 1.190075397491455, |
| "learning_rate": 6.5398973237463415e-06, |
| "loss": 3.2257, |
| "step": 19880 |
| }, |
| { |
| "epoch": 2.525995434948009, |
| "grad_norm": 1.1866856813430786, |
| "learning_rate": 6.405166775063709e-06, |
| "loss": 3.2212, |
| "step": 19920 |
| }, |
| { |
| "epoch": 2.5310677149378646, |
| "grad_norm": 1.2677091360092163, |
| "learning_rate": 6.271743492426097e-06, |
| "loss": 3.2219, |
| "step": 19960 |
| }, |
| { |
| "epoch": 2.53613999492772, |
| "grad_norm": 1.1606627702713013, |
| "learning_rate": 6.139631476760088e-06, |
| "loss": 3.2219, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.53613999492772, |
| "eval_action_accuracy": 0.154052734375, |
| "eval_loss": 3.199657678604126, |
| "eval_runtime": 148.1379, |
| "eval_samples_per_second": 13.825, |
| "eval_steps_per_second": 0.864, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.5412122749175756, |
| "grad_norm": 1.2358750104904175, |
| "learning_rate": 6.008834689671672e-06, |
| "loss": 3.219, |
| "step": 20040 |
| }, |
| { |
| "epoch": 2.546284554907431, |
| "grad_norm": 1.2563279867172241, |
| "learning_rate": 5.879357053327416e-06, |
| "loss": 3.2216, |
| "step": 20080 |
| }, |
| { |
| "epoch": 2.5513568348972866, |
| "grad_norm": 1.2597004175186157, |
| "learning_rate": 5.751202450336951e-06, |
| "loss": 3.2216, |
| "step": 20120 |
| }, |
| { |
| "epoch": 2.5564291148871416, |
| "grad_norm": 1.2089635133743286, |
| "learning_rate": 5.624374723636399e-06, |
| "loss": 3.2151, |
| "step": 20160 |
| }, |
| { |
| "epoch": 2.561501394876997, |
| "grad_norm": 1.2823585271835327, |
| "learning_rate": 5.49887767637327e-06, |
| "loss": 3.2148, |
| "step": 20200 |
| }, |
| { |
| "epoch": 2.5665736748668526, |
| "grad_norm": 1.1914616823196411, |
| "learning_rate": 5.3747150717923465e-06, |
| "loss": 3.214, |
| "step": 20240 |
| }, |
| { |
| "epoch": 2.571645954856708, |
| "grad_norm": 1.1906664371490479, |
| "learning_rate": 5.251890633122858e-06, |
| "loss": 3.2127, |
| "step": 20280 |
| }, |
| { |
| "epoch": 2.5767182348465636, |
| "grad_norm": 1.2275757789611816, |
| "learning_rate": 5.1304080434668055e-06, |
| "loss": 3.21, |
| "step": 20320 |
| }, |
| { |
| "epoch": 2.581790514836419, |
| "grad_norm": 1.2065469026565552, |
| "learning_rate": 5.010270945688572e-06, |
| "loss": 3.2124, |
| "step": 20360 |
| }, |
| { |
| "epoch": 2.5868627948262746, |
| "grad_norm": 1.2290291786193848, |
| "learning_rate": 4.891482942305614e-06, |
| "loss": 3.2085, |
| "step": 20400 |
| }, |
| { |
| "epoch": 2.5868627948262746, |
| "eval_action_accuracy": 0.15435791015625, |
| "eval_loss": 3.1864919662475586, |
| "eval_runtime": 282.6311, |
| "eval_samples_per_second": 7.246, |
| "eval_steps_per_second": 0.453, |
| "step": 20400 |
| }, |
| { |
| "epoch": 2.5919350748161296, |
| "grad_norm": 1.2060233354568481, |
| "learning_rate": 4.7740475953805096e-06, |
| "loss": 3.21, |
| "step": 20440 |
| }, |
| { |
| "epoch": 2.597007354805985, |
| "grad_norm": 1.2422938346862793, |
| "learning_rate": 4.657968426414095e-06, |
| "loss": 3.2093, |
| "step": 20480 |
| }, |
| { |
| "epoch": 2.6020796347958406, |
| "grad_norm": 1.2729711532592773, |
| "learning_rate": 4.543248916239878e-06, |
| "loss": 3.2056, |
| "step": 20520 |
| }, |
| { |
| "epoch": 2.607151914785696, |
| "grad_norm": 1.3254213333129883, |
| "learning_rate": 4.429892504919636e-06, |
| "loss": 3.2064, |
| "step": 20560 |
| }, |
| { |
| "epoch": 2.6122241947755516, |
| "grad_norm": 1.2693305015563965, |
| "learning_rate": 4.317902591640327e-06, |
| "loss": 3.204, |
| "step": 20600 |
| }, |
| { |
| "epoch": 2.617296474765407, |
| "grad_norm": 1.2927143573760986, |
| "learning_rate": 4.207282534612067e-06, |
| "loss": 3.2015, |
| "step": 20640 |
| }, |
| { |
| "epoch": 2.6223687547552625, |
| "grad_norm": 1.2953568696975708, |
| "learning_rate": 4.098035650967519e-06, |
| "loss": 3.2035, |
| "step": 20680 |
| }, |
| { |
| "epoch": 2.627441034745118, |
| "grad_norm": 1.2383949756622314, |
| "learning_rate": 3.9901652166623615e-06, |
| "loss": 3.2019, |
| "step": 20720 |
| }, |
| { |
| "epoch": 2.6325133147349735, |
| "grad_norm": 1.2561314105987549, |
| "learning_rate": 3.883674466377074e-06, |
| "loss": 3.2014, |
| "step": 20760 |
| }, |
| { |
| "epoch": 2.637585594724829, |
| "grad_norm": 1.3677061796188354, |
| "learning_rate": 3.778566593419924e-06, |
| "loss": 3.1998, |
| "step": 20800 |
| }, |
| { |
| "epoch": 2.637585594724829, |
| "eval_action_accuracy": 0.1546630859375, |
| "eval_loss": 3.1765692234039307, |
| "eval_runtime": 204.9483, |
| "eval_samples_per_second": 9.993, |
| "eval_steps_per_second": 0.625, |
| "step": 20800 |
| }, |
| { |
| "epoch": 2.6426578747146845, |
| "grad_norm": 1.288628101348877, |
| "learning_rate": 3.6748447496312623e-06, |
| "loss": 3.2009, |
| "step": 20840 |
| }, |
| { |
| "epoch": 2.6477301547045395, |
| "grad_norm": 1.3819936513900757, |
| "learning_rate": 3.5725120452889226e-06, |
| "loss": 3.1966, |
| "step": 20880 |
| }, |
| { |
| "epoch": 2.652802434694395, |
| "grad_norm": 1.3419549465179443, |
| "learning_rate": 3.4715715490150403e-06, |
| "loss": 3.1976, |
| "step": 20920 |
| }, |
| { |
| "epoch": 2.6578747146842505, |
| "grad_norm": 1.3197323083877563, |
| "learning_rate": 3.3720262876839827e-06, |
| "loss": 3.2006, |
| "step": 20960 |
| }, |
| { |
| "epoch": 2.662946994674106, |
| "grad_norm": 1.2988895177841187, |
| "learning_rate": 3.273879246331607e-06, |
| "loss": 3.1958, |
| "step": 21000 |
| }, |
| { |
| "epoch": 2.6680192746639615, |
| "grad_norm": 1.2606120109558105, |
| "learning_rate": 3.1771333680657157e-06, |
| "loss": 3.1944, |
| "step": 21040 |
| }, |
| { |
| "epoch": 2.673091554653817, |
| "grad_norm": 1.2874621152877808, |
| "learning_rate": 3.0817915539778695e-06, |
| "loss": 3.1949, |
| "step": 21080 |
| }, |
| { |
| "epoch": 2.6781638346436725, |
| "grad_norm": 1.3432596921920776, |
| "learning_rate": 2.9878566630563042e-06, |
| "loss": 3.1932, |
| "step": 21120 |
| }, |
| { |
| "epoch": 2.6832361146335275, |
| "grad_norm": 1.2724778652191162, |
| "learning_rate": 2.895331512100269e-06, |
| "loss": 3.1964, |
| "step": 21160 |
| }, |
| { |
| "epoch": 2.688308394623383, |
| "grad_norm": 1.2716572284698486, |
| "learning_rate": 2.804218875635539e-06, |
| "loss": 3.1928, |
| "step": 21200 |
| }, |
| { |
| "epoch": 2.688308394623383, |
| "eval_action_accuracy": 0.15606689453125, |
| "eval_loss": 3.165931224822998, |
| "eval_runtime": 297.7287, |
| "eval_samples_per_second": 6.879, |
| "eval_steps_per_second": 0.43, |
| "step": 21200 |
| }, |
| { |
| "epoch": 2.6933806746132385, |
| "grad_norm": 1.2943603992462158, |
| "learning_rate": 2.714521485831206e-06, |
| "loss": 3.1892, |
| "step": 21240 |
| }, |
| { |
| "epoch": 2.698452954603094, |
| "grad_norm": 1.3462802171707153, |
| "learning_rate": 2.6262420324177473e-06, |
| "loss": 3.1934, |
| "step": 21280 |
| }, |
| { |
| "epoch": 2.7035252345929495, |
| "grad_norm": 1.3464304208755493, |
| "learning_rate": 2.539383162606407e-06, |
| "loss": 3.1878, |
| "step": 21320 |
| }, |
| { |
| "epoch": 2.708597514582805, |
| "grad_norm": 1.286362886428833, |
| "learning_rate": 2.4539474810097495e-06, |
| "loss": 3.1885, |
| "step": 21360 |
| }, |
| { |
| "epoch": 2.7136697945726604, |
| "grad_norm": 1.3978246450424194, |
| "learning_rate": 2.3699375495636277e-06, |
| "loss": 3.1854, |
| "step": 21400 |
| }, |
| { |
| "epoch": 2.718742074562516, |
| "grad_norm": 1.3346283435821533, |
| "learning_rate": 2.287355887450299e-06, |
| "loss": 3.1846, |
| "step": 21440 |
| }, |
| { |
| "epoch": 2.7238143545523714, |
| "grad_norm": 1.3086987733840942, |
| "learning_rate": 2.2062049710229293e-06, |
| "loss": 3.1911, |
| "step": 21480 |
| }, |
| { |
| "epoch": 2.728886634542227, |
| "grad_norm": 1.2781283855438232, |
| "learning_rate": 2.126487233731289e-06, |
| "loss": 3.1883, |
| "step": 21520 |
| }, |
| { |
| "epoch": 2.7339589145320824, |
| "grad_norm": 1.2688366174697876, |
| "learning_rate": 2.0482050660488417e-06, |
| "loss": 3.1866, |
| "step": 21560 |
| }, |
| { |
| "epoch": 2.739031194521938, |
| "grad_norm": 1.3789799213409424, |
| "learning_rate": 1.971360815400991e-06, |
| "loss": 3.1887, |
| "step": 21600 |
| }, |
| { |
| "epoch": 2.739031194521938, |
| "eval_action_accuracy": 0.15606689453125, |
| "eval_loss": 3.1581926345825195, |
| "eval_runtime": 335.7569, |
| "eval_samples_per_second": 6.1, |
| "eval_steps_per_second": 0.381, |
| "step": 21600 |
| }, |
| { |
| "epoch": 2.744103474511793, |
| "grad_norm": 1.2935245037078857, |
| "learning_rate": 1.8959567860947602e-06, |
| "loss": 3.1853, |
| "step": 21640 |
| }, |
| { |
| "epoch": 2.7491757545016484, |
| "grad_norm": 1.3740415573120117, |
| "learning_rate": 1.821995239249613e-06, |
| "loss": 3.1829, |
| "step": 21680 |
| }, |
| { |
| "epoch": 2.754248034491504, |
| "grad_norm": 1.3145182132720947, |
| "learning_rate": 1.7494783927297643e-06, |
| "loss": 3.1853, |
| "step": 21720 |
| }, |
| { |
| "epoch": 2.7593203144813594, |
| "grad_norm": 1.4077484607696533, |
| "learning_rate": 1.678408421077532e-06, |
| "loss": 3.1842, |
| "step": 21760 |
| }, |
| { |
| "epoch": 2.764392594471215, |
| "grad_norm": 1.2955596446990967, |
| "learning_rate": 1.6087874554482573e-06, |
| "loss": 3.1853, |
| "step": 21800 |
| }, |
| { |
| "epoch": 2.7694648744610704, |
| "grad_norm": 1.329830288887024, |
| "learning_rate": 1.5406175835463111e-06, |
| "loss": 3.1843, |
| "step": 21840 |
| }, |
| { |
| "epoch": 2.7745371544509254, |
| "grad_norm": 1.343755841255188, |
| "learning_rate": 1.4739008495625427e-06, |
| "loss": 3.1841, |
| "step": 21880 |
| }, |
| { |
| "epoch": 2.779609434440781, |
| "grad_norm": 1.320789098739624, |
| "learning_rate": 1.4086392541129366e-06, |
| "loss": 3.1815, |
| "step": 21920 |
| }, |
| { |
| "epoch": 2.7846817144306364, |
| "grad_norm": 1.338318109512329, |
| "learning_rate": 1.344834754178692e-06, |
| "loss": 3.1815, |
| "step": 21960 |
| }, |
| { |
| "epoch": 2.789753994420492, |
| "grad_norm": 1.3006017208099365, |
| "learning_rate": 1.2824892630474484e-06, |
| "loss": 3.1824, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.789753994420492, |
| "eval_action_accuracy": 0.15509033203125, |
| "eval_loss": 3.1539883613586426, |
| "eval_runtime": 99.5814, |
| "eval_samples_per_second": 20.566, |
| "eval_steps_per_second": 1.285, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.7948262744103474, |
| "grad_norm": 1.327972650527954, |
| "learning_rate": 1.2216046502559753e-06, |
| "loss": 3.1804, |
| "step": 22040 |
| }, |
| { |
| "epoch": 2.799898554400203, |
| "grad_norm": 1.422127604484558, |
| "learning_rate": 1.1621827415340958e-06, |
| "loss": 3.1834, |
| "step": 22080 |
| }, |
| { |
| "epoch": 2.8049708343900583, |
| "grad_norm": 1.318961501121521, |
| "learning_rate": 1.104225318749924e-06, |
| "loss": 3.1799, |
| "step": 22120 |
| }, |
| { |
| "epoch": 2.810043114379914, |
| "grad_norm": 1.4058647155761719, |
| "learning_rate": 1.0477341198564582e-06, |
| "loss": 3.1802, |
| "step": 22160 |
| }, |
| { |
| "epoch": 2.8151153943697693, |
| "grad_norm": 1.3461483716964722, |
| "learning_rate": 9.927108388394446e-07, |
| "loss": 3.178, |
| "step": 22200 |
| }, |
| { |
| "epoch": 2.820187674359625, |
| "grad_norm": 1.2846914529800415, |
| "learning_rate": 9.391571256665899e-07, |
| "loss": 3.1817, |
| "step": 22240 |
| }, |
| { |
| "epoch": 2.8252599543494803, |
| "grad_norm": 1.3674983978271484, |
| "learning_rate": 8.870745862380847e-07, |
| "loss": 3.1779, |
| "step": 22280 |
| }, |
| { |
| "epoch": 2.830332234339336, |
| "grad_norm": 1.3590912818908691, |
| "learning_rate": 8.364647823384308e-07, |
| "loss": 3.1788, |
| "step": 22320 |
| }, |
| { |
| "epoch": 2.835404514329191, |
| "grad_norm": 1.2989791631698608, |
| "learning_rate": 7.873292315896453e-07, |
| "loss": 3.1788, |
| "step": 22360 |
| }, |
| { |
| "epoch": 2.8404767943190463, |
| "grad_norm": 1.311061978340149, |
| "learning_rate": 7.39669407405702e-07, |
| "loss": 3.1778, |
| "step": 22400 |
| }, |
| { |
| "epoch": 2.8404767943190463, |
| "eval_action_accuracy": 0.1556396484375, |
| "eval_loss": 3.1505887508392334, |
| "eval_runtime": 336.5852, |
| "eval_samples_per_second": 6.085, |
| "eval_steps_per_second": 0.38, |
| "step": 22400 |
| }, |
| { |
| "epoch": 2.845549074308902, |
| "grad_norm": 1.3574533462524414, |
| "learning_rate": 6.934867389484013e-07, |
| "loss": 3.1798, |
| "step": 22440 |
| }, |
| { |
| "epoch": 2.8506213542987573, |
| "grad_norm": 1.4348208904266357, |
| "learning_rate": 6.487826110844808e-07, |
| "loss": 3.1808, |
| "step": 22480 |
| }, |
| { |
| "epoch": 2.855693634288613, |
| "grad_norm": 1.377144455909729, |
| "learning_rate": 6.055583643440776e-07, |
| "loss": 3.1728, |
| "step": 22520 |
| }, |
| { |
| "epoch": 2.8607659142784683, |
| "grad_norm": 1.2860257625579834, |
| "learning_rate": 5.638152948805819e-07, |
| "loss": 3.1738, |
| "step": 22560 |
| }, |
| { |
| "epoch": 2.8658381942683238, |
| "grad_norm": 1.3123292922973633, |
| "learning_rate": 5.235546544317016e-07, |
| "loss": 3.1754, |
| "step": 22600 |
| }, |
| { |
| "epoch": 2.870910474258179, |
| "grad_norm": 1.3371970653533936, |
| "learning_rate": 4.847776502819867e-07, |
| "loss": 3.174, |
| "step": 22640 |
| }, |
| { |
| "epoch": 2.8759827542480343, |
| "grad_norm": 1.3241571187973022, |
| "learning_rate": 4.47485445226592e-07, |
| "loss": 3.1782, |
| "step": 22680 |
| }, |
| { |
| "epoch": 2.88105503423789, |
| "grad_norm": 1.4008209705352783, |
| "learning_rate": 4.116791575364154e-07, |
| "loss": 3.1776, |
| "step": 22720 |
| }, |
| { |
| "epoch": 2.8861273142277453, |
| "grad_norm": 1.3333276510238647, |
| "learning_rate": 3.7735986092457543e-07, |
| "loss": 3.1787, |
| "step": 22760 |
| }, |
| { |
| "epoch": 2.8911995942176008, |
| "grad_norm": 1.3414257764816284, |
| "learning_rate": 3.445285845141921e-07, |
| "loss": 3.1776, |
| "step": 22800 |
| }, |
| { |
| "epoch": 2.8911995942176008, |
| "eval_action_accuracy": 0.15618896484375, |
| "eval_loss": 3.1486361026763916, |
| "eval_runtime": 309.6189, |
| "eval_samples_per_second": 6.615, |
| "eval_steps_per_second": 0.413, |
| "step": 22800 |
| }, |
| { |
| "epoch": 2.8962718742074562, |
| "grad_norm": 1.3558026552200317, |
| "learning_rate": 3.1318631280755626e-07, |
| "loss": 3.1795, |
| "step": 22840 |
| }, |
| { |
| "epoch": 2.9013441541973117, |
| "grad_norm": 1.3386595249176025, |
| "learning_rate": 2.833339856565753e-07, |
| "loss": 3.172, |
| "step": 22880 |
| }, |
| { |
| "epoch": 2.906416434187167, |
| "grad_norm": 1.3235087394714355, |
| "learning_rate": 2.549724982346291e-07, |
| "loss": 3.177, |
| "step": 22920 |
| }, |
| { |
| "epoch": 2.9114887141770227, |
| "grad_norm": 1.504699945449829, |
| "learning_rate": 2.2810270100968033e-07, |
| "loss": 3.1757, |
| "step": 22960 |
| }, |
| { |
| "epoch": 2.916560994166878, |
| "grad_norm": 1.3332788944244385, |
| "learning_rate": 2.027253997188172e-07, |
| "loss": 3.1752, |
| "step": 23000 |
| }, |
| { |
| "epoch": 2.9216332741567337, |
| "grad_norm": 1.4046924114227295, |
| "learning_rate": 1.78841355344056e-07, |
| "loss": 3.1748, |
| "step": 23040 |
| }, |
| { |
| "epoch": 2.9267055541465887, |
| "grad_norm": 1.3087977170944214, |
| "learning_rate": 1.5645128408953713e-07, |
| "loss": 3.1736, |
| "step": 23080 |
| }, |
| { |
| "epoch": 2.931777834136444, |
| "grad_norm": 1.274903416633606, |
| "learning_rate": 1.355558573600535e-07, |
| "loss": 3.1749, |
| "step": 23120 |
| }, |
| { |
| "epoch": 2.9368501141262997, |
| "grad_norm": 1.5779058933258057, |
| "learning_rate": 1.161557017409165e-07, |
| "loss": 3.1741, |
| "step": 23160 |
| }, |
| { |
| "epoch": 2.941922394116155, |
| "grad_norm": 1.2767544984817505, |
| "learning_rate": 9.825139897915447e-08, |
| "loss": 3.1755, |
| "step": 23200 |
| }, |
| { |
| "epoch": 2.941922394116155, |
| "eval_action_accuracy": 0.1553955078125, |
| "eval_loss": 3.1481196880340576, |
| "eval_runtime": 226.2846, |
| "eval_samples_per_second": 9.051, |
| "eval_steps_per_second": 0.566, |
| "step": 23200 |
| }, |
| { |
| "epoch": 2.9469946741060107, |
| "grad_norm": 1.3390287160873413, |
| "learning_rate": 8.184348596606551e-08, |
| "loss": 3.174, |
| "step": 23240 |
| }, |
| { |
| "epoch": 2.952066954095866, |
| "grad_norm": 1.3068482875823975, |
| "learning_rate": 6.693245472114695e-08, |
| "loss": 3.1747, |
| "step": 23280 |
| }, |
| { |
| "epoch": 2.9571392340857217, |
| "grad_norm": 1.301686406135559, |
| "learning_rate": 5.3518752377307304e-08, |
| "loss": 3.1731, |
| "step": 23320 |
| }, |
| { |
| "epoch": 2.9622115140755767, |
| "grad_norm": 1.3553813695907593, |
| "learning_rate": 4.1602781167487946e-08, |
| "loss": 3.175, |
| "step": 23360 |
| }, |
| { |
| "epoch": 2.967283794065432, |
| "grad_norm": 1.3697738647460938, |
| "learning_rate": 3.1184898412572886e-08, |
| "loss": 3.1749, |
| "step": 23400 |
| }, |
| { |
| "epoch": 2.9723560740552877, |
| "grad_norm": 1.3143235445022583, |
| "learning_rate": 2.2265416510691693e-08, |
| "loss": 3.1737, |
| "step": 23440 |
| }, |
| { |
| "epoch": 2.977428354045143, |
| "grad_norm": 1.4149271249771118, |
| "learning_rate": 1.4844602927849283e-08, |
| "loss": 3.1723, |
| "step": 23480 |
| }, |
| { |
| "epoch": 2.9825006340349987, |
| "grad_norm": 1.37666916847229, |
| "learning_rate": 8.922680189898946e-09, |
| "loss": 3.1717, |
| "step": 23520 |
| }, |
| { |
| "epoch": 2.987572914024854, |
| "grad_norm": 1.3097264766693115, |
| "learning_rate": 4.499825875886599e-09, |
| "loss": 3.1736, |
| "step": 23560 |
| }, |
| { |
| "epoch": 2.9926451940147096, |
| "grad_norm": 1.2627308368682861, |
| "learning_rate": 1.576172612693938e-09, |
| "loss": 3.175, |
| "step": 23600 |
| }, |
| { |
| "epoch": 2.9926451940147096, |
| "eval_action_accuracy": 0.15625, |
| "eval_loss": 3.147233486175537, |
| "eval_runtime": 261.558, |
| "eval_samples_per_second": 7.83, |
| "eval_steps_per_second": 0.489, |
| "step": 23600 |
| }, |
| { |
| "epoch": 2.997717474004565, |
| "grad_norm": 1.4098745584487915, |
| "learning_rate": 1.5180807110271034e-10, |
| "loss": 3.1737, |
| "step": 23640 |
| } |
| ], |
| "logging_steps": 40, |
| "max_steps": 23658, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 4000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.3886060071684145e+19, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|