| { |
| "best_global_step": 57250, |
| "best_metric": 0.3188753128051758, |
| "best_model_checkpoint": "/shared/nalampara/mattext_ckpt_pretrain/300k/2026-02-13/17-18-37/pretrain/checkpoints/robocrys_rep_test-pretrain/checkpoint-45000", |
| "epoch": 50.0, |
| "eval_steps": 50, |
| "global_step": 58050, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04306632213608958, |
| "grad_norm": 3.4339823722839355, |
| "learning_rate": 0.00019983118001722653, |
| "loss": 23.69191650390625, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04306632213608958, |
| "eval_loss": 16.00394630432129, |
| "eval_runtime": 16.8675, |
| "eval_samples_per_second": 1126.664, |
| "eval_steps_per_second": 35.216, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.08613264427217916, |
| "grad_norm": 4.098213195800781, |
| "learning_rate": 0.0001996589147286822, |
| "loss": 15.080333251953125, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08613264427217916, |
| "eval_loss": 14.168619155883789, |
| "eval_runtime": 17.0695, |
| "eval_samples_per_second": 1113.33, |
| "eval_steps_per_second": 34.799, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.12919896640826872, |
| "grad_norm": 2.9972188472747803, |
| "learning_rate": 0.0001994866494401378, |
| "loss": 13.9048046875, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.12919896640826872, |
| "eval_loss": 13.282241821289062, |
| "eval_runtime": 17.0503, |
| "eval_samples_per_second": 1114.583, |
| "eval_steps_per_second": 34.838, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.17226528854435832, |
| "grad_norm": 3.62282657623291, |
| "learning_rate": 0.00019931438415159348, |
| "loss": 13.15665283203125, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.17226528854435832, |
| "eval_loss": 12.615082740783691, |
| "eval_runtime": 17.091, |
| "eval_samples_per_second": 1111.932, |
| "eval_steps_per_second": 34.755, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2153316106804479, |
| "grad_norm": 4.212780952453613, |
| "learning_rate": 0.0001991421188630491, |
| "loss": 12.563475341796876, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2153316106804479, |
| "eval_loss": 12.107625007629395, |
| "eval_runtime": 17.3804, |
| "eval_samples_per_second": 1093.413, |
| "eval_steps_per_second": 34.176, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.25839793281653745, |
| "grad_norm": 5.472993850708008, |
| "learning_rate": 0.00019896985357450474, |
| "loss": 12.01086181640625, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.25839793281653745, |
| "eval_loss": 11.567380905151367, |
| "eval_runtime": 17.2108, |
| "eval_samples_per_second": 1104.188, |
| "eval_steps_per_second": 34.513, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.301464254952627, |
| "grad_norm": 6.175601959228516, |
| "learning_rate": 0.00019879758828596038, |
| "loss": 11.541044921875, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.301464254952627, |
| "eval_loss": 10.8231840133667, |
| "eval_runtime": 16.0675, |
| "eval_samples_per_second": 1182.757, |
| "eval_steps_per_second": 36.969, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.34453057708871665, |
| "grad_norm": 7.172627925872803, |
| "learning_rate": 0.00019862532299741602, |
| "loss": 10.729189453125, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.34453057708871665, |
| "eval_loss": 9.469819068908691, |
| "eval_runtime": 17.0542, |
| "eval_samples_per_second": 1114.331, |
| "eval_steps_per_second": 34.83, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.3875968992248062, |
| "grad_norm": 6.581453800201416, |
| "learning_rate": 0.0001984530577088717, |
| "loss": 8.910851440429688, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.3875968992248062, |
| "eval_loss": 6.483331680297852, |
| "eval_runtime": 17.0895, |
| "eval_samples_per_second": 1112.028, |
| "eval_steps_per_second": 34.758, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.4306632213608958, |
| "grad_norm": 4.437243938446045, |
| "learning_rate": 0.0001982807924203273, |
| "loss": 6.259661254882812, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4306632213608958, |
| "eval_loss": 4.317221641540527, |
| "eval_runtime": 17.0015, |
| "eval_samples_per_second": 1117.783, |
| "eval_steps_per_second": 34.938, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4737295434969854, |
| "grad_norm": 3.040710687637329, |
| "learning_rate": 0.00019810852713178297, |
| "loss": 4.48141845703125, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.4737295434969854, |
| "eval_loss": 3.2465999126434326, |
| "eval_runtime": 17.1896, |
| "eval_samples_per_second": 1105.55, |
| "eval_steps_per_second": 34.556, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5167958656330749, |
| "grad_norm": 2.620871067047119, |
| "learning_rate": 0.0001979362618432386, |
| "loss": 3.5127716064453125, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5167958656330749, |
| "eval_loss": 2.8293402194976807, |
| "eval_runtime": 17.5816, |
| "eval_samples_per_second": 1080.901, |
| "eval_steps_per_second": 33.785, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5598621877691645, |
| "grad_norm": 2.405362606048584, |
| "learning_rate": 0.00019776399655469423, |
| "loss": 3.18022216796875, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.5598621877691645, |
| "eval_loss": 2.5354790687561035, |
| "eval_runtime": 16.9547, |
| "eval_samples_per_second": 1120.867, |
| "eval_steps_per_second": 35.034, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.602928509905254, |
| "grad_norm": 2.6026864051818848, |
| "learning_rate": 0.00019759173126614987, |
| "loss": 2.8754867553710937, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.602928509905254, |
| "eval_loss": 2.357602119445801, |
| "eval_runtime": 16.2184, |
| "eval_samples_per_second": 1171.756, |
| "eval_steps_per_second": 36.625, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6459948320413437, |
| "grad_norm": 2.574624538421631, |
| "learning_rate": 0.00019741946597760552, |
| "loss": 2.6319024658203123, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.6459948320413437, |
| "eval_loss": 2.1693274974823, |
| "eval_runtime": 17.0307, |
| "eval_samples_per_second": 1115.867, |
| "eval_steps_per_second": 34.878, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.6890611541774333, |
| "grad_norm": 1.7991482019424438, |
| "learning_rate": 0.00019724720068906118, |
| "loss": 2.439267578125, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.6890611541774333, |
| "eval_loss": 2.030348300933838, |
| "eval_runtime": 16.7562, |
| "eval_samples_per_second": 1134.147, |
| "eval_steps_per_second": 35.45, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.7321274763135228, |
| "grad_norm": 1.7477670907974243, |
| "learning_rate": 0.0001970749354005168, |
| "loss": 2.2564727783203127, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.7321274763135228, |
| "eval_loss": 1.9149311780929565, |
| "eval_runtime": 16.688, |
| "eval_samples_per_second": 1138.78, |
| "eval_steps_per_second": 35.594, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.7751937984496124, |
| "grad_norm": 1.7949120998382568, |
| "learning_rate": 0.00019690267011197244, |
| "loss": 2.1781280517578123, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.7751937984496124, |
| "eval_loss": 1.8148161172866821, |
| "eval_runtime": 17.0557, |
| "eval_samples_per_second": 1114.231, |
| "eval_steps_per_second": 34.827, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.818260120585702, |
| "grad_norm": 2.2657155990600586, |
| "learning_rate": 0.00019673040482342808, |
| "loss": 1.9842291259765625, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.818260120585702, |
| "eval_loss": 1.6966851949691772, |
| "eval_runtime": 17.1068, |
| "eval_samples_per_second": 1110.902, |
| "eval_steps_per_second": 34.723, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.8613264427217916, |
| "grad_norm": 2.0038652420043945, |
| "learning_rate": 0.00019655813953488373, |
| "loss": 1.9545573425292968, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8613264427217916, |
| "eval_loss": 1.6101728677749634, |
| "eval_runtime": 17.0809, |
| "eval_samples_per_second": 1112.589, |
| "eval_steps_per_second": 34.776, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9043927648578811, |
| "grad_norm": 1.9319428205490112, |
| "learning_rate": 0.00019638587424633937, |
| "loss": 1.7500227355957032, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.9043927648578811, |
| "eval_loss": 1.504974603652954, |
| "eval_runtime": 17.1529, |
| "eval_samples_per_second": 1107.92, |
| "eval_steps_per_second": 34.63, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.9474590869939707, |
| "grad_norm": 1.970742106437683, |
| "learning_rate": 0.000196213608957795, |
| "loss": 1.6700083923339843, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.9474590869939707, |
| "eval_loss": 1.3932300806045532, |
| "eval_runtime": 17.4272, |
| "eval_samples_per_second": 1090.48, |
| "eval_steps_per_second": 34.085, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.9905254091300603, |
| "grad_norm": 1.5904282331466675, |
| "learning_rate": 0.00019604134366925065, |
| "loss": 1.5361054992675782, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.9905254091300603, |
| "eval_loss": 1.304945945739746, |
| "eval_runtime": 16.8173, |
| "eval_samples_per_second": 1130.028, |
| "eval_steps_per_second": 35.321, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.0335917312661498, |
| "grad_norm": 1.6059073209762573, |
| "learning_rate": 0.0001958690783807063, |
| "loss": 1.4490776062011719, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.0335917312661498, |
| "eval_loss": 1.2493284940719604, |
| "eval_runtime": 16.9616, |
| "eval_samples_per_second": 1120.415, |
| "eval_steps_per_second": 35.02, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.0766580534022394, |
| "grad_norm": 1.632856011390686, |
| "learning_rate": 0.00019569681309216194, |
| "loss": 1.3666287231445313, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.0766580534022394, |
| "eval_loss": 1.1869384050369263, |
| "eval_runtime": 17.0434, |
| "eval_samples_per_second": 1115.035, |
| "eval_steps_per_second": 34.852, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.119724375538329, |
| "grad_norm": 1.5437358617782593, |
| "learning_rate": 0.00019552454780361758, |
| "loss": 1.3059585571289063, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.119724375538329, |
| "eval_loss": 1.0582355260849, |
| "eval_runtime": 16.5221, |
| "eval_samples_per_second": 1150.216, |
| "eval_steps_per_second": 35.952, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.1627906976744187, |
| "grad_norm": 1.649256944656372, |
| "learning_rate": 0.00019535228251507322, |
| "loss": 1.0846002197265625, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.1627906976744187, |
| "eval_loss": 0.9497116804122925, |
| "eval_runtime": 17.1019, |
| "eval_samples_per_second": 1111.221, |
| "eval_steps_per_second": 34.733, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.2058570198105083, |
| "grad_norm": 1.5769813060760498, |
| "learning_rate": 0.00019518001722652886, |
| "loss": 0.9779405212402343, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.2058570198105083, |
| "eval_loss": 0.9034150838851929, |
| "eval_runtime": 17.0701, |
| "eval_samples_per_second": 1113.289, |
| "eval_steps_per_second": 34.798, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.2489233419465977, |
| "grad_norm": 1.503847599029541, |
| "learning_rate": 0.0001950077519379845, |
| "loss": 0.9594493103027344, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.2489233419465977, |
| "eval_loss": 0.8880420923233032, |
| "eval_runtime": 17.1136, |
| "eval_samples_per_second": 1110.461, |
| "eval_steps_per_second": 34.709, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.2919896640826873, |
| "grad_norm": 1.324310302734375, |
| "learning_rate": 0.00019483548664944015, |
| "loss": 0.9324958801269532, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.2919896640826873, |
| "eval_loss": 0.8554781079292297, |
| "eval_runtime": 17.1878, |
| "eval_samples_per_second": 1105.667, |
| "eval_steps_per_second": 34.559, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.335055986218777, |
| "grad_norm": 1.2940914630889893, |
| "learning_rate": 0.0001946632213608958, |
| "loss": 0.9000779724121094, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.335055986218777, |
| "eval_loss": 0.8320924639701843, |
| "eval_runtime": 16.9956, |
| "eval_samples_per_second": 1118.173, |
| "eval_steps_per_second": 34.95, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.3781223083548664, |
| "grad_norm": 1.3323431015014648, |
| "learning_rate": 0.00019449095607235143, |
| "loss": 0.8746331787109375, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.3781223083548664, |
| "eval_loss": 0.8127567172050476, |
| "eval_runtime": 16.9374, |
| "eval_samples_per_second": 1122.017, |
| "eval_steps_per_second": 35.07, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.421188630490956, |
| "grad_norm": 1.3652969598770142, |
| "learning_rate": 0.00019431869078380707, |
| "loss": 0.8706568145751953, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.421188630490956, |
| "eval_loss": 0.8145562410354614, |
| "eval_runtime": 16.5749, |
| "eval_samples_per_second": 1146.55, |
| "eval_steps_per_second": 35.837, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.4642549526270456, |
| "grad_norm": 1.3932275772094727, |
| "learning_rate": 0.00019414642549526272, |
| "loss": 0.8283468627929688, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.4642549526270456, |
| "eval_loss": 0.7912552356719971, |
| "eval_runtime": 17.1373, |
| "eval_samples_per_second": 1108.929, |
| "eval_steps_per_second": 34.661, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.5073212747631353, |
| "grad_norm": 1.182525873184204, |
| "learning_rate": 0.00019397416020671836, |
| "loss": 0.8269770050048828, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.5073212747631353, |
| "eval_loss": 0.7707593441009521, |
| "eval_runtime": 16.8899, |
| "eval_samples_per_second": 1125.167, |
| "eval_steps_per_second": 35.169, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.550387596899225, |
| "grad_norm": 1.1756706237792969, |
| "learning_rate": 0.000193801894918174, |
| "loss": 0.7841999816894532, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.550387596899225, |
| "eval_loss": 0.7642788887023926, |
| "eval_runtime": 17.2573, |
| "eval_samples_per_second": 1101.218, |
| "eval_steps_per_second": 34.42, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.5934539190353143, |
| "grad_norm": 1.1239345073699951, |
| "learning_rate": 0.00019362962962962964, |
| "loss": 0.778316650390625, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.5934539190353143, |
| "eval_loss": 0.7528692483901978, |
| "eval_runtime": 16.8188, |
| "eval_samples_per_second": 1129.926, |
| "eval_steps_per_second": 35.318, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.636520241171404, |
| "grad_norm": 1.2261638641357422, |
| "learning_rate": 0.00019345736434108528, |
| "loss": 0.7577320098876953, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.636520241171404, |
| "eval_loss": 0.7464138269424438, |
| "eval_runtime": 16.7312, |
| "eval_samples_per_second": 1135.845, |
| "eval_steps_per_second": 35.503, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.6795865633074936, |
| "grad_norm": 1.094499111175537, |
| "learning_rate": 0.00019328509905254093, |
| "loss": 0.7711544799804687, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.6795865633074936, |
| "eval_loss": 0.7377104163169861, |
| "eval_runtime": 16.7677, |
| "eval_samples_per_second": 1133.373, |
| "eval_steps_per_second": 35.425, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.722652885443583, |
| "grad_norm": 1.166304349899292, |
| "learning_rate": 0.00019311283376399657, |
| "loss": 0.7610466003417968, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.722652885443583, |
| "eval_loss": 0.732494056224823, |
| "eval_runtime": 16.6682, |
| "eval_samples_per_second": 1140.132, |
| "eval_steps_per_second": 35.637, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.7657192075796728, |
| "grad_norm": 1.171218991279602, |
| "learning_rate": 0.0001929405684754522, |
| "loss": 0.7330171203613282, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.7657192075796728, |
| "eval_loss": 0.7134495377540588, |
| "eval_runtime": 16.8525, |
| "eval_samples_per_second": 1127.667, |
| "eval_steps_per_second": 35.247, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.8087855297157622, |
| "grad_norm": 1.0657861232757568, |
| "learning_rate": 0.00019276830318690785, |
| "loss": 0.7187896728515625, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.8087855297157622, |
| "eval_loss": 0.6970002055168152, |
| "eval_runtime": 16.3557, |
| "eval_samples_per_second": 1161.916, |
| "eval_steps_per_second": 36.318, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.8518518518518519, |
| "grad_norm": 0.9776778817176819, |
| "learning_rate": 0.0001925960378983635, |
| "loss": 0.7035723114013672, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.8518518518518519, |
| "eval_loss": 0.7010369300842285, |
| "eval_runtime": 16.6178, |
| "eval_samples_per_second": 1143.593, |
| "eval_steps_per_second": 35.745, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.8949181739879415, |
| "grad_norm": 1.1066110134124756, |
| "learning_rate": 0.0001924237726098191, |
| "loss": 0.7039942169189453, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.8949181739879415, |
| "eval_loss": 0.7116236090660095, |
| "eval_runtime": 16.6683, |
| "eval_samples_per_second": 1140.126, |
| "eval_steps_per_second": 35.636, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.937984496124031, |
| "grad_norm": 1.0685100555419922, |
| "learning_rate": 0.00019225150732127478, |
| "loss": 0.70912109375, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.937984496124031, |
| "eval_loss": 0.6799043416976929, |
| "eval_runtime": 16.1106, |
| "eval_samples_per_second": 1179.597, |
| "eval_steps_per_second": 36.87, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.9810508182601207, |
| "grad_norm": 1.0897897481918335, |
| "learning_rate": 0.00019207924203273042, |
| "loss": 0.691868896484375, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.9810508182601207, |
| "eval_loss": 0.6700846552848816, |
| "eval_runtime": 16.1839, |
| "eval_samples_per_second": 1174.254, |
| "eval_steps_per_second": 36.703, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.02411714039621, |
| "grad_norm": 0.9901228547096252, |
| "learning_rate": 0.00019190697674418606, |
| "loss": 0.6789529418945313, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.02411714039621, |
| "eval_loss": 0.6777063012123108, |
| "eval_runtime": 16.6687, |
| "eval_samples_per_second": 1140.099, |
| "eval_steps_per_second": 35.636, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.0671834625322996, |
| "grad_norm": 1.12906014919281, |
| "learning_rate": 0.0001917347114556417, |
| "loss": 0.664688720703125, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.0671834625322996, |
| "eval_loss": 0.6748037338256836, |
| "eval_runtime": 16.8591, |
| "eval_samples_per_second": 1127.228, |
| "eval_steps_per_second": 35.233, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.1102497846683894, |
| "grad_norm": 1.0208185911178589, |
| "learning_rate": 0.00019156244616709732, |
| "loss": 0.6541690063476563, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.1102497846683894, |
| "eval_loss": 0.6536487936973572, |
| "eval_runtime": 16.5497, |
| "eval_samples_per_second": 1148.299, |
| "eval_steps_per_second": 35.892, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.153316106804479, |
| "grad_norm": 1.067574381828308, |
| "learning_rate": 0.000191390180878553, |
| "loss": 0.6565885162353515, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.153316106804479, |
| "eval_loss": 0.6487522125244141, |
| "eval_runtime": 16.7171, |
| "eval_samples_per_second": 1136.798, |
| "eval_steps_per_second": 35.532, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.1963824289405687, |
| "grad_norm": 1.065882682800293, |
| "learning_rate": 0.0001912179155900086, |
| "loss": 0.651094970703125, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.1963824289405687, |
| "eval_loss": 0.6502730250358582, |
| "eval_runtime": 17.1893, |
| "eval_samples_per_second": 1105.573, |
| "eval_steps_per_second": 34.556, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.239448751076658, |
| "grad_norm": 0.9784948825836182, |
| "learning_rate": 0.00019104565030146427, |
| "loss": 0.6197249603271484, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.239448751076658, |
| "eval_loss": 0.6479949355125427, |
| "eval_runtime": 16.9796, |
| "eval_samples_per_second": 1119.223, |
| "eval_steps_per_second": 34.983, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.2825150732127475, |
| "grad_norm": 0.925772488117218, |
| "learning_rate": 0.00019087338501291992, |
| "loss": 0.6295633697509766, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.2825150732127475, |
| "eval_loss": 0.6387772560119629, |
| "eval_runtime": 16.5664, |
| "eval_samples_per_second": 1147.14, |
| "eval_steps_per_second": 35.856, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.3255813953488373, |
| "grad_norm": 1.0654851198196411, |
| "learning_rate": 0.00019070111972437553, |
| "loss": 0.631210823059082, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.3255813953488373, |
| "eval_loss": 0.636771559715271, |
| "eval_runtime": 17.4711, |
| "eval_samples_per_second": 1087.738, |
| "eval_steps_per_second": 33.999, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.3686477174849268, |
| "grad_norm": 1.0032150745391846, |
| "learning_rate": 0.0001905288544358312, |
| "loss": 0.6157208633422852, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.3686477174849268, |
| "eval_loss": 0.6328519582748413, |
| "eval_runtime": 17.1571, |
| "eval_samples_per_second": 1107.646, |
| "eval_steps_per_second": 34.621, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.4117140396210166, |
| "grad_norm": 0.869613528251648, |
| "learning_rate": 0.00019035658914728681, |
| "loss": 0.6231210327148438, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.4117140396210166, |
| "eval_loss": 0.6197053790092468, |
| "eval_runtime": 17.5434, |
| "eval_samples_per_second": 1083.258, |
| "eval_steps_per_second": 33.859, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.454780361757106, |
| "grad_norm": 1.018100619316101, |
| "learning_rate": 0.00019018432385874248, |
| "loss": 0.6205888748168945, |
| "step": 2850 |
| }, |
| { |
| "epoch": 2.454780361757106, |
| "eval_loss": 0.6245771050453186, |
| "eval_runtime": 17.4136, |
| "eval_samples_per_second": 1091.331, |
| "eval_steps_per_second": 34.111, |
| "step": 2850 |
| }, |
| { |
| "epoch": 2.4978466838931954, |
| "grad_norm": 0.9806316494941711, |
| "learning_rate": 0.0001900120585701981, |
| "loss": 0.6005197906494141, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.4978466838931954, |
| "eval_loss": 0.6213578581809998, |
| "eval_runtime": 17.1385, |
| "eval_samples_per_second": 1108.851, |
| "eval_steps_per_second": 34.659, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.5409130060292853, |
| "grad_norm": 0.8172425627708435, |
| "learning_rate": 0.00018983979328165377, |
| "loss": 0.606026611328125, |
| "step": 2950 |
| }, |
| { |
| "epoch": 2.5409130060292853, |
| "eval_loss": 0.6219125986099243, |
| "eval_runtime": 16.9845, |
| "eval_samples_per_second": 1118.902, |
| "eval_steps_per_second": 34.973, |
| "step": 2950 |
| }, |
| { |
| "epoch": 2.5839793281653747, |
| "grad_norm": 0.9219424724578857, |
| "learning_rate": 0.0001896675279931094, |
| "loss": 0.6011330032348633, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.5839793281653747, |
| "eval_loss": 0.6213803887367249, |
| "eval_runtime": 16.7255, |
| "eval_samples_per_second": 1136.232, |
| "eval_steps_per_second": 35.515, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.627045650301464, |
| "grad_norm": 0.9950935244560242, |
| "learning_rate": 0.00018949526270456503, |
| "loss": 0.5897697448730469, |
| "step": 3050 |
| }, |
| { |
| "epoch": 2.627045650301464, |
| "eval_loss": 0.5991339087486267, |
| "eval_runtime": 17.2627, |
| "eval_samples_per_second": 1100.871, |
| "eval_steps_per_second": 34.409, |
| "step": 3050 |
| }, |
| { |
| "epoch": 2.670111972437554, |
| "grad_norm": 0.8786163926124573, |
| "learning_rate": 0.0001893229974160207, |
| "loss": 0.5946865844726562, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.670111972437554, |
| "eval_loss": 0.6077448725700378, |
| "eval_runtime": 15.8056, |
| "eval_samples_per_second": 1202.358, |
| "eval_steps_per_second": 37.582, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.7131782945736433, |
| "grad_norm": 0.7736066579818726, |
| "learning_rate": 0.0001891507321274763, |
| "loss": 0.5817356872558593, |
| "step": 3150 |
| }, |
| { |
| "epoch": 2.7131782945736433, |
| "eval_loss": 0.5996423959732056, |
| "eval_runtime": 17.1554, |
| "eval_samples_per_second": 1107.754, |
| "eval_steps_per_second": 34.625, |
| "step": 3150 |
| }, |
| { |
| "epoch": 2.7562446167097328, |
| "grad_norm": 0.9255921244621277, |
| "learning_rate": 0.00018897846683893198, |
| "loss": 0.5700679016113281, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.7562446167097328, |
| "eval_loss": 0.5959638357162476, |
| "eval_runtime": 17.7994, |
| "eval_samples_per_second": 1067.678, |
| "eval_steps_per_second": 33.372, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.7993109388458226, |
| "grad_norm": 0.8310986757278442, |
| "learning_rate": 0.0001888062015503876, |
| "loss": 0.5805090713500977, |
| "step": 3250 |
| }, |
| { |
| "epoch": 2.7993109388458226, |
| "eval_loss": 0.5928221344947815, |
| "eval_runtime": 17.5479, |
| "eval_samples_per_second": 1082.982, |
| "eval_steps_per_second": 33.85, |
| "step": 3250 |
| }, |
| { |
| "epoch": 2.842377260981912, |
| "grad_norm": 0.9678044319152832, |
| "learning_rate": 0.00018863393626184324, |
| "loss": 0.5692436599731445, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.842377260981912, |
| "eval_loss": 0.5916841626167297, |
| "eval_runtime": 17.1141, |
| "eval_samples_per_second": 1110.431, |
| "eval_steps_per_second": 34.708, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.885443583118002, |
| "grad_norm": 0.9506468772888184, |
| "learning_rate": 0.0001884616709732989, |
| "loss": 0.5596051406860352, |
| "step": 3350 |
| }, |
| { |
| "epoch": 2.885443583118002, |
| "eval_loss": 0.5949987173080444, |
| "eval_runtime": 17.6707, |
| "eval_samples_per_second": 1075.45, |
| "eval_steps_per_second": 33.615, |
| "step": 3350 |
| }, |
| { |
| "epoch": 2.9285099052540913, |
| "grad_norm": 0.8617449998855591, |
| "learning_rate": 0.00018828940568475452, |
| "loss": 0.5663171768188476, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.9285099052540913, |
| "eval_loss": 0.5809502601623535, |
| "eval_runtime": 17.0812, |
| "eval_samples_per_second": 1112.571, |
| "eval_steps_per_second": 34.775, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.971576227390181, |
| "grad_norm": 1.022600531578064, |
| "learning_rate": 0.0001881171403962102, |
| "loss": 0.5594380187988282, |
| "step": 3450 |
| }, |
| { |
| "epoch": 2.971576227390181, |
| "eval_loss": 0.5832746028900146, |
| "eval_runtime": 15.7435, |
| "eval_samples_per_second": 1207.102, |
| "eval_steps_per_second": 37.73, |
| "step": 3450 |
| }, |
| { |
| "epoch": 3.0146425495262705, |
| "grad_norm": 0.8227512240409851, |
| "learning_rate": 0.0001879448751076658, |
| "loss": 0.5425720977783203, |
| "step": 3500 |
| }, |
| { |
| "epoch": 3.0146425495262705, |
| "eval_loss": 0.5764813423156738, |
| "eval_runtime": 17.1751, |
| "eval_samples_per_second": 1106.485, |
| "eval_steps_per_second": 34.585, |
| "step": 3500 |
| }, |
| { |
| "epoch": 3.05770887166236, |
| "grad_norm": 0.9377761483192444, |
| "learning_rate": 0.00018777260981912145, |
| "loss": 0.5348855590820313, |
| "step": 3550 |
| }, |
| { |
| "epoch": 3.05770887166236, |
| "eval_loss": 0.570465624332428, |
| "eval_runtime": 16.6757, |
| "eval_samples_per_second": 1139.621, |
| "eval_steps_per_second": 35.621, |
| "step": 3550 |
| }, |
| { |
| "epoch": 3.10077519379845, |
| "grad_norm": 0.854451596736908, |
| "learning_rate": 0.0001876003445305771, |
| "loss": 0.5510664367675782, |
| "step": 3600 |
| }, |
| { |
| "epoch": 3.10077519379845, |
| "eval_loss": 0.575946569442749, |
| "eval_runtime": 16.7449, |
| "eval_samples_per_second": 1134.91, |
| "eval_steps_per_second": 35.473, |
| "step": 3600 |
| }, |
| { |
| "epoch": 3.143841515934539, |
| "grad_norm": 0.8335583806037903, |
| "learning_rate": 0.00018742807924203273, |
| "loss": 0.5319256591796875, |
| "step": 3650 |
| }, |
| { |
| "epoch": 3.143841515934539, |
| "eval_loss": 0.5813077688217163, |
| "eval_runtime": 17.3381, |
| "eval_samples_per_second": 1096.084, |
| "eval_steps_per_second": 34.26, |
| "step": 3650 |
| }, |
| { |
| "epoch": 3.1869078380706286, |
| "grad_norm": 0.8312974572181702, |
| "learning_rate": 0.0001872558139534884, |
| "loss": 0.5365386199951172, |
| "step": 3700 |
| }, |
| { |
| "epoch": 3.1869078380706286, |
| "eval_loss": 0.5719412565231323, |
| "eval_runtime": 17.729, |
| "eval_samples_per_second": 1071.917, |
| "eval_steps_per_second": 33.504, |
| "step": 3700 |
| }, |
| { |
| "epoch": 3.2299741602067185, |
| "grad_norm": 0.7145699858665466, |
| "learning_rate": 0.00018708354866494401, |
| "loss": 0.5286837768554687, |
| "step": 3750 |
| }, |
| { |
| "epoch": 3.2299741602067185, |
| "eval_loss": 0.563346266746521, |
| "eval_runtime": 17.1979, |
| "eval_samples_per_second": 1105.021, |
| "eval_steps_per_second": 34.539, |
| "step": 3750 |
| }, |
| { |
| "epoch": 3.273040482342808, |
| "grad_norm": 0.7736471891403198, |
| "learning_rate": 0.00018691128337639968, |
| "loss": 0.5384387969970703, |
| "step": 3800 |
| }, |
| { |
| "epoch": 3.273040482342808, |
| "eval_loss": 0.562952995300293, |
| "eval_runtime": 17.2225, |
| "eval_samples_per_second": 1103.44, |
| "eval_steps_per_second": 34.49, |
| "step": 3800 |
| }, |
| { |
| "epoch": 3.3161068044788973, |
| "grad_norm": 0.8005329966545105, |
| "learning_rate": 0.0001867390180878553, |
| "loss": 0.5317356491088867, |
| "step": 3850 |
| }, |
| { |
| "epoch": 3.3161068044788973, |
| "eval_loss": 0.5772661566734314, |
| "eval_runtime": 17.4171, |
| "eval_samples_per_second": 1091.109, |
| "eval_steps_per_second": 34.104, |
| "step": 3850 |
| }, |
| { |
| "epoch": 3.359173126614987, |
| "grad_norm": 0.9179701805114746, |
| "learning_rate": 0.00018656675279931094, |
| "loss": 0.5409442138671875, |
| "step": 3900 |
| }, |
| { |
| "epoch": 3.359173126614987, |
| "eval_loss": 0.5530401468276978, |
| "eval_runtime": 17.0967, |
| "eval_samples_per_second": 1111.562, |
| "eval_steps_per_second": 34.744, |
| "step": 3900 |
| }, |
| { |
| "epoch": 3.4022394487510765, |
| "grad_norm": 0.7085617780685425, |
| "learning_rate": 0.00018639448751076658, |
| "loss": 0.5294640350341797, |
| "step": 3950 |
| }, |
| { |
| "epoch": 3.4022394487510765, |
| "eval_loss": 0.5698192119598389, |
| "eval_runtime": 17.0564, |
| "eval_samples_per_second": 1114.183, |
| "eval_steps_per_second": 34.826, |
| "step": 3950 |
| }, |
| { |
| "epoch": 3.4453057708871664, |
| "grad_norm": 0.7268469333648682, |
| "learning_rate": 0.00018622222222222223, |
| "loss": 0.5306741333007813, |
| "step": 4000 |
| }, |
| { |
| "epoch": 3.4453057708871664, |
| "eval_loss": 0.5615823268890381, |
| "eval_runtime": 17.2492, |
| "eval_samples_per_second": 1101.734, |
| "eval_steps_per_second": 34.436, |
| "step": 4000 |
| }, |
| { |
| "epoch": 3.488372093023256, |
| "grad_norm": 0.6962466239929199, |
| "learning_rate": 0.0001860499569336779, |
| "loss": 0.5262623977661133, |
| "step": 4050 |
| }, |
| { |
| "epoch": 3.488372093023256, |
| "eval_loss": 0.5616022944450378, |
| "eval_runtime": 16.9577, |
| "eval_samples_per_second": 1120.674, |
| "eval_steps_per_second": 35.028, |
| "step": 4050 |
| }, |
| { |
| "epoch": 3.5314384151593456, |
| "grad_norm": 0.8463277816772461, |
| "learning_rate": 0.0001858776916451335, |
| "loss": 0.5265095138549805, |
| "step": 4100 |
| }, |
| { |
| "epoch": 3.5314384151593456, |
| "eval_loss": 0.5528528690338135, |
| "eval_runtime": 17.9044, |
| "eval_samples_per_second": 1061.414, |
| "eval_steps_per_second": 33.176, |
| "step": 4100 |
| }, |
| { |
| "epoch": 3.574504737295435, |
| "grad_norm": 0.841475784778595, |
| "learning_rate": 0.00018570542635658915, |
| "loss": 0.5126543426513672, |
| "step": 4150 |
| }, |
| { |
| "epoch": 3.574504737295435, |
| "eval_loss": 0.5555074214935303, |
| "eval_runtime": 17.3706, |
| "eval_samples_per_second": 1094.032, |
| "eval_steps_per_second": 34.196, |
| "step": 4150 |
| }, |
| { |
| "epoch": 3.6175710594315245, |
| "grad_norm": 0.925631046295166, |
| "learning_rate": 0.0001855331610680448, |
| "loss": 0.518094711303711, |
| "step": 4200 |
| }, |
| { |
| "epoch": 3.6175710594315245, |
| "eval_loss": 0.5519886612892151, |
| "eval_runtime": 17.2437, |
| "eval_samples_per_second": 1102.083, |
| "eval_steps_per_second": 34.447, |
| "step": 4200 |
| }, |
| { |
| "epoch": 3.6606373815676143, |
| "grad_norm": 0.770258903503418, |
| "learning_rate": 0.00018536089577950044, |
| "loss": 0.5245172119140625, |
| "step": 4250 |
| }, |
| { |
| "epoch": 3.6606373815676143, |
| "eval_loss": 0.5562922954559326, |
| "eval_runtime": 17.3784, |
| "eval_samples_per_second": 1093.54, |
| "eval_steps_per_second": 34.18, |
| "step": 4250 |
| }, |
| { |
| "epoch": 3.7037037037037037, |
| "grad_norm": 0.790610134601593, |
| "learning_rate": 0.00018518863049095608, |
| "loss": 0.5096866989135742, |
| "step": 4300 |
| }, |
| { |
| "epoch": 3.7037037037037037, |
| "eval_loss": 0.5537944436073303, |
| "eval_runtime": 17.0179, |
| "eval_samples_per_second": 1116.704, |
| "eval_steps_per_second": 34.904, |
| "step": 4300 |
| }, |
| { |
| "epoch": 3.746770025839793, |
| "grad_norm": 0.8743278384208679, |
| "learning_rate": 0.00018501636520241172, |
| "loss": 0.5054009246826172, |
| "step": 4350 |
| }, |
| { |
| "epoch": 3.746770025839793, |
| "eval_loss": 0.5489095449447632, |
| "eval_runtime": 17.5664, |
| "eval_samples_per_second": 1081.839, |
| "eval_steps_per_second": 33.815, |
| "step": 4350 |
| }, |
| { |
| "epoch": 3.789836347975883, |
| "grad_norm": 1.0607808828353882, |
| "learning_rate": 0.00018484409991386736, |
| "loss": 0.5099769973754883, |
| "step": 4400 |
| }, |
| { |
| "epoch": 3.789836347975883, |
| "eval_loss": 0.5474193692207336, |
| "eval_runtime": 16.9832, |
| "eval_samples_per_second": 1118.988, |
| "eval_steps_per_second": 34.976, |
| "step": 4400 |
| }, |
| { |
| "epoch": 3.8329026701119724, |
| "grad_norm": 0.8405239582061768, |
| "learning_rate": 0.000184671834625323, |
| "loss": 0.5094646453857422, |
| "step": 4450 |
| }, |
| { |
| "epoch": 3.8329026701119724, |
| "eval_loss": 0.539194643497467, |
| "eval_runtime": 17.3908, |
| "eval_samples_per_second": 1092.763, |
| "eval_steps_per_second": 34.156, |
| "step": 4450 |
| }, |
| { |
| "epoch": 3.875968992248062, |
| "grad_norm": 0.7805312275886536, |
| "learning_rate": 0.00018449956933677865, |
| "loss": 0.506949462890625, |
| "step": 4500 |
| }, |
| { |
| "epoch": 3.875968992248062, |
| "eval_loss": 0.542855441570282, |
| "eval_runtime": 16.5024, |
| "eval_samples_per_second": 1151.593, |
| "eval_steps_per_second": 35.995, |
| "step": 4500 |
| }, |
| { |
| "epoch": 3.9190353143841516, |
| "grad_norm": 0.7860807180404663, |
| "learning_rate": 0.0001843273040482343, |
| "loss": 0.5034315872192383, |
| "step": 4550 |
| }, |
| { |
| "epoch": 3.9190353143841516, |
| "eval_loss": 0.5419240593910217, |
| "eval_runtime": 17.33, |
| "eval_samples_per_second": 1096.598, |
| "eval_steps_per_second": 34.276, |
| "step": 4550 |
| }, |
| { |
| "epoch": 3.962101636520241, |
| "grad_norm": 0.667561411857605, |
| "learning_rate": 0.00018415503875968993, |
| "loss": 0.48937828063964844, |
| "step": 4600 |
| }, |
| { |
| "epoch": 3.962101636520241, |
| "eval_loss": 0.5298347473144531, |
| "eval_runtime": 17.6483, |
| "eval_samples_per_second": 1076.82, |
| "eval_steps_per_second": 33.658, |
| "step": 4600 |
| }, |
| { |
| "epoch": 4.0051679586563305, |
| "grad_norm": 0.8568994998931885, |
| "learning_rate": 0.00018398277347114557, |
| "loss": 0.5055955505371094, |
| "step": 4650 |
| }, |
| { |
| "epoch": 4.0051679586563305, |
| "eval_loss": 0.5280942320823669, |
| "eval_runtime": 17.4227, |
| "eval_samples_per_second": 1090.759, |
| "eval_steps_per_second": 34.093, |
| "step": 4650 |
| }, |
| { |
| "epoch": 4.04823428079242, |
| "grad_norm": 0.9342105984687805, |
| "learning_rate": 0.00018381050818260121, |
| "loss": 0.48168006896972654, |
| "step": 4700 |
| }, |
| { |
| "epoch": 4.04823428079242, |
| "eval_loss": 0.5394223928451538, |
| "eval_runtime": 17.1425, |
| "eval_samples_per_second": 1108.592, |
| "eval_steps_per_second": 34.651, |
| "step": 4700 |
| }, |
| { |
| "epoch": 4.09130060292851, |
| "grad_norm": 0.7910040616989136, |
| "learning_rate": 0.00018363824289405686, |
| "loss": 0.48789196014404296, |
| "step": 4750 |
| }, |
| { |
| "epoch": 4.09130060292851, |
| "eval_loss": 0.534257173538208, |
| "eval_runtime": 17.3471, |
| "eval_samples_per_second": 1095.516, |
| "eval_steps_per_second": 34.242, |
| "step": 4750 |
| }, |
| { |
| "epoch": 4.134366925064599, |
| "grad_norm": 0.7728812098503113, |
| "learning_rate": 0.0001834659776055125, |
| "loss": 0.47792926788330076, |
| "step": 4800 |
| }, |
| { |
| "epoch": 4.134366925064599, |
| "eval_loss": 0.5376848578453064, |
| "eval_runtime": 17.163, |
| "eval_samples_per_second": 1107.267, |
| "eval_steps_per_second": 34.609, |
| "step": 4800 |
| }, |
| { |
| "epoch": 4.177433247200689, |
| "grad_norm": 0.9220482110977173, |
| "learning_rate": 0.00018329371231696814, |
| "loss": 0.48800254821777345, |
| "step": 4850 |
| }, |
| { |
| "epoch": 4.177433247200689, |
| "eval_loss": 0.5448176860809326, |
| "eval_runtime": 16.7685, |
| "eval_samples_per_second": 1133.315, |
| "eval_steps_per_second": 35.424, |
| "step": 4850 |
| }, |
| { |
| "epoch": 4.220499569336779, |
| "grad_norm": 0.7572962641716003, |
| "learning_rate": 0.00018312144702842378, |
| "loss": 0.48915714263916016, |
| "step": 4900 |
| }, |
| { |
| "epoch": 4.220499569336779, |
| "eval_loss": 0.5347938537597656, |
| "eval_runtime": 19.5201, |
| "eval_samples_per_second": 973.561, |
| "eval_steps_per_second": 30.43, |
| "step": 4900 |
| }, |
| { |
| "epoch": 4.263565891472869, |
| "grad_norm": 0.7922815680503845, |
| "learning_rate": 0.00018294918173987943, |
| "loss": 0.4717945861816406, |
| "step": 4950 |
| }, |
| { |
| "epoch": 4.263565891472869, |
| "eval_loss": 0.5262234807014465, |
| "eval_runtime": 18.6404, |
| "eval_samples_per_second": 1019.509, |
| "eval_steps_per_second": 31.866, |
| "step": 4950 |
| }, |
| { |
| "epoch": 4.306632213608958, |
| "grad_norm": 0.8557692766189575, |
| "learning_rate": 0.00018277691645133507, |
| "loss": 0.48916732788085937, |
| "step": 5000 |
| }, |
| { |
| "epoch": 4.306632213608958, |
| "eval_loss": 0.5336983799934387, |
| "eval_runtime": 16.6117, |
| "eval_samples_per_second": 1144.015, |
| "eval_steps_per_second": 35.758, |
| "step": 5000 |
| }, |
| { |
| "epoch": 4.3496985357450475, |
| "grad_norm": 0.8177461624145508, |
| "learning_rate": 0.0001826046511627907, |
| "loss": 0.4893519592285156, |
| "step": 5050 |
| }, |
| { |
| "epoch": 4.3496985357450475, |
| "eval_loss": 0.5244340300559998, |
| "eval_runtime": 18.6024, |
| "eval_samples_per_second": 1021.589, |
| "eval_steps_per_second": 31.931, |
| "step": 5050 |
| }, |
| { |
| "epoch": 4.392764857881137, |
| "grad_norm": 0.9127139449119568, |
| "learning_rate": 0.00018243238587424635, |
| "loss": 0.4808861923217773, |
| "step": 5100 |
| }, |
| { |
| "epoch": 4.392764857881137, |
| "eval_loss": 0.5295798778533936, |
| "eval_runtime": 17.282, |
| "eval_samples_per_second": 1099.64, |
| "eval_steps_per_second": 34.371, |
| "step": 5100 |
| }, |
| { |
| "epoch": 4.435831180017226, |
| "grad_norm": 0.7107005715370178, |
| "learning_rate": 0.000182260120585702, |
| "loss": 0.4700811767578125, |
| "step": 5150 |
| }, |
| { |
| "epoch": 4.435831180017226, |
| "eval_loss": 0.5272426009178162, |
| "eval_runtime": 17.251, |
| "eval_samples_per_second": 1101.619, |
| "eval_steps_per_second": 34.433, |
| "step": 5150 |
| }, |
| { |
| "epoch": 4.478897502153316, |
| "grad_norm": 0.7757647633552551, |
| "learning_rate": 0.00018208785529715764, |
| "loss": 0.48522254943847654, |
| "step": 5200 |
| }, |
| { |
| "epoch": 4.478897502153316, |
| "eval_loss": 0.5197687745094299, |
| "eval_runtime": 17.2491, |
| "eval_samples_per_second": 1101.737, |
| "eval_steps_per_second": 34.437, |
| "step": 5200 |
| }, |
| { |
| "epoch": 4.521963824289406, |
| "grad_norm": 0.7408074140548706, |
| "learning_rate": 0.00018191559000861328, |
| "loss": 0.47749095916748047, |
| "step": 5250 |
| }, |
| { |
| "epoch": 4.521963824289406, |
| "eval_loss": 0.5270209312438965, |
| "eval_runtime": 17.2465, |
| "eval_samples_per_second": 1101.905, |
| "eval_steps_per_second": 34.442, |
| "step": 5250 |
| }, |
| { |
| "epoch": 4.565030146425495, |
| "grad_norm": 0.7746986150741577, |
| "learning_rate": 0.00018174332472006892, |
| "loss": 0.4729361343383789, |
| "step": 5300 |
| }, |
| { |
| "epoch": 4.565030146425495, |
| "eval_loss": 0.5280850529670715, |
| "eval_runtime": 16.8871, |
| "eval_samples_per_second": 1125.354, |
| "eval_steps_per_second": 35.175, |
| "step": 5300 |
| }, |
| { |
| "epoch": 4.608096468561585, |
| "grad_norm": 0.733068585395813, |
| "learning_rate": 0.00018157105943152456, |
| "loss": 0.475450439453125, |
| "step": 5350 |
| }, |
| { |
| "epoch": 4.608096468561585, |
| "eval_loss": 0.5167151689529419, |
| "eval_runtime": 17.6109, |
| "eval_samples_per_second": 1079.104, |
| "eval_steps_per_second": 33.729, |
| "step": 5350 |
| }, |
| { |
| "epoch": 4.651162790697675, |
| "grad_norm": 0.6393090486526489, |
| "learning_rate": 0.0001813987941429802, |
| "loss": 0.46974494934082034, |
| "step": 5400 |
| }, |
| { |
| "epoch": 4.651162790697675, |
| "eval_loss": 0.5263372659683228, |
| "eval_runtime": 17.1455, |
| "eval_samples_per_second": 1108.395, |
| "eval_steps_per_second": 34.645, |
| "step": 5400 |
| }, |
| { |
| "epoch": 4.694229112833764, |
| "grad_norm": 0.7762560844421387, |
| "learning_rate": 0.00018122652885443585, |
| "loss": 0.4681483459472656, |
| "step": 5450 |
| }, |
| { |
| "epoch": 4.694229112833764, |
| "eval_loss": 0.5209926962852478, |
| "eval_runtime": 16.9614, |
| "eval_samples_per_second": 1120.423, |
| "eval_steps_per_second": 35.021, |
| "step": 5450 |
| }, |
| { |
| "epoch": 4.7372954349698535, |
| "grad_norm": 0.6850082278251648, |
| "learning_rate": 0.0001810542635658915, |
| "loss": 0.47497440338134767, |
| "step": 5500 |
| }, |
| { |
| "epoch": 4.7372954349698535, |
| "eval_loss": 0.5202682614326477, |
| "eval_runtime": 17.7827, |
| "eval_samples_per_second": 1068.681, |
| "eval_steps_per_second": 33.403, |
| "step": 5500 |
| }, |
| { |
| "epoch": 4.780361757105943, |
| "grad_norm": 0.5083145499229431, |
| "learning_rate": 0.00018088199827734713, |
| "loss": 0.4617390441894531, |
| "step": 5550 |
| }, |
| { |
| "epoch": 4.780361757105943, |
| "eval_loss": 0.5157390832901001, |
| "eval_runtime": 17.164, |
| "eval_samples_per_second": 1107.204, |
| "eval_steps_per_second": 34.607, |
| "step": 5550 |
| }, |
| { |
| "epoch": 4.823428079242033, |
| "grad_norm": 0.6421205401420593, |
| "learning_rate": 0.00018070973298880277, |
| "loss": 0.46330387115478516, |
| "step": 5600 |
| }, |
| { |
| "epoch": 4.823428079242033, |
| "eval_loss": 0.5214293003082275, |
| "eval_runtime": 17.3236, |
| "eval_samples_per_second": 1097.001, |
| "eval_steps_per_second": 34.288, |
| "step": 5600 |
| }, |
| { |
| "epoch": 4.866494401378122, |
| "grad_norm": 0.811347246170044, |
| "learning_rate": 0.00018053746770025841, |
| "loss": 0.44483318328857424, |
| "step": 5650 |
| }, |
| { |
| "epoch": 4.866494401378122, |
| "eval_loss": 0.5206965208053589, |
| "eval_runtime": 18.0419, |
| "eval_samples_per_second": 1053.327, |
| "eval_steps_per_second": 32.923, |
| "step": 5650 |
| }, |
| { |
| "epoch": 4.909560723514212, |
| "grad_norm": 0.723167359828949, |
| "learning_rate": 0.00018036520241171403, |
| "loss": 0.45687950134277344, |
| "step": 5700 |
| }, |
| { |
| "epoch": 4.909560723514212, |
| "eval_loss": 0.5181661248207092, |
| "eval_runtime": 17.6599, |
| "eval_samples_per_second": 1076.113, |
| "eval_steps_per_second": 33.636, |
| "step": 5700 |
| }, |
| { |
| "epoch": 4.952627045650301, |
| "grad_norm": 0.821702241897583, |
| "learning_rate": 0.0001801929371231697, |
| "loss": 0.4651851272583008, |
| "step": 5750 |
| }, |
| { |
| "epoch": 4.952627045650301, |
| "eval_loss": 0.5162400007247925, |
| "eval_runtime": 16.6915, |
| "eval_samples_per_second": 1138.542, |
| "eval_steps_per_second": 35.587, |
| "step": 5750 |
| }, |
| { |
| "epoch": 4.995693367786391, |
| "grad_norm": 0.9089387059211731, |
| "learning_rate": 0.00018002067183462531, |
| "loss": 0.46469066619873045, |
| "step": 5800 |
| }, |
| { |
| "epoch": 4.995693367786391, |
| "eval_loss": 0.5052764415740967, |
| "eval_runtime": 17.7442, |
| "eval_samples_per_second": 1071.0, |
| "eval_steps_per_second": 33.476, |
| "step": 5800 |
| }, |
| { |
| "epoch": 5.038759689922481, |
| "grad_norm": 0.6017876863479614, |
| "learning_rate": 0.00017984840654608098, |
| "loss": 0.44865966796875, |
| "step": 5850 |
| }, |
| { |
| "epoch": 5.038759689922481, |
| "eval_loss": 0.5109136700630188, |
| "eval_runtime": 16.853, |
| "eval_samples_per_second": 1127.636, |
| "eval_steps_per_second": 35.246, |
| "step": 5850 |
| }, |
| { |
| "epoch": 5.0818260120585705, |
| "grad_norm": 0.7853453159332275, |
| "learning_rate": 0.00017967614125753662, |
| "loss": 0.46314373016357424, |
| "step": 5900 |
| }, |
| { |
| "epoch": 5.0818260120585705, |
| "eval_loss": 0.513229250907898, |
| "eval_runtime": 16.6481, |
| "eval_samples_per_second": 1141.512, |
| "eval_steps_per_second": 35.68, |
| "step": 5900 |
| }, |
| { |
| "epoch": 5.1248923341946595, |
| "grad_norm": 0.7348341941833496, |
| "learning_rate": 0.00017950387596899224, |
| "loss": 0.4561290740966797, |
| "step": 5950 |
| }, |
| { |
| "epoch": 5.1248923341946595, |
| "eval_loss": 0.5073517560958862, |
| "eval_runtime": 17.3322, |
| "eval_samples_per_second": 1096.454, |
| "eval_steps_per_second": 34.271, |
| "step": 5950 |
| }, |
| { |
| "epoch": 5.167958656330749, |
| "grad_norm": 0.902717113494873, |
| "learning_rate": 0.0001793316106804479, |
| "loss": 0.4551318359375, |
| "step": 6000 |
| }, |
| { |
| "epoch": 5.167958656330749, |
| "eval_loss": 0.5088045001029968, |
| "eval_runtime": 17.2698, |
| "eval_samples_per_second": 1100.421, |
| "eval_steps_per_second": 34.395, |
| "step": 6000 |
| }, |
| { |
| "epoch": 5.211024978466839, |
| "grad_norm": 0.7658097743988037, |
| "learning_rate": 0.00017915934539190352, |
| "loss": 0.4525523376464844, |
| "step": 6050 |
| }, |
| { |
| "epoch": 5.211024978466839, |
| "eval_loss": 0.5058748722076416, |
| "eval_runtime": 17.313, |
| "eval_samples_per_second": 1097.669, |
| "eval_steps_per_second": 34.309, |
| "step": 6050 |
| }, |
| { |
| "epoch": 5.254091300602928, |
| "grad_norm": 0.7149024605751038, |
| "learning_rate": 0.0001789870801033592, |
| "loss": 0.45574371337890623, |
| "step": 6100 |
| }, |
| { |
| "epoch": 5.254091300602928, |
| "eval_loss": 0.4991587996482849, |
| "eval_runtime": 17.203, |
| "eval_samples_per_second": 1104.694, |
| "eval_steps_per_second": 34.529, |
| "step": 6100 |
| }, |
| { |
| "epoch": 5.297157622739018, |
| "grad_norm": 0.6680011749267578, |
| "learning_rate": 0.0001788148148148148, |
| "loss": 0.4474559020996094, |
| "step": 6150 |
| }, |
| { |
| "epoch": 5.297157622739018, |
| "eval_loss": 0.5035756230354309, |
| "eval_runtime": 17.0604, |
| "eval_samples_per_second": 1113.928, |
| "eval_steps_per_second": 34.818, |
| "step": 6150 |
| }, |
| { |
| "epoch": 5.340223944875108, |
| "grad_norm": 0.6084044575691223, |
| "learning_rate": 0.00017864254952627045, |
| "loss": 0.4532883834838867, |
| "step": 6200 |
| }, |
| { |
| "epoch": 5.340223944875108, |
| "eval_loss": 0.5061535239219666, |
| "eval_runtime": 17.2044, |
| "eval_samples_per_second": 1104.599, |
| "eval_steps_per_second": 34.526, |
| "step": 6200 |
| }, |
| { |
| "epoch": 5.383290267011197, |
| "grad_norm": 0.6588345766067505, |
| "learning_rate": 0.00017847028423772612, |
| "loss": 0.44038040161132813, |
| "step": 6250 |
| }, |
| { |
| "epoch": 5.383290267011197, |
| "eval_loss": 0.5017246603965759, |
| "eval_runtime": 17.2744, |
| "eval_samples_per_second": 1100.123, |
| "eval_steps_per_second": 34.386, |
| "step": 6250 |
| }, |
| { |
| "epoch": 5.426356589147287, |
| "grad_norm": 0.7204316258430481, |
| "learning_rate": 0.00017829801894918173, |
| "loss": 0.45370059967041015, |
| "step": 6300 |
| }, |
| { |
| "epoch": 5.426356589147287, |
| "eval_loss": 0.513796329498291, |
| "eval_runtime": 17.368, |
| "eval_samples_per_second": 1094.195, |
| "eval_steps_per_second": 34.201, |
| "step": 6300 |
| }, |
| { |
| "epoch": 5.4694229112833765, |
| "grad_norm": 0.6742197871208191, |
| "learning_rate": 0.0001781257536606374, |
| "loss": 0.4497013473510742, |
| "step": 6350 |
| }, |
| { |
| "epoch": 5.4694229112833765, |
| "eval_loss": 0.4986371695995331, |
| "eval_runtime": 16.7527, |
| "eval_samples_per_second": 1134.381, |
| "eval_steps_per_second": 35.457, |
| "step": 6350 |
| }, |
| { |
| "epoch": 5.5124892334194655, |
| "grad_norm": 0.6411893963813782, |
| "learning_rate": 0.00017795348837209302, |
| "loss": 0.4475288009643555, |
| "step": 6400 |
| }, |
| { |
| "epoch": 5.5124892334194655, |
| "eval_loss": 0.4998282492160797, |
| "eval_runtime": 17.2908, |
| "eval_samples_per_second": 1099.081, |
| "eval_steps_per_second": 34.354, |
| "step": 6400 |
| }, |
| { |
| "epoch": 5.555555555555555, |
| "grad_norm": 0.7940775156021118, |
| "learning_rate": 0.0001777812230835487, |
| "loss": 0.43942253112792967, |
| "step": 6450 |
| }, |
| { |
| "epoch": 5.555555555555555, |
| "eval_loss": 0.5093288421630859, |
| "eval_runtime": 17.0928, |
| "eval_samples_per_second": 1111.813, |
| "eval_steps_per_second": 34.751, |
| "step": 6450 |
| }, |
| { |
| "epoch": 5.598621877691645, |
| "grad_norm": 0.6469661593437195, |
| "learning_rate": 0.0001776089577950043, |
| "loss": 0.44519527435302736, |
| "step": 6500 |
| }, |
| { |
| "epoch": 5.598621877691645, |
| "eval_loss": 0.5019457340240479, |
| "eval_runtime": 17.1997, |
| "eval_samples_per_second": 1104.904, |
| "eval_steps_per_second": 34.536, |
| "step": 6500 |
| }, |
| { |
| "epoch": 5.641688199827735, |
| "grad_norm": 0.6471430063247681, |
| "learning_rate": 0.00017743669250645995, |
| "loss": 0.4439122772216797, |
| "step": 6550 |
| }, |
| { |
| "epoch": 5.641688199827735, |
| "eval_loss": 0.4955105185508728, |
| "eval_runtime": 17.2722, |
| "eval_samples_per_second": 1100.265, |
| "eval_steps_per_second": 34.391, |
| "step": 6550 |
| }, |
| { |
| "epoch": 5.684754521963824, |
| "grad_norm": 0.7212668061256409, |
| "learning_rate": 0.00017726442721791561, |
| "loss": 0.4470480346679688, |
| "step": 6600 |
| }, |
| { |
| "epoch": 5.684754521963824, |
| "eval_loss": 0.5024229288101196, |
| "eval_runtime": 17.7954, |
| "eval_samples_per_second": 1067.916, |
| "eval_steps_per_second": 33.379, |
| "step": 6600 |
| }, |
| { |
| "epoch": 5.727820844099914, |
| "grad_norm": 0.717725932598114, |
| "learning_rate": 0.00017709216192937123, |
| "loss": 0.44252021789550783, |
| "step": 6650 |
| }, |
| { |
| "epoch": 5.727820844099914, |
| "eval_loss": 0.49507880210876465, |
| "eval_runtime": 17.1214, |
| "eval_samples_per_second": 1109.957, |
| "eval_steps_per_second": 34.693, |
| "step": 6650 |
| }, |
| { |
| "epoch": 5.770887166236004, |
| "grad_norm": 0.6818066835403442, |
| "learning_rate": 0.0001769198966408269, |
| "loss": 0.44801937103271483, |
| "step": 6700 |
| }, |
| { |
| "epoch": 5.770887166236004, |
| "eval_loss": 0.49282634258270264, |
| "eval_runtime": 17.2449, |
| "eval_samples_per_second": 1102.007, |
| "eval_steps_per_second": 34.445, |
| "step": 6700 |
| }, |
| { |
| "epoch": 5.813953488372093, |
| "grad_norm": 0.7912653684616089, |
| "learning_rate": 0.00017674763135228251, |
| "loss": 0.4390088653564453, |
| "step": 6750 |
| }, |
| { |
| "epoch": 5.813953488372093, |
| "eval_loss": 0.49278008937835693, |
| "eval_runtime": 17.1369, |
| "eval_samples_per_second": 1108.954, |
| "eval_steps_per_second": 34.662, |
| "step": 6750 |
| }, |
| { |
| "epoch": 5.8570198105081825, |
| "grad_norm": 0.7058820128440857, |
| "learning_rate": 0.00017657536606373816, |
| "loss": 0.4471379089355469, |
| "step": 6800 |
| }, |
| { |
| "epoch": 5.8570198105081825, |
| "eval_loss": 0.4846435487270355, |
| "eval_runtime": 16.2701, |
| "eval_samples_per_second": 1168.029, |
| "eval_steps_per_second": 36.509, |
| "step": 6800 |
| }, |
| { |
| "epoch": 5.900086132644272, |
| "grad_norm": 0.6577419638633728, |
| "learning_rate": 0.0001764031007751938, |
| "loss": 0.4329195404052734, |
| "step": 6850 |
| }, |
| { |
| "epoch": 5.900086132644272, |
| "eval_loss": 0.4891928732395172, |
| "eval_runtime": 17.5298, |
| "eval_samples_per_second": 1084.097, |
| "eval_steps_per_second": 33.885, |
| "step": 6850 |
| }, |
| { |
| "epoch": 5.943152454780362, |
| "grad_norm": 0.6675143241882324, |
| "learning_rate": 0.00017623083548664944, |
| "loss": 0.4451116180419922, |
| "step": 6900 |
| }, |
| { |
| "epoch": 5.943152454780362, |
| "eval_loss": 0.49225810170173645, |
| "eval_runtime": 17.2842, |
| "eval_samples_per_second": 1099.503, |
| "eval_steps_per_second": 34.367, |
| "step": 6900 |
| }, |
| { |
| "epoch": 5.986218776916451, |
| "grad_norm": 0.637526273727417, |
| "learning_rate": 0.0001760585701981051, |
| "loss": 0.4396438217163086, |
| "step": 6950 |
| }, |
| { |
| "epoch": 5.986218776916451, |
| "eval_loss": 0.4892192780971527, |
| "eval_runtime": 17.4708, |
| "eval_samples_per_second": 1087.757, |
| "eval_steps_per_second": 34.0, |
| "step": 6950 |
| }, |
| { |
| "epoch": 6.029285099052541, |
| "grad_norm": 0.6243614554405212, |
| "learning_rate": 0.00017588630490956072, |
| "loss": 0.43088115692138673, |
| "step": 7000 |
| }, |
| { |
| "epoch": 6.029285099052541, |
| "eval_loss": 0.4946584403514862, |
| "eval_runtime": 17.2236, |
| "eval_samples_per_second": 1103.37, |
| "eval_steps_per_second": 34.488, |
| "step": 7000 |
| }, |
| { |
| "epoch": 6.072351421188631, |
| "grad_norm": 0.6974560618400574, |
| "learning_rate": 0.00017571403962101637, |
| "loss": 0.43844024658203123, |
| "step": 7050 |
| }, |
| { |
| "epoch": 6.072351421188631, |
| "eval_loss": 0.4866611063480377, |
| "eval_runtime": 17.3366, |
| "eval_samples_per_second": 1096.177, |
| "eval_steps_per_second": 34.263, |
| "step": 7050 |
| }, |
| { |
| "epoch": 6.11541774332472, |
| "grad_norm": 0.7083775401115417, |
| "learning_rate": 0.000175541774332472, |
| "loss": 0.4352645111083984, |
| "step": 7100 |
| }, |
| { |
| "epoch": 6.11541774332472, |
| "eval_loss": 0.48218822479248047, |
| "eval_runtime": 17.1233, |
| "eval_samples_per_second": 1109.831, |
| "eval_steps_per_second": 34.69, |
| "step": 7100 |
| }, |
| { |
| "epoch": 6.15848406546081, |
| "grad_norm": 0.7368115782737732, |
| "learning_rate": 0.00017536950904392765, |
| "loss": 0.43624774932861327, |
| "step": 7150 |
| }, |
| { |
| "epoch": 6.15848406546081, |
| "eval_loss": 0.48245272040367126, |
| "eval_runtime": 16.2557, |
| "eval_samples_per_second": 1169.065, |
| "eval_steps_per_second": 36.541, |
| "step": 7150 |
| }, |
| { |
| "epoch": 6.2015503875969, |
| "grad_norm": 0.6461877226829529, |
| "learning_rate": 0.0001751972437553833, |
| "loss": 0.4340004348754883, |
| "step": 7200 |
| }, |
| { |
| "epoch": 6.2015503875969, |
| "eval_loss": 0.48431915044784546, |
| "eval_runtime": 17.0182, |
| "eval_samples_per_second": 1116.689, |
| "eval_steps_per_second": 34.904, |
| "step": 7200 |
| }, |
| { |
| "epoch": 6.2446167097329885, |
| "grad_norm": 0.6845267415046692, |
| "learning_rate": 0.00017502497846683893, |
| "loss": 0.41942180633544923, |
| "step": 7250 |
| }, |
| { |
| "epoch": 6.2446167097329885, |
| "eval_loss": 0.48767024278640747, |
| "eval_runtime": 15.9606, |
| "eval_samples_per_second": 1190.681, |
| "eval_steps_per_second": 37.217, |
| "step": 7250 |
| }, |
| { |
| "epoch": 6.287683031869078, |
| "grad_norm": 0.6504621505737305, |
| "learning_rate": 0.0001748527131782946, |
| "loss": 0.4262746429443359, |
| "step": 7300 |
| }, |
| { |
| "epoch": 6.287683031869078, |
| "eval_loss": 0.485770046710968, |
| "eval_runtime": 17.3977, |
| "eval_samples_per_second": 1092.326, |
| "eval_steps_per_second": 34.142, |
| "step": 7300 |
| }, |
| { |
| "epoch": 6.330749354005168, |
| "grad_norm": 0.6602944731712341, |
| "learning_rate": 0.00017468044788975022, |
| "loss": 0.4175703048706055, |
| "step": 7350 |
| }, |
| { |
| "epoch": 6.330749354005168, |
| "eval_loss": 0.48184552788734436, |
| "eval_runtime": 17.2587, |
| "eval_samples_per_second": 1101.126, |
| "eval_steps_per_second": 34.417, |
| "step": 7350 |
| }, |
| { |
| "epoch": 6.373815676141257, |
| "grad_norm": 0.686592161655426, |
| "learning_rate": 0.00017450818260120586, |
| "loss": 0.4337629699707031, |
| "step": 7400 |
| }, |
| { |
| "epoch": 6.373815676141257, |
| "eval_loss": 0.4817441403865814, |
| "eval_runtime": 17.4563, |
| "eval_samples_per_second": 1088.659, |
| "eval_steps_per_second": 34.028, |
| "step": 7400 |
| }, |
| { |
| "epoch": 6.416881998277347, |
| "grad_norm": 0.7309668064117432, |
| "learning_rate": 0.0001743359173126615, |
| "loss": 0.42674171447753906, |
| "step": 7450 |
| }, |
| { |
| "epoch": 6.416881998277347, |
| "eval_loss": 0.4860132336616516, |
| "eval_runtime": 17.4504, |
| "eval_samples_per_second": 1089.031, |
| "eval_steps_per_second": 34.039, |
| "step": 7450 |
| }, |
| { |
| "epoch": 6.459948320413437, |
| "grad_norm": 0.7980537414550781, |
| "learning_rate": 0.00017416365202411715, |
| "loss": 0.4265460205078125, |
| "step": 7500 |
| }, |
| { |
| "epoch": 6.459948320413437, |
| "eval_loss": 0.48347485065460205, |
| "eval_runtime": 17.2725, |
| "eval_samples_per_second": 1100.245, |
| "eval_steps_per_second": 34.39, |
| "step": 7500 |
| }, |
| { |
| "epoch": 6.503014642549527, |
| "grad_norm": 0.7159664034843445, |
| "learning_rate": 0.0001739913867355728, |
| "loss": 0.43710147857666015, |
| "step": 7550 |
| }, |
| { |
| "epoch": 6.503014642549527, |
| "eval_loss": 0.47490188479423523, |
| "eval_runtime": 17.4854, |
| "eval_samples_per_second": 1086.852, |
| "eval_steps_per_second": 33.971, |
| "step": 7550 |
| }, |
| { |
| "epoch": 6.546080964685616, |
| "grad_norm": 0.8943643569946289, |
| "learning_rate": 0.00017381912144702843, |
| "loss": 0.42655269622802733, |
| "step": 7600 |
| }, |
| { |
| "epoch": 6.546080964685616, |
| "eval_loss": 0.46805503964424133, |
| "eval_runtime": 16.7689, |
| "eval_samples_per_second": 1133.289, |
| "eval_steps_per_second": 35.423, |
| "step": 7600 |
| }, |
| { |
| "epoch": 6.589147286821706, |
| "grad_norm": 0.6106852889060974, |
| "learning_rate": 0.00017364685615848407, |
| "loss": 0.4123693466186523, |
| "step": 7650 |
| }, |
| { |
| "epoch": 6.589147286821706, |
| "eval_loss": 0.47157835960388184, |
| "eval_runtime": 17.4834, |
| "eval_samples_per_second": 1086.973, |
| "eval_steps_per_second": 33.975, |
| "step": 7650 |
| }, |
| { |
| "epoch": 6.6322136089577945, |
| "grad_norm": 0.5885831117630005, |
| "learning_rate": 0.0001734745908699397, |
| "loss": 0.41778255462646485, |
| "step": 7700 |
| }, |
| { |
| "epoch": 6.6322136089577945, |
| "eval_loss": 0.4761877954006195, |
| "eval_runtime": 16.9739, |
| "eval_samples_per_second": 1119.601, |
| "eval_steps_per_second": 34.995, |
| "step": 7700 |
| }, |
| { |
| "epoch": 6.675279931093884, |
| "grad_norm": 0.5975524187088013, |
| "learning_rate": 0.00017330232558139536, |
| "loss": 0.4212100219726562, |
| "step": 7750 |
| }, |
| { |
| "epoch": 6.675279931093884, |
| "eval_loss": 0.468214750289917, |
| "eval_runtime": 17.1264, |
| "eval_samples_per_second": 1109.633, |
| "eval_steps_per_second": 34.683, |
| "step": 7750 |
| }, |
| { |
| "epoch": 6.718346253229974, |
| "grad_norm": 0.6737027168273926, |
| "learning_rate": 0.000173130060292851, |
| "loss": 0.423582763671875, |
| "step": 7800 |
| }, |
| { |
| "epoch": 6.718346253229974, |
| "eval_loss": 0.47411462664604187, |
| "eval_runtime": 17.3271, |
| "eval_samples_per_second": 1096.78, |
| "eval_steps_per_second": 34.282, |
| "step": 7800 |
| }, |
| { |
| "epoch": 6.761412575366064, |
| "grad_norm": 0.6910893321037292, |
| "learning_rate": 0.00017295779500430664, |
| "loss": 0.4238215637207031, |
| "step": 7850 |
| }, |
| { |
| "epoch": 6.761412575366064, |
| "eval_loss": 0.4713309705257416, |
| "eval_runtime": 17.2882, |
| "eval_samples_per_second": 1099.25, |
| "eval_steps_per_second": 34.359, |
| "step": 7850 |
| }, |
| { |
| "epoch": 6.804478897502153, |
| "grad_norm": 0.7054369449615479, |
| "learning_rate": 0.00017278552971576228, |
| "loss": 0.4182852172851563, |
| "step": 7900 |
| }, |
| { |
| "epoch": 6.804478897502153, |
| "eval_loss": 0.4671083986759186, |
| "eval_runtime": 17.3277, |
| "eval_samples_per_second": 1096.743, |
| "eval_steps_per_second": 34.28, |
| "step": 7900 |
| }, |
| { |
| "epoch": 6.847545219638243, |
| "grad_norm": 0.6247032880783081, |
| "learning_rate": 0.00017261326442721792, |
| "loss": 0.41578125, |
| "step": 7950 |
| }, |
| { |
| "epoch": 6.847545219638243, |
| "eval_loss": 0.47773998975753784, |
| "eval_runtime": 17.4983, |
| "eval_samples_per_second": 1086.046, |
| "eval_steps_per_second": 33.946, |
| "step": 7950 |
| }, |
| { |
| "epoch": 6.890611541774333, |
| "grad_norm": 0.8400962352752686, |
| "learning_rate": 0.00017244099913867357, |
| "loss": 0.41641212463378907, |
| "step": 8000 |
| }, |
| { |
| "epoch": 6.890611541774333, |
| "eval_loss": 0.4788215756416321, |
| "eval_runtime": 17.0811, |
| "eval_samples_per_second": 1112.574, |
| "eval_steps_per_second": 34.775, |
| "step": 8000 |
| }, |
| { |
| "epoch": 6.933677863910422, |
| "grad_norm": 0.6829719543457031, |
| "learning_rate": 0.0001722687338501292, |
| "loss": 0.41895538330078125, |
| "step": 8050 |
| }, |
| { |
| "epoch": 6.933677863910422, |
| "eval_loss": 0.4660840332508087, |
| "eval_runtime": 16.6677, |
| "eval_samples_per_second": 1140.173, |
| "eval_steps_per_second": 35.638, |
| "step": 8050 |
| }, |
| { |
| "epoch": 6.976744186046512, |
| "grad_norm": 0.7007145881652832, |
| "learning_rate": 0.00017209646856158485, |
| "loss": 0.42030097961425783, |
| "step": 8100 |
| }, |
| { |
| "epoch": 6.976744186046512, |
| "eval_loss": 0.4828939139842987, |
| "eval_runtime": 17.0682, |
| "eval_samples_per_second": 1113.413, |
| "eval_steps_per_second": 34.801, |
| "step": 8100 |
| }, |
| { |
| "epoch": 7.019810508182601, |
| "grad_norm": 0.5981889963150024, |
| "learning_rate": 0.0001719242032730405, |
| "loss": 0.41966373443603516, |
| "step": 8150 |
| }, |
| { |
| "epoch": 7.019810508182601, |
| "eval_loss": 0.4712368845939636, |
| "eval_runtime": 17.6419, |
| "eval_samples_per_second": 1077.205, |
| "eval_steps_per_second": 33.67, |
| "step": 8150 |
| }, |
| { |
| "epoch": 7.06287683031869, |
| "grad_norm": 0.5835450887680054, |
| "learning_rate": 0.00017175193798449613, |
| "loss": 0.40881683349609377, |
| "step": 8200 |
| }, |
| { |
| "epoch": 7.06287683031869, |
| "eval_loss": 0.47391384840011597, |
| "eval_runtime": 16.389, |
| "eval_samples_per_second": 1159.556, |
| "eval_steps_per_second": 36.244, |
| "step": 8200 |
| }, |
| { |
| "epoch": 7.10594315245478, |
| "grad_norm": 0.7046903967857361, |
| "learning_rate": 0.00017157967269595178, |
| "loss": 0.40905754089355467, |
| "step": 8250 |
| }, |
| { |
| "epoch": 7.10594315245478, |
| "eval_loss": 0.46922409534454346, |
| "eval_runtime": 17.5686, |
| "eval_samples_per_second": 1081.705, |
| "eval_steps_per_second": 33.81, |
| "step": 8250 |
| }, |
| { |
| "epoch": 7.14900947459087, |
| "grad_norm": 0.6183799505233765, |
| "learning_rate": 0.00017140740740740742, |
| "loss": 0.4082827377319336, |
| "step": 8300 |
| }, |
| { |
| "epoch": 7.14900947459087, |
| "eval_loss": 0.460680216550827, |
| "eval_runtime": 17.3062, |
| "eval_samples_per_second": 1098.102, |
| "eval_steps_per_second": 34.323, |
| "step": 8300 |
| }, |
| { |
| "epoch": 7.192075796726959, |
| "grad_norm": 0.591304361820221, |
| "learning_rate": 0.00017123514211886306, |
| "loss": 0.40852947235107423, |
| "step": 8350 |
| }, |
| { |
| "epoch": 7.192075796726959, |
| "eval_loss": 0.4756697118282318, |
| "eval_runtime": 17.2275, |
| "eval_samples_per_second": 1103.119, |
| "eval_steps_per_second": 34.48, |
| "step": 8350 |
| }, |
| { |
| "epoch": 7.235142118863049, |
| "grad_norm": 0.6740310192108154, |
| "learning_rate": 0.0001710628768303187, |
| "loss": 0.4172178268432617, |
| "step": 8400 |
| }, |
| { |
| "epoch": 7.235142118863049, |
| "eval_loss": 0.4743621051311493, |
| "eval_runtime": 17.3123, |
| "eval_samples_per_second": 1097.717, |
| "eval_steps_per_second": 34.311, |
| "step": 8400 |
| }, |
| { |
| "epoch": 7.278208440999139, |
| "grad_norm": 0.5578835010528564, |
| "learning_rate": 0.00017089061154177434, |
| "loss": 0.4068143081665039, |
| "step": 8450 |
| }, |
| { |
| "epoch": 7.278208440999139, |
| "eval_loss": 0.47389987111091614, |
| "eval_runtime": 17.3109, |
| "eval_samples_per_second": 1097.803, |
| "eval_steps_per_second": 34.314, |
| "step": 8450 |
| }, |
| { |
| "epoch": 7.321274763135229, |
| "grad_norm": 0.6933954358100891, |
| "learning_rate": 0.00017071834625323, |
| "loss": 0.4053021240234375, |
| "step": 8500 |
| }, |
| { |
| "epoch": 7.321274763135229, |
| "eval_loss": 0.47286462783813477, |
| "eval_runtime": 17.2635, |
| "eval_samples_per_second": 1100.82, |
| "eval_steps_per_second": 34.408, |
| "step": 8500 |
| }, |
| { |
| "epoch": 7.364341085271318, |
| "grad_norm": 0.7197252511978149, |
| "learning_rate": 0.00017054608096468563, |
| "loss": 0.41045791625976563, |
| "step": 8550 |
| }, |
| { |
| "epoch": 7.364341085271318, |
| "eval_loss": 0.46601465344429016, |
| "eval_runtime": 16.1238, |
| "eval_samples_per_second": 1178.628, |
| "eval_steps_per_second": 36.84, |
| "step": 8550 |
| }, |
| { |
| "epoch": 7.407407407407407, |
| "grad_norm": 0.6437709331512451, |
| "learning_rate": 0.00017037381567614124, |
| "loss": 0.4072903823852539, |
| "step": 8600 |
| }, |
| { |
| "epoch": 7.407407407407407, |
| "eval_loss": 0.47019290924072266, |
| "eval_runtime": 17.3946, |
| "eval_samples_per_second": 1092.521, |
| "eval_steps_per_second": 34.148, |
| "step": 8600 |
| }, |
| { |
| "epoch": 7.450473729543497, |
| "grad_norm": 0.5939879417419434, |
| "learning_rate": 0.0001702015503875969, |
| "loss": 0.40440605163574217, |
| "step": 8650 |
| }, |
| { |
| "epoch": 7.450473729543497, |
| "eval_loss": 0.4719381630420685, |
| "eval_runtime": 16.8308, |
| "eval_samples_per_second": 1129.122, |
| "eval_steps_per_second": 35.292, |
| "step": 8650 |
| }, |
| { |
| "epoch": 7.493540051679586, |
| "grad_norm": 0.8631351590156555, |
| "learning_rate": 0.00017002928509905256, |
| "loss": 0.4140526580810547, |
| "step": 8700 |
| }, |
| { |
| "epoch": 7.493540051679586, |
| "eval_loss": 0.46160271763801575, |
| "eval_runtime": 17.1488, |
| "eval_samples_per_second": 1108.18, |
| "eval_steps_per_second": 34.638, |
| "step": 8700 |
| }, |
| { |
| "epoch": 7.536606373815676, |
| "grad_norm": 0.6466717720031738, |
| "learning_rate": 0.0001698570198105082, |
| "loss": 0.40803627014160154, |
| "step": 8750 |
| }, |
| { |
| "epoch": 7.536606373815676, |
| "eval_loss": 0.47316986322402954, |
| "eval_runtime": 17.3823, |
| "eval_samples_per_second": 1093.294, |
| "eval_steps_per_second": 34.173, |
| "step": 8750 |
| }, |
| { |
| "epoch": 7.579672695951766, |
| "grad_norm": 0.7510745525360107, |
| "learning_rate": 0.00016968475452196384, |
| "loss": 0.40083221435546873, |
| "step": 8800 |
| }, |
| { |
| "epoch": 7.579672695951766, |
| "eval_loss": 0.472391813993454, |
| "eval_runtime": 17.7188, |
| "eval_samples_per_second": 1072.532, |
| "eval_steps_per_second": 33.524, |
| "step": 8800 |
| }, |
| { |
| "epoch": 7.622739018087855, |
| "grad_norm": 0.7287890911102295, |
| "learning_rate": 0.00016951248923341948, |
| "loss": 0.40442337036132814, |
| "step": 8850 |
| }, |
| { |
| "epoch": 7.622739018087855, |
| "eval_loss": 0.46329817175865173, |
| "eval_runtime": 17.365, |
| "eval_samples_per_second": 1094.385, |
| "eval_steps_per_second": 34.207, |
| "step": 8850 |
| }, |
| { |
| "epoch": 7.665805340223945, |
| "grad_norm": 0.6469999551773071, |
| "learning_rate": 0.00016934022394487512, |
| "loss": 0.4089640045166016, |
| "step": 8900 |
| }, |
| { |
| "epoch": 7.665805340223945, |
| "eval_loss": 0.4669649004936218, |
| "eval_runtime": 17.1577, |
| "eval_samples_per_second": 1107.607, |
| "eval_steps_per_second": 34.62, |
| "step": 8900 |
| }, |
| { |
| "epoch": 7.708871662360035, |
| "grad_norm": 0.6616798639297485, |
| "learning_rate": 0.00016916795865633074, |
| "loss": 0.40827705383300783, |
| "step": 8950 |
| }, |
| { |
| "epoch": 7.708871662360035, |
| "eval_loss": 0.4632900655269623, |
| "eval_runtime": 17.1597, |
| "eval_samples_per_second": 1107.478, |
| "eval_steps_per_second": 34.616, |
| "step": 8950 |
| }, |
| { |
| "epoch": 7.751937984496124, |
| "grad_norm": 0.8429326415061951, |
| "learning_rate": 0.0001689956933677864, |
| "loss": 0.40378345489501954, |
| "step": 9000 |
| }, |
| { |
| "epoch": 7.751937984496124, |
| "eval_loss": 0.46200963854789734, |
| "eval_runtime": 16.748, |
| "eval_samples_per_second": 1134.703, |
| "eval_steps_per_second": 35.467, |
| "step": 9000 |
| }, |
| { |
| "epoch": 7.795004306632213, |
| "grad_norm": 0.6626513004302979, |
| "learning_rate": 0.00016882342807924202, |
| "loss": 0.3880492401123047, |
| "step": 9050 |
| }, |
| { |
| "epoch": 7.795004306632213, |
| "eval_loss": 0.4620370864868164, |
| "eval_runtime": 17.4679, |
| "eval_samples_per_second": 1087.941, |
| "eval_steps_per_second": 34.005, |
| "step": 9050 |
| }, |
| { |
| "epoch": 7.838070628768303, |
| "grad_norm": 0.588159441947937, |
| "learning_rate": 0.0001686511627906977, |
| "loss": 0.3987490844726562, |
| "step": 9100 |
| }, |
| { |
| "epoch": 7.838070628768303, |
| "eval_loss": 0.46975627541542053, |
| "eval_runtime": 16.9538, |
| "eval_samples_per_second": 1120.932, |
| "eval_steps_per_second": 35.036, |
| "step": 9100 |
| }, |
| { |
| "epoch": 7.881136950904393, |
| "grad_norm": 0.632615327835083, |
| "learning_rate": 0.00016847889750215333, |
| "loss": 0.3987492370605469, |
| "step": 9150 |
| }, |
| { |
| "epoch": 7.881136950904393, |
| "eval_loss": 0.4580506682395935, |
| "eval_runtime": 16.9917, |
| "eval_samples_per_second": 1118.427, |
| "eval_steps_per_second": 34.958, |
| "step": 9150 |
| }, |
| { |
| "epoch": 7.924203273040482, |
| "grad_norm": 0.521962583065033, |
| "learning_rate": 0.00016830663221360895, |
| "loss": 0.3979851531982422, |
| "step": 9200 |
| }, |
| { |
| "epoch": 7.924203273040482, |
| "eval_loss": 0.455099493265152, |
| "eval_runtime": 17.8257, |
| "eval_samples_per_second": 1066.101, |
| "eval_steps_per_second": 33.323, |
| "step": 9200 |
| }, |
| { |
| "epoch": 7.967269595176572, |
| "grad_norm": 0.5939018726348877, |
| "learning_rate": 0.00016813436692506462, |
| "loss": 0.40017799377441404, |
| "step": 9250 |
| }, |
| { |
| "epoch": 7.967269595176572, |
| "eval_loss": 0.4639855921268463, |
| "eval_runtime": 16.0253, |
| "eval_samples_per_second": 1185.872, |
| "eval_steps_per_second": 37.066, |
| "step": 9250 |
| }, |
| { |
| "epoch": 8.010335917312661, |
| "grad_norm": 0.6152825951576233, |
| "learning_rate": 0.00016796210163652023, |
| "loss": 0.40499732971191404, |
| "step": 9300 |
| }, |
| { |
| "epoch": 8.010335917312661, |
| "eval_loss": 0.4626195430755615, |
| "eval_runtime": 17.1638, |
| "eval_samples_per_second": 1107.212, |
| "eval_steps_per_second": 34.608, |
| "step": 9300 |
| }, |
| { |
| "epoch": 8.05340223944875, |
| "grad_norm": 0.7398512959480286, |
| "learning_rate": 0.0001677898363479759, |
| "loss": 0.38915348052978516, |
| "step": 9350 |
| }, |
| { |
| "epoch": 8.05340223944875, |
| "eval_loss": 0.45817697048187256, |
| "eval_runtime": 16.8467, |
| "eval_samples_per_second": 1128.053, |
| "eval_steps_per_second": 35.259, |
| "step": 9350 |
| }, |
| { |
| "epoch": 8.09646856158484, |
| "grad_norm": 0.7731131911277771, |
| "learning_rate": 0.00016761757105943152, |
| "loss": 0.3941301727294922, |
| "step": 9400 |
| }, |
| { |
| "epoch": 8.09646856158484, |
| "eval_loss": 0.44625988602638245, |
| "eval_runtime": 17.3837, |
| "eval_samples_per_second": 1093.21, |
| "eval_steps_per_second": 34.17, |
| "step": 9400 |
| }, |
| { |
| "epoch": 8.13953488372093, |
| "grad_norm": 0.5277883410453796, |
| "learning_rate": 0.00016744530577088716, |
| "loss": 0.4036808776855469, |
| "step": 9450 |
| }, |
| { |
| "epoch": 8.13953488372093, |
| "eval_loss": 0.4603153467178345, |
| "eval_runtime": 17.0968, |
| "eval_samples_per_second": 1111.554, |
| "eval_steps_per_second": 34.743, |
| "step": 9450 |
| }, |
| { |
| "epoch": 8.18260120585702, |
| "grad_norm": 0.6296549439430237, |
| "learning_rate": 0.00016727304048234283, |
| "loss": 0.391630744934082, |
| "step": 9500 |
| }, |
| { |
| "epoch": 8.18260120585702, |
| "eval_loss": 0.46081680059432983, |
| "eval_runtime": 17.0294, |
| "eval_samples_per_second": 1115.951, |
| "eval_steps_per_second": 34.881, |
| "step": 9500 |
| }, |
| { |
| "epoch": 8.22566752799311, |
| "grad_norm": 0.75502610206604, |
| "learning_rate": 0.00016710077519379844, |
| "loss": 0.39266380310058596, |
| "step": 9550 |
| }, |
| { |
| "epoch": 8.22566752799311, |
| "eval_loss": 0.46628537774086, |
| "eval_runtime": 17.2988, |
| "eval_samples_per_second": 1098.574, |
| "eval_steps_per_second": 34.338, |
| "step": 9550 |
| }, |
| { |
| "epoch": 8.268733850129198, |
| "grad_norm": 0.7355108261108398, |
| "learning_rate": 0.0001669285099052541, |
| "loss": 0.38500797271728515, |
| "step": 9600 |
| }, |
| { |
| "epoch": 8.268733850129198, |
| "eval_loss": 0.45412328839302063, |
| "eval_runtime": 16.0582, |
| "eval_samples_per_second": 1183.444, |
| "eval_steps_per_second": 36.99, |
| "step": 9600 |
| }, |
| { |
| "epoch": 8.311800172265288, |
| "grad_norm": 0.5990964770317078, |
| "learning_rate": 0.00016675624461670973, |
| "loss": 0.3953593444824219, |
| "step": 9650 |
| }, |
| { |
| "epoch": 8.311800172265288, |
| "eval_loss": 0.460112065076828, |
| "eval_runtime": 17.2327, |
| "eval_samples_per_second": 1102.785, |
| "eval_steps_per_second": 34.469, |
| "step": 9650 |
| }, |
| { |
| "epoch": 8.354866494401378, |
| "grad_norm": 0.5800876021385193, |
| "learning_rate": 0.0001665839793281654, |
| "loss": 0.39939281463623044, |
| "step": 9700 |
| }, |
| { |
| "epoch": 8.354866494401378, |
| "eval_loss": 0.45985108613967896, |
| "eval_runtime": 16.8147, |
| "eval_samples_per_second": 1130.204, |
| "eval_steps_per_second": 35.326, |
| "step": 9700 |
| }, |
| { |
| "epoch": 8.397932816537468, |
| "grad_norm": 0.6307799816131592, |
| "learning_rate": 0.000166411714039621, |
| "loss": 0.38937881469726565, |
| "step": 9750 |
| }, |
| { |
| "epoch": 8.397932816537468, |
| "eval_loss": 0.45991334319114685, |
| "eval_runtime": 19.4581, |
| "eval_samples_per_second": 976.665, |
| "eval_steps_per_second": 30.527, |
| "step": 9750 |
| }, |
| { |
| "epoch": 8.440999138673558, |
| "grad_norm": 0.5629838109016418, |
| "learning_rate": 0.00016623944875107665, |
| "loss": 0.40010757446289064, |
| "step": 9800 |
| }, |
| { |
| "epoch": 8.440999138673558, |
| "eval_loss": 0.4581594467163086, |
| "eval_runtime": 18.8434, |
| "eval_samples_per_second": 1008.521, |
| "eval_steps_per_second": 31.523, |
| "step": 9800 |
| }, |
| { |
| "epoch": 8.484065460809648, |
| "grad_norm": 0.7436449527740479, |
| "learning_rate": 0.00016606718346253232, |
| "loss": 0.3864497375488281, |
| "step": 9850 |
| }, |
| { |
| "epoch": 8.484065460809648, |
| "eval_loss": 0.4545927345752716, |
| "eval_runtime": 17.6423, |
| "eval_samples_per_second": 1077.184, |
| "eval_steps_per_second": 33.669, |
| "step": 9850 |
| }, |
| { |
| "epoch": 8.527131782945737, |
| "grad_norm": 0.6741047501564026, |
| "learning_rate": 0.00016589491817398794, |
| "loss": 0.39211551666259764, |
| "step": 9900 |
| }, |
| { |
| "epoch": 8.527131782945737, |
| "eval_loss": 0.45410382747650146, |
| "eval_runtime": 19.733, |
| "eval_samples_per_second": 963.059, |
| "eval_steps_per_second": 30.102, |
| "step": 9900 |
| }, |
| { |
| "epoch": 8.570198105081825, |
| "grad_norm": 0.6780059337615967, |
| "learning_rate": 0.0001657226528854436, |
| "loss": 0.3939036178588867, |
| "step": 9950 |
| }, |
| { |
| "epoch": 8.570198105081825, |
| "eval_loss": 0.4540647566318512, |
| "eval_runtime": 17.4929, |
| "eval_samples_per_second": 1086.381, |
| "eval_steps_per_second": 33.957, |
| "step": 9950 |
| }, |
| { |
| "epoch": 8.613264427217915, |
| "grad_norm": 0.6214281916618347, |
| "learning_rate": 0.00016555038759689922, |
| "loss": 0.3902143096923828, |
| "step": 10000 |
| }, |
| { |
| "epoch": 8.613264427217915, |
| "eval_loss": 0.45562663674354553, |
| "eval_runtime": 17.1617, |
| "eval_samples_per_second": 1107.35, |
| "eval_steps_per_second": 34.612, |
| "step": 10000 |
| }, |
| { |
| "epoch": 8.656330749354005, |
| "grad_norm": 0.6021105647087097, |
| "learning_rate": 0.00016537812230835487, |
| "loss": 0.3947802734375, |
| "step": 10050 |
| }, |
| { |
| "epoch": 8.656330749354005, |
| "eval_loss": 0.45046406984329224, |
| "eval_runtime": 16.3866, |
| "eval_samples_per_second": 1159.725, |
| "eval_steps_per_second": 36.249, |
| "step": 10050 |
| }, |
| { |
| "epoch": 8.699397071490095, |
| "grad_norm": 0.6217834949493408, |
| "learning_rate": 0.0001652058570198105, |
| "loss": 0.39479156494140627, |
| "step": 10100 |
| }, |
| { |
| "epoch": 8.699397071490095, |
| "eval_loss": 0.4492938220500946, |
| "eval_runtime": 16.9921, |
| "eval_samples_per_second": 1118.405, |
| "eval_steps_per_second": 34.958, |
| "step": 10100 |
| }, |
| { |
| "epoch": 8.742463393626185, |
| "grad_norm": 0.5626874566078186, |
| "learning_rate": 0.00016503359173126615, |
| "loss": 0.39408538818359373, |
| "step": 10150 |
| }, |
| { |
| "epoch": 8.742463393626185, |
| "eval_loss": 0.44171395897865295, |
| "eval_runtime": 16.0412, |
| "eval_samples_per_second": 1184.701, |
| "eval_steps_per_second": 37.03, |
| "step": 10150 |
| }, |
| { |
| "epoch": 8.785529715762275, |
| "grad_norm": 0.582612931728363, |
| "learning_rate": 0.00016486132644272182, |
| "loss": 0.38269500732421874, |
| "step": 10200 |
| }, |
| { |
| "epoch": 8.785529715762275, |
| "eval_loss": 0.4445774555206299, |
| "eval_runtime": 17.3218, |
| "eval_samples_per_second": 1097.115, |
| "eval_steps_per_second": 34.292, |
| "step": 10200 |
| }, |
| { |
| "epoch": 8.828596037898363, |
| "grad_norm": 0.5538569092750549, |
| "learning_rate": 0.00016468906115417743, |
| "loss": 0.3785874938964844, |
| "step": 10250 |
| }, |
| { |
| "epoch": 8.828596037898363, |
| "eval_loss": 0.44508177042007446, |
| "eval_runtime": 16.4479, |
| "eval_samples_per_second": 1155.404, |
| "eval_steps_per_second": 36.114, |
| "step": 10250 |
| }, |
| { |
| "epoch": 8.871662360034453, |
| "grad_norm": 0.6597520112991333, |
| "learning_rate": 0.00016451679586563308, |
| "loss": 0.38223503112792967, |
| "step": 10300 |
| }, |
| { |
| "epoch": 8.871662360034453, |
| "eval_loss": 0.4491458833217621, |
| "eval_runtime": 17.5632, |
| "eval_samples_per_second": 1082.038, |
| "eval_steps_per_second": 33.821, |
| "step": 10300 |
| }, |
| { |
| "epoch": 8.914728682170542, |
| "grad_norm": 0.6764523983001709, |
| "learning_rate": 0.00016434453057708872, |
| "loss": 0.3907370376586914, |
| "step": 10350 |
| }, |
| { |
| "epoch": 8.914728682170542, |
| "eval_loss": 0.45313188433647156, |
| "eval_runtime": 17.1402, |
| "eval_samples_per_second": 1108.736, |
| "eval_steps_per_second": 34.655, |
| "step": 10350 |
| }, |
| { |
| "epoch": 8.957795004306632, |
| "grad_norm": 0.6652244329452515, |
| "learning_rate": 0.00016417226528854436, |
| "loss": 0.3858833312988281, |
| "step": 10400 |
| }, |
| { |
| "epoch": 8.957795004306632, |
| "eval_loss": 0.43872758746147156, |
| "eval_runtime": 16.8242, |
| "eval_samples_per_second": 1129.565, |
| "eval_steps_per_second": 35.306, |
| "step": 10400 |
| }, |
| { |
| "epoch": 9.000861326442722, |
| "grad_norm": 0.6380756497383118, |
| "learning_rate": 0.000164, |
| "loss": 0.3900408935546875, |
| "step": 10450 |
| }, |
| { |
| "epoch": 9.000861326442722, |
| "eval_loss": 0.4445935785770416, |
| "eval_runtime": 17.3745, |
| "eval_samples_per_second": 1093.789, |
| "eval_steps_per_second": 34.188, |
| "step": 10450 |
| }, |
| { |
| "epoch": 9.043927648578812, |
| "grad_norm": 0.5553951859474182, |
| "learning_rate": 0.00016382773471145564, |
| "loss": 0.3827755355834961, |
| "step": 10500 |
| }, |
| { |
| "epoch": 9.043927648578812, |
| "eval_loss": 0.43815913796424866, |
| "eval_runtime": 16.5732, |
| "eval_samples_per_second": 1146.672, |
| "eval_steps_per_second": 35.841, |
| "step": 10500 |
| }, |
| { |
| "epoch": 9.0869939707149, |
| "grad_norm": 0.6225996613502502, |
| "learning_rate": 0.0001636554694229113, |
| "loss": 0.3869587326049805, |
| "step": 10550 |
| }, |
| { |
| "epoch": 9.0869939707149, |
| "eval_loss": 0.4400031864643097, |
| "eval_runtime": 17.3172, |
| "eval_samples_per_second": 1097.409, |
| "eval_steps_per_second": 34.301, |
| "step": 10550 |
| }, |
| { |
| "epoch": 9.13006029285099, |
| "grad_norm": 0.6141314506530762, |
| "learning_rate": 0.00016348320413436693, |
| "loss": 0.38014041900634765, |
| "step": 10600 |
| }, |
| { |
| "epoch": 9.13006029285099, |
| "eval_loss": 0.4501992166042328, |
| "eval_runtime": 16.1985, |
| "eval_samples_per_second": 1173.191, |
| "eval_steps_per_second": 36.67, |
| "step": 10600 |
| }, |
| { |
| "epoch": 9.17312661498708, |
| "grad_norm": 0.6114876866340637, |
| "learning_rate": 0.00016331093884582257, |
| "loss": 0.3823957824707031, |
| "step": 10650 |
| }, |
| { |
| "epoch": 9.17312661498708, |
| "eval_loss": 0.44051340222358704, |
| "eval_runtime": 17.9518, |
| "eval_samples_per_second": 1058.615, |
| "eval_steps_per_second": 33.089, |
| "step": 10650 |
| }, |
| { |
| "epoch": 9.21619293712317, |
| "grad_norm": 0.7262207865715027, |
| "learning_rate": 0.0001631386735572782, |
| "loss": 0.38280406951904294, |
| "step": 10700 |
| }, |
| { |
| "epoch": 9.21619293712317, |
| "eval_loss": 0.44237202405929565, |
| "eval_runtime": 17.3535, |
| "eval_samples_per_second": 1095.109, |
| "eval_steps_per_second": 34.229, |
| "step": 10700 |
| }, |
| { |
| "epoch": 9.25925925925926, |
| "grad_norm": 0.8027353286743164, |
| "learning_rate": 0.00016296640826873385, |
| "loss": 0.38072364807128906, |
| "step": 10750 |
| }, |
| { |
| "epoch": 9.25925925925926, |
| "eval_loss": 0.4526488482952118, |
| "eval_runtime": 17.7263, |
| "eval_samples_per_second": 1072.078, |
| "eval_steps_per_second": 33.509, |
| "step": 10750 |
| }, |
| { |
| "epoch": 9.30232558139535, |
| "grad_norm": 0.6305018067359924, |
| "learning_rate": 0.0001627941429801895, |
| "loss": 0.37516212463378906, |
| "step": 10800 |
| }, |
| { |
| "epoch": 9.30232558139535, |
| "eval_loss": 0.4463886618614197, |
| "eval_runtime": 17.2375, |
| "eval_samples_per_second": 1102.483, |
| "eval_steps_per_second": 34.46, |
| "step": 10800 |
| }, |
| { |
| "epoch": 9.34539190353144, |
| "grad_norm": 0.5047479867935181, |
| "learning_rate": 0.00016262187769164514, |
| "loss": 0.3838955307006836, |
| "step": 10850 |
| }, |
| { |
| "epoch": 9.34539190353144, |
| "eval_loss": 0.44005444645881653, |
| "eval_runtime": 17.4853, |
| "eval_samples_per_second": 1086.859, |
| "eval_steps_per_second": 33.971, |
| "step": 10850 |
| }, |
| { |
| "epoch": 9.388458225667527, |
| "grad_norm": 0.5472151041030884, |
| "learning_rate": 0.00016244961240310078, |
| "loss": 0.3777126693725586, |
| "step": 10900 |
| }, |
| { |
| "epoch": 9.388458225667527, |
| "eval_loss": 0.4376201033592224, |
| "eval_runtime": 17.3113, |
| "eval_samples_per_second": 1097.783, |
| "eval_steps_per_second": 34.313, |
| "step": 10900 |
| }, |
| { |
| "epoch": 9.431524547803617, |
| "grad_norm": 0.5795921087265015, |
| "learning_rate": 0.00016227734711455642, |
| "loss": 0.3782763671875, |
| "step": 10950 |
| }, |
| { |
| "epoch": 9.431524547803617, |
| "eval_loss": 0.448537677526474, |
| "eval_runtime": 16.4602, |
| "eval_samples_per_second": 1154.545, |
| "eval_steps_per_second": 36.087, |
| "step": 10950 |
| }, |
| { |
| "epoch": 9.474590869939707, |
| "grad_norm": 0.5140406489372253, |
| "learning_rate": 0.00016210508182601206, |
| "loss": 0.3784123611450195, |
| "step": 11000 |
| }, |
| { |
| "epoch": 9.474590869939707, |
| "eval_loss": 0.43698176741600037, |
| "eval_runtime": 18.2099, |
| "eval_samples_per_second": 1043.611, |
| "eval_steps_per_second": 32.62, |
| "step": 11000 |
| }, |
| { |
| "epoch": 9.517657192075797, |
| "grad_norm": 0.7354308366775513, |
| "learning_rate": 0.0001619328165374677, |
| "loss": 0.3827821731567383, |
| "step": 11050 |
| }, |
| { |
| "epoch": 9.517657192075797, |
| "eval_loss": 0.4404692053794861, |
| "eval_runtime": 16.5291, |
| "eval_samples_per_second": 1149.728, |
| "eval_steps_per_second": 35.937, |
| "step": 11050 |
| }, |
| { |
| "epoch": 9.560723514211887, |
| "grad_norm": 0.5452671051025391, |
| "learning_rate": 0.00016176055124892335, |
| "loss": 0.3785516357421875, |
| "step": 11100 |
| }, |
| { |
| "epoch": 9.560723514211887, |
| "eval_loss": 0.43846017122268677, |
| "eval_runtime": 17.2509, |
| "eval_samples_per_second": 1101.623, |
| "eval_steps_per_second": 34.433, |
| "step": 11100 |
| }, |
| { |
| "epoch": 9.603789836347977, |
| "grad_norm": 0.5908451080322266, |
| "learning_rate": 0.000161588285960379, |
| "loss": 0.3758753967285156, |
| "step": 11150 |
| }, |
| { |
| "epoch": 9.603789836347977, |
| "eval_loss": 0.4382474720478058, |
| "eval_runtime": 16.8649, |
| "eval_samples_per_second": 1126.835, |
| "eval_steps_per_second": 35.221, |
| "step": 11150 |
| }, |
| { |
| "epoch": 9.646856158484065, |
| "grad_norm": 0.5804340243339539, |
| "learning_rate": 0.00016141602067183463, |
| "loss": 0.361547966003418, |
| "step": 11200 |
| }, |
| { |
| "epoch": 9.646856158484065, |
| "eval_loss": 0.4456506669521332, |
| "eval_runtime": 17.7288, |
| "eval_samples_per_second": 1071.926, |
| "eval_steps_per_second": 33.505, |
| "step": 11200 |
| }, |
| { |
| "epoch": 9.689922480620154, |
| "grad_norm": 0.6554870009422302, |
| "learning_rate": 0.00016124375538329028, |
| "loss": 0.3801045608520508, |
| "step": 11250 |
| }, |
| { |
| "epoch": 9.689922480620154, |
| "eval_loss": 0.43516239523887634, |
| "eval_runtime": 17.3981, |
| "eval_samples_per_second": 1092.305, |
| "eval_steps_per_second": 34.142, |
| "step": 11250 |
| }, |
| { |
| "epoch": 9.732988802756244, |
| "grad_norm": 0.7969627976417542, |
| "learning_rate": 0.00016107149009474592, |
| "loss": 0.3737542724609375, |
| "step": 11300 |
| }, |
| { |
| "epoch": 9.732988802756244, |
| "eval_loss": 0.44730818271636963, |
| "eval_runtime": 17.3437, |
| "eval_samples_per_second": 1095.73, |
| "eval_steps_per_second": 34.249, |
| "step": 11300 |
| }, |
| { |
| "epoch": 9.776055124892334, |
| "grad_norm": 0.7298914790153503, |
| "learning_rate": 0.00016089922480620156, |
| "loss": 0.3751395797729492, |
| "step": 11350 |
| }, |
| { |
| "epoch": 9.776055124892334, |
| "eval_loss": 0.43920770287513733, |
| "eval_runtime": 17.3811, |
| "eval_samples_per_second": 1093.369, |
| "eval_steps_per_second": 34.175, |
| "step": 11350 |
| }, |
| { |
| "epoch": 9.819121447028424, |
| "grad_norm": 0.6960113048553467, |
| "learning_rate": 0.0001607269595176572, |
| "loss": 0.38081275939941406, |
| "step": 11400 |
| }, |
| { |
| "epoch": 9.819121447028424, |
| "eval_loss": 0.4437948167324066, |
| "eval_runtime": 16.202, |
| "eval_samples_per_second": 1172.942, |
| "eval_steps_per_second": 36.662, |
| "step": 11400 |
| }, |
| { |
| "epoch": 9.862187769164514, |
| "grad_norm": 0.541384756565094, |
| "learning_rate": 0.00016055469422911284, |
| "loss": 0.3859746170043945, |
| "step": 11450 |
| }, |
| { |
| "epoch": 9.862187769164514, |
| "eval_loss": 0.44062063097953796, |
| "eval_runtime": 17.3324, |
| "eval_samples_per_second": 1096.441, |
| "eval_steps_per_second": 34.271, |
| "step": 11450 |
| }, |
| { |
| "epoch": 9.905254091300604, |
| "grad_norm": 0.651509165763855, |
| "learning_rate": 0.00016038242894056849, |
| "loss": 0.37168853759765624, |
| "step": 11500 |
| }, |
| { |
| "epoch": 9.905254091300604, |
| "eval_loss": 0.4349174201488495, |
| "eval_runtime": 16.8433, |
| "eval_samples_per_second": 1128.285, |
| "eval_steps_per_second": 35.266, |
| "step": 11500 |
| }, |
| { |
| "epoch": 9.948320413436692, |
| "grad_norm": 0.7806121706962585, |
| "learning_rate": 0.00016021016365202413, |
| "loss": 0.37619327545166015, |
| "step": 11550 |
| }, |
| { |
| "epoch": 9.948320413436692, |
| "eval_loss": 0.4412415623664856, |
| "eval_runtime": 17.4266, |
| "eval_samples_per_second": 1090.517, |
| "eval_steps_per_second": 34.086, |
| "step": 11550 |
| }, |
| { |
| "epoch": 9.991386735572782, |
| "grad_norm": 0.550398051738739, |
| "learning_rate": 0.00016003789836347977, |
| "loss": 0.37089550018310546, |
| "step": 11600 |
| }, |
| { |
| "epoch": 9.991386735572782, |
| "eval_loss": 0.4356813430786133, |
| "eval_runtime": 16.8274, |
| "eval_samples_per_second": 1129.35, |
| "eval_steps_per_second": 35.3, |
| "step": 11600 |
| }, |
| { |
| "epoch": 10.034453057708872, |
| "grad_norm": 0.6004517674446106, |
| "learning_rate": 0.0001598656330749354, |
| "loss": 0.3658511352539062, |
| "step": 11650 |
| }, |
| { |
| "epoch": 10.034453057708872, |
| "eval_loss": 0.4357646405696869, |
| "eval_runtime": 17.4756, |
| "eval_samples_per_second": 1087.456, |
| "eval_steps_per_second": 33.99, |
| "step": 11650 |
| }, |
| { |
| "epoch": 10.077519379844961, |
| "grad_norm": 0.6227307915687561, |
| "learning_rate": 0.00015969336778639105, |
| "loss": 0.36172927856445314, |
| "step": 11700 |
| }, |
| { |
| "epoch": 10.077519379844961, |
| "eval_loss": 0.44107383489608765, |
| "eval_runtime": 17.4553, |
| "eval_samples_per_second": 1088.724, |
| "eval_steps_per_second": 34.03, |
| "step": 11700 |
| }, |
| { |
| "epoch": 10.120585701981051, |
| "grad_norm": 0.6746264696121216, |
| "learning_rate": 0.0001595211024978467, |
| "loss": 0.3724021911621094, |
| "step": 11750 |
| }, |
| { |
| "epoch": 10.120585701981051, |
| "eval_loss": 0.44043871760368347, |
| "eval_runtime": 16.1534, |
| "eval_samples_per_second": 1176.469, |
| "eval_steps_per_second": 36.772, |
| "step": 11750 |
| }, |
| { |
| "epoch": 10.163652024117141, |
| "grad_norm": 0.7006517648696899, |
| "learning_rate": 0.00015934883720930234, |
| "loss": 0.37673473358154297, |
| "step": 11800 |
| }, |
| { |
| "epoch": 10.163652024117141, |
| "eval_loss": 0.4389665424823761, |
| "eval_runtime": 17.4499, |
| "eval_samples_per_second": 1089.063, |
| "eval_steps_per_second": 34.04, |
| "step": 11800 |
| }, |
| { |
| "epoch": 10.20671834625323, |
| "grad_norm": 0.5790155529975891, |
| "learning_rate": 0.00015917657192075795, |
| "loss": 0.3703031158447266, |
| "step": 11850 |
| }, |
| { |
| "epoch": 10.20671834625323, |
| "eval_loss": 0.44572848081588745, |
| "eval_runtime": 16.6654, |
| "eval_samples_per_second": 1140.325, |
| "eval_steps_per_second": 35.643, |
| "step": 11850 |
| }, |
| { |
| "epoch": 10.249784668389319, |
| "grad_norm": 0.6631970405578613, |
| "learning_rate": 0.00015900430663221362, |
| "loss": 0.374197998046875, |
| "step": 11900 |
| }, |
| { |
| "epoch": 10.249784668389319, |
| "eval_loss": 0.4412307143211365, |
| "eval_runtime": 17.3364, |
| "eval_samples_per_second": 1096.193, |
| "eval_steps_per_second": 34.263, |
| "step": 11900 |
| }, |
| { |
| "epoch": 10.292850990525409, |
| "grad_norm": 0.5273671746253967, |
| "learning_rate": 0.00015883204134366926, |
| "loss": 0.37083282470703127, |
| "step": 11950 |
| }, |
| { |
| "epoch": 10.292850990525409, |
| "eval_loss": 0.43490493297576904, |
| "eval_runtime": 17.5978, |
| "eval_samples_per_second": 1079.907, |
| "eval_steps_per_second": 33.754, |
| "step": 11950 |
| }, |
| { |
| "epoch": 10.335917312661499, |
| "grad_norm": 0.528450608253479, |
| "learning_rate": 0.0001586597760551249, |
| "loss": 0.3701524353027344, |
| "step": 12000 |
| }, |
| { |
| "epoch": 10.335917312661499, |
| "eval_loss": 0.4375080168247223, |
| "eval_runtime": 17.927, |
| "eval_samples_per_second": 1060.078, |
| "eval_steps_per_second": 33.134, |
| "step": 12000 |
| }, |
| { |
| "epoch": 10.378983634797589, |
| "grad_norm": 0.5573973059654236, |
| "learning_rate": 0.00015848751076658055, |
| "loss": 0.3601241683959961, |
| "step": 12050 |
| }, |
| { |
| "epoch": 10.378983634797589, |
| "eval_loss": 0.4350755512714386, |
| "eval_runtime": 17.0222, |
| "eval_samples_per_second": 1116.424, |
| "eval_steps_per_second": 34.896, |
| "step": 12050 |
| }, |
| { |
| "epoch": 10.422049956933678, |
| "grad_norm": 0.6054695844650269, |
| "learning_rate": 0.0001583152454780362, |
| "loss": 0.3784658050537109, |
| "step": 12100 |
| }, |
| { |
| "epoch": 10.422049956933678, |
| "eval_loss": 0.43380206823349, |
| "eval_runtime": 17.5629, |
| "eval_samples_per_second": 1082.056, |
| "eval_steps_per_second": 33.821, |
| "step": 12100 |
| }, |
| { |
| "epoch": 10.465116279069768, |
| "grad_norm": 0.6000425815582275, |
| "learning_rate": 0.00015814298018949183, |
| "loss": 0.3644602966308594, |
| "step": 12150 |
| }, |
| { |
| "epoch": 10.465116279069768, |
| "eval_loss": 0.43688303232192993, |
| "eval_runtime": 16.9392, |
| "eval_samples_per_second": 1121.897, |
| "eval_steps_per_second": 35.067, |
| "step": 12150 |
| }, |
| { |
| "epoch": 10.508182601205856, |
| "grad_norm": 0.6011264324188232, |
| "learning_rate": 0.00015797071490094745, |
| "loss": 0.36014255523681643, |
| "step": 12200 |
| }, |
| { |
| "epoch": 10.508182601205856, |
| "eval_loss": 0.43164920806884766, |
| "eval_runtime": 16.5719, |
| "eval_samples_per_second": 1146.76, |
| "eval_steps_per_second": 35.844, |
| "step": 12200 |
| }, |
| { |
| "epoch": 10.551248923341946, |
| "grad_norm": 0.5600745677947998, |
| "learning_rate": 0.00015779844961240312, |
| "loss": 0.3694350433349609, |
| "step": 12250 |
| }, |
| { |
| "epoch": 10.551248923341946, |
| "eval_loss": 0.43914440274238586, |
| "eval_runtime": 15.8135, |
| "eval_samples_per_second": 1201.757, |
| "eval_steps_per_second": 37.563, |
| "step": 12250 |
| }, |
| { |
| "epoch": 10.594315245478036, |
| "grad_norm": 0.7852029800415039, |
| "learning_rate": 0.00015762618432385876, |
| "loss": 0.3673841094970703, |
| "step": 12300 |
| }, |
| { |
| "epoch": 10.594315245478036, |
| "eval_loss": 0.4305252432823181, |
| "eval_runtime": 16.0232, |
| "eval_samples_per_second": 1186.031, |
| "eval_steps_per_second": 37.071, |
| "step": 12300 |
| }, |
| { |
| "epoch": 10.637381567614126, |
| "grad_norm": 0.4954133629798889, |
| "learning_rate": 0.0001574539190353144, |
| "loss": 0.37212295532226564, |
| "step": 12350 |
| }, |
| { |
| "epoch": 10.637381567614126, |
| "eval_loss": 0.42601069808006287, |
| "eval_runtime": 15.7868, |
| "eval_samples_per_second": 1203.79, |
| "eval_steps_per_second": 37.626, |
| "step": 12350 |
| }, |
| { |
| "epoch": 10.680447889750216, |
| "grad_norm": 0.5571798086166382, |
| "learning_rate": 0.00015728165374677004, |
| "loss": 0.36096405029296874, |
| "step": 12400 |
| }, |
| { |
| "epoch": 10.680447889750216, |
| "eval_loss": 0.4234640598297119, |
| "eval_runtime": 16.6133, |
| "eval_samples_per_second": 1143.9, |
| "eval_steps_per_second": 35.754, |
| "step": 12400 |
| }, |
| { |
| "epoch": 10.723514211886306, |
| "grad_norm": 0.6863204836845398, |
| "learning_rate": 0.00015710938845822566, |
| "loss": 0.370234375, |
| "step": 12450 |
| }, |
| { |
| "epoch": 10.723514211886306, |
| "eval_loss": 0.4254721999168396, |
| "eval_runtime": 17.0259, |
| "eval_samples_per_second": 1116.184, |
| "eval_steps_per_second": 34.888, |
| "step": 12450 |
| }, |
| { |
| "epoch": 10.766580534022394, |
| "grad_norm": 0.6626876592636108, |
| "learning_rate": 0.00015693712316968133, |
| "loss": 0.36537841796875, |
| "step": 12500 |
| }, |
| { |
| "epoch": 10.766580534022394, |
| "eval_loss": 0.4343000054359436, |
| "eval_runtime": 16.9232, |
| "eval_samples_per_second": 1122.957, |
| "eval_steps_per_second": 35.1, |
| "step": 12500 |
| }, |
| { |
| "epoch": 10.809646856158484, |
| "grad_norm": 0.509812593460083, |
| "learning_rate": 0.00015676485788113694, |
| "loss": 0.3594136047363281, |
| "step": 12550 |
| }, |
| { |
| "epoch": 10.809646856158484, |
| "eval_loss": 0.430789589881897, |
| "eval_runtime": 16.2233, |
| "eval_samples_per_second": 1171.401, |
| "eval_steps_per_second": 36.614, |
| "step": 12550 |
| }, |
| { |
| "epoch": 10.852713178294573, |
| "grad_norm": 0.786090075969696, |
| "learning_rate": 0.0001565925925925926, |
| "loss": 0.35622817993164063, |
| "step": 12600 |
| }, |
| { |
| "epoch": 10.852713178294573, |
| "eval_loss": 0.4235756993293762, |
| "eval_runtime": 16.8862, |
| "eval_samples_per_second": 1125.416, |
| "eval_steps_per_second": 35.177, |
| "step": 12600 |
| }, |
| { |
| "epoch": 10.895779500430663, |
| "grad_norm": 0.6675612330436707, |
| "learning_rate": 0.00015642032730404823, |
| "loss": 0.36076969146728516, |
| "step": 12650 |
| }, |
| { |
| "epoch": 10.895779500430663, |
| "eval_loss": 0.432416170835495, |
| "eval_runtime": 16.6454, |
| "eval_samples_per_second": 1141.698, |
| "eval_steps_per_second": 35.686, |
| "step": 12650 |
| }, |
| { |
| "epoch": 10.938845822566753, |
| "grad_norm": 0.6346985697746277, |
| "learning_rate": 0.00015624806201550387, |
| "loss": 0.36723545074462893, |
| "step": 12700 |
| }, |
| { |
| "epoch": 10.938845822566753, |
| "eval_loss": 0.42092081904411316, |
| "eval_runtime": 17.2276, |
| "eval_samples_per_second": 1103.111, |
| "eval_steps_per_second": 34.479, |
| "step": 12700 |
| }, |
| { |
| "epoch": 10.981912144702843, |
| "grad_norm": 0.6402779817581177, |
| "learning_rate": 0.00015607579672695954, |
| "loss": 0.3656714630126953, |
| "step": 12750 |
| }, |
| { |
| "epoch": 10.981912144702843, |
| "eval_loss": 0.4239721894264221, |
| "eval_runtime": 16.6403, |
| "eval_samples_per_second": 1142.05, |
| "eval_steps_per_second": 35.697, |
| "step": 12750 |
| }, |
| { |
| "epoch": 11.024978466838933, |
| "grad_norm": 0.5922400951385498, |
| "learning_rate": 0.00015590353143841515, |
| "loss": 0.3640504455566406, |
| "step": 12800 |
| }, |
| { |
| "epoch": 11.024978466838933, |
| "eval_loss": 0.41864249110221863, |
| "eval_runtime": 17.2535, |
| "eval_samples_per_second": 1101.46, |
| "eval_steps_per_second": 34.428, |
| "step": 12800 |
| }, |
| { |
| "epoch": 11.06804478897502, |
| "grad_norm": 0.5507603287696838, |
| "learning_rate": 0.00015573126614987082, |
| "loss": 0.3542987060546875, |
| "step": 12850 |
| }, |
| { |
| "epoch": 11.06804478897502, |
| "eval_loss": 0.4256451725959778, |
| "eval_runtime": 17.2905, |
| "eval_samples_per_second": 1099.1, |
| "eval_steps_per_second": 34.354, |
| "step": 12850 |
| }, |
| { |
| "epoch": 11.11111111111111, |
| "grad_norm": 0.548511266708374, |
| "learning_rate": 0.00015555900086132644, |
| "loss": 0.3623085021972656, |
| "step": 12900 |
| }, |
| { |
| "epoch": 11.11111111111111, |
| "eval_loss": 0.42673003673553467, |
| "eval_runtime": 17.4446, |
| "eval_samples_per_second": 1089.39, |
| "eval_steps_per_second": 34.051, |
| "step": 12900 |
| }, |
| { |
| "epoch": 11.1541774332472, |
| "grad_norm": 0.6575692296028137, |
| "learning_rate": 0.0001553867355727821, |
| "loss": 0.3683247375488281, |
| "step": 12950 |
| }, |
| { |
| "epoch": 11.1541774332472, |
| "eval_loss": 0.4245961606502533, |
| "eval_runtime": 17.3578, |
| "eval_samples_per_second": 1094.837, |
| "eval_steps_per_second": 34.221, |
| "step": 12950 |
| }, |
| { |
| "epoch": 11.19724375538329, |
| "grad_norm": 0.5141576528549194, |
| "learning_rate": 0.00015521447028423772, |
| "loss": 0.3595258331298828, |
| "step": 13000 |
| }, |
| { |
| "epoch": 11.19724375538329, |
| "eval_loss": 0.4224443733692169, |
| "eval_runtime": 17.488, |
| "eval_samples_per_second": 1086.686, |
| "eval_steps_per_second": 33.966, |
| "step": 13000 |
| }, |
| { |
| "epoch": 11.24031007751938, |
| "grad_norm": 0.4834965169429779, |
| "learning_rate": 0.00015504220499569336, |
| "loss": 0.3647360992431641, |
| "step": 13050 |
| }, |
| { |
| "epoch": 11.24031007751938, |
| "eval_loss": 0.42276498675346375, |
| "eval_runtime": 17.1192, |
| "eval_samples_per_second": 1110.098, |
| "eval_steps_per_second": 34.698, |
| "step": 13050 |
| }, |
| { |
| "epoch": 11.28337639965547, |
| "grad_norm": 0.6599516272544861, |
| "learning_rate": 0.00015486993970714903, |
| "loss": 0.3615505599975586, |
| "step": 13100 |
| }, |
| { |
| "epoch": 11.28337639965547, |
| "eval_loss": 0.4229472577571869, |
| "eval_runtime": 16.418, |
| "eval_samples_per_second": 1157.508, |
| "eval_steps_per_second": 36.18, |
| "step": 13100 |
| }, |
| { |
| "epoch": 11.326442721791558, |
| "grad_norm": 0.6309078931808472, |
| "learning_rate": 0.00015469767441860465, |
| "loss": 0.36210174560546876, |
| "step": 13150 |
| }, |
| { |
| "epoch": 11.326442721791558, |
| "eval_loss": 0.42000076174736023, |
| "eval_runtime": 17.3299, |
| "eval_samples_per_second": 1096.604, |
| "eval_steps_per_second": 34.276, |
| "step": 13150 |
| }, |
| { |
| "epoch": 11.369509043927648, |
| "grad_norm": 0.5616578459739685, |
| "learning_rate": 0.00015452540913006032, |
| "loss": 0.3634178161621094, |
| "step": 13200 |
| }, |
| { |
| "epoch": 11.369509043927648, |
| "eval_loss": 0.4281771779060364, |
| "eval_runtime": 17.3687, |
| "eval_samples_per_second": 1094.15, |
| "eval_steps_per_second": 34.199, |
| "step": 13200 |
| }, |
| { |
| "epoch": 11.412575366063738, |
| "grad_norm": 0.6235555410385132, |
| "learning_rate": 0.00015435314384151593, |
| "loss": 0.35823890686035154, |
| "step": 13250 |
| }, |
| { |
| "epoch": 11.412575366063738, |
| "eval_loss": 0.4235421121120453, |
| "eval_runtime": 16.7581, |
| "eval_samples_per_second": 1134.016, |
| "eval_steps_per_second": 35.445, |
| "step": 13250 |
| }, |
| { |
| "epoch": 11.455641688199828, |
| "grad_norm": 0.5683552026748657, |
| "learning_rate": 0.00015418087855297157, |
| "loss": 0.3551355743408203, |
| "step": 13300 |
| }, |
| { |
| "epoch": 11.455641688199828, |
| "eval_loss": 0.40868687629699707, |
| "eval_runtime": 17.5985, |
| "eval_samples_per_second": 1079.862, |
| "eval_steps_per_second": 33.753, |
| "step": 13300 |
| }, |
| { |
| "epoch": 11.498708010335918, |
| "grad_norm": 0.5393732786178589, |
| "learning_rate": 0.00015400861326442722, |
| "loss": 0.35280082702636717, |
| "step": 13350 |
| }, |
| { |
| "epoch": 11.498708010335918, |
| "eval_loss": 0.4148114025592804, |
| "eval_runtime": 17.5185, |
| "eval_samples_per_second": 1084.798, |
| "eval_steps_per_second": 33.907, |
| "step": 13350 |
| }, |
| { |
| "epoch": 11.541774332472007, |
| "grad_norm": 0.579129159450531, |
| "learning_rate": 0.00015383634797588286, |
| "loss": 0.36021167755126954, |
| "step": 13400 |
| }, |
| { |
| "epoch": 11.541774332472007, |
| "eval_loss": 0.42251133918762207, |
| "eval_runtime": 17.6195, |
| "eval_samples_per_second": 1078.579, |
| "eval_steps_per_second": 33.713, |
| "step": 13400 |
| }, |
| { |
| "epoch": 11.584840654608097, |
| "grad_norm": 0.7097395062446594, |
| "learning_rate": 0.00015366408268733853, |
| "loss": 0.3502839660644531, |
| "step": 13450 |
| }, |
| { |
| "epoch": 11.584840654608097, |
| "eval_loss": 0.42374807596206665, |
| "eval_runtime": 17.2164, |
| "eval_samples_per_second": 1103.831, |
| "eval_steps_per_second": 34.502, |
| "step": 13450 |
| }, |
| { |
| "epoch": 11.627906976744185, |
| "grad_norm": 0.766302227973938, |
| "learning_rate": 0.00015349181739879414, |
| "loss": 0.3584626770019531, |
| "step": 13500 |
| }, |
| { |
| "epoch": 11.627906976744185, |
| "eval_loss": 0.4234767556190491, |
| "eval_runtime": 17.3125, |
| "eval_samples_per_second": 1097.703, |
| "eval_steps_per_second": 34.31, |
| "step": 13500 |
| }, |
| { |
| "epoch": 11.670973298880275, |
| "grad_norm": 0.7396245002746582, |
| "learning_rate": 0.00015331955211024979, |
| "loss": 0.3636350250244141, |
| "step": 13550 |
| }, |
| { |
| "epoch": 11.670973298880275, |
| "eval_loss": 0.42209184169769287, |
| "eval_runtime": 17.216, |
| "eval_samples_per_second": 1103.857, |
| "eval_steps_per_second": 34.503, |
| "step": 13550 |
| }, |
| { |
| "epoch": 11.714039621016365, |
| "grad_norm": 0.6891668438911438, |
| "learning_rate": 0.00015314728682170543, |
| "loss": 0.35415069580078123, |
| "step": 13600 |
| }, |
| { |
| "epoch": 11.714039621016365, |
| "eval_loss": 0.42032742500305176, |
| "eval_runtime": 16.795, |
| "eval_samples_per_second": 1131.529, |
| "eval_steps_per_second": 35.368, |
| "step": 13600 |
| }, |
| { |
| "epoch": 11.757105943152455, |
| "grad_norm": 0.6603532433509827, |
| "learning_rate": 0.00015297502153316107, |
| "loss": 0.3615140533447266, |
| "step": 13650 |
| }, |
| { |
| "epoch": 11.757105943152455, |
| "eval_loss": 0.4193870723247528, |
| "eval_runtime": 17.3607, |
| "eval_samples_per_second": 1094.655, |
| "eval_steps_per_second": 34.215, |
| "step": 13650 |
| }, |
| { |
| "epoch": 11.800172265288545, |
| "grad_norm": 0.6098962426185608, |
| "learning_rate": 0.0001528027562446167, |
| "loss": 0.36224365234375, |
| "step": 13700 |
| }, |
| { |
| "epoch": 11.800172265288545, |
| "eval_loss": 0.4110799729824066, |
| "eval_runtime": 16.6706, |
| "eval_samples_per_second": 1139.972, |
| "eval_steps_per_second": 35.632, |
| "step": 13700 |
| }, |
| { |
| "epoch": 11.843238587424635, |
| "grad_norm": 0.5804024338722229, |
| "learning_rate": 0.00015263049095607235, |
| "loss": 0.35254592895507814, |
| "step": 13750 |
| }, |
| { |
| "epoch": 11.843238587424635, |
| "eval_loss": 0.42059120535850525, |
| "eval_runtime": 17.1304, |
| "eval_samples_per_second": 1109.375, |
| "eval_steps_per_second": 34.675, |
| "step": 13750 |
| }, |
| { |
| "epoch": 11.886304909560723, |
| "grad_norm": 0.6594691872596741, |
| "learning_rate": 0.00015245822566752802, |
| "loss": 0.35712413787841796, |
| "step": 13800 |
| }, |
| { |
| "epoch": 11.886304909560723, |
| "eval_loss": 0.42418330907821655, |
| "eval_runtime": 17.2047, |
| "eval_samples_per_second": 1104.579, |
| "eval_steps_per_second": 34.525, |
| "step": 13800 |
| }, |
| { |
| "epoch": 11.929371231696813, |
| "grad_norm": 0.5514585971832275, |
| "learning_rate": 0.00015228596037898364, |
| "loss": 0.35963973999023435, |
| "step": 13850 |
| }, |
| { |
| "epoch": 11.929371231696813, |
| "eval_loss": 0.41717028617858887, |
| "eval_runtime": 17.6749, |
| "eval_samples_per_second": 1075.2, |
| "eval_steps_per_second": 33.607, |
| "step": 13850 |
| }, |
| { |
| "epoch": 11.972437553832902, |
| "grad_norm": 0.5450137257575989, |
| "learning_rate": 0.00015211369509043928, |
| "loss": 0.34988128662109375, |
| "step": 13900 |
| }, |
| { |
| "epoch": 11.972437553832902, |
| "eval_loss": 0.4247604310512543, |
| "eval_runtime": 17.676, |
| "eval_samples_per_second": 1075.127, |
| "eval_steps_per_second": 33.605, |
| "step": 13900 |
| }, |
| { |
| "epoch": 12.015503875968992, |
| "grad_norm": 0.5637671947479248, |
| "learning_rate": 0.00015194142980189492, |
| "loss": 0.351929931640625, |
| "step": 13950 |
| }, |
| { |
| "epoch": 12.015503875968992, |
| "eval_loss": 0.42415958642959595, |
| "eval_runtime": 17.3608, |
| "eval_samples_per_second": 1094.649, |
| "eval_steps_per_second": 34.215, |
| "step": 13950 |
| }, |
| { |
| "epoch": 12.058570198105082, |
| "grad_norm": 0.7126480937004089, |
| "learning_rate": 0.00015176916451335056, |
| "loss": 0.3480724334716797, |
| "step": 14000 |
| }, |
| { |
| "epoch": 12.058570198105082, |
| "eval_loss": 0.4246508479118347, |
| "eval_runtime": 17.3426, |
| "eval_samples_per_second": 1095.8, |
| "eval_steps_per_second": 34.251, |
| "step": 14000 |
| }, |
| { |
| "epoch": 12.101636520241172, |
| "grad_norm": 0.5521771907806396, |
| "learning_rate": 0.0001515968992248062, |
| "loss": 0.3527442169189453, |
| "step": 14050 |
| }, |
| { |
| "epoch": 12.101636520241172, |
| "eval_loss": 0.42050713300704956, |
| "eval_runtime": 16.8859, |
| "eval_samples_per_second": 1125.438, |
| "eval_steps_per_second": 35.177, |
| "step": 14050 |
| }, |
| { |
| "epoch": 12.144702842377262, |
| "grad_norm": 0.626928985118866, |
| "learning_rate": 0.00015142463393626185, |
| "loss": 0.3487448883056641, |
| "step": 14100 |
| }, |
| { |
| "epoch": 12.144702842377262, |
| "eval_loss": 0.4141220152378082, |
| "eval_runtime": 17.5821, |
| "eval_samples_per_second": 1080.873, |
| "eval_steps_per_second": 33.784, |
| "step": 14100 |
| }, |
| { |
| "epoch": 12.18776916451335, |
| "grad_norm": 0.6588882803916931, |
| "learning_rate": 0.0001512523686477175, |
| "loss": 0.3564021301269531, |
| "step": 14150 |
| }, |
| { |
| "epoch": 12.18776916451335, |
| "eval_loss": 0.421312153339386, |
| "eval_runtime": 16.615, |
| "eval_samples_per_second": 1143.784, |
| "eval_steps_per_second": 35.751, |
| "step": 14150 |
| }, |
| { |
| "epoch": 12.23083548664944, |
| "grad_norm": 0.5131123065948486, |
| "learning_rate": 0.00015108010335917313, |
| "loss": 0.349322509765625, |
| "step": 14200 |
| }, |
| { |
| "epoch": 12.23083548664944, |
| "eval_loss": 0.4248192310333252, |
| "eval_runtime": 17.8653, |
| "eval_samples_per_second": 1063.736, |
| "eval_steps_per_second": 33.249, |
| "step": 14200 |
| }, |
| { |
| "epoch": 12.27390180878553, |
| "grad_norm": 0.6044149994850159, |
| "learning_rate": 0.00015090783807062877, |
| "loss": 0.34683589935302733, |
| "step": 14250 |
| }, |
| { |
| "epoch": 12.27390180878553, |
| "eval_loss": 0.4210798144340515, |
| "eval_runtime": 17.3506, |
| "eval_samples_per_second": 1095.293, |
| "eval_steps_per_second": 34.235, |
| "step": 14250 |
| }, |
| { |
| "epoch": 12.31696813092162, |
| "grad_norm": 0.5438874363899231, |
| "learning_rate": 0.00015073557278208442, |
| "loss": 0.3560785675048828, |
| "step": 14300 |
| }, |
| { |
| "epoch": 12.31696813092162, |
| "eval_loss": 0.4185738265514374, |
| "eval_runtime": 17.1799, |
| "eval_samples_per_second": 1106.179, |
| "eval_steps_per_second": 34.575, |
| "step": 14300 |
| }, |
| { |
| "epoch": 12.36003445305771, |
| "grad_norm": 0.8494352102279663, |
| "learning_rate": 0.00015056330749354006, |
| "loss": 0.34643798828125, |
| "step": 14350 |
| }, |
| { |
| "epoch": 12.36003445305771, |
| "eval_loss": 0.41647806763648987, |
| "eval_runtime": 17.4394, |
| "eval_samples_per_second": 1089.717, |
| "eval_steps_per_second": 34.061, |
| "step": 14350 |
| }, |
| { |
| "epoch": 12.4031007751938, |
| "grad_norm": 0.5796271562576294, |
| "learning_rate": 0.0001503910422049957, |
| "loss": 0.34534194946289065, |
| "step": 14400 |
| }, |
| { |
| "epoch": 12.4031007751938, |
| "eval_loss": 0.41010603308677673, |
| "eval_runtime": 17.4261, |
| "eval_samples_per_second": 1090.549, |
| "eval_steps_per_second": 34.087, |
| "step": 14400 |
| }, |
| { |
| "epoch": 12.446167097329887, |
| "grad_norm": 0.5342833399772644, |
| "learning_rate": 0.00015021877691645134, |
| "loss": 0.34199066162109376, |
| "step": 14450 |
| }, |
| { |
| "epoch": 12.446167097329887, |
| "eval_loss": 0.4103580415248871, |
| "eval_runtime": 17.3016, |
| "eval_samples_per_second": 1098.398, |
| "eval_steps_per_second": 34.332, |
| "step": 14450 |
| }, |
| { |
| "epoch": 12.489233419465977, |
| "grad_norm": 0.5282058715820312, |
| "learning_rate": 0.00015004651162790698, |
| "loss": 0.34900962829589843, |
| "step": 14500 |
| }, |
| { |
| "epoch": 12.489233419465977, |
| "eval_loss": 0.4202011227607727, |
| "eval_runtime": 16.822, |
| "eval_samples_per_second": 1129.713, |
| "eval_steps_per_second": 35.311, |
| "step": 14500 |
| }, |
| { |
| "epoch": 12.532299741602067, |
| "grad_norm": 0.5700145959854126, |
| "learning_rate": 0.00014987424633936263, |
| "loss": 0.34178398132324217, |
| "step": 14550 |
| }, |
| { |
| "epoch": 12.532299741602067, |
| "eval_loss": 0.42386436462402344, |
| "eval_runtime": 19.712, |
| "eval_samples_per_second": 964.082, |
| "eval_steps_per_second": 30.134, |
| "step": 14550 |
| }, |
| { |
| "epoch": 12.575366063738157, |
| "grad_norm": 0.5954127311706543, |
| "learning_rate": 0.00014970198105081827, |
| "loss": 0.3540873718261719, |
| "step": 14600 |
| }, |
| { |
| "epoch": 12.575366063738157, |
| "eval_loss": 0.41673940420150757, |
| "eval_runtime": 18.1461, |
| "eval_samples_per_second": 1047.278, |
| "eval_steps_per_second": 32.734, |
| "step": 14600 |
| }, |
| { |
| "epoch": 12.618432385874247, |
| "grad_norm": 0.5680475831031799, |
| "learning_rate": 0.0001495297157622739, |
| "loss": 0.3487066650390625, |
| "step": 14650 |
| }, |
| { |
| "epoch": 12.618432385874247, |
| "eval_loss": 0.41556957364082336, |
| "eval_runtime": 17.4336, |
| "eval_samples_per_second": 1090.081, |
| "eval_steps_per_second": 34.072, |
| "step": 14650 |
| }, |
| { |
| "epoch": 12.661498708010337, |
| "grad_norm": 0.5987432599067688, |
| "learning_rate": 0.00014935745047372955, |
| "loss": 0.35428131103515625, |
| "step": 14700 |
| }, |
| { |
| "epoch": 12.661498708010337, |
| "eval_loss": 0.41658732295036316, |
| "eval_runtime": 19.2492, |
| "eval_samples_per_second": 987.26, |
| "eval_steps_per_second": 30.858, |
| "step": 14700 |
| }, |
| { |
| "epoch": 12.704565030146426, |
| "grad_norm": 0.5099753737449646, |
| "learning_rate": 0.0001491851851851852, |
| "loss": 0.3417974853515625, |
| "step": 14750 |
| }, |
| { |
| "epoch": 12.704565030146426, |
| "eval_loss": 0.4173789918422699, |
| "eval_runtime": 17.2326, |
| "eval_samples_per_second": 1102.796, |
| "eval_steps_per_second": 34.47, |
| "step": 14750 |
| }, |
| { |
| "epoch": 12.747631352282514, |
| "grad_norm": 0.5181341767311096, |
| "learning_rate": 0.00014901291989664084, |
| "loss": 0.3451295471191406, |
| "step": 14800 |
| }, |
| { |
| "epoch": 12.747631352282514, |
| "eval_loss": 0.41771939396858215, |
| "eval_runtime": 17.5448, |
| "eval_samples_per_second": 1083.171, |
| "eval_steps_per_second": 33.856, |
| "step": 14800 |
| }, |
| { |
| "epoch": 12.790697674418604, |
| "grad_norm": 0.7171940207481384, |
| "learning_rate": 0.00014884065460809648, |
| "loss": 0.3461411285400391, |
| "step": 14850 |
| }, |
| { |
| "epoch": 12.790697674418604, |
| "eval_loss": 0.41813889145851135, |
| "eval_runtime": 17.4502, |
| "eval_samples_per_second": 1089.039, |
| "eval_steps_per_second": 34.04, |
| "step": 14850 |
| }, |
| { |
| "epoch": 12.833763996554694, |
| "grad_norm": 0.5442430377006531, |
| "learning_rate": 0.00014866838931955212, |
| "loss": 0.35834976196289064, |
| "step": 14900 |
| }, |
| { |
| "epoch": 12.833763996554694, |
| "eval_loss": 0.4198405146598816, |
| "eval_runtime": 17.152, |
| "eval_samples_per_second": 1107.977, |
| "eval_steps_per_second": 34.632, |
| "step": 14900 |
| }, |
| { |
| "epoch": 12.876830318690784, |
| "grad_norm": 0.7742429375648499, |
| "learning_rate": 0.00014849612403100776, |
| "loss": 0.35129867553710936, |
| "step": 14950 |
| }, |
| { |
| "epoch": 12.876830318690784, |
| "eval_loss": 0.4203258156776428, |
| "eval_runtime": 16.187, |
| "eval_samples_per_second": 1174.029, |
| "eval_steps_per_second": 36.696, |
| "step": 14950 |
| }, |
| { |
| "epoch": 12.919896640826874, |
| "grad_norm": 0.49093976616859436, |
| "learning_rate": 0.0001483238587424634, |
| "loss": 0.35147186279296877, |
| "step": 15000 |
| }, |
| { |
| "epoch": 12.919896640826874, |
| "eval_loss": 0.41292715072631836, |
| "eval_runtime": 18.151, |
| "eval_samples_per_second": 1046.997, |
| "eval_steps_per_second": 32.726, |
| "step": 15000 |
| }, |
| { |
| "epoch": 12.962962962962964, |
| "grad_norm": 0.5578615069389343, |
| "learning_rate": 0.00014815159345391905, |
| "loss": 0.34760341644287107, |
| "step": 15050 |
| }, |
| { |
| "epoch": 12.962962962962964, |
| "eval_loss": 0.4126093089580536, |
| "eval_runtime": 16.4589, |
| "eval_samples_per_second": 1154.636, |
| "eval_steps_per_second": 36.09, |
| "step": 15050 |
| }, |
| { |
| "epoch": 13.006029285099052, |
| "grad_norm": 0.6248014569282532, |
| "learning_rate": 0.00014797932816537466, |
| "loss": 0.34215831756591797, |
| "step": 15100 |
| }, |
| { |
| "epoch": 13.006029285099052, |
| "eval_loss": 0.4089677631855011, |
| "eval_runtime": 17.221, |
| "eval_samples_per_second": 1103.538, |
| "eval_steps_per_second": 34.493, |
| "step": 15100 |
| }, |
| { |
| "epoch": 13.049095607235142, |
| "grad_norm": 0.4811834990978241, |
| "learning_rate": 0.00014780706287683033, |
| "loss": 0.3458440399169922, |
| "step": 15150 |
| }, |
| { |
| "epoch": 13.049095607235142, |
| "eval_loss": 0.4163960814476013, |
| "eval_runtime": 17.1986, |
| "eval_samples_per_second": 1104.976, |
| "eval_steps_per_second": 34.538, |
| "step": 15150 |
| }, |
| { |
| "epoch": 13.092161929371231, |
| "grad_norm": 0.5372201204299927, |
| "learning_rate": 0.00014763479758828597, |
| "loss": 0.343751220703125, |
| "step": 15200 |
| }, |
| { |
| "epoch": 13.092161929371231, |
| "eval_loss": 0.41129928827285767, |
| "eval_runtime": 17.3258, |
| "eval_samples_per_second": 1096.86, |
| "eval_steps_per_second": 34.284, |
| "step": 15200 |
| }, |
| { |
| "epoch": 13.135228251507321, |
| "grad_norm": 0.48968204855918884, |
| "learning_rate": 0.00014746253229974162, |
| "loss": 0.35028865814208987, |
| "step": 15250 |
| }, |
| { |
| "epoch": 13.135228251507321, |
| "eval_loss": 0.41533833742141724, |
| "eval_runtime": 17.18, |
| "eval_samples_per_second": 1106.172, |
| "eval_steps_per_second": 34.575, |
| "step": 15250 |
| }, |
| { |
| "epoch": 13.178294573643411, |
| "grad_norm": 0.6101740002632141, |
| "learning_rate": 0.00014729026701119726, |
| "loss": 0.3286838912963867, |
| "step": 15300 |
| }, |
| { |
| "epoch": 13.178294573643411, |
| "eval_loss": 0.41756105422973633, |
| "eval_runtime": 17.1329, |
| "eval_samples_per_second": 1109.213, |
| "eval_steps_per_second": 34.67, |
| "step": 15300 |
| }, |
| { |
| "epoch": 13.221360895779501, |
| "grad_norm": 0.5691545009613037, |
| "learning_rate": 0.0001471180017226529, |
| "loss": 0.3424900817871094, |
| "step": 15350 |
| }, |
| { |
| "epoch": 13.221360895779501, |
| "eval_loss": 0.41093096137046814, |
| "eval_runtime": 17.2687, |
| "eval_samples_per_second": 1100.491, |
| "eval_steps_per_second": 34.398, |
| "step": 15350 |
| }, |
| { |
| "epoch": 13.264427217915589, |
| "grad_norm": 0.5297402143478394, |
| "learning_rate": 0.00014694573643410854, |
| "loss": 0.34250846862792966, |
| "step": 15400 |
| }, |
| { |
| "epoch": 13.264427217915589, |
| "eval_loss": 0.40691322088241577, |
| "eval_runtime": 16.9163, |
| "eval_samples_per_second": 1123.415, |
| "eval_steps_per_second": 35.114, |
| "step": 15400 |
| }, |
| { |
| "epoch": 13.307493540051679, |
| "grad_norm": 0.555321455001831, |
| "learning_rate": 0.00014677347114556416, |
| "loss": 0.3355692291259766, |
| "step": 15450 |
| }, |
| { |
| "epoch": 13.307493540051679, |
| "eval_loss": 0.4075530171394348, |
| "eval_runtime": 17.4202, |
| "eval_samples_per_second": 1090.92, |
| "eval_steps_per_second": 34.098, |
| "step": 15450 |
| }, |
| { |
| "epoch": 13.350559862187769, |
| "grad_norm": 0.5421215295791626, |
| "learning_rate": 0.00014660120585701983, |
| "loss": 0.3451258850097656, |
| "step": 15500 |
| }, |
| { |
| "epoch": 13.350559862187769, |
| "eval_loss": 0.40354102849960327, |
| "eval_runtime": 16.8807, |
| "eval_samples_per_second": 1125.781, |
| "eval_steps_per_second": 35.188, |
| "step": 15500 |
| }, |
| { |
| "epoch": 13.393626184323859, |
| "grad_norm": 0.5563757419586182, |
| "learning_rate": 0.00014642894056847547, |
| "loss": 0.3422290802001953, |
| "step": 15550 |
| }, |
| { |
| "epoch": 13.393626184323859, |
| "eval_loss": 0.40993013978004456, |
| "eval_runtime": 17.5299, |
| "eval_samples_per_second": 1084.091, |
| "eval_steps_per_second": 33.885, |
| "step": 15550 |
| }, |
| { |
| "epoch": 13.436692506459949, |
| "grad_norm": 0.4965982437133789, |
| "learning_rate": 0.0001462566752799311, |
| "loss": 0.34627342224121094, |
| "step": 15600 |
| }, |
| { |
| "epoch": 13.436692506459949, |
| "eval_loss": 0.40034791827201843, |
| "eval_runtime": 17.5168, |
| "eval_samples_per_second": 1084.903, |
| "eval_steps_per_second": 33.91, |
| "step": 15600 |
| }, |
| { |
| "epoch": 13.479758828596038, |
| "grad_norm": 0.5663852691650391, |
| "learning_rate": 0.00014608440999138675, |
| "loss": 0.3398374938964844, |
| "step": 15650 |
| }, |
| { |
| "epoch": 13.479758828596038, |
| "eval_loss": 0.41277310252189636, |
| "eval_runtime": 17.3753, |
| "eval_samples_per_second": 1093.739, |
| "eval_steps_per_second": 34.187, |
| "step": 15650 |
| }, |
| { |
| "epoch": 13.522825150732128, |
| "grad_norm": 0.5163738131523132, |
| "learning_rate": 0.00014591214470284237, |
| "loss": 0.34459991455078126, |
| "step": 15700 |
| }, |
| { |
| "epoch": 13.522825150732128, |
| "eval_loss": 0.40066856145858765, |
| "eval_runtime": 17.3668, |
| "eval_samples_per_second": 1094.271, |
| "eval_steps_per_second": 34.203, |
| "step": 15700 |
| }, |
| { |
| "epoch": 13.565891472868216, |
| "grad_norm": 0.6366977691650391, |
| "learning_rate": 0.00014573987941429804, |
| "loss": 0.34204261779785156, |
| "step": 15750 |
| }, |
| { |
| "epoch": 13.565891472868216, |
| "eval_loss": 0.4138205647468567, |
| "eval_runtime": 17.4203, |
| "eval_samples_per_second": 1090.911, |
| "eval_steps_per_second": 34.098, |
| "step": 15750 |
| }, |
| { |
| "epoch": 13.608957795004306, |
| "grad_norm": 0.6208717226982117, |
| "learning_rate": 0.00014556761412575365, |
| "loss": 0.34886016845703127, |
| "step": 15800 |
| }, |
| { |
| "epoch": 13.608957795004306, |
| "eval_loss": 0.4113547205924988, |
| "eval_runtime": 17.3668, |
| "eval_samples_per_second": 1094.27, |
| "eval_steps_per_second": 34.203, |
| "step": 15800 |
| }, |
| { |
| "epoch": 13.652024117140396, |
| "grad_norm": 0.5838480591773987, |
| "learning_rate": 0.00014539534883720932, |
| "loss": 0.340546875, |
| "step": 15850 |
| }, |
| { |
| "epoch": 13.652024117140396, |
| "eval_loss": 0.40785887837409973, |
| "eval_runtime": 16.4475, |
| "eval_samples_per_second": 1155.434, |
| "eval_steps_per_second": 36.115, |
| "step": 15850 |
| }, |
| { |
| "epoch": 13.695090439276486, |
| "grad_norm": 0.6009616851806641, |
| "learning_rate": 0.00014522308354866494, |
| "loss": 0.3441506958007812, |
| "step": 15900 |
| }, |
| { |
| "epoch": 13.695090439276486, |
| "eval_loss": 0.40609824657440186, |
| "eval_runtime": 17.4361, |
| "eval_samples_per_second": 1089.926, |
| "eval_steps_per_second": 34.067, |
| "step": 15900 |
| }, |
| { |
| "epoch": 13.738156761412576, |
| "grad_norm": 0.7058950066566467, |
| "learning_rate": 0.00014505081826012058, |
| "loss": 0.34030899047851565, |
| "step": 15950 |
| }, |
| { |
| "epoch": 13.738156761412576, |
| "eval_loss": 0.4107040464878082, |
| "eval_runtime": 17.0979, |
| "eval_samples_per_second": 1111.482, |
| "eval_steps_per_second": 34.741, |
| "step": 15950 |
| }, |
| { |
| "epoch": 13.781223083548666, |
| "grad_norm": 0.49852386116981506, |
| "learning_rate": 0.00014487855297157625, |
| "loss": 0.3450359344482422, |
| "step": 16000 |
| }, |
| { |
| "epoch": 13.781223083548666, |
| "eval_loss": 0.39904460310935974, |
| "eval_runtime": 17.2069, |
| "eval_samples_per_second": 1104.44, |
| "eval_steps_per_second": 34.521, |
| "step": 16000 |
| }, |
| { |
| "epoch": 13.824289405684755, |
| "grad_norm": 0.5838021636009216, |
| "learning_rate": 0.00014470628768303186, |
| "loss": 0.33926872253417967, |
| "step": 16050 |
| }, |
| { |
| "epoch": 13.824289405684755, |
| "eval_loss": 0.40694746375083923, |
| "eval_runtime": 17.3414, |
| "eval_samples_per_second": 1095.873, |
| "eval_steps_per_second": 34.253, |
| "step": 16050 |
| }, |
| { |
| "epoch": 13.867355727820843, |
| "grad_norm": 0.8022839426994324, |
| "learning_rate": 0.00014453402239448753, |
| "loss": 0.33638832092285154, |
| "step": 16100 |
| }, |
| { |
| "epoch": 13.867355727820843, |
| "eval_loss": 0.41297706961631775, |
| "eval_runtime": 17.5437, |
| "eval_samples_per_second": 1083.237, |
| "eval_steps_per_second": 33.858, |
| "step": 16100 |
| }, |
| { |
| "epoch": 13.910422049956933, |
| "grad_norm": 0.6754311919212341, |
| "learning_rate": 0.00014436175710594315, |
| "loss": 0.33161933898925783, |
| "step": 16150 |
| }, |
| { |
| "epoch": 13.910422049956933, |
| "eval_loss": 0.40214139223098755, |
| "eval_runtime": 17.4008, |
| "eval_samples_per_second": 1092.133, |
| "eval_steps_per_second": 34.136, |
| "step": 16150 |
| }, |
| { |
| "epoch": 13.953488372093023, |
| "grad_norm": 0.6820469498634338, |
| "learning_rate": 0.00014418949181739882, |
| "loss": 0.34790390014648437, |
| "step": 16200 |
| }, |
| { |
| "epoch": 13.953488372093023, |
| "eval_loss": 0.4014909863471985, |
| "eval_runtime": 17.2923, |
| "eval_samples_per_second": 1098.984, |
| "eval_steps_per_second": 34.35, |
| "step": 16200 |
| }, |
| { |
| "epoch": 13.996554694229113, |
| "grad_norm": 0.5553178787231445, |
| "learning_rate": 0.00014401722652885443, |
| "loss": 0.33886314392089845, |
| "step": 16250 |
| }, |
| { |
| "epoch": 13.996554694229113, |
| "eval_loss": 0.40084779262542725, |
| "eval_runtime": 17.3503, |
| "eval_samples_per_second": 1095.31, |
| "eval_steps_per_second": 34.236, |
| "step": 16250 |
| }, |
| { |
| "epoch": 14.039621016365203, |
| "grad_norm": 0.5039793252944946, |
| "learning_rate": 0.00014384496124031007, |
| "loss": 0.33563224792480467, |
| "step": 16300 |
| }, |
| { |
| "epoch": 14.039621016365203, |
| "eval_loss": 0.41018620133399963, |
| "eval_runtime": 16.7245, |
| "eval_samples_per_second": 1136.298, |
| "eval_steps_per_second": 35.517, |
| "step": 16300 |
| }, |
| { |
| "epoch": 14.082687338501293, |
| "grad_norm": 0.49403050541877747, |
| "learning_rate": 0.00014367269595176574, |
| "loss": 0.34347259521484375, |
| "step": 16350 |
| }, |
| { |
| "epoch": 14.082687338501293, |
| "eval_loss": 0.40463119745254517, |
| "eval_runtime": 17.326, |
| "eval_samples_per_second": 1096.846, |
| "eval_steps_per_second": 34.284, |
| "step": 16350 |
| }, |
| { |
| "epoch": 14.12575366063738, |
| "grad_norm": 0.6298602223396301, |
| "learning_rate": 0.00014350043066322136, |
| "loss": 0.33865066528320314, |
| "step": 16400 |
| }, |
| { |
| "epoch": 14.12575366063738, |
| "eval_loss": 0.40046370029449463, |
| "eval_runtime": 17.0738, |
| "eval_samples_per_second": 1113.053, |
| "eval_steps_per_second": 34.79, |
| "step": 16400 |
| }, |
| { |
| "epoch": 14.16881998277347, |
| "grad_norm": 0.611960232257843, |
| "learning_rate": 0.00014332816537467703, |
| "loss": 0.3326186370849609, |
| "step": 16450 |
| }, |
| { |
| "epoch": 14.16881998277347, |
| "eval_loss": 0.4022998511791229, |
| "eval_runtime": 17.3832, |
| "eval_samples_per_second": 1093.241, |
| "eval_steps_per_second": 34.171, |
| "step": 16450 |
| }, |
| { |
| "epoch": 14.21188630490956, |
| "grad_norm": 0.6012845039367676, |
| "learning_rate": 0.00014315590008613264, |
| "loss": 0.3416895294189453, |
| "step": 16500 |
| }, |
| { |
| "epoch": 14.21188630490956, |
| "eval_loss": 0.40356630086898804, |
| "eval_runtime": 17.3881, |
| "eval_samples_per_second": 1092.933, |
| "eval_steps_per_second": 34.161, |
| "step": 16500 |
| }, |
| { |
| "epoch": 14.25495262704565, |
| "grad_norm": 0.5431012511253357, |
| "learning_rate": 0.00014298363479758828, |
| "loss": 0.34037681579589846, |
| "step": 16550 |
| }, |
| { |
| "epoch": 14.25495262704565, |
| "eval_loss": 0.3986068665981293, |
| "eval_runtime": 17.3416, |
| "eval_samples_per_second": 1095.86, |
| "eval_steps_per_second": 34.253, |
| "step": 16550 |
| }, |
| { |
| "epoch": 14.29801894918174, |
| "grad_norm": 0.6223941445350647, |
| "learning_rate": 0.00014281136950904393, |
| "loss": 0.3397150802612305, |
| "step": 16600 |
| }, |
| { |
| "epoch": 14.29801894918174, |
| "eval_loss": 0.40138301253318787, |
| "eval_runtime": 17.4634, |
| "eval_samples_per_second": 1088.219, |
| "eval_steps_per_second": 34.014, |
| "step": 16600 |
| }, |
| { |
| "epoch": 14.34108527131783, |
| "grad_norm": 0.5051292181015015, |
| "learning_rate": 0.00014263910422049957, |
| "loss": 0.33477035522460935, |
| "step": 16650 |
| }, |
| { |
| "epoch": 14.34108527131783, |
| "eval_loss": 0.39406636357307434, |
| "eval_runtime": 17.4451, |
| "eval_samples_per_second": 1089.36, |
| "eval_steps_per_second": 34.05, |
| "step": 16650 |
| }, |
| { |
| "epoch": 14.384151593453918, |
| "grad_norm": 0.5401943922042847, |
| "learning_rate": 0.00014246683893195524, |
| "loss": 0.33589164733886717, |
| "step": 16700 |
| }, |
| { |
| "epoch": 14.384151593453918, |
| "eval_loss": 0.4036507308483124, |
| "eval_runtime": 17.2724, |
| "eval_samples_per_second": 1100.252, |
| "eval_steps_per_second": 34.39, |
| "step": 16700 |
| }, |
| { |
| "epoch": 14.427217915590008, |
| "grad_norm": 0.48140889406204224, |
| "learning_rate": 0.00014229457364341085, |
| "loss": 0.34063690185546874, |
| "step": 16750 |
| }, |
| { |
| "epoch": 14.427217915590008, |
| "eval_loss": 0.3962832987308502, |
| "eval_runtime": 17.325, |
| "eval_samples_per_second": 1096.914, |
| "eval_steps_per_second": 34.286, |
| "step": 16750 |
| }, |
| { |
| "epoch": 14.470284237726098, |
| "grad_norm": 0.48055461049079895, |
| "learning_rate": 0.0001421223083548665, |
| "loss": 0.33268959045410157, |
| "step": 16800 |
| }, |
| { |
| "epoch": 14.470284237726098, |
| "eval_loss": 0.40976482629776, |
| "eval_runtime": 17.4337, |
| "eval_samples_per_second": 1090.07, |
| "eval_steps_per_second": 34.072, |
| "step": 16800 |
| }, |
| { |
| "epoch": 14.513350559862188, |
| "grad_norm": 0.6199280023574829, |
| "learning_rate": 0.00014195004306632214, |
| "loss": 0.336192626953125, |
| "step": 16850 |
| }, |
| { |
| "epoch": 14.513350559862188, |
| "eval_loss": 0.4026069641113281, |
| "eval_runtime": 17.0752, |
| "eval_samples_per_second": 1112.96, |
| "eval_steps_per_second": 34.787, |
| "step": 16850 |
| }, |
| { |
| "epoch": 14.556416881998278, |
| "grad_norm": 0.5128330588340759, |
| "learning_rate": 0.00014177777777777778, |
| "loss": 0.3429254913330078, |
| "step": 16900 |
| }, |
| { |
| "epoch": 14.556416881998278, |
| "eval_loss": 0.40689241886138916, |
| "eval_runtime": 16.679, |
| "eval_samples_per_second": 1139.395, |
| "eval_steps_per_second": 35.614, |
| "step": 16900 |
| }, |
| { |
| "epoch": 14.599483204134367, |
| "grad_norm": 0.5781823992729187, |
| "learning_rate": 0.00014160551248923342, |
| "loss": 0.3337020111083984, |
| "step": 16950 |
| }, |
| { |
| "epoch": 14.599483204134367, |
| "eval_loss": 0.4006907641887665, |
| "eval_runtime": 17.6406, |
| "eval_samples_per_second": 1077.287, |
| "eval_steps_per_second": 33.672, |
| "step": 16950 |
| }, |
| { |
| "epoch": 14.642549526270457, |
| "grad_norm": 0.6032127737998962, |
| "learning_rate": 0.00014143324720068906, |
| "loss": 0.33814579010009765, |
| "step": 17000 |
| }, |
| { |
| "epoch": 14.642549526270457, |
| "eval_loss": 0.39685383439064026, |
| "eval_runtime": 17.4922, |
| "eval_samples_per_second": 1086.429, |
| "eval_steps_per_second": 33.958, |
| "step": 17000 |
| }, |
| { |
| "epoch": 14.685615848406545, |
| "grad_norm": 0.5460181832313538, |
| "learning_rate": 0.00014126098191214473, |
| "loss": 0.33991954803466795, |
| "step": 17050 |
| }, |
| { |
| "epoch": 14.685615848406545, |
| "eval_loss": 0.3965746760368347, |
| "eval_runtime": 17.49, |
| "eval_samples_per_second": 1086.562, |
| "eval_steps_per_second": 33.962, |
| "step": 17050 |
| }, |
| { |
| "epoch": 14.728682170542635, |
| "grad_norm": 0.5615427494049072, |
| "learning_rate": 0.00014108871662360035, |
| "loss": 0.3355008316040039, |
| "step": 17100 |
| }, |
| { |
| "epoch": 14.728682170542635, |
| "eval_loss": 0.4105238914489746, |
| "eval_runtime": 17.4493, |
| "eval_samples_per_second": 1089.1, |
| "eval_steps_per_second": 34.042, |
| "step": 17100 |
| }, |
| { |
| "epoch": 14.771748492678725, |
| "grad_norm": 0.5080145597457886, |
| "learning_rate": 0.000140916451335056, |
| "loss": 0.3328333282470703, |
| "step": 17150 |
| }, |
| { |
| "epoch": 14.771748492678725, |
| "eval_loss": 0.4033212959766388, |
| "eval_runtime": 17.5468, |
| "eval_samples_per_second": 1083.044, |
| "eval_steps_per_second": 33.852, |
| "step": 17150 |
| }, |
| { |
| "epoch": 14.814814814814815, |
| "grad_norm": 0.6350198984146118, |
| "learning_rate": 0.00014074418604651163, |
| "loss": 0.333209228515625, |
| "step": 17200 |
| }, |
| { |
| "epoch": 14.814814814814815, |
| "eval_loss": 0.40370309352874756, |
| "eval_runtime": 16.7941, |
| "eval_samples_per_second": 1131.585, |
| "eval_steps_per_second": 35.369, |
| "step": 17200 |
| }, |
| { |
| "epoch": 14.857881136950905, |
| "grad_norm": 0.49065691232681274, |
| "learning_rate": 0.00014057192075796727, |
| "loss": 0.3375306701660156, |
| "step": 17250 |
| }, |
| { |
| "epoch": 14.857881136950905, |
| "eval_loss": 0.40457409620285034, |
| "eval_runtime": 17.1664, |
| "eval_samples_per_second": 1107.045, |
| "eval_steps_per_second": 34.602, |
| "step": 17250 |
| }, |
| { |
| "epoch": 14.900947459086995, |
| "grad_norm": 0.4902515709400177, |
| "learning_rate": 0.00014039965546942292, |
| "loss": 0.34255237579345704, |
| "step": 17300 |
| }, |
| { |
| "epoch": 14.900947459086995, |
| "eval_loss": 0.4081343412399292, |
| "eval_runtime": 17.5525, |
| "eval_samples_per_second": 1082.697, |
| "eval_steps_per_second": 33.841, |
| "step": 17300 |
| }, |
| { |
| "epoch": 14.944013781223084, |
| "grad_norm": 0.5764937400817871, |
| "learning_rate": 0.00014022739018087856, |
| "loss": 0.34118736267089844, |
| "step": 17350 |
| }, |
| { |
| "epoch": 14.944013781223084, |
| "eval_loss": 0.40441158413887024, |
| "eval_runtime": 17.0335, |
| "eval_samples_per_second": 1115.685, |
| "eval_steps_per_second": 34.872, |
| "step": 17350 |
| }, |
| { |
| "epoch": 14.987080103359173, |
| "grad_norm": 0.6214373707771301, |
| "learning_rate": 0.0001400551248923342, |
| "loss": 0.33193634033203123, |
| "step": 17400 |
| }, |
| { |
| "epoch": 14.987080103359173, |
| "eval_loss": 0.4039756655693054, |
| "eval_runtime": 17.4751, |
| "eval_samples_per_second": 1087.49, |
| "eval_steps_per_second": 33.991, |
| "step": 17400 |
| }, |
| { |
| "epoch": 15.030146425495262, |
| "grad_norm": 0.5778855681419373, |
| "learning_rate": 0.00013988285960378984, |
| "loss": 0.3388556671142578, |
| "step": 17450 |
| }, |
| { |
| "epoch": 15.030146425495262, |
| "eval_loss": 0.39234092831611633, |
| "eval_runtime": 17.4139, |
| "eval_samples_per_second": 1091.31, |
| "eval_steps_per_second": 34.111, |
| "step": 17450 |
| }, |
| { |
| "epoch": 15.073212747631352, |
| "grad_norm": 0.5316303968429565, |
| "learning_rate": 0.00013971059431524548, |
| "loss": 0.33368736267089844, |
| "step": 17500 |
| }, |
| { |
| "epoch": 15.073212747631352, |
| "eval_loss": 0.4007318615913391, |
| "eval_runtime": 17.4159, |
| "eval_samples_per_second": 1091.187, |
| "eval_steps_per_second": 34.107, |
| "step": 17500 |
| }, |
| { |
| "epoch": 15.116279069767442, |
| "grad_norm": 0.5068018436431885, |
| "learning_rate": 0.00013953832902670113, |
| "loss": 0.32500762939453126, |
| "step": 17550 |
| }, |
| { |
| "epoch": 15.116279069767442, |
| "eval_loss": 0.39454713463783264, |
| "eval_runtime": 17.3134, |
| "eval_samples_per_second": 1097.646, |
| "eval_steps_per_second": 34.309, |
| "step": 17550 |
| }, |
| { |
| "epoch": 15.159345391903532, |
| "grad_norm": 0.6217418909072876, |
| "learning_rate": 0.00013936606373815677, |
| "loss": 0.33881416320800783, |
| "step": 17600 |
| }, |
| { |
| "epoch": 15.159345391903532, |
| "eval_loss": 0.3999788463115692, |
| "eval_runtime": 17.5144, |
| "eval_samples_per_second": 1085.049, |
| "eval_steps_per_second": 33.915, |
| "step": 17600 |
| }, |
| { |
| "epoch": 15.202411714039622, |
| "grad_norm": 0.6280364394187927, |
| "learning_rate": 0.0001391937984496124, |
| "loss": 0.33057609558105466, |
| "step": 17650 |
| }, |
| { |
| "epoch": 15.202411714039622, |
| "eval_loss": 0.40249961614608765, |
| "eval_runtime": 17.3541, |
| "eval_samples_per_second": 1095.072, |
| "eval_steps_per_second": 34.228, |
| "step": 17650 |
| }, |
| { |
| "epoch": 15.24547803617571, |
| "grad_norm": 0.5168995261192322, |
| "learning_rate": 0.00013902153316106805, |
| "loss": 0.3267534255981445, |
| "step": 17700 |
| }, |
| { |
| "epoch": 15.24547803617571, |
| "eval_loss": 0.4028068780899048, |
| "eval_runtime": 16.6035, |
| "eval_samples_per_second": 1144.58, |
| "eval_steps_per_second": 35.776, |
| "step": 17700 |
| }, |
| { |
| "epoch": 15.2885443583118, |
| "grad_norm": 0.6022219061851501, |
| "learning_rate": 0.0001388492678725237, |
| "loss": 0.33923439025878904, |
| "step": 17750 |
| }, |
| { |
| "epoch": 15.2885443583118, |
| "eval_loss": 0.3993185758590698, |
| "eval_runtime": 17.489, |
| "eval_samples_per_second": 1086.625, |
| "eval_steps_per_second": 33.964, |
| "step": 17750 |
| }, |
| { |
| "epoch": 15.33161068044789, |
| "grad_norm": 0.5371029376983643, |
| "learning_rate": 0.00013867700258397934, |
| "loss": 0.328874397277832, |
| "step": 17800 |
| }, |
| { |
| "epoch": 15.33161068044789, |
| "eval_loss": 0.3954143822193146, |
| "eval_runtime": 17.0921, |
| "eval_samples_per_second": 1111.859, |
| "eval_steps_per_second": 34.753, |
| "step": 17800 |
| }, |
| { |
| "epoch": 15.37467700258398, |
| "grad_norm": 0.6205661296844482, |
| "learning_rate": 0.00013850473729543498, |
| "loss": 0.33194568634033206, |
| "step": 17850 |
| }, |
| { |
| "epoch": 15.37467700258398, |
| "eval_loss": 0.39704281091690063, |
| "eval_runtime": 17.3119, |
| "eval_samples_per_second": 1097.74, |
| "eval_steps_per_second": 34.312, |
| "step": 17850 |
| }, |
| { |
| "epoch": 15.41774332472007, |
| "grad_norm": 0.571426272392273, |
| "learning_rate": 0.00013833247200689062, |
| "loss": 0.3300050735473633, |
| "step": 17900 |
| }, |
| { |
| "epoch": 15.41774332472007, |
| "eval_loss": 0.39621701836586, |
| "eval_runtime": 17.2981, |
| "eval_samples_per_second": 1098.619, |
| "eval_steps_per_second": 34.339, |
| "step": 17900 |
| }, |
| { |
| "epoch": 15.460809646856159, |
| "grad_norm": 0.7013274431228638, |
| "learning_rate": 0.00013816020671834626, |
| "loss": 0.3340178680419922, |
| "step": 17950 |
| }, |
| { |
| "epoch": 15.460809646856159, |
| "eval_loss": 0.39800071716308594, |
| "eval_runtime": 17.282, |
| "eval_samples_per_second": 1099.64, |
| "eval_steps_per_second": 34.371, |
| "step": 17950 |
| }, |
| { |
| "epoch": 15.503875968992247, |
| "grad_norm": 0.6281255483627319, |
| "learning_rate": 0.0001379879414298019, |
| "loss": 0.3254861068725586, |
| "step": 18000 |
| }, |
| { |
| "epoch": 15.503875968992247, |
| "eval_loss": 0.3988846242427826, |
| "eval_runtime": 17.3939, |
| "eval_samples_per_second": 1092.567, |
| "eval_steps_per_second": 34.15, |
| "step": 18000 |
| }, |
| { |
| "epoch": 15.546942291128337, |
| "grad_norm": 0.6308976411819458, |
| "learning_rate": 0.00013781567614125755, |
| "loss": 0.3320703887939453, |
| "step": 18050 |
| }, |
| { |
| "epoch": 15.546942291128337, |
| "eval_loss": 0.4004760682582855, |
| "eval_runtime": 17.4506, |
| "eval_samples_per_second": 1089.017, |
| "eval_steps_per_second": 34.039, |
| "step": 18050 |
| }, |
| { |
| "epoch": 15.590008613264427, |
| "grad_norm": 0.6034271121025085, |
| "learning_rate": 0.0001376434108527132, |
| "loss": 0.33000999450683594, |
| "step": 18100 |
| }, |
| { |
| "epoch": 15.590008613264427, |
| "eval_loss": 0.3943745791912079, |
| "eval_runtime": 17.2953, |
| "eval_samples_per_second": 1098.795, |
| "eval_steps_per_second": 34.345, |
| "step": 18100 |
| }, |
| { |
| "epoch": 15.633074935400517, |
| "grad_norm": 0.6983925104141235, |
| "learning_rate": 0.00013747114556416883, |
| "loss": 0.3292758178710937, |
| "step": 18150 |
| }, |
| { |
| "epoch": 15.633074935400517, |
| "eval_loss": 0.3915829658508301, |
| "eval_runtime": 16.4769, |
| "eval_samples_per_second": 1153.371, |
| "eval_steps_per_second": 36.05, |
| "step": 18150 |
| }, |
| { |
| "epoch": 15.676141257536607, |
| "grad_norm": 0.6047272682189941, |
| "learning_rate": 0.00013729888027562447, |
| "loss": 0.3290716171264648, |
| "step": 18200 |
| }, |
| { |
| "epoch": 15.676141257536607, |
| "eval_loss": 0.3990899324417114, |
| "eval_runtime": 18.1527, |
| "eval_samples_per_second": 1046.899, |
| "eval_steps_per_second": 32.722, |
| "step": 18200 |
| }, |
| { |
| "epoch": 15.719207579672696, |
| "grad_norm": 0.7523520588874817, |
| "learning_rate": 0.00013712661498708012, |
| "loss": 0.32717063903808596, |
| "step": 18250 |
| }, |
| { |
| "epoch": 15.719207579672696, |
| "eval_loss": 0.3920466899871826, |
| "eval_runtime": 17.4929, |
| "eval_samples_per_second": 1086.385, |
| "eval_steps_per_second": 33.957, |
| "step": 18250 |
| }, |
| { |
| "epoch": 15.762273901808786, |
| "grad_norm": 0.5970990061759949, |
| "learning_rate": 0.00013695434969853576, |
| "loss": 0.3242684936523437, |
| "step": 18300 |
| }, |
| { |
| "epoch": 15.762273901808786, |
| "eval_loss": 0.4024035930633545, |
| "eval_runtime": 16.4655, |
| "eval_samples_per_second": 1154.169, |
| "eval_steps_per_second": 36.075, |
| "step": 18300 |
| }, |
| { |
| "epoch": 15.805340223944874, |
| "grad_norm": 0.480525404214859, |
| "learning_rate": 0.00013678208440999137, |
| "loss": 0.3314341354370117, |
| "step": 18350 |
| }, |
| { |
| "epoch": 15.805340223944874, |
| "eval_loss": 0.3887929320335388, |
| "eval_runtime": 17.5894, |
| "eval_samples_per_second": 1080.422, |
| "eval_steps_per_second": 33.77, |
| "step": 18350 |
| }, |
| { |
| "epoch": 15.848406546080964, |
| "grad_norm": 0.5142746567726135, |
| "learning_rate": 0.00013660981912144704, |
| "loss": 0.32969032287597655, |
| "step": 18400 |
| }, |
| { |
| "epoch": 15.848406546080964, |
| "eval_loss": 0.3979225158691406, |
| "eval_runtime": 17.5068, |
| "eval_samples_per_second": 1085.522, |
| "eval_steps_per_second": 33.93, |
| "step": 18400 |
| }, |
| { |
| "epoch": 15.891472868217054, |
| "grad_norm": 0.5760953426361084, |
| "learning_rate": 0.00013643755383290268, |
| "loss": 0.3342143630981445, |
| "step": 18450 |
| }, |
| { |
| "epoch": 15.891472868217054, |
| "eval_loss": 0.3954464793205261, |
| "eval_runtime": 17.5214, |
| "eval_samples_per_second": 1084.614, |
| "eval_steps_per_second": 33.901, |
| "step": 18450 |
| }, |
| { |
| "epoch": 15.934539190353144, |
| "grad_norm": 0.5239692330360413, |
| "learning_rate": 0.00013626528854435833, |
| "loss": 0.33184822082519533, |
| "step": 18500 |
| }, |
| { |
| "epoch": 15.934539190353144, |
| "eval_loss": 0.39176249504089355, |
| "eval_runtime": 17.4381, |
| "eval_samples_per_second": 1089.798, |
| "eval_steps_per_second": 34.063, |
| "step": 18500 |
| }, |
| { |
| "epoch": 15.977605512489234, |
| "grad_norm": 0.5742284655570984, |
| "learning_rate": 0.00013609302325581397, |
| "loss": 0.33209632873535155, |
| "step": 18550 |
| }, |
| { |
| "epoch": 15.977605512489234, |
| "eval_loss": 0.4047396779060364, |
| "eval_runtime": 17.1881, |
| "eval_samples_per_second": 1105.65, |
| "eval_steps_per_second": 34.559, |
| "step": 18550 |
| }, |
| { |
| "epoch": 16.020671834625322, |
| "grad_norm": 0.5419143438339233, |
| "learning_rate": 0.00013592075796726958, |
| "loss": 0.3236275863647461, |
| "step": 18600 |
| }, |
| { |
| "epoch": 16.020671834625322, |
| "eval_loss": 0.3944310247898102, |
| "eval_runtime": 16.3521, |
| "eval_samples_per_second": 1162.172, |
| "eval_steps_per_second": 36.326, |
| "step": 18600 |
| }, |
| { |
| "epoch": 16.063738156761413, |
| "grad_norm": 0.4870966374874115, |
| "learning_rate": 0.00013574849267872525, |
| "loss": 0.3301403045654297, |
| "step": 18650 |
| }, |
| { |
| "epoch": 16.063738156761413, |
| "eval_loss": 0.39082440733909607, |
| "eval_runtime": 17.0017, |
| "eval_samples_per_second": 1117.773, |
| "eval_steps_per_second": 34.938, |
| "step": 18650 |
| }, |
| { |
| "epoch": 16.1068044788975, |
| "grad_norm": 0.51682448387146, |
| "learning_rate": 0.00013557622739018087, |
| "loss": 0.32326805114746093, |
| "step": 18700 |
| }, |
| { |
| "epoch": 16.1068044788975, |
| "eval_loss": 0.3971370756626129, |
| "eval_runtime": 17.09, |
| "eval_samples_per_second": 1111.997, |
| "eval_steps_per_second": 34.757, |
| "step": 18700 |
| }, |
| { |
| "epoch": 16.149870801033593, |
| "grad_norm": 0.5596668720245361, |
| "learning_rate": 0.00013540396210163654, |
| "loss": 0.3247249603271484, |
| "step": 18750 |
| }, |
| { |
| "epoch": 16.149870801033593, |
| "eval_loss": 0.39040490984916687, |
| "eval_runtime": 16.4002, |
| "eval_samples_per_second": 1158.763, |
| "eval_steps_per_second": 36.219, |
| "step": 18750 |
| }, |
| { |
| "epoch": 16.19293712316968, |
| "grad_norm": 0.5393404960632324, |
| "learning_rate": 0.00013523169681309218, |
| "loss": 0.3202825164794922, |
| "step": 18800 |
| }, |
| { |
| "epoch": 16.19293712316968, |
| "eval_loss": 0.38830479979515076, |
| "eval_runtime": 17.5879, |
| "eval_samples_per_second": 1080.518, |
| "eval_steps_per_second": 33.773, |
| "step": 18800 |
| }, |
| { |
| "epoch": 16.23600344530577, |
| "grad_norm": 0.5143821239471436, |
| "learning_rate": 0.00013505943152454782, |
| "loss": 0.3251231384277344, |
| "step": 18850 |
| }, |
| { |
| "epoch": 16.23600344530577, |
| "eval_loss": 0.3889082670211792, |
| "eval_runtime": 17.6982, |
| "eval_samples_per_second": 1073.781, |
| "eval_steps_per_second": 33.563, |
| "step": 18850 |
| }, |
| { |
| "epoch": 16.27906976744186, |
| "grad_norm": 0.49142956733703613, |
| "learning_rate": 0.00013488716623600346, |
| "loss": 0.3229839324951172, |
| "step": 18900 |
| }, |
| { |
| "epoch": 16.27906976744186, |
| "eval_loss": 0.3905869722366333, |
| "eval_runtime": 17.3362, |
| "eval_samples_per_second": 1096.206, |
| "eval_steps_per_second": 34.264, |
| "step": 18900 |
| }, |
| { |
| "epoch": 16.32213608957795, |
| "grad_norm": 0.5397304892539978, |
| "learning_rate": 0.00013471490094745908, |
| "loss": 0.33159187316894534, |
| "step": 18950 |
| }, |
| { |
| "epoch": 16.32213608957795, |
| "eval_loss": 0.39496833086013794, |
| "eval_runtime": 17.2737, |
| "eval_samples_per_second": 1100.169, |
| "eval_steps_per_second": 34.388, |
| "step": 18950 |
| }, |
| { |
| "epoch": 16.36520241171404, |
| "grad_norm": 0.5198789834976196, |
| "learning_rate": 0.00013454263565891475, |
| "loss": 0.3233955383300781, |
| "step": 19000 |
| }, |
| { |
| "epoch": 16.36520241171404, |
| "eval_loss": 0.3942340910434723, |
| "eval_runtime": 17.4115, |
| "eval_samples_per_second": 1091.465, |
| "eval_steps_per_second": 34.115, |
| "step": 19000 |
| }, |
| { |
| "epoch": 16.40826873385013, |
| "grad_norm": 0.6628881692886353, |
| "learning_rate": 0.00013437037037037036, |
| "loss": 0.3342235565185547, |
| "step": 19050 |
| }, |
| { |
| "epoch": 16.40826873385013, |
| "eval_loss": 0.3900792598724365, |
| "eval_runtime": 17.3181, |
| "eval_samples_per_second": 1097.349, |
| "eval_steps_per_second": 34.299, |
| "step": 19050 |
| }, |
| { |
| "epoch": 16.45133505598622, |
| "grad_norm": 0.44677937030792236, |
| "learning_rate": 0.00013419810508182603, |
| "loss": 0.3183754348754883, |
| "step": 19100 |
| }, |
| { |
| "epoch": 16.45133505598622, |
| "eval_loss": 0.39026835560798645, |
| "eval_runtime": 16.6126, |
| "eval_samples_per_second": 1143.954, |
| "eval_steps_per_second": 35.756, |
| "step": 19100 |
| }, |
| { |
| "epoch": 16.49440137812231, |
| "grad_norm": 0.7109096050262451, |
| "learning_rate": 0.00013402583979328167, |
| "loss": 0.33458770751953126, |
| "step": 19150 |
| }, |
| { |
| "epoch": 16.49440137812231, |
| "eval_loss": 0.39101794362068176, |
| "eval_runtime": 17.4189, |
| "eval_samples_per_second": 1090.998, |
| "eval_steps_per_second": 34.101, |
| "step": 19150 |
| }, |
| { |
| "epoch": 16.537467700258397, |
| "grad_norm": 0.5609311461448669, |
| "learning_rate": 0.0001338535745047373, |
| "loss": 0.32187355041503907, |
| "step": 19200 |
| }, |
| { |
| "epoch": 16.537467700258397, |
| "eval_loss": 0.3992527425289154, |
| "eval_runtime": 17.3996, |
| "eval_samples_per_second": 1092.209, |
| "eval_steps_per_second": 34.139, |
| "step": 19200 |
| }, |
| { |
| "epoch": 16.580534022394488, |
| "grad_norm": 0.545501708984375, |
| "learning_rate": 0.00013368130921619296, |
| "loss": 0.329481315612793, |
| "step": 19250 |
| }, |
| { |
| "epoch": 16.580534022394488, |
| "eval_loss": 0.39770665764808655, |
| "eval_runtime": 17.1122, |
| "eval_samples_per_second": 1110.554, |
| "eval_steps_per_second": 34.712, |
| "step": 19250 |
| }, |
| { |
| "epoch": 16.623600344530576, |
| "grad_norm": 0.5404015779495239, |
| "learning_rate": 0.00013350904392764857, |
| "loss": 0.3238807678222656, |
| "step": 19300 |
| }, |
| { |
| "epoch": 16.623600344530576, |
| "eval_loss": 0.39125141501426697, |
| "eval_runtime": 17.1581, |
| "eval_samples_per_second": 1107.581, |
| "eval_steps_per_second": 34.619, |
| "step": 19300 |
| }, |
| { |
| "epoch": 16.666666666666668, |
| "grad_norm": 0.5957887768745422, |
| "learning_rate": 0.00013333677863910424, |
| "loss": 0.3259474945068359, |
| "step": 19350 |
| }, |
| { |
| "epoch": 16.666666666666668, |
| "eval_loss": 0.392182856798172, |
| "eval_runtime": 17.339, |
| "eval_samples_per_second": 1096.028, |
| "eval_steps_per_second": 34.258, |
| "step": 19350 |
| }, |
| { |
| "epoch": 16.709732988802756, |
| "grad_norm": 0.5888888239860535, |
| "learning_rate": 0.00013316451335055986, |
| "loss": 0.32502166748046873, |
| "step": 19400 |
| }, |
| { |
| "epoch": 16.709732988802756, |
| "eval_loss": 0.38831356167793274, |
| "eval_runtime": 19.0354, |
| "eval_samples_per_second": 998.352, |
| "eval_steps_per_second": 31.205, |
| "step": 19400 |
| }, |
| { |
| "epoch": 16.752799310938848, |
| "grad_norm": 0.5550042986869812, |
| "learning_rate": 0.0001329922480620155, |
| "loss": 0.3259937286376953, |
| "step": 19450 |
| }, |
| { |
| "epoch": 16.752799310938848, |
| "eval_loss": 0.39002206921577454, |
| "eval_runtime": 18.2055, |
| "eval_samples_per_second": 1043.861, |
| "eval_steps_per_second": 32.628, |
| "step": 19450 |
| }, |
| { |
| "epoch": 16.795865633074936, |
| "grad_norm": 0.5127618908882141, |
| "learning_rate": 0.00013281998277347114, |
| "loss": 0.32699764251708985, |
| "step": 19500 |
| }, |
| { |
| "epoch": 16.795865633074936, |
| "eval_loss": 0.3960529863834381, |
| "eval_runtime": 17.4398, |
| "eval_samples_per_second": 1089.69, |
| "eval_steps_per_second": 34.06, |
| "step": 19500 |
| }, |
| { |
| "epoch": 16.838931955211024, |
| "grad_norm": 0.4416603446006775, |
| "learning_rate": 0.00013264771748492678, |
| "loss": 0.32539680480957034, |
| "step": 19550 |
| }, |
| { |
| "epoch": 16.838931955211024, |
| "eval_loss": 0.39588266611099243, |
| "eval_runtime": 18.3991, |
| "eval_samples_per_second": 1032.874, |
| "eval_steps_per_second": 32.284, |
| "step": 19550 |
| }, |
| { |
| "epoch": 16.881998277347115, |
| "grad_norm": 0.5327856540679932, |
| "learning_rate": 0.00013247545219638245, |
| "loss": 0.3237502288818359, |
| "step": 19600 |
| }, |
| { |
| "epoch": 16.881998277347115, |
| "eval_loss": 0.39336878061294556, |
| "eval_runtime": 17.153, |
| "eval_samples_per_second": 1107.913, |
| "eval_steps_per_second": 34.63, |
| "step": 19600 |
| }, |
| { |
| "epoch": 16.925064599483203, |
| "grad_norm": 0.5136561989784241, |
| "learning_rate": 0.00013230318690783807, |
| "loss": 0.33170738220214846, |
| "step": 19650 |
| }, |
| { |
| "epoch": 16.925064599483203, |
| "eval_loss": 0.39044713973999023, |
| "eval_runtime": 16.8605, |
| "eval_samples_per_second": 1127.13, |
| "eval_steps_per_second": 35.23, |
| "step": 19650 |
| }, |
| { |
| "epoch": 16.968130921619295, |
| "grad_norm": 0.6515547037124634, |
| "learning_rate": 0.00013213092161929374, |
| "loss": 0.3308847427368164, |
| "step": 19700 |
| }, |
| { |
| "epoch": 16.968130921619295, |
| "eval_loss": 0.39422011375427246, |
| "eval_runtime": 17.4151, |
| "eval_samples_per_second": 1091.239, |
| "eval_steps_per_second": 34.108, |
| "step": 19700 |
| }, |
| { |
| "epoch": 17.011197243755383, |
| "grad_norm": 0.6061251163482666, |
| "learning_rate": 0.00013195865633074935, |
| "loss": 0.31996837615966794, |
| "step": 19750 |
| }, |
| { |
| "epoch": 17.011197243755383, |
| "eval_loss": 0.38462579250335693, |
| "eval_runtime": 17.2169, |
| "eval_samples_per_second": 1103.799, |
| "eval_steps_per_second": 34.501, |
| "step": 19750 |
| }, |
| { |
| "epoch": 17.05426356589147, |
| "grad_norm": 0.5053262114524841, |
| "learning_rate": 0.000131786391042205, |
| "loss": 0.32310577392578127, |
| "step": 19800 |
| }, |
| { |
| "epoch": 17.05426356589147, |
| "eval_loss": 0.38237592577934265, |
| "eval_runtime": 16.7211, |
| "eval_samples_per_second": 1136.529, |
| "eval_steps_per_second": 35.524, |
| "step": 19800 |
| }, |
| { |
| "epoch": 17.097329888027563, |
| "grad_norm": 0.5430858731269836, |
| "learning_rate": 0.00013161412575366064, |
| "loss": 0.3208709716796875, |
| "step": 19850 |
| }, |
| { |
| "epoch": 17.097329888027563, |
| "eval_loss": 0.3853854537010193, |
| "eval_runtime": 17.5253, |
| "eval_samples_per_second": 1084.373, |
| "eval_steps_per_second": 33.894, |
| "step": 19850 |
| }, |
| { |
| "epoch": 17.14039621016365, |
| "grad_norm": 0.6029484868049622, |
| "learning_rate": 0.00013144186046511628, |
| "loss": 0.33539260864257814, |
| "step": 19900 |
| }, |
| { |
| "epoch": 17.14039621016365, |
| "eval_loss": 0.3901008367538452, |
| "eval_runtime": 17.3659, |
| "eval_samples_per_second": 1094.326, |
| "eval_steps_per_second": 34.205, |
| "step": 19900 |
| }, |
| { |
| "epoch": 17.183462532299743, |
| "grad_norm": 0.5201014876365662, |
| "learning_rate": 0.00013126959517657195, |
| "loss": 0.3294208908081055, |
| "step": 19950 |
| }, |
| { |
| "epoch": 17.183462532299743, |
| "eval_loss": 0.38815146684646606, |
| "eval_runtime": 17.2059, |
| "eval_samples_per_second": 1104.504, |
| "eval_steps_per_second": 34.523, |
| "step": 19950 |
| }, |
| { |
| "epoch": 17.22652885443583, |
| "grad_norm": 0.501994788646698, |
| "learning_rate": 0.00013109732988802756, |
| "loss": 0.32511749267578127, |
| "step": 20000 |
| }, |
| { |
| "epoch": 17.22652885443583, |
| "eval_loss": 0.38529202342033386, |
| "eval_runtime": 17.3501, |
| "eval_samples_per_second": 1095.327, |
| "eval_steps_per_second": 34.236, |
| "step": 20000 |
| }, |
| { |
| "epoch": 17.269595176571922, |
| "grad_norm": 0.5027745366096497, |
| "learning_rate": 0.0001309250645994832, |
| "loss": 0.31986518859863283, |
| "step": 20050 |
| }, |
| { |
| "epoch": 17.269595176571922, |
| "eval_loss": 0.391549289226532, |
| "eval_runtime": 17.1948, |
| "eval_samples_per_second": 1105.216, |
| "eval_steps_per_second": 34.545, |
| "step": 20050 |
| }, |
| { |
| "epoch": 17.31266149870801, |
| "grad_norm": 0.6108536720275879, |
| "learning_rate": 0.00013075279931093885, |
| "loss": 0.3184634971618652, |
| "step": 20100 |
| }, |
| { |
| "epoch": 17.31266149870801, |
| "eval_loss": 0.38223689794540405, |
| "eval_runtime": 16.5133, |
| "eval_samples_per_second": 1150.827, |
| "eval_steps_per_second": 35.971, |
| "step": 20100 |
| }, |
| { |
| "epoch": 17.3557278208441, |
| "grad_norm": 0.5198239088058472, |
| "learning_rate": 0.0001305805340223945, |
| "loss": 0.3237266540527344, |
| "step": 20150 |
| }, |
| { |
| "epoch": 17.3557278208441, |
| "eval_loss": 0.38378673791885376, |
| "eval_runtime": 17.0237, |
| "eval_samples_per_second": 1116.326, |
| "eval_steps_per_second": 34.893, |
| "step": 20150 |
| }, |
| { |
| "epoch": 17.39879414298019, |
| "grad_norm": 0.5638048052787781, |
| "learning_rate": 0.00013040826873385013, |
| "loss": 0.32793220520019534, |
| "step": 20200 |
| }, |
| { |
| "epoch": 17.39879414298019, |
| "eval_loss": 0.3887383043766022, |
| "eval_runtime": 17.368, |
| "eval_samples_per_second": 1094.196, |
| "eval_steps_per_second": 34.201, |
| "step": 20200 |
| }, |
| { |
| "epoch": 17.441860465116278, |
| "grad_norm": 0.5240247845649719, |
| "learning_rate": 0.00013023600344530577, |
| "loss": 0.3263576889038086, |
| "step": 20250 |
| }, |
| { |
| "epoch": 17.441860465116278, |
| "eval_loss": 0.3850667476654053, |
| "eval_runtime": 16.5853, |
| "eval_samples_per_second": 1145.832, |
| "eval_steps_per_second": 35.815, |
| "step": 20250 |
| }, |
| { |
| "epoch": 17.48492678725237, |
| "grad_norm": 0.4876612424850464, |
| "learning_rate": 0.00013006373815676141, |
| "loss": 0.321444091796875, |
| "step": 20300 |
| }, |
| { |
| "epoch": 17.48492678725237, |
| "eval_loss": 0.3869178295135498, |
| "eval_runtime": 17.3704, |
| "eval_samples_per_second": 1094.042, |
| "eval_steps_per_second": 34.196, |
| "step": 20300 |
| }, |
| { |
| "epoch": 17.527993109388458, |
| "grad_norm": 0.4462520182132721, |
| "learning_rate": 0.00012989147286821706, |
| "loss": 0.31568416595458987, |
| "step": 20350 |
| }, |
| { |
| "epoch": 17.527993109388458, |
| "eval_loss": 0.38926127552986145, |
| "eval_runtime": 17.5271, |
| "eval_samples_per_second": 1084.261, |
| "eval_steps_per_second": 33.89, |
| "step": 20350 |
| }, |
| { |
| "epoch": 17.57105943152455, |
| "grad_norm": 0.44649818539619446, |
| "learning_rate": 0.0001297192075796727, |
| "loss": 0.31538238525390627, |
| "step": 20400 |
| }, |
| { |
| "epoch": 17.57105943152455, |
| "eval_loss": 0.38832584023475647, |
| "eval_runtime": 17.429, |
| "eval_samples_per_second": 1090.364, |
| "eval_steps_per_second": 34.081, |
| "step": 20400 |
| }, |
| { |
| "epoch": 17.614125753660637, |
| "grad_norm": 0.5380814671516418, |
| "learning_rate": 0.00012954694229112834, |
| "loss": 0.3133953857421875, |
| "step": 20450 |
| }, |
| { |
| "epoch": 17.614125753660637, |
| "eval_loss": 0.3826683461666107, |
| "eval_runtime": 17.128, |
| "eval_samples_per_second": 1109.526, |
| "eval_steps_per_second": 34.68, |
| "step": 20450 |
| }, |
| { |
| "epoch": 17.657192075796726, |
| "grad_norm": 0.529960036277771, |
| "learning_rate": 0.00012937467700258398, |
| "loss": 0.32620044708251955, |
| "step": 20500 |
| }, |
| { |
| "epoch": 17.657192075796726, |
| "eval_loss": 0.392609566450119, |
| "eval_runtime": 17.057, |
| "eval_samples_per_second": 1114.15, |
| "eval_steps_per_second": 34.825, |
| "step": 20500 |
| }, |
| { |
| "epoch": 17.700258397932817, |
| "grad_norm": 0.5545978546142578, |
| "learning_rate": 0.00012920241171403962, |
| "loss": 0.3222107696533203, |
| "step": 20550 |
| }, |
| { |
| "epoch": 17.700258397932817, |
| "eval_loss": 0.39162397384643555, |
| "eval_runtime": 17.1676, |
| "eval_samples_per_second": 1106.969, |
| "eval_steps_per_second": 34.6, |
| "step": 20550 |
| }, |
| { |
| "epoch": 17.743324720068905, |
| "grad_norm": 0.4985523819923401, |
| "learning_rate": 0.00012903014642549527, |
| "loss": 0.3192410469055176, |
| "step": 20600 |
| }, |
| { |
| "epoch": 17.743324720068905, |
| "eval_loss": 0.3959045708179474, |
| "eval_runtime": 16.4739, |
| "eval_samples_per_second": 1153.579, |
| "eval_steps_per_second": 36.057, |
| "step": 20600 |
| }, |
| { |
| "epoch": 17.786391042204997, |
| "grad_norm": 0.5570954084396362, |
| "learning_rate": 0.0001288578811369509, |
| "loss": 0.3183399200439453, |
| "step": 20650 |
| }, |
| { |
| "epoch": 17.786391042204997, |
| "eval_loss": 0.3895127773284912, |
| "eval_runtime": 17.7317, |
| "eval_samples_per_second": 1071.752, |
| "eval_steps_per_second": 33.499, |
| "step": 20650 |
| }, |
| { |
| "epoch": 17.829457364341085, |
| "grad_norm": 0.5547237396240234, |
| "learning_rate": 0.00012868561584840655, |
| "loss": 0.32538810729980466, |
| "step": 20700 |
| }, |
| { |
| "epoch": 17.829457364341085, |
| "eval_loss": 0.3861739933490753, |
| "eval_runtime": 17.6484, |
| "eval_samples_per_second": 1076.813, |
| "eval_steps_per_second": 33.657, |
| "step": 20700 |
| }, |
| { |
| "epoch": 17.872523686477173, |
| "grad_norm": 0.447329044342041, |
| "learning_rate": 0.0001285133505598622, |
| "loss": 0.3192084121704102, |
| "step": 20750 |
| }, |
| { |
| "epoch": 17.872523686477173, |
| "eval_loss": 0.3876189887523651, |
| "eval_runtime": 17.7022, |
| "eval_samples_per_second": 1073.537, |
| "eval_steps_per_second": 33.555, |
| "step": 20750 |
| }, |
| { |
| "epoch": 17.915590008613265, |
| "grad_norm": 0.5152373313903809, |
| "learning_rate": 0.00012834108527131784, |
| "loss": 0.3196234130859375, |
| "step": 20800 |
| }, |
| { |
| "epoch": 17.915590008613265, |
| "eval_loss": 0.3957338035106659, |
| "eval_runtime": 17.166, |
| "eval_samples_per_second": 1107.071, |
| "eval_steps_per_second": 34.603, |
| "step": 20800 |
| }, |
| { |
| "epoch": 17.958656330749353, |
| "grad_norm": 0.6933789253234863, |
| "learning_rate": 0.00012816881998277348, |
| "loss": 0.32067276000976563, |
| "step": 20850 |
| }, |
| { |
| "epoch": 17.958656330749353, |
| "eval_loss": 0.3911714255809784, |
| "eval_runtime": 17.1329, |
| "eval_samples_per_second": 1109.209, |
| "eval_steps_per_second": 34.67, |
| "step": 20850 |
| }, |
| { |
| "epoch": 18.001722652885444, |
| "grad_norm": 0.5589037537574768, |
| "learning_rate": 0.00012799655469422912, |
| "loss": 0.3197665214538574, |
| "step": 20900 |
| }, |
| { |
| "epoch": 18.001722652885444, |
| "eval_loss": 0.3835658133029938, |
| "eval_runtime": 17.9174, |
| "eval_samples_per_second": 1060.645, |
| "eval_steps_per_second": 33.152, |
| "step": 20900 |
| }, |
| { |
| "epoch": 18.044788975021532, |
| "grad_norm": 0.506934642791748, |
| "learning_rate": 0.00012782428940568476, |
| "loss": 0.30830619812011717, |
| "step": 20950 |
| }, |
| { |
| "epoch": 18.044788975021532, |
| "eval_loss": 0.386690229177475, |
| "eval_runtime": 16.7282, |
| "eval_samples_per_second": 1136.048, |
| "eval_steps_per_second": 35.509, |
| "step": 20950 |
| }, |
| { |
| "epoch": 18.087855297157624, |
| "grad_norm": 0.5666943788528442, |
| "learning_rate": 0.0001276520241171404, |
| "loss": 0.31840274810791014, |
| "step": 21000 |
| }, |
| { |
| "epoch": 18.087855297157624, |
| "eval_loss": 0.3852052390575409, |
| "eval_runtime": 17.2738, |
| "eval_samples_per_second": 1100.166, |
| "eval_steps_per_second": 34.387, |
| "step": 21000 |
| }, |
| { |
| "epoch": 18.130921619293712, |
| "grad_norm": 0.4481973648071289, |
| "learning_rate": 0.00012747975882859605, |
| "loss": 0.31186540603637697, |
| "step": 21050 |
| }, |
| { |
| "epoch": 18.130921619293712, |
| "eval_loss": 0.3922789394855499, |
| "eval_runtime": 17.4131, |
| "eval_samples_per_second": 1091.36, |
| "eval_steps_per_second": 34.112, |
| "step": 21050 |
| }, |
| { |
| "epoch": 18.1739879414298, |
| "grad_norm": 0.4946073889732361, |
| "learning_rate": 0.0001273074935400517, |
| "loss": 0.31672819137573244, |
| "step": 21100 |
| }, |
| { |
| "epoch": 18.1739879414298, |
| "eval_loss": 0.38604193925857544, |
| "eval_runtime": 16.9427, |
| "eval_samples_per_second": 1121.666, |
| "eval_steps_per_second": 35.059, |
| "step": 21100 |
| }, |
| { |
| "epoch": 18.217054263565892, |
| "grad_norm": 0.6280601024627686, |
| "learning_rate": 0.00012713522825150733, |
| "loss": 0.31234973907470703, |
| "step": 21150 |
| }, |
| { |
| "epoch": 18.217054263565892, |
| "eval_loss": 0.3931697905063629, |
| "eval_runtime": 17.1136, |
| "eval_samples_per_second": 1110.463, |
| "eval_steps_per_second": 34.709, |
| "step": 21150 |
| }, |
| { |
| "epoch": 18.26012058570198, |
| "grad_norm": 0.6457147598266602, |
| "learning_rate": 0.00012696296296296297, |
| "loss": 0.3120527648925781, |
| "step": 21200 |
| }, |
| { |
| "epoch": 18.26012058570198, |
| "eval_loss": 0.3861483931541443, |
| "eval_runtime": 17.1111, |
| "eval_samples_per_second": 1110.622, |
| "eval_steps_per_second": 34.714, |
| "step": 21200 |
| }, |
| { |
| "epoch": 18.30318690783807, |
| "grad_norm": 0.6117618680000305, |
| "learning_rate": 0.00012679069767441861, |
| "loss": 0.3122466850280762, |
| "step": 21250 |
| }, |
| { |
| "epoch": 18.30318690783807, |
| "eval_loss": 0.38948336243629456, |
| "eval_runtime": 16.7751, |
| "eval_samples_per_second": 1132.872, |
| "eval_steps_per_second": 35.41, |
| "step": 21250 |
| }, |
| { |
| "epoch": 18.34625322997416, |
| "grad_norm": 0.5237821340560913, |
| "learning_rate": 0.00012661843238587426, |
| "loss": 0.3228919219970703, |
| "step": 21300 |
| }, |
| { |
| "epoch": 18.34625322997416, |
| "eval_loss": 0.3906412124633789, |
| "eval_runtime": 16.684, |
| "eval_samples_per_second": 1139.054, |
| "eval_steps_per_second": 35.603, |
| "step": 21300 |
| }, |
| { |
| "epoch": 18.38931955211025, |
| "grad_norm": 0.5655460953712463, |
| "learning_rate": 0.0001264461670973299, |
| "loss": 0.3129468154907227, |
| "step": 21350 |
| }, |
| { |
| "epoch": 18.38931955211025, |
| "eval_loss": 0.38534438610076904, |
| "eval_runtime": 16.7814, |
| "eval_samples_per_second": 1132.447, |
| "eval_steps_per_second": 35.396, |
| "step": 21350 |
| }, |
| { |
| "epoch": 18.43238587424634, |
| "grad_norm": 0.5534801483154297, |
| "learning_rate": 0.00012627390180878554, |
| "loss": 0.3185391616821289, |
| "step": 21400 |
| }, |
| { |
| "epoch": 18.43238587424634, |
| "eval_loss": 0.391643762588501, |
| "eval_runtime": 16.3245, |
| "eval_samples_per_second": 1164.138, |
| "eval_steps_per_second": 36.387, |
| "step": 21400 |
| }, |
| { |
| "epoch": 18.475452196382427, |
| "grad_norm": 0.5594217777252197, |
| "learning_rate": 0.00012610163652024118, |
| "loss": 0.321013298034668, |
| "step": 21450 |
| }, |
| { |
| "epoch": 18.475452196382427, |
| "eval_loss": 0.38839325308799744, |
| "eval_runtime": 16.6874, |
| "eval_samples_per_second": 1138.825, |
| "eval_steps_per_second": 35.596, |
| "step": 21450 |
| }, |
| { |
| "epoch": 18.51851851851852, |
| "grad_norm": 0.47908687591552734, |
| "learning_rate": 0.00012592937123169682, |
| "loss": 0.31948158264160154, |
| "step": 21500 |
| }, |
| { |
| "epoch": 18.51851851851852, |
| "eval_loss": 0.38242292404174805, |
| "eval_runtime": 16.8044, |
| "eval_samples_per_second": 1130.893, |
| "eval_steps_per_second": 35.348, |
| "step": 21500 |
| }, |
| { |
| "epoch": 18.561584840654607, |
| "grad_norm": 0.43603160977363586, |
| "learning_rate": 0.00012575710594315247, |
| "loss": 0.31390554428100587, |
| "step": 21550 |
| }, |
| { |
| "epoch": 18.561584840654607, |
| "eval_loss": 0.37871894240379333, |
| "eval_runtime": 16.1361, |
| "eval_samples_per_second": 1177.73, |
| "eval_steps_per_second": 36.812, |
| "step": 21550 |
| }, |
| { |
| "epoch": 18.6046511627907, |
| "grad_norm": 0.5649058222770691, |
| "learning_rate": 0.00012558484065460808, |
| "loss": 0.3198824691772461, |
| "step": 21600 |
| }, |
| { |
| "epoch": 18.6046511627907, |
| "eval_loss": 0.38102349638938904, |
| "eval_runtime": 16.6623, |
| "eval_samples_per_second": 1140.537, |
| "eval_steps_per_second": 35.649, |
| "step": 21600 |
| }, |
| { |
| "epoch": 18.647717484926787, |
| "grad_norm": 0.4506802558898926, |
| "learning_rate": 0.00012541257536606375, |
| "loss": 0.31376161575317385, |
| "step": 21650 |
| }, |
| { |
| "epoch": 18.647717484926787, |
| "eval_loss": 0.38414791226387024, |
| "eval_runtime": 16.5267, |
| "eval_samples_per_second": 1149.899, |
| "eval_steps_per_second": 35.942, |
| "step": 21650 |
| }, |
| { |
| "epoch": 18.69078380706288, |
| "grad_norm": 0.4440782368183136, |
| "learning_rate": 0.0001252403100775194, |
| "loss": 0.3159283447265625, |
| "step": 21700 |
| }, |
| { |
| "epoch": 18.69078380706288, |
| "eval_loss": 0.37915998697280884, |
| "eval_runtime": 16.2176, |
| "eval_samples_per_second": 1171.81, |
| "eval_steps_per_second": 36.627, |
| "step": 21700 |
| }, |
| { |
| "epoch": 18.733850129198967, |
| "grad_norm": 0.6125892400741577, |
| "learning_rate": 0.00012506804478897504, |
| "loss": 0.3128934097290039, |
| "step": 21750 |
| }, |
| { |
| "epoch": 18.733850129198967, |
| "eval_loss": 0.38138166069984436, |
| "eval_runtime": 15.9943, |
| "eval_samples_per_second": 1188.175, |
| "eval_steps_per_second": 37.138, |
| "step": 21750 |
| }, |
| { |
| "epoch": 18.776916451335055, |
| "grad_norm": 0.5350700616836548, |
| "learning_rate": 0.00012489577950043068, |
| "loss": 0.31795137405395507, |
| "step": 21800 |
| }, |
| { |
| "epoch": 18.776916451335055, |
| "eval_loss": 0.38072770833969116, |
| "eval_runtime": 17.0595, |
| "eval_samples_per_second": 1113.985, |
| "eval_steps_per_second": 34.819, |
| "step": 21800 |
| }, |
| { |
| "epoch": 18.819982773471146, |
| "grad_norm": 0.6350408792495728, |
| "learning_rate": 0.0001247235142118863, |
| "loss": 0.3134295463562012, |
| "step": 21850 |
| }, |
| { |
| "epoch": 18.819982773471146, |
| "eval_loss": 0.3758614957332611, |
| "eval_runtime": 17.4302, |
| "eval_samples_per_second": 1090.29, |
| "eval_steps_per_second": 34.079, |
| "step": 21850 |
| }, |
| { |
| "epoch": 18.863049095607234, |
| "grad_norm": 0.5005162954330444, |
| "learning_rate": 0.00012455124892334196, |
| "loss": 0.32502140045166017, |
| "step": 21900 |
| }, |
| { |
| "epoch": 18.863049095607234, |
| "eval_loss": 0.38265460729599, |
| "eval_runtime": 16.6214, |
| "eval_samples_per_second": 1143.347, |
| "eval_steps_per_second": 35.737, |
| "step": 21900 |
| }, |
| { |
| "epoch": 18.906115417743326, |
| "grad_norm": 0.5520798563957214, |
| "learning_rate": 0.00012437898363479758, |
| "loss": 0.3212323760986328, |
| "step": 21950 |
| }, |
| { |
| "epoch": 18.906115417743326, |
| "eval_loss": 0.3830348253250122, |
| "eval_runtime": 17.3028, |
| "eval_samples_per_second": 1098.317, |
| "eval_steps_per_second": 34.33, |
| "step": 21950 |
| }, |
| { |
| "epoch": 18.949181739879414, |
| "grad_norm": 0.41653236746788025, |
| "learning_rate": 0.00012420671834625325, |
| "loss": 0.3083462142944336, |
| "step": 22000 |
| }, |
| { |
| "epoch": 18.949181739879414, |
| "eval_loss": 0.3885103762149811, |
| "eval_runtime": 17.2255, |
| "eval_samples_per_second": 1103.246, |
| "eval_steps_per_second": 34.484, |
| "step": 22000 |
| }, |
| { |
| "epoch": 18.992248062015506, |
| "grad_norm": 0.42078638076782227, |
| "learning_rate": 0.0001240344530577089, |
| "loss": 0.3157729721069336, |
| "step": 22050 |
| }, |
| { |
| "epoch": 18.992248062015506, |
| "eval_loss": 0.38450390100479126, |
| "eval_runtime": 17.1782, |
| "eval_samples_per_second": 1106.283, |
| "eval_steps_per_second": 34.579, |
| "step": 22050 |
| }, |
| { |
| "epoch": 19.035314384151594, |
| "grad_norm": 0.4940221607685089, |
| "learning_rate": 0.00012386218776916453, |
| "loss": 0.3200815963745117, |
| "step": 22100 |
| }, |
| { |
| "epoch": 19.035314384151594, |
| "eval_loss": 0.3834143579006195, |
| "eval_runtime": 17.3493, |
| "eval_samples_per_second": 1095.375, |
| "eval_steps_per_second": 34.238, |
| "step": 22100 |
| }, |
| { |
| "epoch": 19.078380706287682, |
| "grad_norm": 0.5358327031135559, |
| "learning_rate": 0.00012368992248062017, |
| "loss": 0.3109498596191406, |
| "step": 22150 |
| }, |
| { |
| "epoch": 19.078380706287682, |
| "eval_loss": 0.38372257351875305, |
| "eval_runtime": 17.6251, |
| "eval_samples_per_second": 1078.233, |
| "eval_steps_per_second": 33.702, |
| "step": 22150 |
| }, |
| { |
| "epoch": 19.121447028423773, |
| "grad_norm": 0.5010002255439758, |
| "learning_rate": 0.0001235176571920758, |
| "loss": 0.30669967651367186, |
| "step": 22200 |
| }, |
| { |
| "epoch": 19.121447028423773, |
| "eval_loss": 0.3840700387954712, |
| "eval_runtime": 16.9058, |
| "eval_samples_per_second": 1124.114, |
| "eval_steps_per_second": 35.136, |
| "step": 22200 |
| }, |
| { |
| "epoch": 19.16451335055986, |
| "grad_norm": 0.514018714427948, |
| "learning_rate": 0.00012334539190353146, |
| "loss": 0.3106396293640137, |
| "step": 22250 |
| }, |
| { |
| "epoch": 19.16451335055986, |
| "eval_loss": 0.3840588927268982, |
| "eval_runtime": 17.0702, |
| "eval_samples_per_second": 1113.284, |
| "eval_steps_per_second": 34.797, |
| "step": 22250 |
| }, |
| { |
| "epoch": 19.207579672695953, |
| "grad_norm": 0.4882144033908844, |
| "learning_rate": 0.00012317312661498707, |
| "loss": 0.31231227874755857, |
| "step": 22300 |
| }, |
| { |
| "epoch": 19.207579672695953, |
| "eval_loss": 0.3815734088420868, |
| "eval_runtime": 17.1064, |
| "eval_samples_per_second": 1110.929, |
| "eval_steps_per_second": 34.724, |
| "step": 22300 |
| }, |
| { |
| "epoch": 19.25064599483204, |
| "grad_norm": 0.4594323933124542, |
| "learning_rate": 0.00012300086132644274, |
| "loss": 0.3108772659301758, |
| "step": 22350 |
| }, |
| { |
| "epoch": 19.25064599483204, |
| "eval_loss": 0.3820939064025879, |
| "eval_runtime": 15.8721, |
| "eval_samples_per_second": 1197.32, |
| "eval_steps_per_second": 37.424, |
| "step": 22350 |
| }, |
| { |
| "epoch": 19.29371231696813, |
| "grad_norm": 0.740356981754303, |
| "learning_rate": 0.00012282859603789838, |
| "loss": 0.31078941345214844, |
| "step": 22400 |
| }, |
| { |
| "epoch": 19.29371231696813, |
| "eval_loss": 0.3824281394481659, |
| "eval_runtime": 17.3013, |
| "eval_samples_per_second": 1098.412, |
| "eval_steps_per_second": 34.333, |
| "step": 22400 |
| }, |
| { |
| "epoch": 19.33677863910422, |
| "grad_norm": 0.7296916842460632, |
| "learning_rate": 0.000122656330749354, |
| "loss": 0.31134443283081054, |
| "step": 22450 |
| }, |
| { |
| "epoch": 19.33677863910422, |
| "eval_loss": 0.3865135908126831, |
| "eval_runtime": 17.5063, |
| "eval_samples_per_second": 1085.554, |
| "eval_steps_per_second": 33.931, |
| "step": 22450 |
| }, |
| { |
| "epoch": 19.37984496124031, |
| "grad_norm": 0.6394023299217224, |
| "learning_rate": 0.00012248406546080967, |
| "loss": 0.32353458404541013, |
| "step": 22500 |
| }, |
| { |
| "epoch": 19.37984496124031, |
| "eval_loss": 0.38835322856903076, |
| "eval_runtime": 17.2454, |
| "eval_samples_per_second": 1101.977, |
| "eval_steps_per_second": 34.444, |
| "step": 22500 |
| }, |
| { |
| "epoch": 19.4229112833764, |
| "grad_norm": 0.5603283047676086, |
| "learning_rate": 0.00012231180017226528, |
| "loss": 0.31344139099121093, |
| "step": 22550 |
| }, |
| { |
| "epoch": 19.4229112833764, |
| "eval_loss": 0.3792956471443176, |
| "eval_runtime": 17.1044, |
| "eval_samples_per_second": 1111.061, |
| "eval_steps_per_second": 34.728, |
| "step": 22550 |
| }, |
| { |
| "epoch": 19.46597760551249, |
| "grad_norm": 0.5717983245849609, |
| "learning_rate": 0.00012213953488372095, |
| "loss": 0.3134092903137207, |
| "step": 22600 |
| }, |
| { |
| "epoch": 19.46597760551249, |
| "eval_loss": 0.3799424171447754, |
| "eval_runtime": 17.2965, |
| "eval_samples_per_second": 1098.722, |
| "eval_steps_per_second": 34.342, |
| "step": 22600 |
| }, |
| { |
| "epoch": 19.50904392764858, |
| "grad_norm": 0.5377107262611389, |
| "learning_rate": 0.00012196726959517657, |
| "loss": 0.3165386962890625, |
| "step": 22650 |
| }, |
| { |
| "epoch": 19.50904392764858, |
| "eval_loss": 0.3824997544288635, |
| "eval_runtime": 17.1515, |
| "eval_samples_per_second": 1108.009, |
| "eval_steps_per_second": 34.633, |
| "step": 22650 |
| }, |
| { |
| "epoch": 19.55211024978467, |
| "grad_norm": 0.45157045125961304, |
| "learning_rate": 0.00012179500430663222, |
| "loss": 0.315402946472168, |
| "step": 22700 |
| }, |
| { |
| "epoch": 19.55211024978467, |
| "eval_loss": 0.3792726695537567, |
| "eval_runtime": 15.7874, |
| "eval_samples_per_second": 1203.742, |
| "eval_steps_per_second": 37.625, |
| "step": 22700 |
| }, |
| { |
| "epoch": 19.595176571920756, |
| "grad_norm": 0.5813359618186951, |
| "learning_rate": 0.00012162273901808785, |
| "loss": 0.31789175033569333, |
| "step": 22750 |
| }, |
| { |
| "epoch": 19.595176571920756, |
| "eval_loss": 0.38310182094573975, |
| "eval_runtime": 17.083, |
| "eval_samples_per_second": 1112.45, |
| "eval_steps_per_second": 34.771, |
| "step": 22750 |
| }, |
| { |
| "epoch": 19.638242894056848, |
| "grad_norm": 0.4540177881717682, |
| "learning_rate": 0.0001214504737295435, |
| "loss": 0.31223211288452146, |
| "step": 22800 |
| }, |
| { |
| "epoch": 19.638242894056848, |
| "eval_loss": 0.3790459632873535, |
| "eval_runtime": 15.6609, |
| "eval_samples_per_second": 1213.466, |
| "eval_steps_per_second": 37.929, |
| "step": 22800 |
| }, |
| { |
| "epoch": 19.681309216192936, |
| "grad_norm": 0.4714398682117462, |
| "learning_rate": 0.00012127820844099915, |
| "loss": 0.3169035530090332, |
| "step": 22850 |
| }, |
| { |
| "epoch": 19.681309216192936, |
| "eval_loss": 0.38244912028312683, |
| "eval_runtime": 17.3654, |
| "eval_samples_per_second": 1094.362, |
| "eval_steps_per_second": 34.206, |
| "step": 22850 |
| }, |
| { |
| "epoch": 19.724375538329028, |
| "grad_norm": 0.4587773382663727, |
| "learning_rate": 0.00012110594315245478, |
| "loss": 0.317059211730957, |
| "step": 22900 |
| }, |
| { |
| "epoch": 19.724375538329028, |
| "eval_loss": 0.3824186623096466, |
| "eval_runtime": 17.9642, |
| "eval_samples_per_second": 1057.884, |
| "eval_steps_per_second": 33.066, |
| "step": 22900 |
| }, |
| { |
| "epoch": 19.767441860465116, |
| "grad_norm": 0.48488104343414307, |
| "learning_rate": 0.00012093367786391043, |
| "loss": 0.3131584358215332, |
| "step": 22950 |
| }, |
| { |
| "epoch": 19.767441860465116, |
| "eval_loss": 0.377473920583725, |
| "eval_runtime": 18.2414, |
| "eval_samples_per_second": 1041.805, |
| "eval_steps_per_second": 32.563, |
| "step": 22950 |
| }, |
| { |
| "epoch": 19.810508182601207, |
| "grad_norm": 0.45736151933670044, |
| "learning_rate": 0.00012076141257536606, |
| "loss": 0.3098099708557129, |
| "step": 23000 |
| }, |
| { |
| "epoch": 19.810508182601207, |
| "eval_loss": 0.3901560604572296, |
| "eval_runtime": 17.204, |
| "eval_samples_per_second": 1104.628, |
| "eval_steps_per_second": 34.527, |
| "step": 23000 |
| }, |
| { |
| "epoch": 19.853574504737296, |
| "grad_norm": 0.44181448221206665, |
| "learning_rate": 0.00012058914728682172, |
| "loss": 0.3161106872558594, |
| "step": 23050 |
| }, |
| { |
| "epoch": 19.853574504737296, |
| "eval_loss": 0.3850392699241638, |
| "eval_runtime": 17.794, |
| "eval_samples_per_second": 1067.998, |
| "eval_steps_per_second": 33.382, |
| "step": 23050 |
| }, |
| { |
| "epoch": 19.896640826873384, |
| "grad_norm": 0.48947784304618835, |
| "learning_rate": 0.00012041688199827734, |
| "loss": 0.30796041488647463, |
| "step": 23100 |
| }, |
| { |
| "epoch": 19.896640826873384, |
| "eval_loss": 0.3785504996776581, |
| "eval_runtime": 17.2727, |
| "eval_samples_per_second": 1100.234, |
| "eval_steps_per_second": 34.39, |
| "step": 23100 |
| }, |
| { |
| "epoch": 19.939707149009475, |
| "grad_norm": 0.47454968094825745, |
| "learning_rate": 0.00012024461670973299, |
| "loss": 0.32041351318359373, |
| "step": 23150 |
| }, |
| { |
| "epoch": 19.939707149009475, |
| "eval_loss": 0.38033241033554077, |
| "eval_runtime": 16.484, |
| "eval_samples_per_second": 1152.877, |
| "eval_steps_per_second": 36.035, |
| "step": 23150 |
| }, |
| { |
| "epoch": 19.982773471145563, |
| "grad_norm": 0.4740263819694519, |
| "learning_rate": 0.00012007235142118864, |
| "loss": 0.3148014068603516, |
| "step": 23200 |
| }, |
| { |
| "epoch": 19.982773471145563, |
| "eval_loss": 0.38085371255874634, |
| "eval_runtime": 17.3432, |
| "eval_samples_per_second": 1095.761, |
| "eval_steps_per_second": 34.25, |
| "step": 23200 |
| }, |
| { |
| "epoch": 20.025839793281655, |
| "grad_norm": 0.5026439428329468, |
| "learning_rate": 0.00011990008613264427, |
| "loss": 0.3176981353759766, |
| "step": 23250 |
| }, |
| { |
| "epoch": 20.025839793281655, |
| "eval_loss": 0.3808394968509674, |
| "eval_runtime": 16.7675, |
| "eval_samples_per_second": 1133.384, |
| "eval_steps_per_second": 35.426, |
| "step": 23250 |
| }, |
| { |
| "epoch": 20.068906115417743, |
| "grad_norm": 0.4572559595108032, |
| "learning_rate": 0.00011972782084409993, |
| "loss": 0.3045423126220703, |
| "step": 23300 |
| }, |
| { |
| "epoch": 20.068906115417743, |
| "eval_loss": 0.38897034525871277, |
| "eval_runtime": 17.4371, |
| "eval_samples_per_second": 1089.86, |
| "eval_steps_per_second": 34.065, |
| "step": 23300 |
| }, |
| { |
| "epoch": 20.11197243755383, |
| "grad_norm": 0.4959510266780853, |
| "learning_rate": 0.00011955555555555556, |
| "loss": 0.3092060089111328, |
| "step": 23350 |
| }, |
| { |
| "epoch": 20.11197243755383, |
| "eval_loss": 0.3781077265739441, |
| "eval_runtime": 17.1755, |
| "eval_samples_per_second": 1106.461, |
| "eval_steps_per_second": 34.584, |
| "step": 23350 |
| }, |
| { |
| "epoch": 20.155038759689923, |
| "grad_norm": 0.576425313949585, |
| "learning_rate": 0.00011938329026701121, |
| "loss": 0.3072701644897461, |
| "step": 23400 |
| }, |
| { |
| "epoch": 20.155038759689923, |
| "eval_loss": 0.3774796426296234, |
| "eval_runtime": 17.5053, |
| "eval_samples_per_second": 1085.616, |
| "eval_steps_per_second": 33.933, |
| "step": 23400 |
| }, |
| { |
| "epoch": 20.19810508182601, |
| "grad_norm": 0.5191887021064758, |
| "learning_rate": 0.00011921102497846684, |
| "loss": 0.3084998512268066, |
| "step": 23450 |
| }, |
| { |
| "epoch": 20.19810508182601, |
| "eval_loss": 0.3787190318107605, |
| "eval_runtime": 17.5021, |
| "eval_samples_per_second": 1085.811, |
| "eval_steps_per_second": 33.939, |
| "step": 23450 |
| }, |
| { |
| "epoch": 20.241171403962102, |
| "grad_norm": 0.4725388288497925, |
| "learning_rate": 0.00011903875968992248, |
| "loss": 0.30435737609863284, |
| "step": 23500 |
| }, |
| { |
| "epoch": 20.241171403962102, |
| "eval_loss": 0.3799598217010498, |
| "eval_runtime": 17.3816, |
| "eval_samples_per_second": 1093.34, |
| "eval_steps_per_second": 34.174, |
| "step": 23500 |
| }, |
| { |
| "epoch": 20.28423772609819, |
| "grad_norm": 0.460355281829834, |
| "learning_rate": 0.00011886649440137814, |
| "loss": 0.31231136322021485, |
| "step": 23550 |
| }, |
| { |
| "epoch": 20.28423772609819, |
| "eval_loss": 0.3775605857372284, |
| "eval_runtime": 17.0961, |
| "eval_samples_per_second": 1111.596, |
| "eval_steps_per_second": 34.745, |
| "step": 23550 |
| }, |
| { |
| "epoch": 20.327304048234282, |
| "grad_norm": 0.44472384452819824, |
| "learning_rate": 0.00011869422911283377, |
| "loss": 0.3119425964355469, |
| "step": 23600 |
| }, |
| { |
| "epoch": 20.327304048234282, |
| "eval_loss": 0.385606974363327, |
| "eval_runtime": 17.4347, |
| "eval_samples_per_second": 1090.01, |
| "eval_steps_per_second": 34.07, |
| "step": 23600 |
| }, |
| { |
| "epoch": 20.37037037037037, |
| "grad_norm": 0.4899062514305115, |
| "learning_rate": 0.00011852196382428942, |
| "loss": 0.3081985664367676, |
| "step": 23650 |
| }, |
| { |
| "epoch": 20.37037037037037, |
| "eval_loss": 0.3742313086986542, |
| "eval_runtime": 17.4633, |
| "eval_samples_per_second": 1088.223, |
| "eval_steps_per_second": 34.014, |
| "step": 23650 |
| }, |
| { |
| "epoch": 20.41343669250646, |
| "grad_norm": 0.39342230558395386, |
| "learning_rate": 0.00011834969853574505, |
| "loss": 0.3061702919006348, |
| "step": 23700 |
| }, |
| { |
| "epoch": 20.41343669250646, |
| "eval_loss": 0.3832343518733978, |
| "eval_runtime": 16.5699, |
| "eval_samples_per_second": 1146.896, |
| "eval_steps_per_second": 35.848, |
| "step": 23700 |
| }, |
| { |
| "epoch": 20.45650301464255, |
| "grad_norm": 0.5367943644523621, |
| "learning_rate": 0.00011817743324720069, |
| "loss": 0.3173460388183594, |
| "step": 23750 |
| }, |
| { |
| "epoch": 20.45650301464255, |
| "eval_loss": 0.3846309781074524, |
| "eval_runtime": 17.5177, |
| "eval_samples_per_second": 1084.846, |
| "eval_steps_per_second": 33.909, |
| "step": 23750 |
| }, |
| { |
| "epoch": 20.499569336778638, |
| "grad_norm": 0.468249648809433, |
| "learning_rate": 0.00011800516795865632, |
| "loss": 0.3046343994140625, |
| "step": 23800 |
| }, |
| { |
| "epoch": 20.499569336778638, |
| "eval_loss": 0.3752977252006531, |
| "eval_runtime": 17.4727, |
| "eval_samples_per_second": 1087.639, |
| "eval_steps_per_second": 33.996, |
| "step": 23800 |
| }, |
| { |
| "epoch": 20.54263565891473, |
| "grad_norm": 0.4803526997566223, |
| "learning_rate": 0.00011783290267011198, |
| "loss": 0.31560543060302737, |
| "step": 23850 |
| }, |
| { |
| "epoch": 20.54263565891473, |
| "eval_loss": 0.37215656042099, |
| "eval_runtime": 17.4673, |
| "eval_samples_per_second": 1087.973, |
| "eval_steps_per_second": 34.006, |
| "step": 23850 |
| }, |
| { |
| "epoch": 20.585701981050818, |
| "grad_norm": 0.4824640154838562, |
| "learning_rate": 0.00011766063738156763, |
| "loss": 0.3100012397766113, |
| "step": 23900 |
| }, |
| { |
| "epoch": 20.585701981050818, |
| "eval_loss": 0.3795510530471802, |
| "eval_runtime": 17.3565, |
| "eval_samples_per_second": 1094.921, |
| "eval_steps_per_second": 34.223, |
| "step": 23900 |
| }, |
| { |
| "epoch": 20.62876830318691, |
| "grad_norm": 0.5121549963951111, |
| "learning_rate": 0.00011748837209302326, |
| "loss": 0.3087359619140625, |
| "step": 23950 |
| }, |
| { |
| "epoch": 20.62876830318691, |
| "eval_loss": 0.37073156237602234, |
| "eval_runtime": 17.7974, |
| "eval_samples_per_second": 1067.794, |
| "eval_steps_per_second": 33.376, |
| "step": 23950 |
| }, |
| { |
| "epoch": 20.671834625322997, |
| "grad_norm": 0.5719079971313477, |
| "learning_rate": 0.0001173161068044789, |
| "loss": 0.3093043327331543, |
| "step": 24000 |
| }, |
| { |
| "epoch": 20.671834625322997, |
| "eval_loss": 0.3770056366920471, |
| "eval_runtime": 17.2425, |
| "eval_samples_per_second": 1102.16, |
| "eval_steps_per_second": 34.45, |
| "step": 24000 |
| }, |
| { |
| "epoch": 20.714900947459086, |
| "grad_norm": 0.46395695209503174, |
| "learning_rate": 0.00011714384151593454, |
| "loss": 0.3123012161254883, |
| "step": 24050 |
| }, |
| { |
| "epoch": 20.714900947459086, |
| "eval_loss": 0.371019572019577, |
| "eval_runtime": 16.6259, |
| "eval_samples_per_second": 1143.034, |
| "eval_steps_per_second": 35.727, |
| "step": 24050 |
| }, |
| { |
| "epoch": 20.757967269595177, |
| "grad_norm": 0.540714681148529, |
| "learning_rate": 0.00011697157622739019, |
| "loss": 0.31075138092041016, |
| "step": 24100 |
| }, |
| { |
| "epoch": 20.757967269595177, |
| "eval_loss": 0.37732967734336853, |
| "eval_runtime": 17.4927, |
| "eval_samples_per_second": 1086.397, |
| "eval_steps_per_second": 33.957, |
| "step": 24100 |
| }, |
| { |
| "epoch": 20.801033591731265, |
| "grad_norm": 0.4655165374279022, |
| "learning_rate": 0.00011679931093884582, |
| "loss": 0.30832584381103517, |
| "step": 24150 |
| }, |
| { |
| "epoch": 20.801033591731265, |
| "eval_loss": 0.37958091497421265, |
| "eval_runtime": 17.2454, |
| "eval_samples_per_second": 1101.973, |
| "eval_steps_per_second": 34.444, |
| "step": 24150 |
| }, |
| { |
| "epoch": 20.844099913867357, |
| "grad_norm": 0.4928206205368042, |
| "learning_rate": 0.00011662704565030147, |
| "loss": 0.31150699615478517, |
| "step": 24200 |
| }, |
| { |
| "epoch": 20.844099913867357, |
| "eval_loss": 0.37537887692451477, |
| "eval_runtime": 16.4495, |
| "eval_samples_per_second": 1155.295, |
| "eval_steps_per_second": 36.111, |
| "step": 24200 |
| }, |
| { |
| "epoch": 20.887166236003445, |
| "grad_norm": 0.5147706270217896, |
| "learning_rate": 0.00011645478036175713, |
| "loss": 0.3046610069274902, |
| "step": 24250 |
| }, |
| { |
| "epoch": 20.887166236003445, |
| "eval_loss": 0.3709302246570587, |
| "eval_runtime": 17.2509, |
| "eval_samples_per_second": 1101.626, |
| "eval_steps_per_second": 34.433, |
| "step": 24250 |
| }, |
| { |
| "epoch": 20.930232558139537, |
| "grad_norm": 0.4951704740524292, |
| "learning_rate": 0.00011628251507321276, |
| "loss": 0.3043970108032227, |
| "step": 24300 |
| }, |
| { |
| "epoch": 20.930232558139537, |
| "eval_loss": 0.38124656677246094, |
| "eval_runtime": 17.4681, |
| "eval_samples_per_second": 1087.923, |
| "eval_steps_per_second": 34.005, |
| "step": 24300 |
| }, |
| { |
| "epoch": 20.973298880275625, |
| "grad_norm": 0.41925662755966187, |
| "learning_rate": 0.0001161102497846684, |
| "loss": 0.3140296173095703, |
| "step": 24350 |
| }, |
| { |
| "epoch": 20.973298880275625, |
| "eval_loss": 0.3799387514591217, |
| "eval_runtime": 17.5417, |
| "eval_samples_per_second": 1083.363, |
| "eval_steps_per_second": 33.862, |
| "step": 24350 |
| }, |
| { |
| "epoch": 21.016365202411713, |
| "grad_norm": 0.5070242285728455, |
| "learning_rate": 0.00011593798449612403, |
| "loss": 0.3025740814208984, |
| "step": 24400 |
| }, |
| { |
| "epoch": 21.016365202411713, |
| "eval_loss": 0.3791089355945587, |
| "eval_runtime": 17.7997, |
| "eval_samples_per_second": 1067.66, |
| "eval_steps_per_second": 33.371, |
| "step": 24400 |
| }, |
| { |
| "epoch": 21.059431524547804, |
| "grad_norm": 0.42946505546569824, |
| "learning_rate": 0.00011576571920757968, |
| "loss": 0.30825782775878907, |
| "step": 24450 |
| }, |
| { |
| "epoch": 21.059431524547804, |
| "eval_loss": 0.37176281213760376, |
| "eval_runtime": 19.2954, |
| "eval_samples_per_second": 984.899, |
| "eval_steps_per_second": 30.785, |
| "step": 24450 |
| }, |
| { |
| "epoch": 21.102497846683892, |
| "grad_norm": 0.40253499150276184, |
| "learning_rate": 0.00011559345391903531, |
| "loss": 0.3015823745727539, |
| "step": 24500 |
| }, |
| { |
| "epoch": 21.102497846683892, |
| "eval_loss": 0.3752162754535675, |
| "eval_runtime": 18.1997, |
| "eval_samples_per_second": 1044.192, |
| "eval_steps_per_second": 32.638, |
| "step": 24500 |
| }, |
| { |
| "epoch": 21.145564168819984, |
| "grad_norm": 0.42977315187454224, |
| "learning_rate": 0.00011542118863049097, |
| "loss": 0.308193244934082, |
| "step": 24550 |
| }, |
| { |
| "epoch": 21.145564168819984, |
| "eval_loss": 0.3800414502620697, |
| "eval_runtime": 18.7255, |
| "eval_samples_per_second": 1014.872, |
| "eval_steps_per_second": 31.721, |
| "step": 24550 |
| }, |
| { |
| "epoch": 21.188630490956072, |
| "grad_norm": 0.39198407530784607, |
| "learning_rate": 0.00011524892334194661, |
| "loss": 0.3081152725219727, |
| "step": 24600 |
| }, |
| { |
| "epoch": 21.188630490956072, |
| "eval_loss": 0.3643619418144226, |
| "eval_runtime": 17.8789, |
| "eval_samples_per_second": 1062.931, |
| "eval_steps_per_second": 33.224, |
| "step": 24600 |
| }, |
| { |
| "epoch": 21.23169681309216, |
| "grad_norm": 0.5011320114135742, |
| "learning_rate": 0.00011507665805340224, |
| "loss": 0.30673561096191404, |
| "step": 24650 |
| }, |
| { |
| "epoch": 21.23169681309216, |
| "eval_loss": 0.37921005487442017, |
| "eval_runtime": 17.1467, |
| "eval_samples_per_second": 1108.32, |
| "eval_steps_per_second": 34.642, |
| "step": 24650 |
| }, |
| { |
| "epoch": 21.274763135228252, |
| "grad_norm": 0.5653364658355713, |
| "learning_rate": 0.00011490439276485789, |
| "loss": 0.309172248840332, |
| "step": 24700 |
| }, |
| { |
| "epoch": 21.274763135228252, |
| "eval_loss": 0.377411812543869, |
| "eval_runtime": 17.148, |
| "eval_samples_per_second": 1108.234, |
| "eval_steps_per_second": 34.64, |
| "step": 24700 |
| }, |
| { |
| "epoch": 21.31782945736434, |
| "grad_norm": 0.542508602142334, |
| "learning_rate": 0.00011473212747631352, |
| "loss": 0.3056539535522461, |
| "step": 24750 |
| }, |
| { |
| "epoch": 21.31782945736434, |
| "eval_loss": 0.3837369680404663, |
| "eval_runtime": 18.4413, |
| "eval_samples_per_second": 1030.514, |
| "eval_steps_per_second": 32.21, |
| "step": 24750 |
| }, |
| { |
| "epoch": 21.36089577950043, |
| "grad_norm": 0.478859007358551, |
| "learning_rate": 0.00011455986218776918, |
| "loss": 0.30355857849121093, |
| "step": 24800 |
| }, |
| { |
| "epoch": 21.36089577950043, |
| "eval_loss": 0.37471258640289307, |
| "eval_runtime": 17.4466, |
| "eval_samples_per_second": 1089.264, |
| "eval_steps_per_second": 34.047, |
| "step": 24800 |
| }, |
| { |
| "epoch": 21.40396210163652, |
| "grad_norm": 0.45857715606689453, |
| "learning_rate": 0.0001143875968992248, |
| "loss": 0.30617919921875, |
| "step": 24850 |
| }, |
| { |
| "epoch": 21.40396210163652, |
| "eval_loss": 0.3754553198814392, |
| "eval_runtime": 17.5133, |
| "eval_samples_per_second": 1085.116, |
| "eval_steps_per_second": 33.917, |
| "step": 24850 |
| }, |
| { |
| "epoch": 21.44702842377261, |
| "grad_norm": 0.4991483688354492, |
| "learning_rate": 0.00011421533161068046, |
| "loss": 0.30925914764404294, |
| "step": 24900 |
| }, |
| { |
| "epoch": 21.44702842377261, |
| "eval_loss": 0.37263229489326477, |
| "eval_runtime": 16.9128, |
| "eval_samples_per_second": 1123.643, |
| "eval_steps_per_second": 35.121, |
| "step": 24900 |
| }, |
| { |
| "epoch": 21.4900947459087, |
| "grad_norm": 0.4601542055606842, |
| "learning_rate": 0.0001140430663221361, |
| "loss": 0.3022798538208008, |
| "step": 24950 |
| }, |
| { |
| "epoch": 21.4900947459087, |
| "eval_loss": 0.3806911110877991, |
| "eval_runtime": 15.6758, |
| "eval_samples_per_second": 1212.312, |
| "eval_steps_per_second": 37.893, |
| "step": 24950 |
| }, |
| { |
| "epoch": 21.533161068044787, |
| "grad_norm": 0.4917987585067749, |
| "learning_rate": 0.00011387080103359173, |
| "loss": 0.30524206161499023, |
| "step": 25000 |
| }, |
| { |
| "epoch": 21.533161068044787, |
| "eval_loss": 0.3728342056274414, |
| "eval_runtime": 17.1627, |
| "eval_samples_per_second": 1107.283, |
| "eval_steps_per_second": 34.61, |
| "step": 25000 |
| }, |
| { |
| "epoch": 21.57622739018088, |
| "grad_norm": 0.5263481736183167, |
| "learning_rate": 0.00011369853574504739, |
| "loss": 0.30638530731201175, |
| "step": 25050 |
| }, |
| { |
| "epoch": 21.57622739018088, |
| "eval_loss": 0.3712511658668518, |
| "eval_runtime": 16.0233, |
| "eval_samples_per_second": 1186.019, |
| "eval_steps_per_second": 37.071, |
| "step": 25050 |
| }, |
| { |
| "epoch": 21.619293712316967, |
| "grad_norm": 0.48792555928230286, |
| "learning_rate": 0.00011352627045650302, |
| "loss": 0.3103178977966309, |
| "step": 25100 |
| }, |
| { |
| "epoch": 21.619293712316967, |
| "eval_loss": 0.370941698551178, |
| "eval_runtime": 17.3446, |
| "eval_samples_per_second": 1095.674, |
| "eval_steps_per_second": 34.247, |
| "step": 25100 |
| }, |
| { |
| "epoch": 21.66236003445306, |
| "grad_norm": 0.5177699327468872, |
| "learning_rate": 0.00011335400516795867, |
| "loss": 0.3069496154785156, |
| "step": 25150 |
| }, |
| { |
| "epoch": 21.66236003445306, |
| "eval_loss": 0.3749415874481201, |
| "eval_runtime": 17.5087, |
| "eval_samples_per_second": 1085.403, |
| "eval_steps_per_second": 33.926, |
| "step": 25150 |
| }, |
| { |
| "epoch": 21.705426356589147, |
| "grad_norm": 0.4783251881599426, |
| "learning_rate": 0.0001131817398794143, |
| "loss": 0.28859285354614256, |
| "step": 25200 |
| }, |
| { |
| "epoch": 21.705426356589147, |
| "eval_loss": 0.3722775876522064, |
| "eval_runtime": 17.3202, |
| "eval_samples_per_second": 1097.216, |
| "eval_steps_per_second": 34.295, |
| "step": 25200 |
| }, |
| { |
| "epoch": 21.74849267872524, |
| "grad_norm": 0.5633186101913452, |
| "learning_rate": 0.00011300947459086994, |
| "loss": 0.30084590911865233, |
| "step": 25250 |
| }, |
| { |
| "epoch": 21.74849267872524, |
| "eval_loss": 0.3847544491291046, |
| "eval_runtime": 17.2383, |
| "eval_samples_per_second": 1102.427, |
| "eval_steps_per_second": 34.458, |
| "step": 25250 |
| }, |
| { |
| "epoch": 21.791559000861326, |
| "grad_norm": 0.5172644853591919, |
| "learning_rate": 0.0001128372093023256, |
| "loss": 0.30555530548095705, |
| "step": 25300 |
| }, |
| { |
| "epoch": 21.791559000861326, |
| "eval_loss": 0.37794214487075806, |
| "eval_runtime": 18.029, |
| "eval_samples_per_second": 1054.078, |
| "eval_steps_per_second": 32.947, |
| "step": 25300 |
| }, |
| { |
| "epoch": 21.834625322997415, |
| "grad_norm": 0.48205018043518066, |
| "learning_rate": 0.00011266494401378123, |
| "loss": 0.3017234992980957, |
| "step": 25350 |
| }, |
| { |
| "epoch": 21.834625322997415, |
| "eval_loss": 0.3765769302845001, |
| "eval_runtime": 18.2823, |
| "eval_samples_per_second": 1039.476, |
| "eval_steps_per_second": 32.49, |
| "step": 25350 |
| }, |
| { |
| "epoch": 21.877691645133506, |
| "grad_norm": 0.4693802297115326, |
| "learning_rate": 0.00011249267872523688, |
| "loss": 0.3170528793334961, |
| "step": 25400 |
| }, |
| { |
| "epoch": 21.877691645133506, |
| "eval_loss": 0.3685433268547058, |
| "eval_runtime": 16.3179, |
| "eval_samples_per_second": 1164.61, |
| "eval_steps_per_second": 36.402, |
| "step": 25400 |
| }, |
| { |
| "epoch": 21.920757967269594, |
| "grad_norm": 0.5655513405799866, |
| "learning_rate": 0.00011232041343669251, |
| "loss": 0.30839736938476564, |
| "step": 25450 |
| }, |
| { |
| "epoch": 21.920757967269594, |
| "eval_loss": 0.3736245930194855, |
| "eval_runtime": 17.5958, |
| "eval_samples_per_second": 1080.032, |
| "eval_steps_per_second": 33.758, |
| "step": 25450 |
| }, |
| { |
| "epoch": 21.963824289405686, |
| "grad_norm": 0.49325621128082275, |
| "learning_rate": 0.00011214814814814815, |
| "loss": 0.29805870056152345, |
| "step": 25500 |
| }, |
| { |
| "epoch": 21.963824289405686, |
| "eval_loss": 0.37143099308013916, |
| "eval_runtime": 16.6653, |
| "eval_samples_per_second": 1140.336, |
| "eval_steps_per_second": 35.643, |
| "step": 25500 |
| }, |
| { |
| "epoch": 22.006890611541774, |
| "grad_norm": 0.49408647418022156, |
| "learning_rate": 0.00011197588285960378, |
| "loss": 0.30263139724731447, |
| "step": 25550 |
| }, |
| { |
| "epoch": 22.006890611541774, |
| "eval_loss": 0.3813234567642212, |
| "eval_runtime": 17.3618, |
| "eval_samples_per_second": 1094.584, |
| "eval_steps_per_second": 34.213, |
| "step": 25550 |
| }, |
| { |
| "epoch": 22.049956933677866, |
| "grad_norm": 0.40663397312164307, |
| "learning_rate": 0.00011180361757105944, |
| "loss": 0.2942478942871094, |
| "step": 25600 |
| }, |
| { |
| "epoch": 22.049956933677866, |
| "eval_loss": 0.37827885150909424, |
| "eval_runtime": 17.5023, |
| "eval_samples_per_second": 1085.798, |
| "eval_steps_per_second": 33.938, |
| "step": 25600 |
| }, |
| { |
| "epoch": 22.093023255813954, |
| "grad_norm": 0.554619550704956, |
| "learning_rate": 0.00011163135228251509, |
| "loss": 0.29897663116455075, |
| "step": 25650 |
| }, |
| { |
| "epoch": 22.093023255813954, |
| "eval_loss": 0.37729790806770325, |
| "eval_runtime": 17.5268, |
| "eval_samples_per_second": 1084.282, |
| "eval_steps_per_second": 33.891, |
| "step": 25650 |
| }, |
| { |
| "epoch": 22.13608957795004, |
| "grad_norm": 0.5263612866401672, |
| "learning_rate": 0.00011145908699397072, |
| "loss": 0.30237943649291993, |
| "step": 25700 |
| }, |
| { |
| "epoch": 22.13608957795004, |
| "eval_loss": 0.3682263195514679, |
| "eval_runtime": 17.3968, |
| "eval_samples_per_second": 1092.383, |
| "eval_steps_per_second": 34.144, |
| "step": 25700 |
| }, |
| { |
| "epoch": 22.179155900086133, |
| "grad_norm": 0.5653451681137085, |
| "learning_rate": 0.00011128682170542638, |
| "loss": 0.3112063217163086, |
| "step": 25750 |
| }, |
| { |
| "epoch": 22.179155900086133, |
| "eval_loss": 0.3737627863883972, |
| "eval_runtime": 17.199, |
| "eval_samples_per_second": 1104.946, |
| "eval_steps_per_second": 34.537, |
| "step": 25750 |
| }, |
| { |
| "epoch": 22.22222222222222, |
| "grad_norm": 0.45960068702697754, |
| "learning_rate": 0.000111114556416882, |
| "loss": 0.30598041534423825, |
| "step": 25800 |
| }, |
| { |
| "epoch": 22.22222222222222, |
| "eval_loss": 0.3709113895893097, |
| "eval_runtime": 17.2079, |
| "eval_samples_per_second": 1104.376, |
| "eval_steps_per_second": 34.519, |
| "step": 25800 |
| }, |
| { |
| "epoch": 22.265288544358313, |
| "grad_norm": 0.4353873133659363, |
| "learning_rate": 0.00011094229112833765, |
| "loss": 0.3021149444580078, |
| "step": 25850 |
| }, |
| { |
| "epoch": 22.265288544358313, |
| "eval_loss": 0.36920997500419617, |
| "eval_runtime": 17.5125, |
| "eval_samples_per_second": 1085.17, |
| "eval_steps_per_second": 33.919, |
| "step": 25850 |
| }, |
| { |
| "epoch": 22.3083548664944, |
| "grad_norm": 0.4611629247665405, |
| "learning_rate": 0.00011077002583979328, |
| "loss": 0.30381624221801756, |
| "step": 25900 |
| }, |
| { |
| "epoch": 22.3083548664944, |
| "eval_loss": 0.3706386387348175, |
| "eval_runtime": 17.3449, |
| "eval_samples_per_second": 1095.655, |
| "eval_steps_per_second": 34.246, |
| "step": 25900 |
| }, |
| { |
| "epoch": 22.35142118863049, |
| "grad_norm": 0.5272542834281921, |
| "learning_rate": 0.00011059776055124893, |
| "loss": 0.300566520690918, |
| "step": 25950 |
| }, |
| { |
| "epoch": 22.35142118863049, |
| "eval_loss": 0.36453819274902344, |
| "eval_runtime": 16.8474, |
| "eval_samples_per_second": 1128.011, |
| "eval_steps_per_second": 35.258, |
| "step": 25950 |
| }, |
| { |
| "epoch": 22.39448751076658, |
| "grad_norm": 0.4472649395465851, |
| "learning_rate": 0.00011042549526270459, |
| "loss": 0.2959398078918457, |
| "step": 26000 |
| }, |
| { |
| "epoch": 22.39448751076658, |
| "eval_loss": 0.37610924243927, |
| "eval_runtime": 17.4908, |
| "eval_samples_per_second": 1086.515, |
| "eval_steps_per_second": 33.961, |
| "step": 26000 |
| }, |
| { |
| "epoch": 22.43755383290267, |
| "grad_norm": 0.4704062342643738, |
| "learning_rate": 0.00011025322997416022, |
| "loss": 0.2961640548706055, |
| "step": 26050 |
| }, |
| { |
| "epoch": 22.43755383290267, |
| "eval_loss": 0.37994384765625, |
| "eval_runtime": 17.5059, |
| "eval_samples_per_second": 1085.579, |
| "eval_steps_per_second": 33.931, |
| "step": 26050 |
| }, |
| { |
| "epoch": 22.48062015503876, |
| "grad_norm": 0.7996960878372192, |
| "learning_rate": 0.00011008096468561586, |
| "loss": 0.3014845085144043, |
| "step": 26100 |
| }, |
| { |
| "epoch": 22.48062015503876, |
| "eval_loss": 0.3722558319568634, |
| "eval_runtime": 17.5232, |
| "eval_samples_per_second": 1084.505, |
| "eval_steps_per_second": 33.898, |
| "step": 26100 |
| }, |
| { |
| "epoch": 22.52368647717485, |
| "grad_norm": 0.6491348147392273, |
| "learning_rate": 0.00010990869939707149, |
| "loss": 0.301578369140625, |
| "step": 26150 |
| }, |
| { |
| "epoch": 22.52368647717485, |
| "eval_loss": 0.37108728289604187, |
| "eval_runtime": 17.7967, |
| "eval_samples_per_second": 1067.84, |
| "eval_steps_per_second": 33.377, |
| "step": 26150 |
| }, |
| { |
| "epoch": 22.56675279931094, |
| "grad_norm": 0.5225486159324646, |
| "learning_rate": 0.00010973643410852714, |
| "loss": 0.3060628318786621, |
| "step": 26200 |
| }, |
| { |
| "epoch": 22.56675279931094, |
| "eval_loss": 0.3759528696537018, |
| "eval_runtime": 17.1394, |
| "eval_samples_per_second": 1108.788, |
| "eval_steps_per_second": 34.657, |
| "step": 26200 |
| }, |
| { |
| "epoch": 22.60981912144703, |
| "grad_norm": 0.5029892325401306, |
| "learning_rate": 0.00010956416881998277, |
| "loss": 0.3059848785400391, |
| "step": 26250 |
| }, |
| { |
| "epoch": 22.60981912144703, |
| "eval_loss": 0.3835486173629761, |
| "eval_runtime": 16.7296, |
| "eval_samples_per_second": 1135.948, |
| "eval_steps_per_second": 35.506, |
| "step": 26250 |
| }, |
| { |
| "epoch": 22.652885443583116, |
| "grad_norm": 0.46938949823379517, |
| "learning_rate": 0.00010939190353143843, |
| "loss": 0.30714372634887693, |
| "step": 26300 |
| }, |
| { |
| "epoch": 22.652885443583116, |
| "eval_loss": 0.3625224232673645, |
| "eval_runtime": 17.5276, |
| "eval_samples_per_second": 1084.235, |
| "eval_steps_per_second": 33.889, |
| "step": 26300 |
| }, |
| { |
| "epoch": 22.695951765719208, |
| "grad_norm": 0.5187062621116638, |
| "learning_rate": 0.00010921963824289405, |
| "loss": 0.3000494003295898, |
| "step": 26350 |
| }, |
| { |
| "epoch": 22.695951765719208, |
| "eval_loss": 0.3673667013645172, |
| "eval_runtime": 16.9361, |
| "eval_samples_per_second": 1122.098, |
| "eval_steps_per_second": 35.073, |
| "step": 26350 |
| }, |
| { |
| "epoch": 22.739018087855296, |
| "grad_norm": 0.4383411109447479, |
| "learning_rate": 0.0001090473729543497, |
| "loss": 0.30050899505615236, |
| "step": 26400 |
| }, |
| { |
| "epoch": 22.739018087855296, |
| "eval_loss": 0.36597198247909546, |
| "eval_runtime": 16.702, |
| "eval_samples_per_second": 1137.826, |
| "eval_steps_per_second": 35.565, |
| "step": 26400 |
| }, |
| { |
| "epoch": 22.782084409991388, |
| "grad_norm": 0.5613611340522766, |
| "learning_rate": 0.00010887510766580535, |
| "loss": 0.29673213958740235, |
| "step": 26450 |
| }, |
| { |
| "epoch": 22.782084409991388, |
| "eval_loss": 0.37194007635116577, |
| "eval_runtime": 17.8701, |
| "eval_samples_per_second": 1063.454, |
| "eval_steps_per_second": 33.24, |
| "step": 26450 |
| }, |
| { |
| "epoch": 22.825150732127476, |
| "grad_norm": 0.47791770100593567, |
| "learning_rate": 0.00010870284237726098, |
| "loss": 0.30543540954589843, |
| "step": 26500 |
| }, |
| { |
| "epoch": 22.825150732127476, |
| "eval_loss": 0.3654497265815735, |
| "eval_runtime": 18.0311, |
| "eval_samples_per_second": 1053.959, |
| "eval_steps_per_second": 32.943, |
| "step": 26500 |
| }, |
| { |
| "epoch": 22.868217054263567, |
| "grad_norm": 0.42347297072410583, |
| "learning_rate": 0.00010853057708871664, |
| "loss": 0.299227294921875, |
| "step": 26550 |
| }, |
| { |
| "epoch": 22.868217054263567, |
| "eval_loss": 0.36894989013671875, |
| "eval_runtime": 17.2629, |
| "eval_samples_per_second": 1100.861, |
| "eval_steps_per_second": 34.409, |
| "step": 26550 |
| }, |
| { |
| "epoch": 22.911283376399656, |
| "grad_norm": 0.5425981283187866, |
| "learning_rate": 0.00010835831180017226, |
| "loss": 0.30300045013427734, |
| "step": 26600 |
| }, |
| { |
| "epoch": 22.911283376399656, |
| "eval_loss": 0.3654426336288452, |
| "eval_runtime": 17.5264, |
| "eval_samples_per_second": 1084.31, |
| "eval_steps_per_second": 33.892, |
| "step": 26600 |
| }, |
| { |
| "epoch": 22.954349698535744, |
| "grad_norm": 0.5579530596733093, |
| "learning_rate": 0.00010818604651162792, |
| "loss": 0.3015494918823242, |
| "step": 26650 |
| }, |
| { |
| "epoch": 22.954349698535744, |
| "eval_loss": 0.37281379103660583, |
| "eval_runtime": 17.2889, |
| "eval_samples_per_second": 1099.204, |
| "eval_steps_per_second": 34.357, |
| "step": 26650 |
| }, |
| { |
| "epoch": 22.997416020671835, |
| "grad_norm": 0.648759126663208, |
| "learning_rate": 0.00010801378122308355, |
| "loss": 0.30109729766845705, |
| "step": 26700 |
| }, |
| { |
| "epoch": 22.997416020671835, |
| "eval_loss": 0.370487779378891, |
| "eval_runtime": 16.1964, |
| "eval_samples_per_second": 1173.347, |
| "eval_steps_per_second": 36.675, |
| "step": 26700 |
| }, |
| { |
| "epoch": 23.040482342807923, |
| "grad_norm": 0.5155696272850037, |
| "learning_rate": 0.00010784151593453919, |
| "loss": 0.29900886535644533, |
| "step": 26750 |
| }, |
| { |
| "epoch": 23.040482342807923, |
| "eval_loss": 0.3741052448749542, |
| "eval_runtime": 17.5083, |
| "eval_samples_per_second": 1085.43, |
| "eval_steps_per_second": 33.927, |
| "step": 26750 |
| }, |
| { |
| "epoch": 23.083548664944015, |
| "grad_norm": 0.47485584020614624, |
| "learning_rate": 0.00010766925064599485, |
| "loss": 0.29572803497314454, |
| "step": 26800 |
| }, |
| { |
| "epoch": 23.083548664944015, |
| "eval_loss": 0.3721824288368225, |
| "eval_runtime": 17.0469, |
| "eval_samples_per_second": 1114.809, |
| "eval_steps_per_second": 34.845, |
| "step": 26800 |
| }, |
| { |
| "epoch": 23.126614987080103, |
| "grad_norm": 0.4486505091190338, |
| "learning_rate": 0.00010749698535745048, |
| "loss": 0.30237659454345706, |
| "step": 26850 |
| }, |
| { |
| "epoch": 23.126614987080103, |
| "eval_loss": 0.3684123754501343, |
| "eval_runtime": 17.3869, |
| "eval_samples_per_second": 1093.004, |
| "eval_steps_per_second": 34.164, |
| "step": 26850 |
| }, |
| { |
| "epoch": 23.169681309216195, |
| "grad_norm": 0.4725002348423004, |
| "learning_rate": 0.00010732472006890613, |
| "loss": 0.2979306221008301, |
| "step": 26900 |
| }, |
| { |
| "epoch": 23.169681309216195, |
| "eval_loss": 0.37552914023399353, |
| "eval_runtime": 17.4933, |
| "eval_samples_per_second": 1086.358, |
| "eval_steps_per_second": 33.956, |
| "step": 26900 |
| }, |
| { |
| "epoch": 23.212747631352283, |
| "grad_norm": 0.5699407458305359, |
| "learning_rate": 0.00010715245478036176, |
| "loss": 0.3049004364013672, |
| "step": 26950 |
| }, |
| { |
| "epoch": 23.212747631352283, |
| "eval_loss": 0.37610357999801636, |
| "eval_runtime": 17.3892, |
| "eval_samples_per_second": 1092.861, |
| "eval_steps_per_second": 34.159, |
| "step": 26950 |
| }, |
| { |
| "epoch": 23.25581395348837, |
| "grad_norm": 0.5288128852844238, |
| "learning_rate": 0.0001069801894918174, |
| "loss": 0.29878740310668944, |
| "step": 27000 |
| }, |
| { |
| "epoch": 23.25581395348837, |
| "eval_loss": 0.3685736060142517, |
| "eval_runtime": 17.5063, |
| "eval_samples_per_second": 1085.555, |
| "eval_steps_per_second": 33.931, |
| "step": 27000 |
| }, |
| { |
| "epoch": 23.298880275624462, |
| "grad_norm": 0.5954151749610901, |
| "learning_rate": 0.00010680792420327303, |
| "loss": 0.30775325775146484, |
| "step": 27050 |
| }, |
| { |
| "epoch": 23.298880275624462, |
| "eval_loss": 0.3689676523208618, |
| "eval_runtime": 17.4826, |
| "eval_samples_per_second": 1087.022, |
| "eval_steps_per_second": 33.977, |
| "step": 27050 |
| }, |
| { |
| "epoch": 23.34194659776055, |
| "grad_norm": 0.49391523003578186, |
| "learning_rate": 0.00010663565891472869, |
| "loss": 0.30306617736816405, |
| "step": 27100 |
| }, |
| { |
| "epoch": 23.34194659776055, |
| "eval_loss": 0.36533570289611816, |
| "eval_runtime": 17.0356, |
| "eval_samples_per_second": 1115.544, |
| "eval_steps_per_second": 34.868, |
| "step": 27100 |
| }, |
| { |
| "epoch": 23.385012919896642, |
| "grad_norm": 0.5512219071388245, |
| "learning_rate": 0.00010646339362618434, |
| "loss": 0.30098161697387693, |
| "step": 27150 |
| }, |
| { |
| "epoch": 23.385012919896642, |
| "eval_loss": 0.3735466003417969, |
| "eval_runtime": 16.0404, |
| "eval_samples_per_second": 1184.757, |
| "eval_steps_per_second": 37.031, |
| "step": 27150 |
| }, |
| { |
| "epoch": 23.42807924203273, |
| "grad_norm": 0.4772174656391144, |
| "learning_rate": 0.00010629112833763997, |
| "loss": 0.2883674812316894, |
| "step": 27200 |
| }, |
| { |
| "epoch": 23.42807924203273, |
| "eval_loss": 0.37100750207901, |
| "eval_runtime": 17.365, |
| "eval_samples_per_second": 1094.384, |
| "eval_steps_per_second": 34.207, |
| "step": 27200 |
| }, |
| { |
| "epoch": 23.47114556416882, |
| "grad_norm": 0.515988290309906, |
| "learning_rate": 0.00010611886304909561, |
| "loss": 0.2984621810913086, |
| "step": 27250 |
| }, |
| { |
| "epoch": 23.47114556416882, |
| "eval_loss": 0.3775508999824524, |
| "eval_runtime": 15.6519, |
| "eval_samples_per_second": 1214.163, |
| "eval_steps_per_second": 37.951, |
| "step": 27250 |
| }, |
| { |
| "epoch": 23.51421188630491, |
| "grad_norm": 0.5282620191574097, |
| "learning_rate": 0.00010594659776055124, |
| "loss": 0.29439170837402345, |
| "step": 27300 |
| }, |
| { |
| "epoch": 23.51421188630491, |
| "eval_loss": 0.3750689923763275, |
| "eval_runtime": 17.1233, |
| "eval_samples_per_second": 1109.83, |
| "eval_steps_per_second": 34.689, |
| "step": 27300 |
| }, |
| { |
| "epoch": 23.557278208440998, |
| "grad_norm": 0.4884462356567383, |
| "learning_rate": 0.0001057743324720069, |
| "loss": 0.29813528060913086, |
| "step": 27350 |
| }, |
| { |
| "epoch": 23.557278208440998, |
| "eval_loss": 0.36741402745246887, |
| "eval_runtime": 17.1586, |
| "eval_samples_per_second": 1107.548, |
| "eval_steps_per_second": 34.618, |
| "step": 27350 |
| }, |
| { |
| "epoch": 23.60034453057709, |
| "grad_norm": 0.6048156023025513, |
| "learning_rate": 0.00010560206718346252, |
| "loss": 0.30056821823120117, |
| "step": 27400 |
| }, |
| { |
| "epoch": 23.60034453057709, |
| "eval_loss": 0.3732120990753174, |
| "eval_runtime": 17.5348, |
| "eval_samples_per_second": 1083.787, |
| "eval_steps_per_second": 33.875, |
| "step": 27400 |
| }, |
| { |
| "epoch": 23.643410852713178, |
| "grad_norm": 0.41740378737449646, |
| "learning_rate": 0.00010542980189491818, |
| "loss": 0.29642223358154296, |
| "step": 27450 |
| }, |
| { |
| "epoch": 23.643410852713178, |
| "eval_loss": 0.37019461393356323, |
| "eval_runtime": 17.5282, |
| "eval_samples_per_second": 1084.198, |
| "eval_steps_per_second": 33.888, |
| "step": 27450 |
| }, |
| { |
| "epoch": 23.68647717484927, |
| "grad_norm": 0.4838183522224426, |
| "learning_rate": 0.00010525753660637384, |
| "loss": 0.3003374481201172, |
| "step": 27500 |
| }, |
| { |
| "epoch": 23.68647717484927, |
| "eval_loss": 0.3708420991897583, |
| "eval_runtime": 17.6626, |
| "eval_samples_per_second": 1075.946, |
| "eval_steps_per_second": 33.63, |
| "step": 27500 |
| }, |
| { |
| "epoch": 23.729543496985357, |
| "grad_norm": 0.6160274147987366, |
| "learning_rate": 0.00010508527131782946, |
| "loss": 0.29868263244628906, |
| "step": 27550 |
| }, |
| { |
| "epoch": 23.729543496985357, |
| "eval_loss": 0.3727380037307739, |
| "eval_runtime": 17.3239, |
| "eval_samples_per_second": 1096.981, |
| "eval_steps_per_second": 34.288, |
| "step": 27550 |
| }, |
| { |
| "epoch": 23.772609819121445, |
| "grad_norm": 0.5261010527610779, |
| "learning_rate": 0.0001049130060292851, |
| "loss": 0.29750579833984375, |
| "step": 27600 |
| }, |
| { |
| "epoch": 23.772609819121445, |
| "eval_loss": 0.36880743503570557, |
| "eval_runtime": 16.7074, |
| "eval_samples_per_second": 1137.462, |
| "eval_steps_per_second": 35.553, |
| "step": 27600 |
| }, |
| { |
| "epoch": 23.815676141257537, |
| "grad_norm": 0.4147060215473175, |
| "learning_rate": 0.00010474074074074074, |
| "loss": 0.2987259864807129, |
| "step": 27650 |
| }, |
| { |
| "epoch": 23.815676141257537, |
| "eval_loss": 0.37036553025245667, |
| "eval_runtime": 17.5461, |
| "eval_samples_per_second": 1083.092, |
| "eval_steps_per_second": 33.854, |
| "step": 27650 |
| }, |
| { |
| "epoch": 23.858742463393625, |
| "grad_norm": 0.6327623128890991, |
| "learning_rate": 0.00010456847545219639, |
| "loss": 0.2982285308837891, |
| "step": 27700 |
| }, |
| { |
| "epoch": 23.858742463393625, |
| "eval_loss": 0.3701915442943573, |
| "eval_runtime": 15.9311, |
| "eval_samples_per_second": 1192.885, |
| "eval_steps_per_second": 37.285, |
| "step": 27700 |
| }, |
| { |
| "epoch": 23.901808785529717, |
| "grad_norm": 0.48875871300697327, |
| "learning_rate": 0.00010439621016365202, |
| "loss": 0.30142845153808595, |
| "step": 27750 |
| }, |
| { |
| "epoch": 23.901808785529717, |
| "eval_loss": 0.3765123188495636, |
| "eval_runtime": 17.3892, |
| "eval_samples_per_second": 1092.862, |
| "eval_steps_per_second": 34.159, |
| "step": 27750 |
| }, |
| { |
| "epoch": 23.944875107665805, |
| "grad_norm": 0.4395269453525543, |
| "learning_rate": 0.00010422394487510768, |
| "loss": 0.30503551483154295, |
| "step": 27800 |
| }, |
| { |
| "epoch": 23.944875107665805, |
| "eval_loss": 0.3670797348022461, |
| "eval_runtime": 17.7719, |
| "eval_samples_per_second": 1069.328, |
| "eval_steps_per_second": 33.424, |
| "step": 27800 |
| }, |
| { |
| "epoch": 23.987941429801896, |
| "grad_norm": 0.561876118183136, |
| "learning_rate": 0.00010405167958656332, |
| "loss": 0.30540702819824217, |
| "step": 27850 |
| }, |
| { |
| "epoch": 23.987941429801896, |
| "eval_loss": 0.3722612261772156, |
| "eval_runtime": 17.3986, |
| "eval_samples_per_second": 1092.27, |
| "eval_steps_per_second": 34.141, |
| "step": 27850 |
| }, |
| { |
| "epoch": 24.031007751937985, |
| "grad_norm": 0.5003405213356018, |
| "learning_rate": 0.00010387941429801895, |
| "loss": 0.2985442352294922, |
| "step": 27900 |
| }, |
| { |
| "epoch": 24.031007751937985, |
| "eval_loss": 0.3710918426513672, |
| "eval_runtime": 17.3771, |
| "eval_samples_per_second": 1093.624, |
| "eval_steps_per_second": 34.183, |
| "step": 27900 |
| }, |
| { |
| "epoch": 24.074074074074073, |
| "grad_norm": 0.4739573299884796, |
| "learning_rate": 0.0001037071490094746, |
| "loss": 0.29766389846801755, |
| "step": 27950 |
| }, |
| { |
| "epoch": 24.074074074074073, |
| "eval_loss": 0.3728407025337219, |
| "eval_runtime": 17.5212, |
| "eval_samples_per_second": 1084.626, |
| "eval_steps_per_second": 33.902, |
| "step": 27950 |
| }, |
| { |
| "epoch": 24.117140396210164, |
| "grad_norm": 0.6355504989624023, |
| "learning_rate": 0.00010353488372093023, |
| "loss": 0.30111677169799805, |
| "step": 28000 |
| }, |
| { |
| "epoch": 24.117140396210164, |
| "eval_loss": 0.37044641375541687, |
| "eval_runtime": 17.4046, |
| "eval_samples_per_second": 1091.897, |
| "eval_steps_per_second": 34.129, |
| "step": 28000 |
| }, |
| { |
| "epoch": 24.160206718346252, |
| "grad_norm": 0.505262017250061, |
| "learning_rate": 0.00010336261843238589, |
| "loss": 0.29236396789550784, |
| "step": 28050 |
| }, |
| { |
| "epoch": 24.160206718346252, |
| "eval_loss": 0.36897462606430054, |
| "eval_runtime": 15.9831, |
| "eval_samples_per_second": 1189.005, |
| "eval_steps_per_second": 37.164, |
| "step": 28050 |
| }, |
| { |
| "epoch": 24.203273040482344, |
| "grad_norm": 0.5167173147201538, |
| "learning_rate": 0.00010319035314384151, |
| "loss": 0.3093917465209961, |
| "step": 28100 |
| }, |
| { |
| "epoch": 24.203273040482344, |
| "eval_loss": 0.36572420597076416, |
| "eval_runtime": 17.5107, |
| "eval_samples_per_second": 1085.276, |
| "eval_steps_per_second": 33.922, |
| "step": 28100 |
| }, |
| { |
| "epoch": 24.246339362618432, |
| "grad_norm": 0.5450465679168701, |
| "learning_rate": 0.00010301808785529716, |
| "loss": 0.29390304565429687, |
| "step": 28150 |
| }, |
| { |
| "epoch": 24.246339362618432, |
| "eval_loss": 0.36511915922164917, |
| "eval_runtime": 16.6281, |
| "eval_samples_per_second": 1142.885, |
| "eval_steps_per_second": 35.723, |
| "step": 28150 |
| }, |
| { |
| "epoch": 24.289405684754524, |
| "grad_norm": 0.5009111166000366, |
| "learning_rate": 0.00010284582256675281, |
| "loss": 0.29303544998168946, |
| "step": 28200 |
| }, |
| { |
| "epoch": 24.289405684754524, |
| "eval_loss": 0.3654605746269226, |
| "eval_runtime": 17.6784, |
| "eval_samples_per_second": 1074.984, |
| "eval_steps_per_second": 33.6, |
| "step": 28200 |
| }, |
| { |
| "epoch": 24.33247200689061, |
| "grad_norm": 0.41044268012046814, |
| "learning_rate": 0.00010267355727820844, |
| "loss": 0.30087459564208985, |
| "step": 28250 |
| }, |
| { |
| "epoch": 24.33247200689061, |
| "eval_loss": 0.3658417761325836, |
| "eval_runtime": 17.1645, |
| "eval_samples_per_second": 1107.169, |
| "eval_steps_per_second": 34.606, |
| "step": 28250 |
| }, |
| { |
| "epoch": 24.3755383290267, |
| "grad_norm": 0.4747028946876526, |
| "learning_rate": 0.0001025012919896641, |
| "loss": 0.28880956649780276, |
| "step": 28300 |
| }, |
| { |
| "epoch": 24.3755383290267, |
| "eval_loss": 0.37379440665245056, |
| "eval_runtime": 17.5312, |
| "eval_samples_per_second": 1084.01, |
| "eval_steps_per_second": 33.882, |
| "step": 28300 |
| }, |
| { |
| "epoch": 24.41860465116279, |
| "grad_norm": 0.5528591275215149, |
| "learning_rate": 0.00010232902670111972, |
| "loss": 0.30124286651611326, |
| "step": 28350 |
| }, |
| { |
| "epoch": 24.41860465116279, |
| "eval_loss": 0.3714071214199066, |
| "eval_runtime": 17.5251, |
| "eval_samples_per_second": 1084.386, |
| "eval_steps_per_second": 33.894, |
| "step": 28350 |
| }, |
| { |
| "epoch": 24.46167097329888, |
| "grad_norm": 0.4998020529747009, |
| "learning_rate": 0.00010215676141257538, |
| "loss": 0.29641380310058596, |
| "step": 28400 |
| }, |
| { |
| "epoch": 24.46167097329888, |
| "eval_loss": 0.3669067323207855, |
| "eval_runtime": 17.6192, |
| "eval_samples_per_second": 1078.599, |
| "eval_steps_per_second": 33.713, |
| "step": 28400 |
| }, |
| { |
| "epoch": 24.50473729543497, |
| "grad_norm": 0.4717235267162323, |
| "learning_rate": 0.00010198449612403101, |
| "loss": 0.29951982498168944, |
| "step": 28450 |
| }, |
| { |
| "epoch": 24.50473729543497, |
| "eval_loss": 0.3649687170982361, |
| "eval_runtime": 17.4214, |
| "eval_samples_per_second": 1090.842, |
| "eval_steps_per_second": 34.096, |
| "step": 28450 |
| }, |
| { |
| "epoch": 24.54780361757106, |
| "grad_norm": 0.5200428366661072, |
| "learning_rate": 0.00010181223083548665, |
| "loss": 0.30025199890136717, |
| "step": 28500 |
| }, |
| { |
| "epoch": 24.54780361757106, |
| "eval_loss": 0.37060484290122986, |
| "eval_runtime": 16.6984, |
| "eval_samples_per_second": 1138.072, |
| "eval_steps_per_second": 35.572, |
| "step": 28500 |
| }, |
| { |
| "epoch": 24.590869939707147, |
| "grad_norm": 0.5694150328636169, |
| "learning_rate": 0.0001016399655469423, |
| "loss": 0.28833511352539065, |
| "step": 28550 |
| }, |
| { |
| "epoch": 24.590869939707147, |
| "eval_loss": 0.36970415711402893, |
| "eval_runtime": 17.2079, |
| "eval_samples_per_second": 1104.374, |
| "eval_steps_per_second": 34.519, |
| "step": 28550 |
| }, |
| { |
| "epoch": 24.63393626184324, |
| "grad_norm": 0.4342597723007202, |
| "learning_rate": 0.00010146770025839794, |
| "loss": 0.29415096282958986, |
| "step": 28600 |
| }, |
| { |
| "epoch": 24.63393626184324, |
| "eval_loss": 0.3628757894039154, |
| "eval_runtime": 16.1679, |
| "eval_samples_per_second": 1175.418, |
| "eval_steps_per_second": 36.74, |
| "step": 28600 |
| }, |
| { |
| "epoch": 24.677002583979327, |
| "grad_norm": 0.44152048230171204, |
| "learning_rate": 0.00010129543496985359, |
| "loss": 0.28828786849975585, |
| "step": 28650 |
| }, |
| { |
| "epoch": 24.677002583979327, |
| "eval_loss": 0.36343470215797424, |
| "eval_runtime": 17.8291, |
| "eval_samples_per_second": 1065.898, |
| "eval_steps_per_second": 33.316, |
| "step": 28650 |
| }, |
| { |
| "epoch": 24.72006890611542, |
| "grad_norm": 0.4448101222515106, |
| "learning_rate": 0.00010112316968130922, |
| "loss": 0.2990582084655762, |
| "step": 28700 |
| }, |
| { |
| "epoch": 24.72006890611542, |
| "eval_loss": 0.3635788857936859, |
| "eval_runtime": 17.3123, |
| "eval_samples_per_second": 1097.716, |
| "eval_steps_per_second": 34.311, |
| "step": 28700 |
| }, |
| { |
| "epoch": 24.763135228251507, |
| "grad_norm": 0.4918091595172882, |
| "learning_rate": 0.00010095090439276486, |
| "loss": 0.3049300765991211, |
| "step": 28750 |
| }, |
| { |
| "epoch": 24.763135228251507, |
| "eval_loss": 0.3565711975097656, |
| "eval_runtime": 17.3735, |
| "eval_samples_per_second": 1093.85, |
| "eval_steps_per_second": 34.19, |
| "step": 28750 |
| }, |
| { |
| "epoch": 24.8062015503876, |
| "grad_norm": 0.4032810926437378, |
| "learning_rate": 0.00010077863910422049, |
| "loss": 0.2918486022949219, |
| "step": 28800 |
| }, |
| { |
| "epoch": 24.8062015503876, |
| "eval_loss": 0.3619081974029541, |
| "eval_runtime": 17.7009, |
| "eval_samples_per_second": 1073.617, |
| "eval_steps_per_second": 33.558, |
| "step": 28800 |
| }, |
| { |
| "epoch": 24.849267872523686, |
| "grad_norm": 0.4441034495830536, |
| "learning_rate": 0.00010060637381567615, |
| "loss": 0.2930795669555664, |
| "step": 28850 |
| }, |
| { |
| "epoch": 24.849267872523686, |
| "eval_loss": 0.3589094281196594, |
| "eval_runtime": 17.3399, |
| "eval_samples_per_second": 1095.967, |
| "eval_steps_per_second": 34.256, |
| "step": 28850 |
| }, |
| { |
| "epoch": 24.892334194659774, |
| "grad_norm": 0.47586753964424133, |
| "learning_rate": 0.0001004341085271318, |
| "loss": 0.2950699806213379, |
| "step": 28900 |
| }, |
| { |
| "epoch": 24.892334194659774, |
| "eval_loss": 0.37087494134902954, |
| "eval_runtime": 16.9329, |
| "eval_samples_per_second": 1122.313, |
| "eval_steps_per_second": 35.08, |
| "step": 28900 |
| }, |
| { |
| "epoch": 24.935400516795866, |
| "grad_norm": 0.44512951374053955, |
| "learning_rate": 0.00010026184323858743, |
| "loss": 0.2932775688171387, |
| "step": 28950 |
| }, |
| { |
| "epoch": 24.935400516795866, |
| "eval_loss": 0.3673975169658661, |
| "eval_runtime": 15.7482, |
| "eval_samples_per_second": 1206.738, |
| "eval_steps_per_second": 37.718, |
| "step": 28950 |
| }, |
| { |
| "epoch": 24.978466838931954, |
| "grad_norm": 0.46907806396484375, |
| "learning_rate": 0.00010008957795004307, |
| "loss": 0.29798852920532226, |
| "step": 29000 |
| }, |
| { |
| "epoch": 24.978466838931954, |
| "eval_loss": 0.3643653094768524, |
| "eval_runtime": 17.5833, |
| "eval_samples_per_second": 1080.8, |
| "eval_steps_per_second": 33.782, |
| "step": 29000 |
| }, |
| { |
| "epoch": 25.021533161068046, |
| "grad_norm": 0.47138622403144836, |
| "learning_rate": 9.991731266149871e-05, |
| "loss": 0.2976431083679199, |
| "step": 29050 |
| }, |
| { |
| "epoch": 25.021533161068046, |
| "eval_loss": 0.363147497177124, |
| "eval_runtime": 16.2005, |
| "eval_samples_per_second": 1173.052, |
| "eval_steps_per_second": 36.666, |
| "step": 29050 |
| }, |
| { |
| "epoch": 25.064599483204134, |
| "grad_norm": 0.4845311939716339, |
| "learning_rate": 9.974504737295436e-05, |
| "loss": 0.30047725677490233, |
| "step": 29100 |
| }, |
| { |
| "epoch": 25.064599483204134, |
| "eval_loss": 0.3621458411216736, |
| "eval_runtime": 17.7041, |
| "eval_samples_per_second": 1073.426, |
| "eval_steps_per_second": 33.552, |
| "step": 29100 |
| }, |
| { |
| "epoch": 25.107665805340226, |
| "grad_norm": 0.3969680368900299, |
| "learning_rate": 9.957278208441e-05, |
| "loss": 0.29270526885986325, |
| "step": 29150 |
| }, |
| { |
| "epoch": 25.107665805340226, |
| "eval_loss": 0.36505424976348877, |
| "eval_runtime": 17.6953, |
| "eval_samples_per_second": 1073.958, |
| "eval_steps_per_second": 33.568, |
| "step": 29150 |
| }, |
| { |
| "epoch": 25.150732127476314, |
| "grad_norm": 0.4725561738014221, |
| "learning_rate": 9.940051679586564e-05, |
| "loss": 0.2929987907409668, |
| "step": 29200 |
| }, |
| { |
| "epoch": 25.150732127476314, |
| "eval_loss": 0.35975778102874756, |
| "eval_runtime": 18.2441, |
| "eval_samples_per_second": 1041.653, |
| "eval_steps_per_second": 32.559, |
| "step": 29200 |
| }, |
| { |
| "epoch": 25.1937984496124, |
| "grad_norm": 0.44618576765060425, |
| "learning_rate": 9.922825150732128e-05, |
| "loss": 0.29194810867309573, |
| "step": 29250 |
| }, |
| { |
| "epoch": 25.1937984496124, |
| "eval_loss": 0.3662404417991638, |
| "eval_runtime": 19.5998, |
| "eval_samples_per_second": 969.604, |
| "eval_steps_per_second": 30.307, |
| "step": 29250 |
| }, |
| { |
| "epoch": 25.236864771748493, |
| "grad_norm": 0.5035511255264282, |
| "learning_rate": 9.905598621877692e-05, |
| "loss": 0.2904472351074219, |
| "step": 29300 |
| }, |
| { |
| "epoch": 25.236864771748493, |
| "eval_loss": 0.37068793177604675, |
| "eval_runtime": 19.1675, |
| "eval_samples_per_second": 991.468, |
| "eval_steps_per_second": 30.99, |
| "step": 29300 |
| }, |
| { |
| "epoch": 25.27993109388458, |
| "grad_norm": 0.49068546295166016, |
| "learning_rate": 9.888372093023255e-05, |
| "loss": 0.2910709762573242, |
| "step": 29350 |
| }, |
| { |
| "epoch": 25.27993109388458, |
| "eval_loss": 0.3615714907646179, |
| "eval_runtime": 17.9591, |
| "eval_samples_per_second": 1058.184, |
| "eval_steps_per_second": 33.075, |
| "step": 29350 |
| }, |
| { |
| "epoch": 25.322997416020673, |
| "grad_norm": 0.4490571916103363, |
| "learning_rate": 9.87114556416882e-05, |
| "loss": 0.2928461456298828, |
| "step": 29400 |
| }, |
| { |
| "epoch": 25.322997416020673, |
| "eval_loss": 0.366520494222641, |
| "eval_runtime": 16.733, |
| "eval_samples_per_second": 1135.722, |
| "eval_steps_per_second": 35.499, |
| "step": 29400 |
| }, |
| { |
| "epoch": 25.36606373815676, |
| "grad_norm": 0.45667514204978943, |
| "learning_rate": 9.853919035314385e-05, |
| "loss": 0.2925313568115234, |
| "step": 29450 |
| }, |
| { |
| "epoch": 25.36606373815676, |
| "eval_loss": 0.36547213792800903, |
| "eval_runtime": 17.4716, |
| "eval_samples_per_second": 1087.707, |
| "eval_steps_per_second": 33.998, |
| "step": 29450 |
| }, |
| { |
| "epoch": 25.409130060292853, |
| "grad_norm": 0.4668999910354614, |
| "learning_rate": 9.836692506459949e-05, |
| "loss": 0.2990184593200684, |
| "step": 29500 |
| }, |
| { |
| "epoch": 25.409130060292853, |
| "eval_loss": 0.3627609610557556, |
| "eval_runtime": 16.4897, |
| "eval_samples_per_second": 1152.474, |
| "eval_steps_per_second": 36.022, |
| "step": 29500 |
| }, |
| { |
| "epoch": 25.45219638242894, |
| "grad_norm": 0.5063576698303223, |
| "learning_rate": 9.819465977605514e-05, |
| "loss": 0.28932918548583986, |
| "step": 29550 |
| }, |
| { |
| "epoch": 25.45219638242894, |
| "eval_loss": 0.36449888348579407, |
| "eval_runtime": 17.2565, |
| "eval_samples_per_second": 1101.265, |
| "eval_steps_per_second": 34.422, |
| "step": 29550 |
| }, |
| { |
| "epoch": 25.49526270456503, |
| "grad_norm": 0.5766741037368774, |
| "learning_rate": 9.802239448751078e-05, |
| "loss": 0.2939249801635742, |
| "step": 29600 |
| }, |
| { |
| "epoch": 25.49526270456503, |
| "eval_loss": 0.3627641499042511, |
| "eval_runtime": 17.7647, |
| "eval_samples_per_second": 1069.762, |
| "eval_steps_per_second": 33.437, |
| "step": 29600 |
| }, |
| { |
| "epoch": 25.53832902670112, |
| "grad_norm": 0.46216633915901184, |
| "learning_rate": 9.78501291989664e-05, |
| "loss": 0.2854126739501953, |
| "step": 29650 |
| }, |
| { |
| "epoch": 25.53832902670112, |
| "eval_loss": 0.3621639013290405, |
| "eval_runtime": 18.3339, |
| "eval_samples_per_second": 1036.55, |
| "eval_steps_per_second": 32.399, |
| "step": 29650 |
| }, |
| { |
| "epoch": 25.58139534883721, |
| "grad_norm": 0.4835483729839325, |
| "learning_rate": 9.767786391042205e-05, |
| "loss": 0.2938497352600098, |
| "step": 29700 |
| }, |
| { |
| "epoch": 25.58139534883721, |
| "eval_loss": 0.3679318130016327, |
| "eval_runtime": 18.0955, |
| "eval_samples_per_second": 1050.208, |
| "eval_steps_per_second": 32.826, |
| "step": 29700 |
| }, |
| { |
| "epoch": 25.6244616709733, |
| "grad_norm": 0.45520979166030884, |
| "learning_rate": 9.750559862187769e-05, |
| "loss": 0.2909726333618164, |
| "step": 29750 |
| }, |
| { |
| "epoch": 25.6244616709733, |
| "eval_loss": 0.3689843416213989, |
| "eval_runtime": 17.8255, |
| "eval_samples_per_second": 1066.111, |
| "eval_steps_per_second": 33.323, |
| "step": 29750 |
| }, |
| { |
| "epoch": 25.66752799310939, |
| "grad_norm": 0.4701136648654938, |
| "learning_rate": 9.733333333333335e-05, |
| "loss": 0.2963640785217285, |
| "step": 29800 |
| }, |
| { |
| "epoch": 25.66752799310939, |
| "eval_loss": 0.37323909997940063, |
| "eval_runtime": 17.5489, |
| "eval_samples_per_second": 1082.914, |
| "eval_steps_per_second": 33.848, |
| "step": 29800 |
| }, |
| { |
| "epoch": 25.710594315245476, |
| "grad_norm": 0.5356958508491516, |
| "learning_rate": 9.716106804478899e-05, |
| "loss": 0.2972037124633789, |
| "step": 29850 |
| }, |
| { |
| "epoch": 25.710594315245476, |
| "eval_loss": 0.3621019124984741, |
| "eval_runtime": 16.7547, |
| "eval_samples_per_second": 1134.25, |
| "eval_steps_per_second": 35.453, |
| "step": 29850 |
| }, |
| { |
| "epoch": 25.753660637381568, |
| "grad_norm": 0.4646724760532379, |
| "learning_rate": 9.698880275624463e-05, |
| "loss": 0.29597414016723633, |
| "step": 29900 |
| }, |
| { |
| "epoch": 25.753660637381568, |
| "eval_loss": 0.37024474143981934, |
| "eval_runtime": 17.4607, |
| "eval_samples_per_second": 1088.386, |
| "eval_steps_per_second": 34.019, |
| "step": 29900 |
| }, |
| { |
| "epoch": 25.796726959517656, |
| "grad_norm": 0.5215739607810974, |
| "learning_rate": 9.681653746770026e-05, |
| "loss": 0.2891251754760742, |
| "step": 29950 |
| }, |
| { |
| "epoch": 25.796726959517656, |
| "eval_loss": 0.364225834608078, |
| "eval_runtime": 16.6391, |
| "eval_samples_per_second": 1142.126, |
| "eval_steps_per_second": 35.699, |
| "step": 29950 |
| }, |
| { |
| "epoch": 25.839793281653748, |
| "grad_norm": 0.5005343556404114, |
| "learning_rate": 9.66442721791559e-05, |
| "loss": 0.2926528167724609, |
| "step": 30000 |
| }, |
| { |
| "epoch": 25.839793281653748, |
| "eval_loss": 0.36475899815559387, |
| "eval_runtime": 17.5158, |
| "eval_samples_per_second": 1084.966, |
| "eval_steps_per_second": 33.912, |
| "step": 30000 |
| }, |
| { |
| "epoch": 25.882859603789836, |
| "grad_norm": 0.47080203890800476, |
| "learning_rate": 9.647200689061154e-05, |
| "loss": 0.2989816093444824, |
| "step": 30050 |
| }, |
| { |
| "epoch": 25.882859603789836, |
| "eval_loss": 0.360416978597641, |
| "eval_runtime": 17.3856, |
| "eval_samples_per_second": 1093.087, |
| "eval_steps_per_second": 34.166, |
| "step": 30050 |
| }, |
| { |
| "epoch": 25.925925925925927, |
| "grad_norm": 0.4474875330924988, |
| "learning_rate": 9.629974160206718e-05, |
| "loss": 0.292766227722168, |
| "step": 30100 |
| }, |
| { |
| "epoch": 25.925925925925927, |
| "eval_loss": 0.36163151264190674, |
| "eval_runtime": 17.7711, |
| "eval_samples_per_second": 1069.378, |
| "eval_steps_per_second": 33.425, |
| "step": 30100 |
| }, |
| { |
| "epoch": 25.968992248062015, |
| "grad_norm": 0.5251291990280151, |
| "learning_rate": 9.612747631352284e-05, |
| "loss": 0.2900256729125977, |
| "step": 30150 |
| }, |
| { |
| "epoch": 25.968992248062015, |
| "eval_loss": 0.3662976026535034, |
| "eval_runtime": 17.5637, |
| "eval_samples_per_second": 1082.002, |
| "eval_steps_per_second": 33.82, |
| "step": 30150 |
| }, |
| { |
| "epoch": 26.012058570198104, |
| "grad_norm": 0.47462013363838196, |
| "learning_rate": 9.595521102497847e-05, |
| "loss": 0.2990392303466797, |
| "step": 30200 |
| }, |
| { |
| "epoch": 26.012058570198104, |
| "eval_loss": 0.36906498670578003, |
| "eval_runtime": 17.2244, |
| "eval_samples_per_second": 1103.322, |
| "eval_steps_per_second": 34.486, |
| "step": 30200 |
| }, |
| { |
| "epoch": 26.055124892334195, |
| "grad_norm": 0.5367721319198608, |
| "learning_rate": 9.578294573643411e-05, |
| "loss": 0.2976276969909668, |
| "step": 30250 |
| }, |
| { |
| "epoch": 26.055124892334195, |
| "eval_loss": 0.3585287034511566, |
| "eval_runtime": 17.0711, |
| "eval_samples_per_second": 1113.229, |
| "eval_steps_per_second": 34.796, |
| "step": 30250 |
| }, |
| { |
| "epoch": 26.098191214470283, |
| "grad_norm": 0.42540860176086426, |
| "learning_rate": 9.561068044788975e-05, |
| "loss": 0.2866293716430664, |
| "step": 30300 |
| }, |
| { |
| "epoch": 26.098191214470283, |
| "eval_loss": 0.3634384870529175, |
| "eval_runtime": 17.4952, |
| "eval_samples_per_second": 1086.241, |
| "eval_steps_per_second": 33.952, |
| "step": 30300 |
| }, |
| { |
| "epoch": 26.141257536606375, |
| "grad_norm": 0.38212257623672485, |
| "learning_rate": 9.54384151593454e-05, |
| "loss": 0.29723587036132815, |
| "step": 30350 |
| }, |
| { |
| "epoch": 26.141257536606375, |
| "eval_loss": 0.3666623830795288, |
| "eval_runtime": 17.3805, |
| "eval_samples_per_second": 1093.407, |
| "eval_steps_per_second": 34.176, |
| "step": 30350 |
| }, |
| { |
| "epoch": 26.184323858742463, |
| "grad_norm": 0.4894464910030365, |
| "learning_rate": 9.526614987080104e-05, |
| "loss": 0.2930299377441406, |
| "step": 30400 |
| }, |
| { |
| "epoch": 26.184323858742463, |
| "eval_loss": 0.3624574840068817, |
| "eval_runtime": 16.5236, |
| "eval_samples_per_second": 1150.112, |
| "eval_steps_per_second": 35.949, |
| "step": 30400 |
| }, |
| { |
| "epoch": 26.227390180878555, |
| "grad_norm": 0.441383421421051, |
| "learning_rate": 9.509388458225668e-05, |
| "loss": 0.28941658020019534, |
| "step": 30450 |
| }, |
| { |
| "epoch": 26.227390180878555, |
| "eval_loss": 0.3617941439151764, |
| "eval_runtime": 17.8969, |
| "eval_samples_per_second": 1061.86, |
| "eval_steps_per_second": 33.19, |
| "step": 30450 |
| }, |
| { |
| "epoch": 26.270456503014643, |
| "grad_norm": 0.5019906163215637, |
| "learning_rate": 9.492161929371232e-05, |
| "loss": 0.29477603912353517, |
| "step": 30500 |
| }, |
| { |
| "epoch": 26.270456503014643, |
| "eval_loss": 0.3631625473499298, |
| "eval_runtime": 18.874, |
| "eval_samples_per_second": 1006.886, |
| "eval_steps_per_second": 31.472, |
| "step": 30500 |
| }, |
| { |
| "epoch": 26.31352282515073, |
| "grad_norm": 0.4819585084915161, |
| "learning_rate": 9.474935400516796e-05, |
| "loss": 0.29420578002929687, |
| "step": 30550 |
| }, |
| { |
| "epoch": 26.31352282515073, |
| "eval_loss": 0.37077534198760986, |
| "eval_runtime": 17.7539, |
| "eval_samples_per_second": 1070.413, |
| "eval_steps_per_second": 33.457, |
| "step": 30550 |
| }, |
| { |
| "epoch": 26.356589147286822, |
| "grad_norm": 0.5694970488548279, |
| "learning_rate": 9.45770887166236e-05, |
| "loss": 0.2949700355529785, |
| "step": 30600 |
| }, |
| { |
| "epoch": 26.356589147286822, |
| "eval_loss": 0.36437633633613586, |
| "eval_runtime": 17.703, |
| "eval_samples_per_second": 1073.488, |
| "eval_steps_per_second": 33.554, |
| "step": 30600 |
| }, |
| { |
| "epoch": 26.39965546942291, |
| "grad_norm": 0.5988081693649292, |
| "learning_rate": 9.440482342807925e-05, |
| "loss": 0.2924643135070801, |
| "step": 30650 |
| }, |
| { |
| "epoch": 26.39965546942291, |
| "eval_loss": 0.36314231157302856, |
| "eval_runtime": 17.3896, |
| "eval_samples_per_second": 1092.836, |
| "eval_steps_per_second": 34.158, |
| "step": 30650 |
| }, |
| { |
| "epoch": 26.442721791559002, |
| "grad_norm": 0.469237744808197, |
| "learning_rate": 9.423255813953489e-05, |
| "loss": 0.29159337997436524, |
| "step": 30700 |
| }, |
| { |
| "epoch": 26.442721791559002, |
| "eval_loss": 0.3586832284927368, |
| "eval_runtime": 17.0922, |
| "eval_samples_per_second": 1111.855, |
| "eval_steps_per_second": 34.753, |
| "step": 30700 |
| }, |
| { |
| "epoch": 26.48578811369509, |
| "grad_norm": 0.42142677307128906, |
| "learning_rate": 9.406029285099053e-05, |
| "loss": 0.2882324409484863, |
| "step": 30750 |
| }, |
| { |
| "epoch": 26.48578811369509, |
| "eval_loss": 0.36477532982826233, |
| "eval_runtime": 16.5523, |
| "eval_samples_per_second": 1148.122, |
| "eval_steps_per_second": 35.886, |
| "step": 30750 |
| }, |
| { |
| "epoch": 26.528854435831178, |
| "grad_norm": 0.5955842733383179, |
| "learning_rate": 9.388802756244617e-05, |
| "loss": 0.2991225433349609, |
| "step": 30800 |
| }, |
| { |
| "epoch": 26.528854435831178, |
| "eval_loss": 0.36673441529273987, |
| "eval_runtime": 17.4498, |
| "eval_samples_per_second": 1089.068, |
| "eval_steps_per_second": 34.041, |
| "step": 30800 |
| }, |
| { |
| "epoch": 26.57192075796727, |
| "grad_norm": 0.5307620763778687, |
| "learning_rate": 9.37157622739018e-05, |
| "loss": 0.2927092170715332, |
| "step": 30850 |
| }, |
| { |
| "epoch": 26.57192075796727, |
| "eval_loss": 0.3575948476791382, |
| "eval_runtime": 17.1996, |
| "eval_samples_per_second": 1104.91, |
| "eval_steps_per_second": 34.536, |
| "step": 30850 |
| }, |
| { |
| "epoch": 26.614987080103358, |
| "grad_norm": 0.4583006799221039, |
| "learning_rate": 9.354349698535746e-05, |
| "loss": 0.2902518844604492, |
| "step": 30900 |
| }, |
| { |
| "epoch": 26.614987080103358, |
| "eval_loss": 0.3646640181541443, |
| "eval_runtime": 17.8661, |
| "eval_samples_per_second": 1063.688, |
| "eval_steps_per_second": 33.247, |
| "step": 30900 |
| }, |
| { |
| "epoch": 26.65805340223945, |
| "grad_norm": 0.49315473437309265, |
| "learning_rate": 9.33712316968131e-05, |
| "loss": 0.2924757385253906, |
| "step": 30950 |
| }, |
| { |
| "epoch": 26.65805340223945, |
| "eval_loss": 0.3622484803199768, |
| "eval_runtime": 17.2611, |
| "eval_samples_per_second": 1100.975, |
| "eval_steps_per_second": 34.413, |
| "step": 30950 |
| }, |
| { |
| "epoch": 26.701119724375538, |
| "grad_norm": 0.4450501501560211, |
| "learning_rate": 9.319896640826874e-05, |
| "loss": 0.28469310760498046, |
| "step": 31000 |
| }, |
| { |
| "epoch": 26.701119724375538, |
| "eval_loss": 0.36087697744369507, |
| "eval_runtime": 17.6979, |
| "eval_samples_per_second": 1073.802, |
| "eval_steps_per_second": 33.563, |
| "step": 31000 |
| }, |
| { |
| "epoch": 26.74418604651163, |
| "grad_norm": 0.5609347224235535, |
| "learning_rate": 9.302670111972438e-05, |
| "loss": 0.29320995330810545, |
| "step": 31050 |
| }, |
| { |
| "epoch": 26.74418604651163, |
| "eval_loss": 0.3648212254047394, |
| "eval_runtime": 17.5686, |
| "eval_samples_per_second": 1081.7, |
| "eval_steps_per_second": 33.81, |
| "step": 31050 |
| }, |
| { |
| "epoch": 26.787252368647717, |
| "grad_norm": 0.4875686466693878, |
| "learning_rate": 9.285443583118003e-05, |
| "loss": 0.2869133186340332, |
| "step": 31100 |
| }, |
| { |
| "epoch": 26.787252368647717, |
| "eval_loss": 0.3665845990180969, |
| "eval_runtime": 17.5663, |
| "eval_samples_per_second": 1081.847, |
| "eval_steps_per_second": 33.815, |
| "step": 31100 |
| }, |
| { |
| "epoch": 26.830318690783805, |
| "grad_norm": 0.5157934427261353, |
| "learning_rate": 9.268217054263566e-05, |
| "loss": 0.290893497467041, |
| "step": 31150 |
| }, |
| { |
| "epoch": 26.830318690783805, |
| "eval_loss": 0.36622917652130127, |
| "eval_runtime": 17.5382, |
| "eval_samples_per_second": 1083.575, |
| "eval_steps_per_second": 33.869, |
| "step": 31150 |
| }, |
| { |
| "epoch": 26.873385012919897, |
| "grad_norm": 0.5066894292831421, |
| "learning_rate": 9.25099052540913e-05, |
| "loss": 0.29268325805664064, |
| "step": 31200 |
| }, |
| { |
| "epoch": 26.873385012919897, |
| "eval_loss": 0.36323854327201843, |
| "eval_runtime": 17.5281, |
| "eval_samples_per_second": 1084.203, |
| "eval_steps_per_second": 33.888, |
| "step": 31200 |
| }, |
| { |
| "epoch": 26.916451335055985, |
| "grad_norm": 0.40066656470298767, |
| "learning_rate": 9.233763996554695e-05, |
| "loss": 0.29453598022460936, |
| "step": 31250 |
| }, |
| { |
| "epoch": 26.916451335055985, |
| "eval_loss": 0.3665629029273987, |
| "eval_runtime": 17.8401, |
| "eval_samples_per_second": 1065.238, |
| "eval_steps_per_second": 33.296, |
| "step": 31250 |
| }, |
| { |
| "epoch": 26.959517657192077, |
| "grad_norm": 0.3987741470336914, |
| "learning_rate": 9.21653746770026e-05, |
| "loss": 0.2970641326904297, |
| "step": 31300 |
| }, |
| { |
| "epoch": 26.959517657192077, |
| "eval_loss": 0.3582976758480072, |
| "eval_runtime": 17.6993, |
| "eval_samples_per_second": 1073.712, |
| "eval_steps_per_second": 33.561, |
| "step": 31300 |
| }, |
| { |
| "epoch": 27.002583979328165, |
| "grad_norm": 0.4397093653678894, |
| "learning_rate": 9.199310938845824e-05, |
| "loss": 0.2915744400024414, |
| "step": 31350 |
| }, |
| { |
| "epoch": 27.002583979328165, |
| "eval_loss": 0.3566512167453766, |
| "eval_runtime": 17.5664, |
| "eval_samples_per_second": 1081.837, |
| "eval_steps_per_second": 33.815, |
| "step": 31350 |
| }, |
| { |
| "epoch": 27.045650301464256, |
| "grad_norm": 0.48084136843681335, |
| "learning_rate": 9.182084409991387e-05, |
| "loss": 0.2919019317626953, |
| "step": 31400 |
| }, |
| { |
| "epoch": 27.045650301464256, |
| "eval_loss": 0.3649647533893585, |
| "eval_runtime": 17.2612, |
| "eval_samples_per_second": 1100.965, |
| "eval_steps_per_second": 34.412, |
| "step": 31400 |
| }, |
| { |
| "epoch": 27.088716623600344, |
| "grad_norm": 0.4913296699523926, |
| "learning_rate": 9.164857881136951e-05, |
| "loss": 0.28895715713500975, |
| "step": 31450 |
| }, |
| { |
| "epoch": 27.088716623600344, |
| "eval_loss": 0.35735324025154114, |
| "eval_runtime": 17.3916, |
| "eval_samples_per_second": 1092.713, |
| "eval_steps_per_second": 34.154, |
| "step": 31450 |
| }, |
| { |
| "epoch": 27.131782945736433, |
| "grad_norm": 0.4659770429134369, |
| "learning_rate": 9.147631352282515e-05, |
| "loss": 0.2834562110900879, |
| "step": 31500 |
| }, |
| { |
| "epoch": 27.131782945736433, |
| "eval_loss": 0.356306254863739, |
| "eval_runtime": 17.5932, |
| "eval_samples_per_second": 1080.187, |
| "eval_steps_per_second": 33.763, |
| "step": 31500 |
| }, |
| { |
| "epoch": 27.174849267872524, |
| "grad_norm": 0.4521220326423645, |
| "learning_rate": 9.130404823428079e-05, |
| "loss": 0.28911502838134767, |
| "step": 31550 |
| }, |
| { |
| "epoch": 27.174849267872524, |
| "eval_loss": 0.3661551773548126, |
| "eval_runtime": 16.6181, |
| "eval_samples_per_second": 1143.575, |
| "eval_steps_per_second": 35.744, |
| "step": 31550 |
| }, |
| { |
| "epoch": 27.217915590008612, |
| "grad_norm": 0.45324376225471497, |
| "learning_rate": 9.113178294573645e-05, |
| "loss": 0.2861846733093262, |
| "step": 31600 |
| }, |
| { |
| "epoch": 27.217915590008612, |
| "eval_loss": 0.36247044801712036, |
| "eval_runtime": 17.7398, |
| "eval_samples_per_second": 1071.261, |
| "eval_steps_per_second": 33.484, |
| "step": 31600 |
| }, |
| { |
| "epoch": 27.260981912144704, |
| "grad_norm": 0.4110028147697449, |
| "learning_rate": 9.095951765719209e-05, |
| "loss": 0.29361557006835937, |
| "step": 31650 |
| }, |
| { |
| "epoch": 27.260981912144704, |
| "eval_loss": 0.3627540171146393, |
| "eval_runtime": 17.2415, |
| "eval_samples_per_second": 1102.226, |
| "eval_steps_per_second": 34.452, |
| "step": 31650 |
| }, |
| { |
| "epoch": 27.304048234280792, |
| "grad_norm": 0.5808663964271545, |
| "learning_rate": 9.078725236864772e-05, |
| "loss": 0.2974928092956543, |
| "step": 31700 |
| }, |
| { |
| "epoch": 27.304048234280792, |
| "eval_loss": 0.3498687148094177, |
| "eval_runtime": 17.589, |
| "eval_samples_per_second": 1080.45, |
| "eval_steps_per_second": 33.771, |
| "step": 31700 |
| }, |
| { |
| "epoch": 27.347114556416884, |
| "grad_norm": 0.4611378014087677, |
| "learning_rate": 9.061498708010336e-05, |
| "loss": 0.28919679641723633, |
| "step": 31750 |
| }, |
| { |
| "epoch": 27.347114556416884, |
| "eval_loss": 0.3646209239959717, |
| "eval_runtime": 17.4877, |
| "eval_samples_per_second": 1086.705, |
| "eval_steps_per_second": 33.967, |
| "step": 31750 |
| }, |
| { |
| "epoch": 27.39018087855297, |
| "grad_norm": 0.5321835875511169, |
| "learning_rate": 9.0442721791559e-05, |
| "loss": 0.2927637481689453, |
| "step": 31800 |
| }, |
| { |
| "epoch": 27.39018087855297, |
| "eval_loss": 0.36543115973472595, |
| "eval_runtime": 17.3861, |
| "eval_samples_per_second": 1093.058, |
| "eval_steps_per_second": 34.165, |
| "step": 31800 |
| }, |
| { |
| "epoch": 27.43324720068906, |
| "grad_norm": 0.410918653011322, |
| "learning_rate": 9.027045650301464e-05, |
| "loss": 0.28457128524780273, |
| "step": 31850 |
| }, |
| { |
| "epoch": 27.43324720068906, |
| "eval_loss": 0.35619232058525085, |
| "eval_runtime": 17.6293, |
| "eval_samples_per_second": 1077.976, |
| "eval_steps_per_second": 33.694, |
| "step": 31850 |
| }, |
| { |
| "epoch": 27.47631352282515, |
| "grad_norm": 0.45185136795043945, |
| "learning_rate": 9.009819121447029e-05, |
| "loss": 0.29344728469848635, |
| "step": 31900 |
| }, |
| { |
| "epoch": 27.47631352282515, |
| "eval_loss": 0.35773923993110657, |
| "eval_runtime": 16.5833, |
| "eval_samples_per_second": 1145.971, |
| "eval_steps_per_second": 35.819, |
| "step": 31900 |
| }, |
| { |
| "epoch": 27.51937984496124, |
| "grad_norm": 0.46231284737586975, |
| "learning_rate": 8.992592592592594e-05, |
| "loss": 0.28259624481201173, |
| "step": 31950 |
| }, |
| { |
| "epoch": 27.51937984496124, |
| "eval_loss": 0.36896592378616333, |
| "eval_runtime": 17.7625, |
| "eval_samples_per_second": 1069.895, |
| "eval_steps_per_second": 33.441, |
| "step": 31950 |
| }, |
| { |
| "epoch": 27.56244616709733, |
| "grad_norm": 0.6303794384002686, |
| "learning_rate": 8.975366063738157e-05, |
| "loss": 0.28135942459106444, |
| "step": 32000 |
| }, |
| { |
| "epoch": 27.56244616709733, |
| "eval_loss": 0.36224251985549927, |
| "eval_runtime": 17.1832, |
| "eval_samples_per_second": 1105.963, |
| "eval_steps_per_second": 34.569, |
| "step": 32000 |
| }, |
| { |
| "epoch": 27.60551248923342, |
| "grad_norm": 0.5213823318481445, |
| "learning_rate": 8.958139534883721e-05, |
| "loss": 0.2868742370605469, |
| "step": 32050 |
| }, |
| { |
| "epoch": 27.60551248923342, |
| "eval_loss": 0.35869160294532776, |
| "eval_runtime": 17.6586, |
| "eval_samples_per_second": 1076.189, |
| "eval_steps_per_second": 33.638, |
| "step": 32050 |
| }, |
| { |
| "epoch": 27.64857881136951, |
| "grad_norm": 0.4423479735851288, |
| "learning_rate": 8.940913006029286e-05, |
| "loss": 0.28639448165893555, |
| "step": 32100 |
| }, |
| { |
| "epoch": 27.64857881136951, |
| "eval_loss": 0.3628954291343689, |
| "eval_runtime": 17.719, |
| "eval_samples_per_second": 1072.523, |
| "eval_steps_per_second": 33.523, |
| "step": 32100 |
| }, |
| { |
| "epoch": 27.6916451335056, |
| "grad_norm": 0.46652039885520935, |
| "learning_rate": 8.92368647717485e-05, |
| "loss": 0.2934608459472656, |
| "step": 32150 |
| }, |
| { |
| "epoch": 27.6916451335056, |
| "eval_loss": 0.36239707469940186, |
| "eval_runtime": 17.3683, |
| "eval_samples_per_second": 1094.179, |
| "eval_steps_per_second": 34.2, |
| "step": 32150 |
| }, |
| { |
| "epoch": 27.734711455641687, |
| "grad_norm": 0.4929044246673584, |
| "learning_rate": 8.906459948320414e-05, |
| "loss": 0.28652910232543943, |
| "step": 32200 |
| }, |
| { |
| "epoch": 27.734711455641687, |
| "eval_loss": 0.36099740862846375, |
| "eval_runtime": 17.6482, |
| "eval_samples_per_second": 1076.823, |
| "eval_steps_per_second": 33.658, |
| "step": 32200 |
| }, |
| { |
| "epoch": 27.77777777777778, |
| "grad_norm": 0.49651429057121277, |
| "learning_rate": 8.889233419465978e-05, |
| "loss": 0.2868942070007324, |
| "step": 32250 |
| }, |
| { |
| "epoch": 27.77777777777778, |
| "eval_loss": 0.3619493544101715, |
| "eval_runtime": 16.6205, |
| "eval_samples_per_second": 1143.405, |
| "eval_steps_per_second": 35.739, |
| "step": 32250 |
| }, |
| { |
| "epoch": 27.820844099913867, |
| "grad_norm": 0.4310859739780426, |
| "learning_rate": 8.872006890611541e-05, |
| "loss": 0.2868019104003906, |
| "step": 32300 |
| }, |
| { |
| "epoch": 27.820844099913867, |
| "eval_loss": 0.35609355568885803, |
| "eval_runtime": 17.7785, |
| "eval_samples_per_second": 1068.934, |
| "eval_steps_per_second": 33.411, |
| "step": 32300 |
| }, |
| { |
| "epoch": 27.86391042204996, |
| "grad_norm": 1.0042189359664917, |
| "learning_rate": 8.854780361757107e-05, |
| "loss": 0.2852842903137207, |
| "step": 32350 |
| }, |
| { |
| "epoch": 27.86391042204996, |
| "eval_loss": 0.35966190695762634, |
| "eval_runtime": 17.3214, |
| "eval_samples_per_second": 1097.14, |
| "eval_steps_per_second": 34.293, |
| "step": 32350 |
| }, |
| { |
| "epoch": 27.906976744186046, |
| "grad_norm": 0.6012923121452332, |
| "learning_rate": 8.837553832902671e-05, |
| "loss": 0.28470855712890625, |
| "step": 32400 |
| }, |
| { |
| "epoch": 27.906976744186046, |
| "eval_loss": 0.3527715802192688, |
| "eval_runtime": 17.7082, |
| "eval_samples_per_second": 1073.176, |
| "eval_steps_per_second": 33.544, |
| "step": 32400 |
| }, |
| { |
| "epoch": 27.950043066322134, |
| "grad_norm": 0.4886482357978821, |
| "learning_rate": 8.820327304048235e-05, |
| "loss": 0.2916010093688965, |
| "step": 32450 |
| }, |
| { |
| "epoch": 27.950043066322134, |
| "eval_loss": 0.3638048470020294, |
| "eval_runtime": 17.5694, |
| "eval_samples_per_second": 1081.656, |
| "eval_steps_per_second": 33.809, |
| "step": 32450 |
| }, |
| { |
| "epoch": 27.993109388458226, |
| "grad_norm": 0.5361951589584351, |
| "learning_rate": 8.803100775193799e-05, |
| "loss": 0.2956967735290527, |
| "step": 32500 |
| }, |
| { |
| "epoch": 27.993109388458226, |
| "eval_loss": 0.35803547501564026, |
| "eval_runtime": 17.0215, |
| "eval_samples_per_second": 1116.469, |
| "eval_steps_per_second": 34.897, |
| "step": 32500 |
| }, |
| { |
| "epoch": 28.036175710594314, |
| "grad_norm": 0.4958977997303009, |
| "learning_rate": 8.785874246339363e-05, |
| "loss": 0.28964385986328123, |
| "step": 32550 |
| }, |
| { |
| "epoch": 28.036175710594314, |
| "eval_loss": 0.35593509674072266, |
| "eval_runtime": 17.5511, |
| "eval_samples_per_second": 1082.781, |
| "eval_steps_per_second": 33.844, |
| "step": 32550 |
| }, |
| { |
| "epoch": 28.079242032730406, |
| "grad_norm": 0.4543485939502716, |
| "learning_rate": 8.768647717484926e-05, |
| "loss": 0.28495445251464846, |
| "step": 32600 |
| }, |
| { |
| "epoch": 28.079242032730406, |
| "eval_loss": 0.3700936436653137, |
| "eval_runtime": 17.4743, |
| "eval_samples_per_second": 1087.537, |
| "eval_steps_per_second": 33.993, |
| "step": 32600 |
| }, |
| { |
| "epoch": 28.122308354866494, |
| "grad_norm": 0.5868750810623169, |
| "learning_rate": 8.75142118863049e-05, |
| "loss": 0.28287410736083984, |
| "step": 32650 |
| }, |
| { |
| "epoch": 28.122308354866494, |
| "eval_loss": 0.3608900010585785, |
| "eval_runtime": 17.6108, |
| "eval_samples_per_second": 1079.114, |
| "eval_steps_per_second": 33.729, |
| "step": 32650 |
| }, |
| { |
| "epoch": 28.165374677002585, |
| "grad_norm": 0.521497368812561, |
| "learning_rate": 8.734194659776056e-05, |
| "loss": 0.2834699058532715, |
| "step": 32700 |
| }, |
| { |
| "epoch": 28.165374677002585, |
| "eval_loss": 0.35730814933776855, |
| "eval_runtime": 17.7048, |
| "eval_samples_per_second": 1073.379, |
| "eval_steps_per_second": 33.55, |
| "step": 32700 |
| }, |
| { |
| "epoch": 28.208440999138674, |
| "grad_norm": 0.40085679292678833, |
| "learning_rate": 8.71696813092162e-05, |
| "loss": 0.2893166351318359, |
| "step": 32750 |
| }, |
| { |
| "epoch": 28.208440999138674, |
| "eval_loss": 0.3633263111114502, |
| "eval_runtime": 17.657, |
| "eval_samples_per_second": 1076.287, |
| "eval_steps_per_second": 33.641, |
| "step": 32750 |
| }, |
| { |
| "epoch": 28.25150732127476, |
| "grad_norm": 0.46187394857406616, |
| "learning_rate": 8.699741602067184e-05, |
| "loss": 0.2845230484008789, |
| "step": 32800 |
| }, |
| { |
| "epoch": 28.25150732127476, |
| "eval_loss": 0.3593103289604187, |
| "eval_runtime": 17.6326, |
| "eval_samples_per_second": 1077.777, |
| "eval_steps_per_second": 33.688, |
| "step": 32800 |
| }, |
| { |
| "epoch": 28.294573643410853, |
| "grad_norm": 0.5855560302734375, |
| "learning_rate": 8.682515073212749e-05, |
| "loss": 0.2909526824951172, |
| "step": 32850 |
| }, |
| { |
| "epoch": 28.294573643410853, |
| "eval_loss": 0.3548714220523834, |
| "eval_runtime": 17.1281, |
| "eval_samples_per_second": 1109.521, |
| "eval_steps_per_second": 34.68, |
| "step": 32850 |
| }, |
| { |
| "epoch": 28.33763996554694, |
| "grad_norm": 0.5393654704093933, |
| "learning_rate": 8.665288544358312e-05, |
| "loss": 0.28526065826416014, |
| "step": 32900 |
| }, |
| { |
| "epoch": 28.33763996554694, |
| "eval_loss": 0.35168689489364624, |
| "eval_runtime": 17.5555, |
| "eval_samples_per_second": 1082.512, |
| "eval_steps_per_second": 33.836, |
| "step": 32900 |
| }, |
| { |
| "epoch": 28.380706287683033, |
| "grad_norm": 0.5609577298164368, |
| "learning_rate": 8.648062015503876e-05, |
| "loss": 0.28955772399902346, |
| "step": 32950 |
| }, |
| { |
| "epoch": 28.380706287683033, |
| "eval_loss": 0.35503652691841125, |
| "eval_runtime": 17.7573, |
| "eval_samples_per_second": 1070.209, |
| "eval_steps_per_second": 33.451, |
| "step": 32950 |
| }, |
| { |
| "epoch": 28.42377260981912, |
| "grad_norm": 0.48286551237106323, |
| "learning_rate": 8.63083548664944e-05, |
| "loss": 0.2849011993408203, |
| "step": 33000 |
| }, |
| { |
| "epoch": 28.42377260981912, |
| "eval_loss": 0.3584132790565491, |
| "eval_runtime": 17.458, |
| "eval_samples_per_second": 1088.557, |
| "eval_steps_per_second": 34.025, |
| "step": 33000 |
| }, |
| { |
| "epoch": 28.466838931955213, |
| "grad_norm": 0.4265981614589691, |
| "learning_rate": 8.613608957795005e-05, |
| "loss": 0.2775753974914551, |
| "step": 33050 |
| }, |
| { |
| "epoch": 28.466838931955213, |
| "eval_loss": 0.3611355125904083, |
| "eval_runtime": 17.3854, |
| "eval_samples_per_second": 1093.1, |
| "eval_steps_per_second": 34.167, |
| "step": 33050 |
| }, |
| { |
| "epoch": 28.5099052540913, |
| "grad_norm": 0.5069410800933838, |
| "learning_rate": 8.59638242894057e-05, |
| "loss": 0.28409801483154296, |
| "step": 33100 |
| }, |
| { |
| "epoch": 28.5099052540913, |
| "eval_loss": 0.3576951026916504, |
| "eval_runtime": 17.6066, |
| "eval_samples_per_second": 1079.366, |
| "eval_steps_per_second": 33.737, |
| "step": 33100 |
| }, |
| { |
| "epoch": 28.55297157622739, |
| "grad_norm": 0.4650542438030243, |
| "learning_rate": 8.579155900086133e-05, |
| "loss": 0.2903776741027832, |
| "step": 33150 |
| }, |
| { |
| "epoch": 28.55297157622739, |
| "eval_loss": 0.3601308763027191, |
| "eval_runtime": 17.6624, |
| "eval_samples_per_second": 1075.961, |
| "eval_steps_per_second": 33.631, |
| "step": 33150 |
| }, |
| { |
| "epoch": 28.59603789836348, |
| "grad_norm": 0.44358712434768677, |
| "learning_rate": 8.561929371231697e-05, |
| "loss": 0.2828477096557617, |
| "step": 33200 |
| }, |
| { |
| "epoch": 28.59603789836348, |
| "eval_loss": 0.34798339009284973, |
| "eval_runtime": 16.7677, |
| "eval_samples_per_second": 1133.368, |
| "eval_steps_per_second": 35.425, |
| "step": 33200 |
| }, |
| { |
| "epoch": 28.63910422049957, |
| "grad_norm": 0.4460907280445099, |
| "learning_rate": 8.544702842377261e-05, |
| "loss": 0.2873170280456543, |
| "step": 33250 |
| }, |
| { |
| "epoch": 28.63910422049957, |
| "eval_loss": 0.3584803342819214, |
| "eval_runtime": 16.701, |
| "eval_samples_per_second": 1137.898, |
| "eval_steps_per_second": 35.567, |
| "step": 33250 |
| }, |
| { |
| "epoch": 28.68217054263566, |
| "grad_norm": 0.4862593412399292, |
| "learning_rate": 8.527476313522825e-05, |
| "loss": 0.28872970581054686, |
| "step": 33300 |
| }, |
| { |
| "epoch": 28.68217054263566, |
| "eval_loss": 0.35872894525527954, |
| "eval_runtime": 17.6402, |
| "eval_samples_per_second": 1077.313, |
| "eval_steps_per_second": 33.673, |
| "step": 33300 |
| }, |
| { |
| "epoch": 28.725236864771748, |
| "grad_norm": 0.4718325436115265, |
| "learning_rate": 8.51024978466839e-05, |
| "loss": 0.28618146896362306, |
| "step": 33350 |
| }, |
| { |
| "epoch": 28.725236864771748, |
| "eval_loss": 0.35736215114593506, |
| "eval_runtime": 17.2688, |
| "eval_samples_per_second": 1100.48, |
| "eval_steps_per_second": 34.397, |
| "step": 33350 |
| }, |
| { |
| "epoch": 28.768303186907836, |
| "grad_norm": 0.41244471073150635, |
| "learning_rate": 8.493023255813955e-05, |
| "loss": 0.29159786224365236, |
| "step": 33400 |
| }, |
| { |
| "epoch": 28.768303186907836, |
| "eval_loss": 0.36435920000076294, |
| "eval_runtime": 16.7461, |
| "eval_samples_per_second": 1134.833, |
| "eval_steps_per_second": 35.471, |
| "step": 33400 |
| }, |
| { |
| "epoch": 28.811369509043928, |
| "grad_norm": 0.43774139881134033, |
| "learning_rate": 8.475796726959518e-05, |
| "loss": 0.28725341796875, |
| "step": 33450 |
| }, |
| { |
| "epoch": 28.811369509043928, |
| "eval_loss": 0.3537900745868683, |
| "eval_runtime": 17.1804, |
| "eval_samples_per_second": 1106.147, |
| "eval_steps_per_second": 34.574, |
| "step": 33450 |
| }, |
| { |
| "epoch": 28.854435831180016, |
| "grad_norm": 0.46093523502349854, |
| "learning_rate": 8.458570198105082e-05, |
| "loss": 0.27915733337402343, |
| "step": 33500 |
| }, |
| { |
| "epoch": 28.854435831180016, |
| "eval_loss": 0.35684412717819214, |
| "eval_runtime": 17.1789, |
| "eval_samples_per_second": 1106.242, |
| "eval_steps_per_second": 34.577, |
| "step": 33500 |
| }, |
| { |
| "epoch": 28.897502153316108, |
| "grad_norm": 0.4768196642398834, |
| "learning_rate": 8.441343669250646e-05, |
| "loss": 0.29193500518798826, |
| "step": 33550 |
| }, |
| { |
| "epoch": 28.897502153316108, |
| "eval_loss": 0.3524988293647766, |
| "eval_runtime": 16.8537, |
| "eval_samples_per_second": 1127.586, |
| "eval_steps_per_second": 35.244, |
| "step": 33550 |
| }, |
| { |
| "epoch": 28.940568475452196, |
| "grad_norm": 0.4585319459438324, |
| "learning_rate": 8.42411714039621e-05, |
| "loss": 0.28741561889648437, |
| "step": 33600 |
| }, |
| { |
| "epoch": 28.940568475452196, |
| "eval_loss": 0.35232821106910706, |
| "eval_runtime": 16.5397, |
| "eval_samples_per_second": 1148.995, |
| "eval_steps_per_second": 35.914, |
| "step": 33600 |
| }, |
| { |
| "epoch": 28.983634797588287, |
| "grad_norm": 0.41558921337127686, |
| "learning_rate": 8.406890611541775e-05, |
| "loss": 0.2895075035095215, |
| "step": 33650 |
| }, |
| { |
| "epoch": 28.983634797588287, |
| "eval_loss": 0.3447812497615814, |
| "eval_runtime": 17.1759, |
| "eval_samples_per_second": 1106.432, |
| "eval_steps_per_second": 34.583, |
| "step": 33650 |
| }, |
| { |
| "epoch": 29.026701119724375, |
| "grad_norm": 0.4974426031112671, |
| "learning_rate": 8.389664082687339e-05, |
| "loss": 0.286500244140625, |
| "step": 33700 |
| }, |
| { |
| "epoch": 29.026701119724375, |
| "eval_loss": 0.35480424761772156, |
| "eval_runtime": 17.4921, |
| "eval_samples_per_second": 1086.433, |
| "eval_steps_per_second": 33.958, |
| "step": 33700 |
| }, |
| { |
| "epoch": 29.069767441860463, |
| "grad_norm": 0.4396195709705353, |
| "learning_rate": 8.372437553832903e-05, |
| "loss": 0.2853713607788086, |
| "step": 33750 |
| }, |
| { |
| "epoch": 29.069767441860463, |
| "eval_loss": 0.35844433307647705, |
| "eval_runtime": 17.6779, |
| "eval_samples_per_second": 1075.015, |
| "eval_steps_per_second": 33.601, |
| "step": 33750 |
| }, |
| { |
| "epoch": 29.112833763996555, |
| "grad_norm": 0.4725944995880127, |
| "learning_rate": 8.355211024978467e-05, |
| "loss": 0.29019893646240236, |
| "step": 33800 |
| }, |
| { |
| "epoch": 29.112833763996555, |
| "eval_loss": 0.3599490523338318, |
| "eval_runtime": 17.2702, |
| "eval_samples_per_second": 1100.393, |
| "eval_steps_per_second": 34.395, |
| "step": 33800 |
| }, |
| { |
| "epoch": 29.155900086132643, |
| "grad_norm": 0.5145936012268066, |
| "learning_rate": 8.337984496124032e-05, |
| "loss": 0.28777868270874024, |
| "step": 33850 |
| }, |
| { |
| "epoch": 29.155900086132643, |
| "eval_loss": 0.354200154542923, |
| "eval_runtime": 17.7173, |
| "eval_samples_per_second": 1072.626, |
| "eval_steps_per_second": 33.527, |
| "step": 33850 |
| }, |
| { |
| "epoch": 29.198966408268735, |
| "grad_norm": 0.471347838640213, |
| "learning_rate": 8.320757967269596e-05, |
| "loss": 0.28519989013671876, |
| "step": 33900 |
| }, |
| { |
| "epoch": 29.198966408268735, |
| "eval_loss": 0.3546842634677887, |
| "eval_runtime": 17.5708, |
| "eval_samples_per_second": 1081.565, |
| "eval_steps_per_second": 33.806, |
| "step": 33900 |
| }, |
| { |
| "epoch": 29.242032730404823, |
| "grad_norm": 0.5167243480682373, |
| "learning_rate": 8.30353143841516e-05, |
| "loss": 0.28689983367919925, |
| "step": 33950 |
| }, |
| { |
| "epoch": 29.242032730404823, |
| "eval_loss": 0.3506035804748535, |
| "eval_runtime": 17.6473, |
| "eval_samples_per_second": 1076.881, |
| "eval_steps_per_second": 33.66, |
| "step": 33950 |
| }, |
| { |
| "epoch": 29.285099052540914, |
| "grad_norm": 0.49219077825546265, |
| "learning_rate": 8.286304909560724e-05, |
| "loss": 0.28681949615478514, |
| "step": 34000 |
| }, |
| { |
| "epoch": 29.285099052540914, |
| "eval_loss": 0.35144245624542236, |
| "eval_runtime": 17.7227, |
| "eval_samples_per_second": 1072.296, |
| "eval_steps_per_second": 33.516, |
| "step": 34000 |
| }, |
| { |
| "epoch": 29.328165374677003, |
| "grad_norm": 0.5044933557510376, |
| "learning_rate": 8.269078380706288e-05, |
| "loss": 0.28524385452270506, |
| "step": 34050 |
| }, |
| { |
| "epoch": 29.328165374677003, |
| "eval_loss": 0.36039817333221436, |
| "eval_runtime": 18.0279, |
| "eval_samples_per_second": 1054.141, |
| "eval_steps_per_second": 32.949, |
| "step": 34050 |
| }, |
| { |
| "epoch": 29.37123169681309, |
| "grad_norm": 0.4944096803665161, |
| "learning_rate": 8.251851851851851e-05, |
| "loss": 0.2815964698791504, |
| "step": 34100 |
| }, |
| { |
| "epoch": 29.37123169681309, |
| "eval_loss": 0.35990285873413086, |
| "eval_runtime": 19.6306, |
| "eval_samples_per_second": 968.079, |
| "eval_steps_per_second": 30.259, |
| "step": 34100 |
| }, |
| { |
| "epoch": 29.414298018949182, |
| "grad_norm": 0.4206973612308502, |
| "learning_rate": 8.234625322997417e-05, |
| "loss": 0.2808472442626953, |
| "step": 34150 |
| }, |
| { |
| "epoch": 29.414298018949182, |
| "eval_loss": 0.3539174497127533, |
| "eval_runtime": 18.5638, |
| "eval_samples_per_second": 1023.71, |
| "eval_steps_per_second": 31.998, |
| "step": 34150 |
| }, |
| { |
| "epoch": 29.45736434108527, |
| "grad_norm": 0.46597999334335327, |
| "learning_rate": 8.217398794142981e-05, |
| "loss": 0.2880288696289062, |
| "step": 34200 |
| }, |
| { |
| "epoch": 29.45736434108527, |
| "eval_loss": 0.3545425236225128, |
| "eval_runtime": 19.4963, |
| "eval_samples_per_second": 974.748, |
| "eval_steps_per_second": 30.467, |
| "step": 34200 |
| }, |
| { |
| "epoch": 29.500430663221362, |
| "grad_norm": 0.40909266471862793, |
| "learning_rate": 8.200172265288545e-05, |
| "loss": 0.28936180114746096, |
| "step": 34250 |
| }, |
| { |
| "epoch": 29.500430663221362, |
| "eval_loss": 0.3585303723812103, |
| "eval_runtime": 16.8611, |
| "eval_samples_per_second": 1127.091, |
| "eval_steps_per_second": 35.229, |
| "step": 34250 |
| }, |
| { |
| "epoch": 29.54349698535745, |
| "grad_norm": 0.3839239478111267, |
| "learning_rate": 8.18294573643411e-05, |
| "loss": 0.2772235107421875, |
| "step": 34300 |
| }, |
| { |
| "epoch": 29.54349698535745, |
| "eval_loss": 0.3551238775253296, |
| "eval_runtime": 17.57, |
| "eval_samples_per_second": 1081.614, |
| "eval_steps_per_second": 33.808, |
| "step": 34300 |
| }, |
| { |
| "epoch": 29.58656330749354, |
| "grad_norm": 0.43001672625541687, |
| "learning_rate": 8.165719207579672e-05, |
| "loss": 0.2795041847229004, |
| "step": 34350 |
| }, |
| { |
| "epoch": 29.58656330749354, |
| "eval_loss": 0.3595533072948456, |
| "eval_runtime": 17.835, |
| "eval_samples_per_second": 1065.543, |
| "eval_steps_per_second": 33.305, |
| "step": 34350 |
| }, |
| { |
| "epoch": 29.62962962962963, |
| "grad_norm": 0.45755746960639954, |
| "learning_rate": 8.148492678725236e-05, |
| "loss": 0.2803531265258789, |
| "step": 34400 |
| }, |
| { |
| "epoch": 29.62962962962963, |
| "eval_loss": 0.3586844503879547, |
| "eval_runtime": 18.152, |
| "eval_samples_per_second": 1046.935, |
| "eval_steps_per_second": 32.724, |
| "step": 34400 |
| }, |
| { |
| "epoch": 29.672695951765718, |
| "grad_norm": 0.436646968126297, |
| "learning_rate": 8.1312661498708e-05, |
| "loss": 0.28399742126464844, |
| "step": 34450 |
| }, |
| { |
| "epoch": 29.672695951765718, |
| "eval_loss": 0.3531823754310608, |
| "eval_runtime": 17.9012, |
| "eval_samples_per_second": 1061.603, |
| "eval_steps_per_second": 33.182, |
| "step": 34450 |
| }, |
| { |
| "epoch": 29.71576227390181, |
| "grad_norm": 0.41019946336746216, |
| "learning_rate": 8.114039621016366e-05, |
| "loss": 0.2811345100402832, |
| "step": 34500 |
| }, |
| { |
| "epoch": 29.71576227390181, |
| "eval_loss": 0.36139482259750366, |
| "eval_runtime": 17.6563, |
| "eval_samples_per_second": 1076.332, |
| "eval_steps_per_second": 33.642, |
| "step": 34500 |
| }, |
| { |
| "epoch": 29.758828596037898, |
| "grad_norm": 0.38991779088974, |
| "learning_rate": 8.09681309216193e-05, |
| "loss": 0.2825624084472656, |
| "step": 34550 |
| }, |
| { |
| "epoch": 29.758828596037898, |
| "eval_loss": 0.3564010262489319, |
| "eval_runtime": 17.4361, |
| "eval_samples_per_second": 1089.92, |
| "eval_steps_per_second": 34.067, |
| "step": 34550 |
| }, |
| { |
| "epoch": 29.80189491817399, |
| "grad_norm": 0.4471035599708557, |
| "learning_rate": 8.079586563307495e-05, |
| "loss": 0.28816804885864256, |
| "step": 34600 |
| }, |
| { |
| "epoch": 29.80189491817399, |
| "eval_loss": 0.3620011806488037, |
| "eval_runtime": 16.5186, |
| "eval_samples_per_second": 1150.461, |
| "eval_steps_per_second": 35.959, |
| "step": 34600 |
| }, |
| { |
| "epoch": 29.844961240310077, |
| "grad_norm": 0.5122584700584412, |
| "learning_rate": 8.062360034453058e-05, |
| "loss": 0.2832551574707031, |
| "step": 34650 |
| }, |
| { |
| "epoch": 29.844961240310077, |
| "eval_loss": 0.35831230878829956, |
| "eval_runtime": 17.6589, |
| "eval_samples_per_second": 1076.174, |
| "eval_steps_per_second": 33.638, |
| "step": 34650 |
| }, |
| { |
| "epoch": 29.88802756244617, |
| "grad_norm": 0.4535158574581146, |
| "learning_rate": 8.045133505598622e-05, |
| "loss": 0.284300365447998, |
| "step": 34700 |
| }, |
| { |
| "epoch": 29.88802756244617, |
| "eval_loss": 0.35620927810668945, |
| "eval_runtime": 16.3303, |
| "eval_samples_per_second": 1163.724, |
| "eval_steps_per_second": 36.374, |
| "step": 34700 |
| }, |
| { |
| "epoch": 29.931093884582257, |
| "grad_norm": 0.42133525013923645, |
| "learning_rate": 8.027906976744186e-05, |
| "loss": 0.28102462768554687, |
| "step": 34750 |
| }, |
| { |
| "epoch": 29.931093884582257, |
| "eval_loss": 0.3616844713687897, |
| "eval_runtime": 18.5174, |
| "eval_samples_per_second": 1026.276, |
| "eval_steps_per_second": 32.078, |
| "step": 34750 |
| }, |
| { |
| "epoch": 29.974160206718345, |
| "grad_norm": 0.43094122409820557, |
| "learning_rate": 8.01068044788975e-05, |
| "loss": 0.27734567642211916, |
| "step": 34800 |
| }, |
| { |
| "epoch": 29.974160206718345, |
| "eval_loss": 0.3515077233314514, |
| "eval_runtime": 18.8179, |
| "eval_samples_per_second": 1009.892, |
| "eval_steps_per_second": 31.566, |
| "step": 34800 |
| }, |
| { |
| "epoch": 30.017226528854437, |
| "grad_norm": 0.49150723218917847, |
| "learning_rate": 7.993453919035316e-05, |
| "loss": 0.2808255386352539, |
| "step": 34850 |
| }, |
| { |
| "epoch": 30.017226528854437, |
| "eval_loss": 0.3547874987125397, |
| "eval_runtime": 17.868, |
| "eval_samples_per_second": 1063.577, |
| "eval_steps_per_second": 33.244, |
| "step": 34850 |
| }, |
| { |
| "epoch": 30.060292850990525, |
| "grad_norm": 0.40897902846336365, |
| "learning_rate": 7.97622739018088e-05, |
| "loss": 0.2808572387695312, |
| "step": 34900 |
| }, |
| { |
| "epoch": 30.060292850990525, |
| "eval_loss": 0.3521062731742859, |
| "eval_runtime": 17.821, |
| "eval_samples_per_second": 1066.38, |
| "eval_steps_per_second": 33.331, |
| "step": 34900 |
| }, |
| { |
| "epoch": 30.103359173126616, |
| "grad_norm": 0.47192278504371643, |
| "learning_rate": 7.959000861326443e-05, |
| "loss": 0.28496150970458983, |
| "step": 34950 |
| }, |
| { |
| "epoch": 30.103359173126616, |
| "eval_loss": 0.35089150071144104, |
| "eval_runtime": 17.8659, |
| "eval_samples_per_second": 1063.701, |
| "eval_steps_per_second": 33.248, |
| "step": 34950 |
| }, |
| { |
| "epoch": 30.146425495262704, |
| "grad_norm": 0.40365535020828247, |
| "learning_rate": 7.941774332472007e-05, |
| "loss": 0.27695383071899415, |
| "step": 35000 |
| }, |
| { |
| "epoch": 30.146425495262704, |
| "eval_loss": 0.3547033965587616, |
| "eval_runtime": 17.6758, |
| "eval_samples_per_second": 1075.145, |
| "eval_steps_per_second": 33.605, |
| "step": 35000 |
| }, |
| { |
| "epoch": 30.189491817398793, |
| "grad_norm": 0.5050297379493713, |
| "learning_rate": 7.924547803617571e-05, |
| "loss": 0.2835973358154297, |
| "step": 35050 |
| }, |
| { |
| "epoch": 30.189491817398793, |
| "eval_loss": 0.351387619972229, |
| "eval_runtime": 17.0333, |
| "eval_samples_per_second": 1115.696, |
| "eval_steps_per_second": 34.873, |
| "step": 35050 |
| }, |
| { |
| "epoch": 30.232558139534884, |
| "grad_norm": 0.5353424549102783, |
| "learning_rate": 7.907321274763135e-05, |
| "loss": 0.2843050003051758, |
| "step": 35100 |
| }, |
| { |
| "epoch": 30.232558139534884, |
| "eval_loss": 0.36003604531288147, |
| "eval_runtime": 18.0958, |
| "eval_samples_per_second": 1050.189, |
| "eval_steps_per_second": 32.825, |
| "step": 35100 |
| }, |
| { |
| "epoch": 30.275624461670972, |
| "grad_norm": 0.468283474445343, |
| "learning_rate": 7.8900947459087e-05, |
| "loss": 0.28055492401123044, |
| "step": 35150 |
| }, |
| { |
| "epoch": 30.275624461670972, |
| "eval_loss": 0.35894954204559326, |
| "eval_runtime": 17.7049, |
| "eval_samples_per_second": 1073.375, |
| "eval_steps_per_second": 33.55, |
| "step": 35150 |
| }, |
| { |
| "epoch": 30.318690783807064, |
| "grad_norm": 0.4638129770755768, |
| "learning_rate": 7.872868217054264e-05, |
| "loss": 0.2750652503967285, |
| "step": 35200 |
| }, |
| { |
| "epoch": 30.318690783807064, |
| "eval_loss": 0.35567528009414673, |
| "eval_runtime": 17.919, |
| "eval_samples_per_second": 1060.552, |
| "eval_steps_per_second": 33.149, |
| "step": 35200 |
| }, |
| { |
| "epoch": 30.361757105943152, |
| "grad_norm": 0.5625136494636536, |
| "learning_rate": 7.855641688199828e-05, |
| "loss": 0.2850845909118652, |
| "step": 35250 |
| }, |
| { |
| "epoch": 30.361757105943152, |
| "eval_loss": 0.36067649722099304, |
| "eval_runtime": 17.6804, |
| "eval_samples_per_second": 1074.863, |
| "eval_steps_per_second": 33.597, |
| "step": 35250 |
| }, |
| { |
| "epoch": 30.404823428079244, |
| "grad_norm": 0.43467268347740173, |
| "learning_rate": 7.838415159345392e-05, |
| "loss": 0.2873003578186035, |
| "step": 35300 |
| }, |
| { |
| "epoch": 30.404823428079244, |
| "eval_loss": 0.3528839349746704, |
| "eval_runtime": 17.756, |
| "eval_samples_per_second": 1070.287, |
| "eval_steps_per_second": 33.454, |
| "step": 35300 |
| }, |
| { |
| "epoch": 30.44788975021533, |
| "grad_norm": 0.4412539303302765, |
| "learning_rate": 7.821188630490956e-05, |
| "loss": 0.285908203125, |
| "step": 35350 |
| }, |
| { |
| "epoch": 30.44788975021533, |
| "eval_loss": 0.3487206697463989, |
| "eval_runtime": 17.9176, |
| "eval_samples_per_second": 1060.633, |
| "eval_steps_per_second": 33.152, |
| "step": 35350 |
| }, |
| { |
| "epoch": 30.49095607235142, |
| "grad_norm": 0.4791826903820038, |
| "learning_rate": 7.80396210163652e-05, |
| "loss": 0.2815831756591797, |
| "step": 35400 |
| }, |
| { |
| "epoch": 30.49095607235142, |
| "eval_loss": 0.35717254877090454, |
| "eval_runtime": 17.8176, |
| "eval_samples_per_second": 1066.587, |
| "eval_steps_per_second": 33.338, |
| "step": 35400 |
| }, |
| { |
| "epoch": 30.53402239448751, |
| "grad_norm": 0.4532201886177063, |
| "learning_rate": 7.786735572782085e-05, |
| "loss": 0.28284832000732424, |
| "step": 35450 |
| }, |
| { |
| "epoch": 30.53402239448751, |
| "eval_loss": 0.3567640781402588, |
| "eval_runtime": 17.5731, |
| "eval_samples_per_second": 1081.425, |
| "eval_steps_per_second": 33.802, |
| "step": 35450 |
| }, |
| { |
| "epoch": 30.5770887166236, |
| "grad_norm": 0.5390316247940063, |
| "learning_rate": 7.769509043927649e-05, |
| "loss": 0.2846924591064453, |
| "step": 35500 |
| }, |
| { |
| "epoch": 30.5770887166236, |
| "eval_loss": 0.3540796935558319, |
| "eval_runtime": 17.2901, |
| "eval_samples_per_second": 1099.126, |
| "eval_steps_per_second": 34.355, |
| "step": 35500 |
| }, |
| { |
| "epoch": 30.62015503875969, |
| "grad_norm": 0.4142419993877411, |
| "learning_rate": 7.752282515073212e-05, |
| "loss": 0.28491304397583006, |
| "step": 35550 |
| }, |
| { |
| "epoch": 30.62015503875969, |
| "eval_loss": 0.36062875390052795, |
| "eval_runtime": 17.9366, |
| "eval_samples_per_second": 1059.508, |
| "eval_steps_per_second": 33.117, |
| "step": 35550 |
| }, |
| { |
| "epoch": 30.66322136089578, |
| "grad_norm": 0.4472252130508423, |
| "learning_rate": 7.735055986218777e-05, |
| "loss": 0.282122917175293, |
| "step": 35600 |
| }, |
| { |
| "epoch": 30.66322136089578, |
| "eval_loss": 0.35857757925987244, |
| "eval_runtime": 16.936, |
| "eval_samples_per_second": 1122.107, |
| "eval_steps_per_second": 35.073, |
| "step": 35600 |
| }, |
| { |
| "epoch": 30.70628768303187, |
| "grad_norm": 0.5478323101997375, |
| "learning_rate": 7.717829457364342e-05, |
| "loss": 0.2744486999511719, |
| "step": 35650 |
| }, |
| { |
| "epoch": 30.70628768303187, |
| "eval_loss": 0.35813167691230774, |
| "eval_runtime": 17.5761, |
| "eval_samples_per_second": 1081.243, |
| "eval_steps_per_second": 33.796, |
| "step": 35650 |
| }, |
| { |
| "epoch": 30.74935400516796, |
| "grad_norm": 0.4716998338699341, |
| "learning_rate": 7.700602928509906e-05, |
| "loss": 0.28380435943603516, |
| "step": 35700 |
| }, |
| { |
| "epoch": 30.74935400516796, |
| "eval_loss": 0.34930458664894104, |
| "eval_runtime": 18.1096, |
| "eval_samples_per_second": 1049.39, |
| "eval_steps_per_second": 32.8, |
| "step": 35700 |
| }, |
| { |
| "epoch": 30.792420327304047, |
| "grad_norm": 0.4638073742389679, |
| "learning_rate": 7.68337639965547e-05, |
| "loss": 0.28032737731933594, |
| "step": 35750 |
| }, |
| { |
| "epoch": 30.792420327304047, |
| "eval_loss": 0.3495262861251831, |
| "eval_runtime": 17.8334, |
| "eval_samples_per_second": 1065.639, |
| "eval_steps_per_second": 33.308, |
| "step": 35750 |
| }, |
| { |
| "epoch": 30.83548664944014, |
| "grad_norm": 0.42722830176353455, |
| "learning_rate": 7.666149870801034e-05, |
| "loss": 0.2841494560241699, |
| "step": 35800 |
| }, |
| { |
| "epoch": 30.83548664944014, |
| "eval_loss": 0.35409942269325256, |
| "eval_runtime": 17.8052, |
| "eval_samples_per_second": 1067.329, |
| "eval_steps_per_second": 33.361, |
| "step": 35800 |
| }, |
| { |
| "epoch": 30.878552971576227, |
| "grad_norm": 0.6081225872039795, |
| "learning_rate": 7.648923341946597e-05, |
| "loss": 0.27756031036376955, |
| "step": 35850 |
| }, |
| { |
| "epoch": 30.878552971576227, |
| "eval_loss": 0.3533887565135956, |
| "eval_runtime": 18.4344, |
| "eval_samples_per_second": 1030.897, |
| "eval_steps_per_second": 32.222, |
| "step": 35850 |
| }, |
| { |
| "epoch": 30.921619293712318, |
| "grad_norm": 0.4515012502670288, |
| "learning_rate": 7.631696813092161e-05, |
| "loss": 0.2825018310546875, |
| "step": 35900 |
| }, |
| { |
| "epoch": 30.921619293712318, |
| "eval_loss": 0.35392051935195923, |
| "eval_runtime": 17.8146, |
| "eval_samples_per_second": 1066.767, |
| "eval_steps_per_second": 33.343, |
| "step": 35900 |
| }, |
| { |
| "epoch": 30.964685615848406, |
| "grad_norm": 0.5015137195587158, |
| "learning_rate": 7.614470284237727e-05, |
| "loss": 0.276204948425293, |
| "step": 35950 |
| }, |
| { |
| "epoch": 30.964685615848406, |
| "eval_loss": 0.35323023796081543, |
| "eval_runtime": 16.7751, |
| "eval_samples_per_second": 1132.871, |
| "eval_steps_per_second": 35.41, |
| "step": 35950 |
| }, |
| { |
| "epoch": 31.007751937984494, |
| "grad_norm": 0.49482443928718567, |
| "learning_rate": 7.597243755383291e-05, |
| "loss": 0.2751371574401855, |
| "step": 36000 |
| }, |
| { |
| "epoch": 31.007751937984494, |
| "eval_loss": 0.35262081027030945, |
| "eval_runtime": 17.7106, |
| "eval_samples_per_second": 1073.033, |
| "eval_steps_per_second": 33.539, |
| "step": 36000 |
| }, |
| { |
| "epoch": 31.050818260120586, |
| "grad_norm": 0.5488302707672119, |
| "learning_rate": 7.580017226528855e-05, |
| "loss": 0.2832693099975586, |
| "step": 36050 |
| }, |
| { |
| "epoch": 31.050818260120586, |
| "eval_loss": 0.3515873849391937, |
| "eval_runtime": 17.7268, |
| "eval_samples_per_second": 1072.047, |
| "eval_steps_per_second": 33.509, |
| "step": 36050 |
| }, |
| { |
| "epoch": 31.093884582256674, |
| "grad_norm": 0.47250041365623474, |
| "learning_rate": 7.56279069767442e-05, |
| "loss": 0.2822879409790039, |
| "step": 36100 |
| }, |
| { |
| "epoch": 31.093884582256674, |
| "eval_loss": 0.3520713746547699, |
| "eval_runtime": 17.2396, |
| "eval_samples_per_second": 1102.344, |
| "eval_steps_per_second": 34.456, |
| "step": 36100 |
| }, |
| { |
| "epoch": 31.136950904392766, |
| "grad_norm": 0.45473265647888184, |
| "learning_rate": 7.545564168819982e-05, |
| "loss": 0.28120903015136717, |
| "step": 36150 |
| }, |
| { |
| "epoch": 31.136950904392766, |
| "eval_loss": 0.35466763377189636, |
| "eval_runtime": 18.006, |
| "eval_samples_per_second": 1055.423, |
| "eval_steps_per_second": 32.989, |
| "step": 36150 |
| }, |
| { |
| "epoch": 31.180017226528854, |
| "grad_norm": 0.5772674679756165, |
| "learning_rate": 7.528337639965547e-05, |
| "loss": 0.28337194442749025, |
| "step": 36200 |
| }, |
| { |
| "epoch": 31.180017226528854, |
| "eval_loss": 0.34932905435562134, |
| "eval_runtime": 17.878, |
| "eval_samples_per_second": 1062.984, |
| "eval_steps_per_second": 33.225, |
| "step": 36200 |
| }, |
| { |
| "epoch": 31.223083548664945, |
| "grad_norm": 0.510277509689331, |
| "learning_rate": 7.511111111111111e-05, |
| "loss": 0.2886422348022461, |
| "step": 36250 |
| }, |
| { |
| "epoch": 31.223083548664945, |
| "eval_loss": 0.35099226236343384, |
| "eval_runtime": 17.8684, |
| "eval_samples_per_second": 1063.551, |
| "eval_steps_per_second": 33.243, |
| "step": 36250 |
| }, |
| { |
| "epoch": 31.266149870801033, |
| "grad_norm": 0.5357157588005066, |
| "learning_rate": 7.493884582256676e-05, |
| "loss": 0.27557926177978515, |
| "step": 36300 |
| }, |
| { |
| "epoch": 31.266149870801033, |
| "eval_loss": 0.3499546945095062, |
| "eval_runtime": 18.1845, |
| "eval_samples_per_second": 1045.067, |
| "eval_steps_per_second": 32.665, |
| "step": 36300 |
| }, |
| { |
| "epoch": 31.30921619293712, |
| "grad_norm": 0.5062825679779053, |
| "learning_rate": 7.47665805340224e-05, |
| "loss": 0.27824714660644534, |
| "step": 36350 |
| }, |
| { |
| "epoch": 31.30921619293712, |
| "eval_loss": 0.3564305901527405, |
| "eval_runtime": 17.7311, |
| "eval_samples_per_second": 1071.791, |
| "eval_steps_per_second": 33.501, |
| "step": 36350 |
| }, |
| { |
| "epoch": 31.352282515073213, |
| "grad_norm": 0.41484689712524414, |
| "learning_rate": 7.459431524547804e-05, |
| "loss": 0.28137615203857425, |
| "step": 36400 |
| }, |
| { |
| "epoch": 31.352282515073213, |
| "eval_loss": 0.3556700646877289, |
| "eval_runtime": 17.3043, |
| "eval_samples_per_second": 1098.227, |
| "eval_steps_per_second": 34.327, |
| "step": 36400 |
| }, |
| { |
| "epoch": 31.3953488372093, |
| "grad_norm": 0.43241560459136963, |
| "learning_rate": 7.442204995693368e-05, |
| "loss": 0.2793069839477539, |
| "step": 36450 |
| }, |
| { |
| "epoch": 31.3953488372093, |
| "eval_loss": 0.3520503044128418, |
| "eval_runtime": 16.5928, |
| "eval_samples_per_second": 1145.316, |
| "eval_steps_per_second": 35.799, |
| "step": 36450 |
| }, |
| { |
| "epoch": 31.438415159345393, |
| "grad_norm": 0.40315011143684387, |
| "learning_rate": 7.424978466838932e-05, |
| "loss": 0.27872093200683595, |
| "step": 36500 |
| }, |
| { |
| "epoch": 31.438415159345393, |
| "eval_loss": 0.3566995859146118, |
| "eval_runtime": 17.9359, |
| "eval_samples_per_second": 1059.55, |
| "eval_steps_per_second": 33.118, |
| "step": 36500 |
| }, |
| { |
| "epoch": 31.48148148148148, |
| "grad_norm": 0.3954852223396301, |
| "learning_rate": 7.407751937984496e-05, |
| "loss": 0.28445125579833985, |
| "step": 36550 |
| }, |
| { |
| "epoch": 31.48148148148148, |
| "eval_loss": 0.3513743281364441, |
| "eval_runtime": 16.2741, |
| "eval_samples_per_second": 1167.747, |
| "eval_steps_per_second": 36.5, |
| "step": 36550 |
| }, |
| { |
| "epoch": 31.524547803617573, |
| "grad_norm": 0.44541651010513306, |
| "learning_rate": 7.39052540913006e-05, |
| "loss": 0.27661422729492186, |
| "step": 36600 |
| }, |
| { |
| "epoch": 31.524547803617573, |
| "eval_loss": 0.34536993503570557, |
| "eval_runtime": 17.8737, |
| "eval_samples_per_second": 1063.237, |
| "eval_steps_per_second": 33.233, |
| "step": 36600 |
| }, |
| { |
| "epoch": 31.56761412575366, |
| "grad_norm": 0.5039131045341492, |
| "learning_rate": 7.373298880275626e-05, |
| "loss": 0.2753811264038086, |
| "step": 36650 |
| }, |
| { |
| "epoch": 31.56761412575366, |
| "eval_loss": 0.34828585386276245, |
| "eval_runtime": 17.826, |
| "eval_samples_per_second": 1066.083, |
| "eval_steps_per_second": 33.322, |
| "step": 36650 |
| }, |
| { |
| "epoch": 31.61068044788975, |
| "grad_norm": 0.455892950296402, |
| "learning_rate": 7.356072351421189e-05, |
| "loss": 0.28127681732177734, |
| "step": 36700 |
| }, |
| { |
| "epoch": 31.61068044788975, |
| "eval_loss": 0.34847480058670044, |
| "eval_runtime": 17.9038, |
| "eval_samples_per_second": 1061.449, |
| "eval_steps_per_second": 33.177, |
| "step": 36700 |
| }, |
| { |
| "epoch": 31.65374677002584, |
| "grad_norm": 0.481543630361557, |
| "learning_rate": 7.338845822566753e-05, |
| "loss": 0.2735530662536621, |
| "step": 36750 |
| }, |
| { |
| "epoch": 31.65374677002584, |
| "eval_loss": 0.34797731041908264, |
| "eval_runtime": 17.8173, |
| "eval_samples_per_second": 1066.603, |
| "eval_steps_per_second": 33.338, |
| "step": 36750 |
| }, |
| { |
| "epoch": 31.69681309216193, |
| "grad_norm": 0.4258839786052704, |
| "learning_rate": 7.321619293712317e-05, |
| "loss": 0.2800648307800293, |
| "step": 36800 |
| }, |
| { |
| "epoch": 31.69681309216193, |
| "eval_loss": 0.35386908054351807, |
| "eval_runtime": 17.8122, |
| "eval_samples_per_second": 1066.911, |
| "eval_steps_per_second": 33.348, |
| "step": 36800 |
| }, |
| { |
| "epoch": 31.73987941429802, |
| "grad_norm": 0.4986749291419983, |
| "learning_rate": 7.304392764857881e-05, |
| "loss": 0.2886836624145508, |
| "step": 36850 |
| }, |
| { |
| "epoch": 31.73987941429802, |
| "eval_loss": 0.35045093297958374, |
| "eval_runtime": 17.9467, |
| "eval_samples_per_second": 1058.912, |
| "eval_steps_per_second": 33.098, |
| "step": 36850 |
| }, |
| { |
| "epoch": 31.782945736434108, |
| "grad_norm": 0.428501695394516, |
| "learning_rate": 7.287166236003446e-05, |
| "loss": 0.27103757858276367, |
| "step": 36900 |
| }, |
| { |
| "epoch": 31.782945736434108, |
| "eval_loss": 0.349502295255661, |
| "eval_runtime": 17.3496, |
| "eval_samples_per_second": 1095.359, |
| "eval_steps_per_second": 34.237, |
| "step": 36900 |
| }, |
| { |
| "epoch": 31.8260120585702, |
| "grad_norm": 0.44585123658180237, |
| "learning_rate": 7.26993970714901e-05, |
| "loss": 0.282184944152832, |
| "step": 36950 |
| }, |
| { |
| "epoch": 31.8260120585702, |
| "eval_loss": 0.3529902994632721, |
| "eval_runtime": 18.1962, |
| "eval_samples_per_second": 1044.397, |
| "eval_steps_per_second": 32.644, |
| "step": 36950 |
| }, |
| { |
| "epoch": 31.869078380706288, |
| "grad_norm": 0.44630590081214905, |
| "learning_rate": 7.252713178294574e-05, |
| "loss": 0.2770352554321289, |
| "step": 37000 |
| }, |
| { |
| "epoch": 31.869078380706288, |
| "eval_loss": 0.3461504876613617, |
| "eval_runtime": 16.8741, |
| "eval_samples_per_second": 1126.222, |
| "eval_steps_per_second": 35.202, |
| "step": 37000 |
| }, |
| { |
| "epoch": 31.912144702842376, |
| "grad_norm": 0.41770753264427185, |
| "learning_rate": 7.235486649440138e-05, |
| "loss": 0.27702234268188475, |
| "step": 37050 |
| }, |
| { |
| "epoch": 31.912144702842376, |
| "eval_loss": 0.3510807752609253, |
| "eval_runtime": 17.9156, |
| "eval_samples_per_second": 1060.75, |
| "eval_steps_per_second": 33.155, |
| "step": 37050 |
| }, |
| { |
| "epoch": 31.955211024978468, |
| "grad_norm": 0.3740564286708832, |
| "learning_rate": 7.218260120585702e-05, |
| "loss": 0.27722122192382814, |
| "step": 37100 |
| }, |
| { |
| "epoch": 31.955211024978468, |
| "eval_loss": 0.3538920283317566, |
| "eval_runtime": 17.9454, |
| "eval_samples_per_second": 1058.989, |
| "eval_steps_per_second": 33.1, |
| "step": 37100 |
| }, |
| { |
| "epoch": 31.998277347114556, |
| "grad_norm": 0.4577646851539612, |
| "learning_rate": 7.201033591731267e-05, |
| "loss": 0.2829978561401367, |
| "step": 37150 |
| }, |
| { |
| "epoch": 31.998277347114556, |
| "eval_loss": 0.3476633131504059, |
| "eval_runtime": 17.8921, |
| "eval_samples_per_second": 1062.145, |
| "eval_steps_per_second": 33.199, |
| "step": 37150 |
| }, |
| { |
| "epoch": 32.041343669250644, |
| "grad_norm": 0.5443671941757202, |
| "learning_rate": 7.183807062876831e-05, |
| "loss": 0.28137630462646485, |
| "step": 37200 |
| }, |
| { |
| "epoch": 32.041343669250644, |
| "eval_loss": 0.35495200753211975, |
| "eval_runtime": 17.9027, |
| "eval_samples_per_second": 1061.513, |
| "eval_steps_per_second": 33.179, |
| "step": 37200 |
| }, |
| { |
| "epoch": 32.084409991386735, |
| "grad_norm": 0.4282469153404236, |
| "learning_rate": 7.166580534022395e-05, |
| "loss": 0.2752407646179199, |
| "step": 37250 |
| }, |
| { |
| "epoch": 32.084409991386735, |
| "eval_loss": 0.3526252508163452, |
| "eval_runtime": 18.1825, |
| "eval_samples_per_second": 1045.183, |
| "eval_steps_per_second": 32.669, |
| "step": 37250 |
| }, |
| { |
| "epoch": 32.12747631352283, |
| "grad_norm": 0.4998242259025574, |
| "learning_rate": 7.149354005167959e-05, |
| "loss": 0.2797782516479492, |
| "step": 37300 |
| }, |
| { |
| "epoch": 32.12747631352283, |
| "eval_loss": 0.35174521803855896, |
| "eval_runtime": 17.6086, |
| "eval_samples_per_second": 1079.243, |
| "eval_steps_per_second": 33.733, |
| "step": 37300 |
| }, |
| { |
| "epoch": 32.17054263565891, |
| "grad_norm": 0.49339139461517334, |
| "learning_rate": 7.132127476313522e-05, |
| "loss": 0.2803740692138672, |
| "step": 37350 |
| }, |
| { |
| "epoch": 32.17054263565891, |
| "eval_loss": 0.35175102949142456, |
| "eval_runtime": 16.6815, |
| "eval_samples_per_second": 1139.226, |
| "eval_steps_per_second": 35.608, |
| "step": 37350 |
| }, |
| { |
| "epoch": 32.213608957795, |
| "grad_norm": 0.46561405062675476, |
| "learning_rate": 7.114900947459088e-05, |
| "loss": 0.27958194732666014, |
| "step": 37400 |
| }, |
| { |
| "epoch": 32.213608957795, |
| "eval_loss": 0.3542179763317108, |
| "eval_runtime": 17.9346, |
| "eval_samples_per_second": 1059.627, |
| "eval_steps_per_second": 33.12, |
| "step": 37400 |
| }, |
| { |
| "epoch": 32.256675279931095, |
| "grad_norm": 0.5260679125785828, |
| "learning_rate": 7.097674418604652e-05, |
| "loss": 0.27948333740234377, |
| "step": 37450 |
| }, |
| { |
| "epoch": 32.256675279931095, |
| "eval_loss": 0.35432368516921997, |
| "eval_runtime": 18.3926, |
| "eval_samples_per_second": 1033.244, |
| "eval_steps_per_second": 32.296, |
| "step": 37450 |
| }, |
| { |
| "epoch": 32.299741602067186, |
| "grad_norm": 0.40552613139152527, |
| "learning_rate": 7.080447889750216e-05, |
| "loss": 0.2720658493041992, |
| "step": 37500 |
| }, |
| { |
| "epoch": 32.299741602067186, |
| "eval_loss": 0.3506203293800354, |
| "eval_runtime": 17.1944, |
| "eval_samples_per_second": 1105.246, |
| "eval_steps_per_second": 34.546, |
| "step": 37500 |
| }, |
| { |
| "epoch": 32.34280792420327, |
| "grad_norm": 0.574375569820404, |
| "learning_rate": 7.06322136089578e-05, |
| "loss": 0.2815470886230469, |
| "step": 37550 |
| }, |
| { |
| "epoch": 32.34280792420327, |
| "eval_loss": 0.34805968403816223, |
| "eval_runtime": 17.8766, |
| "eval_samples_per_second": 1063.066, |
| "eval_steps_per_second": 33.228, |
| "step": 37550 |
| }, |
| { |
| "epoch": 32.38587424633936, |
| "grad_norm": 0.5432583093643188, |
| "learning_rate": 7.045994832041343e-05, |
| "loss": 0.27453317642211916, |
| "step": 37600 |
| }, |
| { |
| "epoch": 32.38587424633936, |
| "eval_loss": 0.34897035360336304, |
| "eval_runtime": 18.4403, |
| "eval_samples_per_second": 1030.569, |
| "eval_steps_per_second": 32.212, |
| "step": 37600 |
| }, |
| { |
| "epoch": 32.428940568475454, |
| "grad_norm": 0.49719077348709106, |
| "learning_rate": 7.028768303186907e-05, |
| "loss": 0.27978298187255857, |
| "step": 37650 |
| }, |
| { |
| "epoch": 32.428940568475454, |
| "eval_loss": 0.3470863103866577, |
| "eval_runtime": 17.7674, |
| "eval_samples_per_second": 1069.602, |
| "eval_steps_per_second": 33.432, |
| "step": 37650 |
| }, |
| { |
| "epoch": 32.47200689061154, |
| "grad_norm": 0.5088914632797241, |
| "learning_rate": 7.011541774332472e-05, |
| "loss": 0.28130838394165036, |
| "step": 37700 |
| }, |
| { |
| "epoch": 32.47200689061154, |
| "eval_loss": 0.3575519919395447, |
| "eval_runtime": 17.8462, |
| "eval_samples_per_second": 1064.875, |
| "eval_steps_per_second": 33.284, |
| "step": 37700 |
| }, |
| { |
| "epoch": 32.51507321274763, |
| "grad_norm": 0.4396224319934845, |
| "learning_rate": 6.994315245478037e-05, |
| "loss": 0.27438232421875, |
| "step": 37750 |
| }, |
| { |
| "epoch": 32.51507321274763, |
| "eval_loss": 0.3423137664794922, |
| "eval_runtime": 17.8343, |
| "eval_samples_per_second": 1065.588, |
| "eval_steps_per_second": 33.307, |
| "step": 37750 |
| }, |
| { |
| "epoch": 32.55813953488372, |
| "grad_norm": 0.5027320981025696, |
| "learning_rate": 6.977088716623601e-05, |
| "loss": 0.275331974029541, |
| "step": 37800 |
| }, |
| { |
| "epoch": 32.55813953488372, |
| "eval_loss": 0.35206782817840576, |
| "eval_runtime": 17.7441, |
| "eval_samples_per_second": 1071.004, |
| "eval_steps_per_second": 33.476, |
| "step": 37800 |
| }, |
| { |
| "epoch": 32.60120585701981, |
| "grad_norm": 0.3858621418476105, |
| "learning_rate": 6.959862187769166e-05, |
| "loss": 0.27590456008911135, |
| "step": 37850 |
| }, |
| { |
| "epoch": 32.60120585701981, |
| "eval_loss": 0.3495849668979645, |
| "eval_runtime": 16.9167, |
| "eval_samples_per_second": 1123.384, |
| "eval_steps_per_second": 35.113, |
| "step": 37850 |
| }, |
| { |
| "epoch": 32.6442721791559, |
| "grad_norm": 0.37935516238212585, |
| "learning_rate": 6.942635658914728e-05, |
| "loss": 0.26858043670654297, |
| "step": 37900 |
| }, |
| { |
| "epoch": 32.6442721791559, |
| "eval_loss": 0.34352296590805054, |
| "eval_runtime": 17.9597, |
| "eval_samples_per_second": 1058.145, |
| "eval_steps_per_second": 33.074, |
| "step": 37900 |
| }, |
| { |
| "epoch": 32.68733850129199, |
| "grad_norm": 0.4469800889492035, |
| "learning_rate": 6.925409130060293e-05, |
| "loss": 0.28146677017211913, |
| "step": 37950 |
| }, |
| { |
| "epoch": 32.68733850129199, |
| "eval_loss": 0.3506070077419281, |
| "eval_runtime": 17.4011, |
| "eval_samples_per_second": 1092.114, |
| "eval_steps_per_second": 34.136, |
| "step": 37950 |
| }, |
| { |
| "epoch": 32.73040482342808, |
| "grad_norm": 0.42299091815948486, |
| "learning_rate": 6.908182601205857e-05, |
| "loss": 0.280551872253418, |
| "step": 38000 |
| }, |
| { |
| "epoch": 32.73040482342808, |
| "eval_loss": 0.3468368947505951, |
| "eval_runtime": 17.7061, |
| "eval_samples_per_second": 1073.302, |
| "eval_steps_per_second": 33.548, |
| "step": 38000 |
| }, |
| { |
| "epoch": 32.773471145564166, |
| "grad_norm": 0.47024187445640564, |
| "learning_rate": 6.890956072351421e-05, |
| "loss": 0.28213356018066404, |
| "step": 38050 |
| }, |
| { |
| "epoch": 32.773471145564166, |
| "eval_loss": 0.35156771540641785, |
| "eval_runtime": 17.9712, |
| "eval_samples_per_second": 1057.468, |
| "eval_steps_per_second": 33.053, |
| "step": 38050 |
| }, |
| { |
| "epoch": 32.81653746770026, |
| "grad_norm": 0.5036038160324097, |
| "learning_rate": 6.873729543496987e-05, |
| "loss": 0.2840275192260742, |
| "step": 38100 |
| }, |
| { |
| "epoch": 32.81653746770026, |
| "eval_loss": 0.3425002694129944, |
| "eval_runtime": 18.0866, |
| "eval_samples_per_second": 1050.724, |
| "eval_steps_per_second": 32.842, |
| "step": 38100 |
| }, |
| { |
| "epoch": 32.85960378983635, |
| "grad_norm": 0.49322518706321716, |
| "learning_rate": 6.856503014642551e-05, |
| "loss": 0.27961723327636717, |
| "step": 38150 |
| }, |
| { |
| "epoch": 32.85960378983635, |
| "eval_loss": 0.34951549768447876, |
| "eval_runtime": 18.0747, |
| "eval_samples_per_second": 1051.417, |
| "eval_steps_per_second": 32.864, |
| "step": 38150 |
| }, |
| { |
| "epoch": 32.90267011197244, |
| "grad_norm": 0.5208374857902527, |
| "learning_rate": 6.839276485788114e-05, |
| "loss": 0.2792753982543945, |
| "step": 38200 |
| }, |
| { |
| "epoch": 32.90267011197244, |
| "eval_loss": 0.35184648633003235, |
| "eval_runtime": 18.2143, |
| "eval_samples_per_second": 1043.354, |
| "eval_steps_per_second": 32.612, |
| "step": 38200 |
| }, |
| { |
| "epoch": 32.945736434108525, |
| "grad_norm": 0.5315906405448914, |
| "learning_rate": 6.822049956933678e-05, |
| "loss": 0.2755429649353027, |
| "step": 38250 |
| }, |
| { |
| "epoch": 32.945736434108525, |
| "eval_loss": 0.3492070734500885, |
| "eval_runtime": 17.6981, |
| "eval_samples_per_second": 1073.789, |
| "eval_steps_per_second": 33.563, |
| "step": 38250 |
| }, |
| { |
| "epoch": 32.98880275624462, |
| "grad_norm": 0.44382065534591675, |
| "learning_rate": 6.804823428079242e-05, |
| "loss": 0.27883129119873046, |
| "step": 38300 |
| }, |
| { |
| "epoch": 32.98880275624462, |
| "eval_loss": 0.3477207124233246, |
| "eval_runtime": 16.8834, |
| "eval_samples_per_second": 1125.6, |
| "eval_steps_per_second": 35.182, |
| "step": 38300 |
| }, |
| { |
| "epoch": 33.03186907838071, |
| "grad_norm": 0.4330918490886688, |
| "learning_rate": 6.787596899224806e-05, |
| "loss": 0.2802844429016113, |
| "step": 38350 |
| }, |
| { |
| "epoch": 33.03186907838071, |
| "eval_loss": 0.35456958413124084, |
| "eval_runtime": 17.6811, |
| "eval_samples_per_second": 1074.82, |
| "eval_steps_per_second": 33.595, |
| "step": 38350 |
| }, |
| { |
| "epoch": 33.07493540051679, |
| "grad_norm": 0.5079190135002136, |
| "learning_rate": 6.77037037037037e-05, |
| "loss": 0.2710557746887207, |
| "step": 38400 |
| }, |
| { |
| "epoch": 33.07493540051679, |
| "eval_loss": 0.3461274802684784, |
| "eval_runtime": 16.7603, |
| "eval_samples_per_second": 1133.867, |
| "eval_steps_per_second": 35.441, |
| "step": 38400 |
| }, |
| { |
| "epoch": 33.118001722652885, |
| "grad_norm": 0.39602577686309814, |
| "learning_rate": 6.753143841515935e-05, |
| "loss": 0.273748836517334, |
| "step": 38450 |
| }, |
| { |
| "epoch": 33.118001722652885, |
| "eval_loss": 0.35338112711906433, |
| "eval_runtime": 17.5627, |
| "eval_samples_per_second": 1082.063, |
| "eval_steps_per_second": 33.822, |
| "step": 38450 |
| }, |
| { |
| "epoch": 33.161068044788976, |
| "grad_norm": 0.5302131175994873, |
| "learning_rate": 6.735917312661499e-05, |
| "loss": 0.27604515075683594, |
| "step": 38500 |
| }, |
| { |
| "epoch": 33.161068044788976, |
| "eval_loss": 0.3451584577560425, |
| "eval_runtime": 17.8874, |
| "eval_samples_per_second": 1062.425, |
| "eval_steps_per_second": 33.208, |
| "step": 38500 |
| }, |
| { |
| "epoch": 33.20413436692507, |
| "grad_norm": 0.4645986258983612, |
| "learning_rate": 6.718690783807063e-05, |
| "loss": 0.27570083618164065, |
| "step": 38550 |
| }, |
| { |
| "epoch": 33.20413436692507, |
| "eval_loss": 0.3517482280731201, |
| "eval_runtime": 18.0111, |
| "eval_samples_per_second": 1055.125, |
| "eval_steps_per_second": 32.98, |
| "step": 38550 |
| }, |
| { |
| "epoch": 33.24720068906115, |
| "grad_norm": 0.5932883620262146, |
| "learning_rate": 6.701464254952627e-05, |
| "loss": 0.27867019653320313, |
| "step": 38600 |
| }, |
| { |
| "epoch": 33.24720068906115, |
| "eval_loss": 0.3499729037284851, |
| "eval_runtime": 17.9683, |
| "eval_samples_per_second": 1057.639, |
| "eval_steps_per_second": 33.058, |
| "step": 38600 |
| }, |
| { |
| "epoch": 33.290267011197244, |
| "grad_norm": 0.5064970254898071, |
| "learning_rate": 6.684237726098192e-05, |
| "loss": 0.2713764190673828, |
| "step": 38650 |
| }, |
| { |
| "epoch": 33.290267011197244, |
| "eval_loss": 0.3454400599002838, |
| "eval_runtime": 17.9809, |
| "eval_samples_per_second": 1056.896, |
| "eval_steps_per_second": 33.035, |
| "step": 38650 |
| }, |
| { |
| "epoch": 33.333333333333336, |
| "grad_norm": 0.4735126793384552, |
| "learning_rate": 6.667011197243756e-05, |
| "loss": 0.27463886260986325, |
| "step": 38700 |
| }, |
| { |
| "epoch": 33.333333333333336, |
| "eval_loss": 0.34867385029792786, |
| "eval_runtime": 18.1937, |
| "eval_samples_per_second": 1044.539, |
| "eval_steps_per_second": 32.649, |
| "step": 38700 |
| }, |
| { |
| "epoch": 33.37639965546942, |
| "grad_norm": 0.4404645264148712, |
| "learning_rate": 6.64978466838932e-05, |
| "loss": 0.27556772232055665, |
| "step": 38750 |
| }, |
| { |
| "epoch": 33.37639965546942, |
| "eval_loss": 0.345106303691864, |
| "eval_runtime": 17.331, |
| "eval_samples_per_second": 1096.532, |
| "eval_steps_per_second": 34.274, |
| "step": 38750 |
| }, |
| { |
| "epoch": 33.41946597760551, |
| "grad_norm": 0.5270001888275146, |
| "learning_rate": 6.632558139534884e-05, |
| "loss": 0.27397455215454103, |
| "step": 38800 |
| }, |
| { |
| "epoch": 33.41946597760551, |
| "eval_loss": 0.34988826513290405, |
| "eval_runtime": 17.8862, |
| "eval_samples_per_second": 1062.495, |
| "eval_steps_per_second": 33.21, |
| "step": 38800 |
| }, |
| { |
| "epoch": 33.4625322997416, |
| "grad_norm": 0.44960689544677734, |
| "learning_rate": 6.615331610680448e-05, |
| "loss": 0.27975887298583985, |
| "step": 38850 |
| }, |
| { |
| "epoch": 33.4625322997416, |
| "eval_loss": 0.342560738325119, |
| "eval_runtime": 17.4275, |
| "eval_samples_per_second": 1090.461, |
| "eval_steps_per_second": 34.084, |
| "step": 38850 |
| }, |
| { |
| "epoch": 33.505598621877695, |
| "grad_norm": 0.5308107733726501, |
| "learning_rate": 6.598105081826013e-05, |
| "loss": 0.2754442596435547, |
| "step": 38900 |
| }, |
| { |
| "epoch": 33.505598621877695, |
| "eval_loss": 0.3444646894931793, |
| "eval_runtime": 18.9001, |
| "eval_samples_per_second": 1005.496, |
| "eval_steps_per_second": 31.428, |
| "step": 38900 |
| }, |
| { |
| "epoch": 33.54866494401378, |
| "grad_norm": 0.4935412108898163, |
| "learning_rate": 6.580878552971577e-05, |
| "loss": 0.2755685806274414, |
| "step": 38950 |
| }, |
| { |
| "epoch": 33.54866494401378, |
| "eval_loss": 0.3471962809562683, |
| "eval_runtime": 19.5679, |
| "eval_samples_per_second": 971.182, |
| "eval_steps_per_second": 30.356, |
| "step": 38950 |
| }, |
| { |
| "epoch": 33.59173126614987, |
| "grad_norm": 0.44433435797691345, |
| "learning_rate": 6.563652024117141e-05, |
| "loss": 0.2712661170959473, |
| "step": 39000 |
| }, |
| { |
| "epoch": 33.59173126614987, |
| "eval_loss": 0.3420495092868805, |
| "eval_runtime": 19.4068, |
| "eval_samples_per_second": 979.247, |
| "eval_steps_per_second": 30.608, |
| "step": 39000 |
| }, |
| { |
| "epoch": 33.63479758828596, |
| "grad_norm": 0.5094283819198608, |
| "learning_rate": 6.546425495262705e-05, |
| "loss": 0.27865215301513674, |
| "step": 39050 |
| }, |
| { |
| "epoch": 33.63479758828596, |
| "eval_loss": 0.3459348678588867, |
| "eval_runtime": 17.9122, |
| "eval_samples_per_second": 1060.954, |
| "eval_steps_per_second": 33.162, |
| "step": 39050 |
| }, |
| { |
| "epoch": 33.67786391042205, |
| "grad_norm": 0.43909311294555664, |
| "learning_rate": 6.529198966408268e-05, |
| "loss": 0.2744731903076172, |
| "step": 39100 |
| }, |
| { |
| "epoch": 33.67786391042205, |
| "eval_loss": 0.3503783345222473, |
| "eval_runtime": 17.8643, |
| "eval_samples_per_second": 1063.798, |
| "eval_steps_per_second": 33.251, |
| "step": 39100 |
| }, |
| { |
| "epoch": 33.72093023255814, |
| "grad_norm": 0.44677734375, |
| "learning_rate": 6.511972437553832e-05, |
| "loss": 0.2760355567932129, |
| "step": 39150 |
| }, |
| { |
| "epoch": 33.72093023255814, |
| "eval_loss": 0.3437975347042084, |
| "eval_runtime": 17.6977, |
| "eval_samples_per_second": 1073.812, |
| "eval_steps_per_second": 33.564, |
| "step": 39150 |
| }, |
| { |
| "epoch": 33.76399655469423, |
| "grad_norm": 0.5084570050239563, |
| "learning_rate": 6.494745908699398e-05, |
| "loss": 0.2722341537475586, |
| "step": 39200 |
| }, |
| { |
| "epoch": 33.76399655469423, |
| "eval_loss": 0.35166263580322266, |
| "eval_runtime": 17.0033, |
| "eval_samples_per_second": 1117.668, |
| "eval_steps_per_second": 34.934, |
| "step": 39200 |
| }, |
| { |
| "epoch": 33.807062876830315, |
| "grad_norm": 0.38987380266189575, |
| "learning_rate": 6.477519379844962e-05, |
| "loss": 0.28159662246704104, |
| "step": 39250 |
| }, |
| { |
| "epoch": 33.807062876830315, |
| "eval_loss": 0.34989920258522034, |
| "eval_runtime": 17.6583, |
| "eval_samples_per_second": 1076.206, |
| "eval_steps_per_second": 33.639, |
| "step": 39250 |
| }, |
| { |
| "epoch": 33.85012919896641, |
| "grad_norm": 0.4187825918197632, |
| "learning_rate": 6.460292850990526e-05, |
| "loss": 0.27221235275268557, |
| "step": 39300 |
| }, |
| { |
| "epoch": 33.85012919896641, |
| "eval_loss": 0.3462679088115692, |
| "eval_runtime": 17.8256, |
| "eval_samples_per_second": 1066.105, |
| "eval_steps_per_second": 33.323, |
| "step": 39300 |
| }, |
| { |
| "epoch": 33.8931955211025, |
| "grad_norm": 0.4491558372974396, |
| "learning_rate": 6.443066322136089e-05, |
| "loss": 0.2739124870300293, |
| "step": 39350 |
| }, |
| { |
| "epoch": 33.8931955211025, |
| "eval_loss": 0.3501715362071991, |
| "eval_runtime": 17.0734, |
| "eval_samples_per_second": 1113.08, |
| "eval_steps_per_second": 34.791, |
| "step": 39350 |
| }, |
| { |
| "epoch": 33.93626184323859, |
| "grad_norm": 0.41994020342826843, |
| "learning_rate": 6.425839793281653e-05, |
| "loss": 0.27803466796875, |
| "step": 39400 |
| }, |
| { |
| "epoch": 33.93626184323859, |
| "eval_loss": 0.34925201535224915, |
| "eval_runtime": 17.6642, |
| "eval_samples_per_second": 1075.85, |
| "eval_steps_per_second": 33.627, |
| "step": 39400 |
| }, |
| { |
| "epoch": 33.979328165374675, |
| "grad_norm": 0.41478756070137024, |
| "learning_rate": 6.408613264427218e-05, |
| "loss": 0.2822599220275879, |
| "step": 39450 |
| }, |
| { |
| "epoch": 33.979328165374675, |
| "eval_loss": 0.3459310829639435, |
| "eval_runtime": 17.86, |
| "eval_samples_per_second": 1064.055, |
| "eval_steps_per_second": 33.259, |
| "step": 39450 |
| }, |
| { |
| "epoch": 34.022394487510766, |
| "grad_norm": 0.46041932702064514, |
| "learning_rate": 6.391386735572782e-05, |
| "loss": 0.2755163383483887, |
| "step": 39500 |
| }, |
| { |
| "epoch": 34.022394487510766, |
| "eval_loss": 0.3456686735153198, |
| "eval_runtime": 17.9365, |
| "eval_samples_per_second": 1059.515, |
| "eval_steps_per_second": 33.117, |
| "step": 39500 |
| }, |
| { |
| "epoch": 34.06546080964686, |
| "grad_norm": 0.4062725603580475, |
| "learning_rate": 6.374160206718347e-05, |
| "loss": 0.2705917167663574, |
| "step": 39550 |
| }, |
| { |
| "epoch": 34.06546080964686, |
| "eval_loss": 0.34850847721099854, |
| "eval_runtime": 17.8359, |
| "eval_samples_per_second": 1065.489, |
| "eval_steps_per_second": 33.304, |
| "step": 39550 |
| }, |
| { |
| "epoch": 34.10852713178294, |
| "grad_norm": 0.3924962878227234, |
| "learning_rate": 6.356933677863912e-05, |
| "loss": 0.2703757858276367, |
| "step": 39600 |
| }, |
| { |
| "epoch": 34.10852713178294, |
| "eval_loss": 0.344966858625412, |
| "eval_runtime": 17.7503, |
| "eval_samples_per_second": 1070.632, |
| "eval_steps_per_second": 33.464, |
| "step": 39600 |
| }, |
| { |
| "epoch": 34.151593453919034, |
| "grad_norm": 0.4565719962120056, |
| "learning_rate": 6.339707149009474e-05, |
| "loss": 0.2750784683227539, |
| "step": 39650 |
| }, |
| { |
| "epoch": 34.151593453919034, |
| "eval_loss": 0.3534243106842041, |
| "eval_runtime": 17.4653, |
| "eval_samples_per_second": 1088.103, |
| "eval_steps_per_second": 34.01, |
| "step": 39650 |
| }, |
| { |
| "epoch": 34.194659776055126, |
| "grad_norm": 0.4472062289714813, |
| "learning_rate": 6.322480620155039e-05, |
| "loss": 0.2693879890441895, |
| "step": 39700 |
| }, |
| { |
| "epoch": 34.194659776055126, |
| "eval_loss": 0.35429486632347107, |
| "eval_runtime": 17.4596, |
| "eval_samples_per_second": 1088.456, |
| "eval_steps_per_second": 34.021, |
| "step": 39700 |
| }, |
| { |
| "epoch": 34.23772609819122, |
| "grad_norm": 0.4285549223423004, |
| "learning_rate": 6.305254091300603e-05, |
| "loss": 0.26811901092529294, |
| "step": 39750 |
| }, |
| { |
| "epoch": 34.23772609819122, |
| "eval_loss": 0.3482956290245056, |
| "eval_runtime": 17.6328, |
| "eval_samples_per_second": 1077.762, |
| "eval_steps_per_second": 33.687, |
| "step": 39750 |
| }, |
| { |
| "epoch": 34.2807924203273, |
| "grad_norm": 0.4658809006214142, |
| "learning_rate": 6.288027562446167e-05, |
| "loss": 0.27812984466552737, |
| "step": 39800 |
| }, |
| { |
| "epoch": 34.2807924203273, |
| "eval_loss": 0.34879428148269653, |
| "eval_runtime": 17.7477, |
| "eval_samples_per_second": 1070.784, |
| "eval_steps_per_second": 33.469, |
| "step": 39800 |
| }, |
| { |
| "epoch": 34.32385874246339, |
| "grad_norm": 0.434271901845932, |
| "learning_rate": 6.270801033591731e-05, |
| "loss": 0.27308488845825196, |
| "step": 39850 |
| }, |
| { |
| "epoch": 34.32385874246339, |
| "eval_loss": 0.3474234938621521, |
| "eval_runtime": 18.2801, |
| "eval_samples_per_second": 1039.603, |
| "eval_steps_per_second": 32.494, |
| "step": 39850 |
| }, |
| { |
| "epoch": 34.366925064599485, |
| "grad_norm": 0.6403506994247437, |
| "learning_rate": 6.253574504737297e-05, |
| "loss": 0.2769889831542969, |
| "step": 39900 |
| }, |
| { |
| "epoch": 34.366925064599485, |
| "eval_loss": 0.3394336402416229, |
| "eval_runtime": 18.1496, |
| "eval_samples_per_second": 1047.077, |
| "eval_steps_per_second": 32.728, |
| "step": 39900 |
| }, |
| { |
| "epoch": 34.40999138673557, |
| "grad_norm": 0.4239259958267212, |
| "learning_rate": 6.23634797588286e-05, |
| "loss": 0.27345130920410154, |
| "step": 39950 |
| }, |
| { |
| "epoch": 34.40999138673557, |
| "eval_loss": 0.3495025038719177, |
| "eval_runtime": 18.9968, |
| "eval_samples_per_second": 1000.379, |
| "eval_steps_per_second": 31.268, |
| "step": 39950 |
| }, |
| { |
| "epoch": 34.45305770887166, |
| "grad_norm": 0.3801671862602234, |
| "learning_rate": 6.219121447028424e-05, |
| "loss": 0.27393379211425783, |
| "step": 40000 |
| }, |
| { |
| "epoch": 34.45305770887166, |
| "eval_loss": 0.34322065114974976, |
| "eval_runtime": 17.9236, |
| "eval_samples_per_second": 1060.28, |
| "eval_steps_per_second": 33.141, |
| "step": 40000 |
| }, |
| { |
| "epoch": 34.49612403100775, |
| "grad_norm": 0.4332405626773834, |
| "learning_rate": 6.201894918173988e-05, |
| "loss": 0.2788553237915039, |
| "step": 40050 |
| }, |
| { |
| "epoch": 34.49612403100775, |
| "eval_loss": 0.34779658913612366, |
| "eval_runtime": 17.437, |
| "eval_samples_per_second": 1089.869, |
| "eval_steps_per_second": 34.066, |
| "step": 40050 |
| }, |
| { |
| "epoch": 34.539190353143844, |
| "grad_norm": 0.3911673128604889, |
| "learning_rate": 6.184668389319552e-05, |
| "loss": 0.2744323348999023, |
| "step": 40100 |
| }, |
| { |
| "epoch": 34.539190353143844, |
| "eval_loss": 0.34955883026123047, |
| "eval_runtime": 17.213, |
| "eval_samples_per_second": 1104.047, |
| "eval_steps_per_second": 34.509, |
| "step": 40100 |
| }, |
| { |
| "epoch": 34.58225667527993, |
| "grad_norm": 0.5038763284683228, |
| "learning_rate": 6.167441860465117e-05, |
| "loss": 0.2725095558166504, |
| "step": 40150 |
| }, |
| { |
| "epoch": 34.58225667527993, |
| "eval_loss": 0.34505942463874817, |
| "eval_runtime": 17.948, |
| "eval_samples_per_second": 1058.839, |
| "eval_steps_per_second": 33.096, |
| "step": 40150 |
| }, |
| { |
| "epoch": 34.62532299741602, |
| "grad_norm": 0.4034155011177063, |
| "learning_rate": 6.150215331610681e-05, |
| "loss": 0.272111701965332, |
| "step": 40200 |
| }, |
| { |
| "epoch": 34.62532299741602, |
| "eval_loss": 0.3441019654273987, |
| "eval_runtime": 17.5975, |
| "eval_samples_per_second": 1079.924, |
| "eval_steps_per_second": 33.755, |
| "step": 40200 |
| }, |
| { |
| "epoch": 34.66838931955211, |
| "grad_norm": 0.45396238565444946, |
| "learning_rate": 6.132988802756245e-05, |
| "loss": 0.27917551040649413, |
| "step": 40250 |
| }, |
| { |
| "epoch": 34.66838931955211, |
| "eval_loss": 0.3423817455768585, |
| "eval_runtime": 17.1731, |
| "eval_samples_per_second": 1106.616, |
| "eval_steps_per_second": 34.589, |
| "step": 40250 |
| }, |
| { |
| "epoch": 34.7114556416882, |
| "grad_norm": 0.4661038815975189, |
| "learning_rate": 6.115762273901809e-05, |
| "loss": 0.280230770111084, |
| "step": 40300 |
| }, |
| { |
| "epoch": 34.7114556416882, |
| "eval_loss": 0.347525417804718, |
| "eval_runtime": 17.9207, |
| "eval_samples_per_second": 1060.449, |
| "eval_steps_per_second": 33.146, |
| "step": 40300 |
| }, |
| { |
| "epoch": 34.75452196382429, |
| "grad_norm": 0.519680917263031, |
| "learning_rate": 6.0985357450473734e-05, |
| "loss": 0.2720791244506836, |
| "step": 40350 |
| }, |
| { |
| "epoch": 34.75452196382429, |
| "eval_loss": 0.33955568075180054, |
| "eval_runtime": 17.8871, |
| "eval_samples_per_second": 1062.442, |
| "eval_steps_per_second": 33.208, |
| "step": 40350 |
| }, |
| { |
| "epoch": 34.79758828596038, |
| "grad_norm": 0.523288905620575, |
| "learning_rate": 6.0813092161929376e-05, |
| "loss": 0.27809135437011717, |
| "step": 40400 |
| }, |
| { |
| "epoch": 34.79758828596038, |
| "eval_loss": 0.3517861068248749, |
| "eval_runtime": 17.9105, |
| "eval_samples_per_second": 1061.055, |
| "eval_steps_per_second": 33.165, |
| "step": 40400 |
| }, |
| { |
| "epoch": 34.84065460809647, |
| "grad_norm": 0.4025672972202301, |
| "learning_rate": 6.064082687338502e-05, |
| "loss": 0.2726823043823242, |
| "step": 40450 |
| }, |
| { |
| "epoch": 34.84065460809647, |
| "eval_loss": 0.3407162129878998, |
| "eval_runtime": 17.7528, |
| "eval_samples_per_second": 1070.477, |
| "eval_steps_per_second": 33.459, |
| "step": 40450 |
| }, |
| { |
| "epoch": 34.883720930232556, |
| "grad_norm": 0.4430167078971863, |
| "learning_rate": 6.046856158484065e-05, |
| "loss": 0.2641313934326172, |
| "step": 40500 |
| }, |
| { |
| "epoch": 34.883720930232556, |
| "eval_loss": 0.3425806164741516, |
| "eval_runtime": 17.4356, |
| "eval_samples_per_second": 1089.951, |
| "eval_steps_per_second": 34.068, |
| "step": 40500 |
| }, |
| { |
| "epoch": 34.92678725236865, |
| "grad_norm": 0.45299965143203735, |
| "learning_rate": 6.0296296296296295e-05, |
| "loss": 0.2765083312988281, |
| "step": 40550 |
| }, |
| { |
| "epoch": 34.92678725236865, |
| "eval_loss": 0.34253862500190735, |
| "eval_runtime": 16.4253, |
| "eval_samples_per_second": 1156.993, |
| "eval_steps_per_second": 36.164, |
| "step": 40550 |
| }, |
| { |
| "epoch": 34.96985357450474, |
| "grad_norm": 0.5068448781967163, |
| "learning_rate": 6.0124031007751944e-05, |
| "loss": 0.27179483413696287, |
| "step": 40600 |
| }, |
| { |
| "epoch": 34.96985357450474, |
| "eval_loss": 0.33738985657691956, |
| "eval_runtime": 17.5247, |
| "eval_samples_per_second": 1084.411, |
| "eval_steps_per_second": 33.895, |
| "step": 40600 |
| }, |
| { |
| "epoch": 35.012919896640824, |
| "grad_norm": 0.4455919861793518, |
| "learning_rate": 5.9951765719207586e-05, |
| "loss": 0.27295986175537107, |
| "step": 40650 |
| }, |
| { |
| "epoch": 35.012919896640824, |
| "eval_loss": 0.3451545536518097, |
| "eval_runtime": 17.1512, |
| "eval_samples_per_second": 1108.03, |
| "eval_steps_per_second": 34.633, |
| "step": 40650 |
| }, |
| { |
| "epoch": 35.055986218776916, |
| "grad_norm": 0.4618251919746399, |
| "learning_rate": 5.977950043066323e-05, |
| "loss": 0.27406063079833987, |
| "step": 40700 |
| }, |
| { |
| "epoch": 35.055986218776916, |
| "eval_loss": 0.34172993898391724, |
| "eval_runtime": 17.2228, |
| "eval_samples_per_second": 1103.424, |
| "eval_steps_per_second": 34.489, |
| "step": 40700 |
| }, |
| { |
| "epoch": 35.09905254091301, |
| "grad_norm": 0.48039114475250244, |
| "learning_rate": 5.9607235142118864e-05, |
| "loss": 0.2712633514404297, |
| "step": 40750 |
| }, |
| { |
| "epoch": 35.09905254091301, |
| "eval_loss": 0.34705230593681335, |
| "eval_runtime": 17.8501, |
| "eval_samples_per_second": 1064.646, |
| "eval_steps_per_second": 33.277, |
| "step": 40750 |
| }, |
| { |
| "epoch": 35.1421188630491, |
| "grad_norm": 0.39087414741516113, |
| "learning_rate": 5.9434969853574506e-05, |
| "loss": 0.2726176452636719, |
| "step": 40800 |
| }, |
| { |
| "epoch": 35.1421188630491, |
| "eval_loss": 0.35245054960250854, |
| "eval_runtime": 17.9496, |
| "eval_samples_per_second": 1058.745, |
| "eval_steps_per_second": 33.093, |
| "step": 40800 |
| }, |
| { |
| "epoch": 35.18518518518518, |
| "grad_norm": 0.5085448622703552, |
| "learning_rate": 5.926270456503015e-05, |
| "loss": 0.27361579895019533, |
| "step": 40850 |
| }, |
| { |
| "epoch": 35.18518518518518, |
| "eval_loss": 0.34160110354423523, |
| "eval_runtime": 17.7554, |
| "eval_samples_per_second": 1070.322, |
| "eval_steps_per_second": 33.455, |
| "step": 40850 |
| }, |
| { |
| "epoch": 35.228251507321275, |
| "grad_norm": 0.3623582124710083, |
| "learning_rate": 5.909043927648579e-05, |
| "loss": 0.26523059844970703, |
| "step": 40900 |
| }, |
| { |
| "epoch": 35.228251507321275, |
| "eval_loss": 0.3383851945400238, |
| "eval_runtime": 18.3149, |
| "eval_samples_per_second": 1037.623, |
| "eval_steps_per_second": 32.433, |
| "step": 40900 |
| }, |
| { |
| "epoch": 35.27131782945737, |
| "grad_norm": 0.42701098322868347, |
| "learning_rate": 5.8918173987941426e-05, |
| "loss": 0.2780462265014648, |
| "step": 40950 |
| }, |
| { |
| "epoch": 35.27131782945737, |
| "eval_loss": 0.34134912490844727, |
| "eval_runtime": 17.9587, |
| "eval_samples_per_second": 1058.205, |
| "eval_steps_per_second": 33.076, |
| "step": 40950 |
| }, |
| { |
| "epoch": 35.31438415159345, |
| "grad_norm": 0.4851948916912079, |
| "learning_rate": 5.874590869939708e-05, |
| "loss": 0.2788583374023437, |
| "step": 41000 |
| }, |
| { |
| "epoch": 35.31438415159345, |
| "eval_loss": 0.33466637134552, |
| "eval_runtime": 16.9935, |
| "eval_samples_per_second": 1118.307, |
| "eval_steps_per_second": 34.954, |
| "step": 41000 |
| }, |
| { |
| "epoch": 35.35745047372954, |
| "grad_norm": 0.3875599205493927, |
| "learning_rate": 5.8573643410852716e-05, |
| "loss": 0.26727346420288084, |
| "step": 41050 |
| }, |
| { |
| "epoch": 35.35745047372954, |
| "eval_loss": 0.34064584970474243, |
| "eval_runtime": 17.7144, |
| "eval_samples_per_second": 1072.8, |
| "eval_steps_per_second": 33.532, |
| "step": 41050 |
| }, |
| { |
| "epoch": 35.400516795865634, |
| "grad_norm": 0.46586155891418457, |
| "learning_rate": 5.840137812230836e-05, |
| "loss": 0.27534004211425783, |
| "step": 41100 |
| }, |
| { |
| "epoch": 35.400516795865634, |
| "eval_loss": 0.33610883355140686, |
| "eval_runtime": 17.8537, |
| "eval_samples_per_second": 1064.432, |
| "eval_steps_per_second": 33.27, |
| "step": 41100 |
| }, |
| { |
| "epoch": 35.443583118001726, |
| "grad_norm": 0.5082590579986572, |
| "learning_rate": 5.8229112833764e-05, |
| "loss": 0.2773623275756836, |
| "step": 41150 |
| }, |
| { |
| "epoch": 35.443583118001726, |
| "eval_loss": 0.3373280465602875, |
| "eval_runtime": 17.3963, |
| "eval_samples_per_second": 1092.419, |
| "eval_steps_per_second": 34.145, |
| "step": 41150 |
| }, |
| { |
| "epoch": 35.48664944013781, |
| "grad_norm": 0.5587695837020874, |
| "learning_rate": 5.8056847545219636e-05, |
| "loss": 0.27423599243164065, |
| "step": 41200 |
| }, |
| { |
| "epoch": 35.48664944013781, |
| "eval_loss": 0.34136703610420227, |
| "eval_runtime": 17.8605, |
| "eval_samples_per_second": 1064.026, |
| "eval_steps_per_second": 33.258, |
| "step": 41200 |
| }, |
| { |
| "epoch": 35.5297157622739, |
| "grad_norm": 0.4970998466014862, |
| "learning_rate": 5.788458225667528e-05, |
| "loss": 0.27262737274169924, |
| "step": 41250 |
| }, |
| { |
| "epoch": 35.5297157622739, |
| "eval_loss": 0.3357755243778229, |
| "eval_runtime": 17.8891, |
| "eval_samples_per_second": 1062.322, |
| "eval_steps_per_second": 33.205, |
| "step": 41250 |
| }, |
| { |
| "epoch": 35.572782084409994, |
| "grad_norm": 0.4705260694026947, |
| "learning_rate": 5.771231696813092e-05, |
| "loss": 0.276383171081543, |
| "step": 41300 |
| }, |
| { |
| "epoch": 35.572782084409994, |
| "eval_loss": 0.34233272075653076, |
| "eval_runtime": 18.0064, |
| "eval_samples_per_second": 1055.401, |
| "eval_steps_per_second": 32.988, |
| "step": 41300 |
| }, |
| { |
| "epoch": 35.61584840654608, |
| "grad_norm": 0.4661819636821747, |
| "learning_rate": 5.754005167958657e-05, |
| "loss": 0.2638749313354492, |
| "step": 41350 |
| }, |
| { |
| "epoch": 35.61584840654608, |
| "eval_loss": 0.3379902243614197, |
| "eval_runtime": 17.6564, |
| "eval_samples_per_second": 1076.321, |
| "eval_steps_per_second": 33.642, |
| "step": 41350 |
| }, |
| { |
| "epoch": 35.65891472868217, |
| "grad_norm": 0.4111165702342987, |
| "learning_rate": 5.736778639104221e-05, |
| "loss": 0.26883033752441404, |
| "step": 41400 |
| }, |
| { |
| "epoch": 35.65891472868217, |
| "eval_loss": 0.3382817208766937, |
| "eval_runtime": 17.5428, |
| "eval_samples_per_second": 1083.296, |
| "eval_steps_per_second": 33.86, |
| "step": 41400 |
| }, |
| { |
| "epoch": 35.70198105081826, |
| "grad_norm": 0.41107845306396484, |
| "learning_rate": 5.719552110249785e-05, |
| "loss": 0.2747551727294922, |
| "step": 41450 |
| }, |
| { |
| "epoch": 35.70198105081826, |
| "eval_loss": 0.34029775857925415, |
| "eval_runtime": 17.4782, |
| "eval_samples_per_second": 1087.295, |
| "eval_steps_per_second": 33.985, |
| "step": 41450 |
| }, |
| { |
| "epoch": 35.74504737295435, |
| "grad_norm": 0.41739150881767273, |
| "learning_rate": 5.702325581395349e-05, |
| "loss": 0.2748952102661133, |
| "step": 41500 |
| }, |
| { |
| "epoch": 35.74504737295435, |
| "eval_loss": 0.34697577357292175, |
| "eval_runtime": 17.7255, |
| "eval_samples_per_second": 1072.128, |
| "eval_steps_per_second": 33.511, |
| "step": 41500 |
| }, |
| { |
| "epoch": 35.78811369509044, |
| "grad_norm": 0.41582760214805603, |
| "learning_rate": 5.685099052540913e-05, |
| "loss": 0.27996679306030275, |
| "step": 41550 |
| }, |
| { |
| "epoch": 35.78811369509044, |
| "eval_loss": 0.34205004572868347, |
| "eval_runtime": 17.8009, |
| "eval_samples_per_second": 1067.589, |
| "eval_steps_per_second": 33.369, |
| "step": 41550 |
| }, |
| { |
| "epoch": 35.83118001722653, |
| "grad_norm": 0.4034820795059204, |
| "learning_rate": 5.667872523686477e-05, |
| "loss": 0.2681726455688477, |
| "step": 41600 |
| }, |
| { |
| "epoch": 35.83118001722653, |
| "eval_loss": 0.3477557599544525, |
| "eval_runtime": 17.2857, |
| "eval_samples_per_second": 1099.407, |
| "eval_steps_per_second": 34.364, |
| "step": 41600 |
| }, |
| { |
| "epoch": 35.87424633936262, |
| "grad_norm": 0.5131692290306091, |
| "learning_rate": 5.6506459948320415e-05, |
| "loss": 0.2685848617553711, |
| "step": 41650 |
| }, |
| { |
| "epoch": 35.87424633936262, |
| "eval_loss": 0.34424716234207153, |
| "eval_runtime": 17.9277, |
| "eval_samples_per_second": 1060.036, |
| "eval_steps_per_second": 33.133, |
| "step": 41650 |
| }, |
| { |
| "epoch": 35.917312661498705, |
| "grad_norm": 0.38450688123703003, |
| "learning_rate": 5.6334194659776064e-05, |
| "loss": 0.2745730972290039, |
| "step": 41700 |
| }, |
| { |
| "epoch": 35.917312661498705, |
| "eval_loss": 0.3426118791103363, |
| "eval_runtime": 17.6578, |
| "eval_samples_per_second": 1076.236, |
| "eval_steps_per_second": 33.639, |
| "step": 41700 |
| }, |
| { |
| "epoch": 35.9603789836348, |
| "grad_norm": 0.5067197680473328, |
| "learning_rate": 5.6161929371231706e-05, |
| "loss": 0.27472471237182616, |
| "step": 41750 |
| }, |
| { |
| "epoch": 35.9603789836348, |
| "eval_loss": 0.34160706400871277, |
| "eval_runtime": 17.8861, |
| "eval_samples_per_second": 1062.498, |
| "eval_steps_per_second": 33.21, |
| "step": 41750 |
| }, |
| { |
| "epoch": 36.00344530577089, |
| "grad_norm": 0.4307672381401062, |
| "learning_rate": 5.598966408268734e-05, |
| "loss": 0.26797880172729494, |
| "step": 41800 |
| }, |
| { |
| "epoch": 36.00344530577089, |
| "eval_loss": 0.3427271842956543, |
| "eval_runtime": 17.9622, |
| "eval_samples_per_second": 1058.0, |
| "eval_steps_per_second": 33.069, |
| "step": 41800 |
| }, |
| { |
| "epoch": 36.04651162790697, |
| "grad_norm": 0.5117197632789612, |
| "learning_rate": 5.581739879414298e-05, |
| "loss": 0.27051544189453125, |
| "step": 41850 |
| }, |
| { |
| "epoch": 36.04651162790697, |
| "eval_loss": 0.34072092175483704, |
| "eval_runtime": 17.8202, |
| "eval_samples_per_second": 1066.432, |
| "eval_steps_per_second": 33.333, |
| "step": 41850 |
| }, |
| { |
| "epoch": 36.089577950043065, |
| "grad_norm": 0.4320580065250397, |
| "learning_rate": 5.5645133505598625e-05, |
| "loss": 0.2716988754272461, |
| "step": 41900 |
| }, |
| { |
| "epoch": 36.089577950043065, |
| "eval_loss": 0.3381943702697754, |
| "eval_runtime": 17.5501, |
| "eval_samples_per_second": 1082.844, |
| "eval_steps_per_second": 33.846, |
| "step": 41900 |
| }, |
| { |
| "epoch": 36.13264427217916, |
| "grad_norm": 0.4369736909866333, |
| "learning_rate": 5.547286821705426e-05, |
| "loss": 0.27702898025512696, |
| "step": 41950 |
| }, |
| { |
| "epoch": 36.13264427217916, |
| "eval_loss": 0.3371625542640686, |
| "eval_runtime": 17.3949, |
| "eval_samples_per_second": 1092.501, |
| "eval_steps_per_second": 34.148, |
| "step": 41950 |
| }, |
| { |
| "epoch": 36.17571059431525, |
| "grad_norm": 0.44421809911727905, |
| "learning_rate": 5.53006029285099e-05, |
| "loss": 0.2704465866088867, |
| "step": 42000 |
| }, |
| { |
| "epoch": 36.17571059431525, |
| "eval_loss": 0.3457271158695221, |
| "eval_runtime": 17.9037, |
| "eval_samples_per_second": 1061.455, |
| "eval_steps_per_second": 33.177, |
| "step": 42000 |
| }, |
| { |
| "epoch": 36.21877691645133, |
| "grad_norm": 0.3486070930957794, |
| "learning_rate": 5.512833763996555e-05, |
| "loss": 0.26162067413330076, |
| "step": 42050 |
| }, |
| { |
| "epoch": 36.21877691645133, |
| "eval_loss": 0.34183192253112793, |
| "eval_runtime": 16.7764, |
| "eval_samples_per_second": 1132.779, |
| "eval_steps_per_second": 35.407, |
| "step": 42050 |
| }, |
| { |
| "epoch": 36.261843238587424, |
| "grad_norm": 0.42011758685112, |
| "learning_rate": 5.4956072351421194e-05, |
| "loss": 0.26230512619018553, |
| "step": 42100 |
| }, |
| { |
| "epoch": 36.261843238587424, |
| "eval_loss": 0.3458743095397949, |
| "eval_runtime": 17.7406, |
| "eval_samples_per_second": 1071.213, |
| "eval_steps_per_second": 33.482, |
| "step": 42100 |
| }, |
| { |
| "epoch": 36.304909560723516, |
| "grad_norm": 0.5051075220108032, |
| "learning_rate": 5.4783807062876836e-05, |
| "loss": 0.26472633361816406, |
| "step": 42150 |
| }, |
| { |
| "epoch": 36.304909560723516, |
| "eval_loss": 0.3397945761680603, |
| "eval_runtime": 17.5211, |
| "eval_samples_per_second": 1084.636, |
| "eval_steps_per_second": 33.902, |
| "step": 42150 |
| }, |
| { |
| "epoch": 36.3479758828596, |
| "grad_norm": 0.4122444689273834, |
| "learning_rate": 5.461154177433248e-05, |
| "loss": 0.27144794464111327, |
| "step": 42200 |
| }, |
| { |
| "epoch": 36.3479758828596, |
| "eval_loss": 0.34024304151535034, |
| "eval_runtime": 17.8293, |
| "eval_samples_per_second": 1065.883, |
| "eval_steps_per_second": 33.316, |
| "step": 42200 |
| }, |
| { |
| "epoch": 36.39104220499569, |
| "grad_norm": 0.4055521786212921, |
| "learning_rate": 5.443927648578811e-05, |
| "loss": 0.26872739791870115, |
| "step": 42250 |
| }, |
| { |
| "epoch": 36.39104220499569, |
| "eval_loss": 0.3468708395957947, |
| "eval_runtime": 17.6978, |
| "eval_samples_per_second": 1073.804, |
| "eval_steps_per_second": 33.563, |
| "step": 42250 |
| }, |
| { |
| "epoch": 36.434108527131784, |
| "grad_norm": 0.4319891333580017, |
| "learning_rate": 5.4267011197243755e-05, |
| "loss": 0.26956621170043943, |
| "step": 42300 |
| }, |
| { |
| "epoch": 36.434108527131784, |
| "eval_loss": 0.34552404284477234, |
| "eval_runtime": 16.9548, |
| "eval_samples_per_second": 1120.861, |
| "eval_steps_per_second": 35.034, |
| "step": 42300 |
| }, |
| { |
| "epoch": 36.477174849267875, |
| "grad_norm": 0.5158259272575378, |
| "learning_rate": 5.40947459086994e-05, |
| "loss": 0.276561336517334, |
| "step": 42350 |
| }, |
| { |
| "epoch": 36.477174849267875, |
| "eval_loss": 0.34414294362068176, |
| "eval_runtime": 17.7146, |
| "eval_samples_per_second": 1072.787, |
| "eval_steps_per_second": 33.532, |
| "step": 42350 |
| }, |
| { |
| "epoch": 36.52024117140396, |
| "grad_norm": 0.4017482399940491, |
| "learning_rate": 5.392248062015503e-05, |
| "loss": 0.2677594757080078, |
| "step": 42400 |
| }, |
| { |
| "epoch": 36.52024117140396, |
| "eval_loss": 0.34336283802986145, |
| "eval_runtime": 16.3716, |
| "eval_samples_per_second": 1160.791, |
| "eval_steps_per_second": 36.282, |
| "step": 42400 |
| }, |
| { |
| "epoch": 36.56330749354005, |
| "grad_norm": 0.48331621289253235, |
| "learning_rate": 5.375021533161069e-05, |
| "loss": 0.27375417709350586, |
| "step": 42450 |
| }, |
| { |
| "epoch": 36.56330749354005, |
| "eval_loss": 0.3469257652759552, |
| "eval_runtime": 17.6382, |
| "eval_samples_per_second": 1077.433, |
| "eval_steps_per_second": 33.677, |
| "step": 42450 |
| }, |
| { |
| "epoch": 36.60637381567614, |
| "grad_norm": 0.47335347533226013, |
| "learning_rate": 5.357795004306633e-05, |
| "loss": 0.2711253356933594, |
| "step": 42500 |
| }, |
| { |
| "epoch": 36.60637381567614, |
| "eval_loss": 0.3430469334125519, |
| "eval_runtime": 17.6249, |
| "eval_samples_per_second": 1078.249, |
| "eval_steps_per_second": 33.702, |
| "step": 42500 |
| }, |
| { |
| "epoch": 36.64944013781223, |
| "grad_norm": 0.4714546203613281, |
| "learning_rate": 5.3405684754521966e-05, |
| "loss": 0.2674391555786133, |
| "step": 42550 |
| }, |
| { |
| "epoch": 36.64944013781223, |
| "eval_loss": 0.3497195839881897, |
| "eval_runtime": 17.853, |
| "eval_samples_per_second": 1064.473, |
| "eval_steps_per_second": 33.272, |
| "step": 42550 |
| }, |
| { |
| "epoch": 36.69250645994832, |
| "grad_norm": 0.41258716583251953, |
| "learning_rate": 5.323341946597761e-05, |
| "loss": 0.26646110534667966, |
| "step": 42600 |
| }, |
| { |
| "epoch": 36.69250645994832, |
| "eval_loss": 0.34118419885635376, |
| "eval_runtime": 17.8783, |
| "eval_samples_per_second": 1062.967, |
| "eval_steps_per_second": 33.225, |
| "step": 42600 |
| }, |
| { |
| "epoch": 36.73557278208441, |
| "grad_norm": 0.45262160897254944, |
| "learning_rate": 5.306115417743325e-05, |
| "loss": 0.2719459533691406, |
| "step": 42650 |
| }, |
| { |
| "epoch": 36.73557278208441, |
| "eval_loss": 0.33805808424949646, |
| "eval_runtime": 18.0153, |
| "eval_samples_per_second": 1054.881, |
| "eval_steps_per_second": 32.972, |
| "step": 42650 |
| }, |
| { |
| "epoch": 36.7786391042205, |
| "grad_norm": 0.36781540513038635, |
| "learning_rate": 5.2888888888888885e-05, |
| "loss": 0.2655976486206055, |
| "step": 42700 |
| }, |
| { |
| "epoch": 36.7786391042205, |
| "eval_loss": 0.34015172719955444, |
| "eval_runtime": 17.5197, |
| "eval_samples_per_second": 1084.721, |
| "eval_steps_per_second": 33.905, |
| "step": 42700 |
| }, |
| { |
| "epoch": 36.82170542635659, |
| "grad_norm": 0.47751036286354065, |
| "learning_rate": 5.271662360034453e-05, |
| "loss": 0.270654182434082, |
| "step": 42750 |
| }, |
| { |
| "epoch": 36.82170542635659, |
| "eval_loss": 0.3435145318508148, |
| "eval_runtime": 17.1934, |
| "eval_samples_per_second": 1105.309, |
| "eval_steps_per_second": 34.548, |
| "step": 42750 |
| }, |
| { |
| "epoch": 36.86477174849268, |
| "grad_norm": 0.5343551635742188, |
| "learning_rate": 5.2544358311800176e-05, |
| "loss": 0.27388362884521483, |
| "step": 42800 |
| }, |
| { |
| "epoch": 36.86477174849268, |
| "eval_loss": 0.33774811029434204, |
| "eval_runtime": 17.9087, |
| "eval_samples_per_second": 1061.158, |
| "eval_steps_per_second": 33.168, |
| "step": 42800 |
| }, |
| { |
| "epoch": 36.90783807062877, |
| "grad_norm": 0.4724558889865875, |
| "learning_rate": 5.237209302325582e-05, |
| "loss": 0.26862293243408203, |
| "step": 42850 |
| }, |
| { |
| "epoch": 36.90783807062877, |
| "eval_loss": 0.3435251712799072, |
| "eval_runtime": 17.0757, |
| "eval_samples_per_second": 1112.929, |
| "eval_steps_per_second": 34.786, |
| "step": 42850 |
| }, |
| { |
| "epoch": 36.950904392764855, |
| "grad_norm": 0.5167089104652405, |
| "learning_rate": 5.219982773471146e-05, |
| "loss": 0.27372997283935546, |
| "step": 42900 |
| }, |
| { |
| "epoch": 36.950904392764855, |
| "eval_loss": 0.34223997592926025, |
| "eval_runtime": 17.7156, |
| "eval_samples_per_second": 1072.728, |
| "eval_steps_per_second": 33.53, |
| "step": 42900 |
| }, |
| { |
| "epoch": 36.99397071490095, |
| "grad_norm": 0.5571605563163757, |
| "learning_rate": 5.20275624461671e-05, |
| "loss": 0.2704277038574219, |
| "step": 42950 |
| }, |
| { |
| "epoch": 36.99397071490095, |
| "eval_loss": 0.34202906489372253, |
| "eval_runtime": 17.7015, |
| "eval_samples_per_second": 1073.583, |
| "eval_steps_per_second": 33.557, |
| "step": 42950 |
| }, |
| { |
| "epoch": 37.03703703703704, |
| "grad_norm": 0.5319112539291382, |
| "learning_rate": 5.185529715762274e-05, |
| "loss": 0.26464887619018557, |
| "step": 43000 |
| }, |
| { |
| "epoch": 37.03703703703704, |
| "eval_loss": 0.33549952507019043, |
| "eval_runtime": 17.6629, |
| "eval_samples_per_second": 1075.925, |
| "eval_steps_per_second": 33.63, |
| "step": 43000 |
| }, |
| { |
| "epoch": 37.08010335917313, |
| "grad_norm": 0.4691065549850464, |
| "learning_rate": 5.168303186907838e-05, |
| "loss": 0.26262826919555665, |
| "step": 43050 |
| }, |
| { |
| "epoch": 37.08010335917313, |
| "eval_loss": 0.33849185705184937, |
| "eval_runtime": 17.7204, |
| "eval_samples_per_second": 1072.434, |
| "eval_steps_per_second": 33.521, |
| "step": 43050 |
| }, |
| { |
| "epoch": 37.123169681309214, |
| "grad_norm": 0.46527716517448425, |
| "learning_rate": 5.151076658053402e-05, |
| "loss": 0.2713047981262207, |
| "step": 43100 |
| }, |
| { |
| "epoch": 37.123169681309214, |
| "eval_loss": 0.3408891260623932, |
| "eval_runtime": 17.364, |
| "eval_samples_per_second": 1094.448, |
| "eval_steps_per_second": 34.209, |
| "step": 43100 |
| }, |
| { |
| "epoch": 37.166236003445306, |
| "grad_norm": 0.552895188331604, |
| "learning_rate": 5.133850129198967e-05, |
| "loss": 0.2643990707397461, |
| "step": 43150 |
| }, |
| { |
| "epoch": 37.166236003445306, |
| "eval_loss": 0.3359718918800354, |
| "eval_runtime": 17.9648, |
| "eval_samples_per_second": 1057.846, |
| "eval_steps_per_second": 33.065, |
| "step": 43150 |
| }, |
| { |
| "epoch": 37.2093023255814, |
| "grad_norm": 0.3904784917831421, |
| "learning_rate": 5.116623600344531e-05, |
| "loss": 0.2650018310546875, |
| "step": 43200 |
| }, |
| { |
| "epoch": 37.2093023255814, |
| "eval_loss": 0.34700727462768555, |
| "eval_runtime": 17.1055, |
| "eval_samples_per_second": 1110.985, |
| "eval_steps_per_second": 34.726, |
| "step": 43200 |
| }, |
| { |
| "epoch": 37.25236864771748, |
| "grad_norm": 0.6356217265129089, |
| "learning_rate": 5.099397071490095e-05, |
| "loss": 0.27305797576904295, |
| "step": 43250 |
| }, |
| { |
| "epoch": 37.25236864771748, |
| "eval_loss": 0.34335824847221375, |
| "eval_runtime": 17.7907, |
| "eval_samples_per_second": 1068.199, |
| "eval_steps_per_second": 33.388, |
| "step": 43250 |
| }, |
| { |
| "epoch": 37.295434969853574, |
| "grad_norm": 0.38687363266944885, |
| "learning_rate": 5.082170542635659e-05, |
| "loss": 0.2676750946044922, |
| "step": 43300 |
| }, |
| { |
| "epoch": 37.295434969853574, |
| "eval_loss": 0.34360429644584656, |
| "eval_runtime": 17.8278, |
| "eval_samples_per_second": 1065.978, |
| "eval_steps_per_second": 33.319, |
| "step": 43300 |
| }, |
| { |
| "epoch": 37.338501291989665, |
| "grad_norm": 0.5335272550582886, |
| "learning_rate": 5.064944013781223e-05, |
| "loss": 0.2677302360534668, |
| "step": 43350 |
| }, |
| { |
| "epoch": 37.338501291989665, |
| "eval_loss": 0.343604177236557, |
| "eval_runtime": 17.5758, |
| "eval_samples_per_second": 1081.257, |
| "eval_steps_per_second": 33.796, |
| "step": 43350 |
| }, |
| { |
| "epoch": 37.38156761412576, |
| "grad_norm": 0.46221479773521423, |
| "learning_rate": 5.0477174849267875e-05, |
| "loss": 0.2667576789855957, |
| "step": 43400 |
| }, |
| { |
| "epoch": 37.38156761412576, |
| "eval_loss": 0.34761863946914673, |
| "eval_runtime": 17.7665, |
| "eval_samples_per_second": 1069.655, |
| "eval_steps_per_second": 33.434, |
| "step": 43400 |
| }, |
| { |
| "epoch": 37.42463393626184, |
| "grad_norm": 0.48034900426864624, |
| "learning_rate": 5.030490956072351e-05, |
| "loss": 0.2662837219238281, |
| "step": 43450 |
| }, |
| { |
| "epoch": 37.42463393626184, |
| "eval_loss": 0.34295064210891724, |
| "eval_runtime": 17.5091, |
| "eval_samples_per_second": 1085.381, |
| "eval_steps_per_second": 33.925, |
| "step": 43450 |
| }, |
| { |
| "epoch": 37.46770025839793, |
| "grad_norm": 0.44220060110092163, |
| "learning_rate": 5.0132644272179166e-05, |
| "loss": 0.26716251373291017, |
| "step": 43500 |
| }, |
| { |
| "epoch": 37.46770025839793, |
| "eval_loss": 0.33440375328063965, |
| "eval_runtime": 18.4518, |
| "eval_samples_per_second": 1029.927, |
| "eval_steps_per_second": 32.192, |
| "step": 43500 |
| }, |
| { |
| "epoch": 37.510766580534025, |
| "grad_norm": 0.36467257142066956, |
| "learning_rate": 4.99603789836348e-05, |
| "loss": 0.2673937225341797, |
| "step": 43550 |
| }, |
| { |
| "epoch": 37.510766580534025, |
| "eval_loss": 0.3415800631046295, |
| "eval_runtime": 17.1191, |
| "eval_samples_per_second": 1110.104, |
| "eval_steps_per_second": 34.698, |
| "step": 43550 |
| }, |
| { |
| "epoch": 37.55383290267011, |
| "grad_norm": 0.5146782994270325, |
| "learning_rate": 4.978811369509044e-05, |
| "loss": 0.2724634552001953, |
| "step": 43600 |
| }, |
| { |
| "epoch": 37.55383290267011, |
| "eval_loss": 0.33940932154655457, |
| "eval_runtime": 17.7888, |
| "eval_samples_per_second": 1068.311, |
| "eval_steps_per_second": 33.392, |
| "step": 43600 |
| }, |
| { |
| "epoch": 37.5968992248062, |
| "grad_norm": 0.4268561005592346, |
| "learning_rate": 4.9615848406546085e-05, |
| "loss": 0.26595291137695315, |
| "step": 43650 |
| }, |
| { |
| "epoch": 37.5968992248062, |
| "eval_loss": 0.3469422459602356, |
| "eval_runtime": 20.077, |
| "eval_samples_per_second": 946.554, |
| "eval_steps_per_second": 29.586, |
| "step": 43650 |
| }, |
| { |
| "epoch": 37.63996554694229, |
| "grad_norm": 0.4228191673755646, |
| "learning_rate": 4.944358311800172e-05, |
| "loss": 0.27800693511962893, |
| "step": 43700 |
| }, |
| { |
| "epoch": 37.63996554694229, |
| "eval_loss": 0.34704458713531494, |
| "eval_runtime": 19.7857, |
| "eval_samples_per_second": 960.494, |
| "eval_steps_per_second": 30.022, |
| "step": 43700 |
| }, |
| { |
| "epoch": 37.683031869078384, |
| "grad_norm": 0.4571135342121124, |
| "learning_rate": 4.927131782945736e-05, |
| "loss": 0.26782615661621095, |
| "step": 43750 |
| }, |
| { |
| "epoch": 37.683031869078384, |
| "eval_loss": 0.3405662178993225, |
| "eval_runtime": 19.7688, |
| "eval_samples_per_second": 961.311, |
| "eval_steps_per_second": 30.047, |
| "step": 43750 |
| }, |
| { |
| "epoch": 37.72609819121447, |
| "grad_norm": 0.5446920394897461, |
| "learning_rate": 4.909905254091301e-05, |
| "loss": 0.2676548957824707, |
| "step": 43800 |
| }, |
| { |
| "epoch": 37.72609819121447, |
| "eval_loss": 0.3393403887748718, |
| "eval_runtime": 18.1421, |
| "eval_samples_per_second": 1047.508, |
| "eval_steps_per_second": 32.742, |
| "step": 43800 |
| }, |
| { |
| "epoch": 37.76916451335056, |
| "grad_norm": 0.39264625310897827, |
| "learning_rate": 4.892678725236865e-05, |
| "loss": 0.2752561569213867, |
| "step": 43850 |
| }, |
| { |
| "epoch": 37.76916451335056, |
| "eval_loss": 0.3424234390258789, |
| "eval_runtime": 17.7583, |
| "eval_samples_per_second": 1070.148, |
| "eval_steps_per_second": 33.449, |
| "step": 43850 |
| }, |
| { |
| "epoch": 37.81223083548665, |
| "grad_norm": 0.44023290276527405, |
| "learning_rate": 4.875452196382429e-05, |
| "loss": 0.27062139511108396, |
| "step": 43900 |
| }, |
| { |
| "epoch": 37.81223083548665, |
| "eval_loss": 0.3438037931919098, |
| "eval_runtime": 16.617, |
| "eval_samples_per_second": 1143.646, |
| "eval_steps_per_second": 35.746, |
| "step": 43900 |
| }, |
| { |
| "epoch": 37.855297157622736, |
| "grad_norm": 0.4684908092021942, |
| "learning_rate": 4.858225667527994e-05, |
| "loss": 0.27128431320190427, |
| "step": 43950 |
| }, |
| { |
| "epoch": 37.855297157622736, |
| "eval_loss": 0.33739370107650757, |
| "eval_runtime": 17.9564, |
| "eval_samples_per_second": 1058.34, |
| "eval_steps_per_second": 33.08, |
| "step": 43950 |
| }, |
| { |
| "epoch": 37.89836347975883, |
| "grad_norm": 0.468597412109375, |
| "learning_rate": 4.840999138673557e-05, |
| "loss": 0.26442058563232423, |
| "step": 44000 |
| }, |
| { |
| "epoch": 37.89836347975883, |
| "eval_loss": 0.34331783652305603, |
| "eval_runtime": 18.2636, |
| "eval_samples_per_second": 1040.541, |
| "eval_steps_per_second": 32.524, |
| "step": 44000 |
| }, |
| { |
| "epoch": 37.94142980189492, |
| "grad_norm": 0.5711420774459839, |
| "learning_rate": 4.8237726098191215e-05, |
| "loss": 0.2691427993774414, |
| "step": 44050 |
| }, |
| { |
| "epoch": 37.94142980189492, |
| "eval_loss": 0.34428438544273376, |
| "eval_runtime": 18.0829, |
| "eval_samples_per_second": 1050.936, |
| "eval_steps_per_second": 32.849, |
| "step": 44050 |
| }, |
| { |
| "epoch": 37.98449612403101, |
| "grad_norm": 0.3936568796634674, |
| "learning_rate": 4.806546080964686e-05, |
| "loss": 0.2765738868713379, |
| "step": 44100 |
| }, |
| { |
| "epoch": 37.98449612403101, |
| "eval_loss": 0.34059733152389526, |
| "eval_runtime": 17.6121, |
| "eval_samples_per_second": 1079.033, |
| "eval_steps_per_second": 33.727, |
| "step": 44100 |
| }, |
| { |
| "epoch": 38.027562446167096, |
| "grad_norm": 0.47591227293014526, |
| "learning_rate": 4.78931955211025e-05, |
| "loss": 0.2679741668701172, |
| "step": 44150 |
| }, |
| { |
| "epoch": 38.027562446167096, |
| "eval_loss": 0.3416222631931305, |
| "eval_runtime": 17.8412, |
| "eval_samples_per_second": 1065.172, |
| "eval_steps_per_second": 33.294, |
| "step": 44150 |
| }, |
| { |
| "epoch": 38.07062876830319, |
| "grad_norm": 0.3419834077358246, |
| "learning_rate": 4.772093023255814e-05, |
| "loss": 0.26936922073364256, |
| "step": 44200 |
| }, |
| { |
| "epoch": 38.07062876830319, |
| "eval_loss": 0.3447894752025604, |
| "eval_runtime": 17.8211, |
| "eval_samples_per_second": 1066.378, |
| "eval_steps_per_second": 33.331, |
| "step": 44200 |
| }, |
| { |
| "epoch": 38.11369509043928, |
| "grad_norm": 0.4044359028339386, |
| "learning_rate": 4.7548664944013784e-05, |
| "loss": 0.2689459800720215, |
| "step": 44250 |
| }, |
| { |
| "epoch": 38.11369509043928, |
| "eval_loss": 0.33676257729530334, |
| "eval_runtime": 16.7593, |
| "eval_samples_per_second": 1133.935, |
| "eval_steps_per_second": 35.443, |
| "step": 44250 |
| }, |
| { |
| "epoch": 38.156761412575364, |
| "grad_norm": 0.5036713480949402, |
| "learning_rate": 4.7376399655469426e-05, |
| "loss": 0.2690622329711914, |
| "step": 44300 |
| }, |
| { |
| "epoch": 38.156761412575364, |
| "eval_loss": 0.33689960837364197, |
| "eval_runtime": 17.4411, |
| "eval_samples_per_second": 1089.612, |
| "eval_steps_per_second": 34.058, |
| "step": 44300 |
| }, |
| { |
| "epoch": 38.199827734711455, |
| "grad_norm": 0.4989610016345978, |
| "learning_rate": 4.720413436692507e-05, |
| "loss": 0.26547248840332033, |
| "step": 44350 |
| }, |
| { |
| "epoch": 38.199827734711455, |
| "eval_loss": 0.3423798680305481, |
| "eval_runtime": 17.1909, |
| "eval_samples_per_second": 1105.469, |
| "eval_steps_per_second": 34.553, |
| "step": 44350 |
| }, |
| { |
| "epoch": 38.24289405684755, |
| "grad_norm": 0.5024054050445557, |
| "learning_rate": 4.703186907838071e-05, |
| "loss": 0.26653575897216797, |
| "step": 44400 |
| }, |
| { |
| "epoch": 38.24289405684755, |
| "eval_loss": 0.33530908823013306, |
| "eval_runtime": 16.9929, |
| "eval_samples_per_second": 1118.352, |
| "eval_steps_per_second": 34.956, |
| "step": 44400 |
| }, |
| { |
| "epoch": 38.28596037898363, |
| "grad_norm": 0.4604988098144531, |
| "learning_rate": 4.6859603789836345e-05, |
| "loss": 0.2670548439025879, |
| "step": 44450 |
| }, |
| { |
| "epoch": 38.28596037898363, |
| "eval_loss": 0.34150001406669617, |
| "eval_runtime": 16.4311, |
| "eval_samples_per_second": 1156.588, |
| "eval_steps_per_second": 36.151, |
| "step": 44450 |
| }, |
| { |
| "epoch": 38.32902670111972, |
| "grad_norm": 0.45105716586112976, |
| "learning_rate": 4.6687338501291994e-05, |
| "loss": 0.2611412048339844, |
| "step": 44500 |
| }, |
| { |
| "epoch": 38.32902670111972, |
| "eval_loss": 0.34156933426856995, |
| "eval_runtime": 16.6019, |
| "eval_samples_per_second": 1144.686, |
| "eval_steps_per_second": 35.779, |
| "step": 44500 |
| }, |
| { |
| "epoch": 38.372093023255815, |
| "grad_norm": 0.340572327375412, |
| "learning_rate": 4.6515073212747636e-05, |
| "loss": 0.26979669570922854, |
| "step": 44550 |
| }, |
| { |
| "epoch": 38.372093023255815, |
| "eval_loss": 0.33604246377944946, |
| "eval_runtime": 16.975, |
| "eval_samples_per_second": 1119.53, |
| "eval_steps_per_second": 34.993, |
| "step": 44550 |
| }, |
| { |
| "epoch": 38.415159345391906, |
| "grad_norm": 0.4126022458076477, |
| "learning_rate": 4.634280792420327e-05, |
| "loss": 0.26638069152832033, |
| "step": 44600 |
| }, |
| { |
| "epoch": 38.415159345391906, |
| "eval_loss": 0.34387487173080444, |
| "eval_runtime": 16.8252, |
| "eval_samples_per_second": 1129.499, |
| "eval_steps_per_second": 35.304, |
| "step": 44600 |
| }, |
| { |
| "epoch": 38.45822566752799, |
| "grad_norm": 0.437680184841156, |
| "learning_rate": 4.6170542635658914e-05, |
| "loss": 0.26841705322265624, |
| "step": 44650 |
| }, |
| { |
| "epoch": 38.45822566752799, |
| "eval_loss": 0.34003522992134094, |
| "eval_runtime": 16.169, |
| "eval_samples_per_second": 1175.336, |
| "eval_steps_per_second": 36.737, |
| "step": 44650 |
| }, |
| { |
| "epoch": 38.50129198966408, |
| "grad_norm": 0.4710044264793396, |
| "learning_rate": 4.599827734711456e-05, |
| "loss": 0.27303672790527345, |
| "step": 44700 |
| }, |
| { |
| "epoch": 38.50129198966408, |
| "eval_loss": 0.34455159306526184, |
| "eval_runtime": 16.3577, |
| "eval_samples_per_second": 1161.78, |
| "eval_steps_per_second": 36.313, |
| "step": 44700 |
| }, |
| { |
| "epoch": 38.544358311800174, |
| "grad_norm": 0.4871346950531006, |
| "learning_rate": 4.58260120585702e-05, |
| "loss": 0.26453098297119143, |
| "step": 44750 |
| }, |
| { |
| "epoch": 38.544358311800174, |
| "eval_loss": 0.3411996066570282, |
| "eval_runtime": 17.6662, |
| "eval_samples_per_second": 1075.728, |
| "eval_steps_per_second": 33.624, |
| "step": 44750 |
| }, |
| { |
| "epoch": 38.58742463393626, |
| "grad_norm": 0.4753357470035553, |
| "learning_rate": 4.565374677002584e-05, |
| "loss": 0.26396688461303713, |
| "step": 44800 |
| }, |
| { |
| "epoch": 38.58742463393626, |
| "eval_loss": 0.33602163195610046, |
| "eval_runtime": 17.3508, |
| "eval_samples_per_second": 1095.279, |
| "eval_steps_per_second": 34.235, |
| "step": 44800 |
| }, |
| { |
| "epoch": 38.63049095607235, |
| "grad_norm": 0.3800760805606842, |
| "learning_rate": 4.548148148148149e-05, |
| "loss": 0.27464492797851564, |
| "step": 44850 |
| }, |
| { |
| "epoch": 38.63049095607235, |
| "eval_loss": 0.34373241662979126, |
| "eval_runtime": 18.5757, |
| "eval_samples_per_second": 1023.059, |
| "eval_steps_per_second": 31.977, |
| "step": 44850 |
| }, |
| { |
| "epoch": 38.67355727820844, |
| "grad_norm": 0.43500515818595886, |
| "learning_rate": 4.5309216192937124e-05, |
| "loss": 0.26696685791015623, |
| "step": 44900 |
| }, |
| { |
| "epoch": 38.67355727820844, |
| "eval_loss": 0.3394354581832886, |
| "eval_runtime": 18.1744, |
| "eval_samples_per_second": 1045.648, |
| "eval_steps_per_second": 32.683, |
| "step": 44900 |
| }, |
| { |
| "epoch": 38.71662360034453, |
| "grad_norm": 0.5373595356941223, |
| "learning_rate": 4.5136950904392766e-05, |
| "loss": 0.2654395866394043, |
| "step": 44950 |
| }, |
| { |
| "epoch": 38.71662360034453, |
| "eval_loss": 0.34289655089378357, |
| "eval_runtime": 17.7815, |
| "eval_samples_per_second": 1068.754, |
| "eval_steps_per_second": 33.406, |
| "step": 44950 |
| }, |
| { |
| "epoch": 38.75968992248062, |
| "grad_norm": 0.48044517636299133, |
| "learning_rate": 4.496468561584841e-05, |
| "loss": 0.26706460952758787, |
| "step": 45000 |
| }, |
| { |
| "epoch": 38.75968992248062, |
| "eval_loss": 0.3330392837524414, |
| "eval_runtime": 17.8713, |
| "eval_samples_per_second": 1063.381, |
| "eval_steps_per_second": 33.238, |
| "step": 45000 |
| }, |
| { |
| "epoch": 38.80275624461671, |
| "grad_norm": 0.4838259220123291, |
| "learning_rate": 4.479242032730405e-05, |
| "loss": 0.2641607666015625, |
| "step": 45050 |
| }, |
| { |
| "epoch": 38.80275624461671, |
| "eval_loss": 0.33986079692840576, |
| "eval_runtime": 18.0427, |
| "eval_samples_per_second": 1053.278, |
| "eval_steps_per_second": 32.922, |
| "step": 45050 |
| }, |
| { |
| "epoch": 38.8458225667528, |
| "grad_norm": 0.4696730077266693, |
| "learning_rate": 4.462015503875969e-05, |
| "loss": 0.2635121154785156, |
| "step": 45100 |
| }, |
| { |
| "epoch": 38.8458225667528, |
| "eval_loss": 0.332796573638916, |
| "eval_runtime": 17.797, |
| "eval_samples_per_second": 1067.819, |
| "eval_steps_per_second": 33.376, |
| "step": 45100 |
| }, |
| { |
| "epoch": 38.888888888888886, |
| "grad_norm": 0.5020613670349121, |
| "learning_rate": 4.4447889750215335e-05, |
| "loss": 0.26674041748046873, |
| "step": 45150 |
| }, |
| { |
| "epoch": 38.888888888888886, |
| "eval_loss": 0.3357270359992981, |
| "eval_runtime": 17.2271, |
| "eval_samples_per_second": 1103.143, |
| "eval_steps_per_second": 34.48, |
| "step": 45150 |
| }, |
| { |
| "epoch": 38.93195521102498, |
| "grad_norm": 0.4783501625061035, |
| "learning_rate": 4.427562446167098e-05, |
| "loss": 0.26975366592407224, |
| "step": 45200 |
| }, |
| { |
| "epoch": 38.93195521102498, |
| "eval_loss": 0.33885446190834045, |
| "eval_runtime": 18.1031, |
| "eval_samples_per_second": 1049.766, |
| "eval_steps_per_second": 32.812, |
| "step": 45200 |
| }, |
| { |
| "epoch": 38.97502153316107, |
| "grad_norm": 0.45317623019218445, |
| "learning_rate": 4.410335917312662e-05, |
| "loss": 0.2624387741088867, |
| "step": 45250 |
| }, |
| { |
| "epoch": 38.97502153316107, |
| "eval_loss": 0.33671680092811584, |
| "eval_runtime": 17.49, |
| "eval_samples_per_second": 1086.562, |
| "eval_steps_per_second": 33.962, |
| "step": 45250 |
| }, |
| { |
| "epoch": 39.01808785529716, |
| "grad_norm": 0.3463917672634125, |
| "learning_rate": 4.393109388458226e-05, |
| "loss": 0.26262115478515624, |
| "step": 45300 |
| }, |
| { |
| "epoch": 39.01808785529716, |
| "eval_loss": 0.34109896421432495, |
| "eval_runtime": 17.6444, |
| "eval_samples_per_second": 1077.057, |
| "eval_steps_per_second": 33.665, |
| "step": 45300 |
| }, |
| { |
| "epoch": 39.061154177433245, |
| "grad_norm": 0.39748960733413696, |
| "learning_rate": 4.3758828596037896e-05, |
| "loss": 0.2630434989929199, |
| "step": 45350 |
| }, |
| { |
| "epoch": 39.061154177433245, |
| "eval_loss": 0.3328157663345337, |
| "eval_runtime": 18.0689, |
| "eval_samples_per_second": 1051.75, |
| "eval_steps_per_second": 32.874, |
| "step": 45350 |
| }, |
| { |
| "epoch": 39.10422049956934, |
| "grad_norm": 0.3978130519390106, |
| "learning_rate": 4.3586563307493545e-05, |
| "loss": 0.2649136734008789, |
| "step": 45400 |
| }, |
| { |
| "epoch": 39.10422049956934, |
| "eval_loss": 0.3429396152496338, |
| "eval_runtime": 18.9002, |
| "eval_samples_per_second": 1005.493, |
| "eval_steps_per_second": 31.428, |
| "step": 45400 |
| }, |
| { |
| "epoch": 39.14728682170543, |
| "grad_norm": 0.4477211833000183, |
| "learning_rate": 4.341429801894919e-05, |
| "loss": 0.27085290908813475, |
| "step": 45450 |
| }, |
| { |
| "epoch": 39.14728682170543, |
| "eval_loss": 0.3342150151729584, |
| "eval_runtime": 17.9198, |
| "eval_samples_per_second": 1060.505, |
| "eval_steps_per_second": 33.148, |
| "step": 45450 |
| }, |
| { |
| "epoch": 39.19035314384151, |
| "grad_norm": 0.38119587302207947, |
| "learning_rate": 4.324203273040482e-05, |
| "loss": 0.270539436340332, |
| "step": 45500 |
| }, |
| { |
| "epoch": 39.19035314384151, |
| "eval_loss": 0.3424154818058014, |
| "eval_runtime": 17.8606, |
| "eval_samples_per_second": 1064.017, |
| "eval_steps_per_second": 33.258, |
| "step": 45500 |
| }, |
| { |
| "epoch": 39.233419465977605, |
| "grad_norm": 0.3684655725955963, |
| "learning_rate": 4.3069767441860465e-05, |
| "loss": 0.26475446701049804, |
| "step": 45550 |
| }, |
| { |
| "epoch": 39.233419465977605, |
| "eval_loss": 0.34204351902008057, |
| "eval_runtime": 17.8644, |
| "eval_samples_per_second": 1063.789, |
| "eval_steps_per_second": 33.25, |
| "step": 45550 |
| }, |
| { |
| "epoch": 39.276485788113696, |
| "grad_norm": 0.42131876945495605, |
| "learning_rate": 4.2897502153316114e-05, |
| "loss": 0.2601029586791992, |
| "step": 45600 |
| }, |
| { |
| "epoch": 39.276485788113696, |
| "eval_loss": 0.33528417348861694, |
| "eval_runtime": 17.3248, |
| "eval_samples_per_second": 1096.927, |
| "eval_steps_per_second": 34.286, |
| "step": 45600 |
| }, |
| { |
| "epoch": 39.31955211024979, |
| "grad_norm": 0.41459059715270996, |
| "learning_rate": 4.272523686477175e-05, |
| "loss": 0.25860076904296875, |
| "step": 45650 |
| }, |
| { |
| "epoch": 39.31955211024979, |
| "eval_loss": 0.3424885869026184, |
| "eval_runtime": 17.8503, |
| "eval_samples_per_second": 1064.634, |
| "eval_steps_per_second": 33.277, |
| "step": 45650 |
| }, |
| { |
| "epoch": 39.36261843238587, |
| "grad_norm": 0.4517412483692169, |
| "learning_rate": 4.255297157622739e-05, |
| "loss": 0.2675811386108398, |
| "step": 45700 |
| }, |
| { |
| "epoch": 39.36261843238587, |
| "eval_loss": 0.3346919119358063, |
| "eval_runtime": 17.7955, |
| "eval_samples_per_second": 1067.913, |
| "eval_steps_per_second": 33.379, |
| "step": 45700 |
| }, |
| { |
| "epoch": 39.405684754521964, |
| "grad_norm": 0.4856393039226532, |
| "learning_rate": 4.238070628768303e-05, |
| "loss": 0.26250232696533204, |
| "step": 45750 |
| }, |
| { |
| "epoch": 39.405684754521964, |
| "eval_loss": 0.3385407328605652, |
| "eval_runtime": 17.5635, |
| "eval_samples_per_second": 1082.015, |
| "eval_steps_per_second": 33.82, |
| "step": 45750 |
| }, |
| { |
| "epoch": 39.448751076658056, |
| "grad_norm": 0.4645376205444336, |
| "learning_rate": 4.2208440999138675e-05, |
| "loss": 0.267506103515625, |
| "step": 45800 |
| }, |
| { |
| "epoch": 39.448751076658056, |
| "eval_loss": 0.34468886256217957, |
| "eval_runtime": 17.8254, |
| "eval_samples_per_second": 1066.121, |
| "eval_steps_per_second": 33.323, |
| "step": 45800 |
| }, |
| { |
| "epoch": 39.49181739879414, |
| "grad_norm": 0.4534069895744324, |
| "learning_rate": 4.203617571059432e-05, |
| "loss": 0.2699179267883301, |
| "step": 45850 |
| }, |
| { |
| "epoch": 39.49181739879414, |
| "eval_loss": 0.34161004424095154, |
| "eval_runtime": 17.9224, |
| "eval_samples_per_second": 1060.346, |
| "eval_steps_per_second": 33.143, |
| "step": 45850 |
| }, |
| { |
| "epoch": 39.53488372093023, |
| "grad_norm": 0.4326345920562744, |
| "learning_rate": 4.186391042204996e-05, |
| "loss": 0.2619389343261719, |
| "step": 45900 |
| }, |
| { |
| "epoch": 39.53488372093023, |
| "eval_loss": 0.33569958806037903, |
| "eval_runtime": 18.012, |
| "eval_samples_per_second": 1055.072, |
| "eval_steps_per_second": 32.978, |
| "step": 45900 |
| }, |
| { |
| "epoch": 39.57795004306632, |
| "grad_norm": 0.41711539030075073, |
| "learning_rate": 4.16916451335056e-05, |
| "loss": 0.2695113945007324, |
| "step": 45950 |
| }, |
| { |
| "epoch": 39.57795004306632, |
| "eval_loss": 0.3339827358722687, |
| "eval_runtime": 18.1577, |
| "eval_samples_per_second": 1046.609, |
| "eval_steps_per_second": 32.713, |
| "step": 45950 |
| }, |
| { |
| "epoch": 39.621016365202415, |
| "grad_norm": 0.4437963664531708, |
| "learning_rate": 4.1519379844961244e-05, |
| "loss": 0.26169944763183595, |
| "step": 46000 |
| }, |
| { |
| "epoch": 39.621016365202415, |
| "eval_loss": 0.34232571721076965, |
| "eval_runtime": 17.7283, |
| "eval_samples_per_second": 1071.959, |
| "eval_steps_per_second": 33.506, |
| "step": 46000 |
| }, |
| { |
| "epoch": 39.6640826873385, |
| "grad_norm": 0.4615746736526489, |
| "learning_rate": 4.1347114556416886e-05, |
| "loss": 0.2618155288696289, |
| "step": 46050 |
| }, |
| { |
| "epoch": 39.6640826873385, |
| "eval_loss": 0.3370404541492462, |
| "eval_runtime": 17.4379, |
| "eval_samples_per_second": 1089.812, |
| "eval_steps_per_second": 34.064, |
| "step": 46050 |
| }, |
| { |
| "epoch": 39.70714900947459, |
| "grad_norm": 0.4825727045536041, |
| "learning_rate": 4.117484926787253e-05, |
| "loss": 0.2661894226074219, |
| "step": 46100 |
| }, |
| { |
| "epoch": 39.70714900947459, |
| "eval_loss": 0.33807504177093506, |
| "eval_runtime": 17.8459, |
| "eval_samples_per_second": 1064.892, |
| "eval_steps_per_second": 33.285, |
| "step": 46100 |
| }, |
| { |
| "epoch": 39.75021533161068, |
| "grad_norm": 0.4135194420814514, |
| "learning_rate": 4.100258397932817e-05, |
| "loss": 0.26151920318603517, |
| "step": 46150 |
| }, |
| { |
| "epoch": 39.75021533161068, |
| "eval_loss": 0.3375168442726135, |
| "eval_runtime": 18.6642, |
| "eval_samples_per_second": 1018.203, |
| "eval_steps_per_second": 31.826, |
| "step": 46150 |
| }, |
| { |
| "epoch": 39.79328165374677, |
| "grad_norm": 0.42200180888175964, |
| "learning_rate": 4.0830318690783805e-05, |
| "loss": 0.26504571914672853, |
| "step": 46200 |
| }, |
| { |
| "epoch": 39.79328165374677, |
| "eval_loss": 0.3431920111179352, |
| "eval_runtime": 17.0825, |
| "eval_samples_per_second": 1112.483, |
| "eval_steps_per_second": 34.772, |
| "step": 46200 |
| }, |
| { |
| "epoch": 39.83634797588286, |
| "grad_norm": 0.45557790994644165, |
| "learning_rate": 4.065805340223945e-05, |
| "loss": 0.27078224182128907, |
| "step": 46250 |
| }, |
| { |
| "epoch": 39.83634797588286, |
| "eval_loss": 0.33381396532058716, |
| "eval_runtime": 17.8822, |
| "eval_samples_per_second": 1062.735, |
| "eval_steps_per_second": 33.217, |
| "step": 46250 |
| }, |
| { |
| "epoch": 39.87941429801895, |
| "grad_norm": 0.4105677902698517, |
| "learning_rate": 4.0485788113695096e-05, |
| "loss": 0.2642168426513672, |
| "step": 46300 |
| }, |
| { |
| "epoch": 39.87941429801895, |
| "eval_loss": 0.33261778950691223, |
| "eval_runtime": 18.0371, |
| "eval_samples_per_second": 1053.605, |
| "eval_steps_per_second": 32.932, |
| "step": 46300 |
| }, |
| { |
| "epoch": 39.92248062015504, |
| "grad_norm": 0.4780519902706146, |
| "learning_rate": 4.031352282515073e-05, |
| "loss": 0.2639030075073242, |
| "step": 46350 |
| }, |
| { |
| "epoch": 39.92248062015504, |
| "eval_loss": 0.33703893423080444, |
| "eval_runtime": 17.8657, |
| "eval_samples_per_second": 1063.713, |
| "eval_steps_per_second": 33.248, |
| "step": 46350 |
| }, |
| { |
| "epoch": 39.96554694229113, |
| "grad_norm": 0.4140496551990509, |
| "learning_rate": 4.0141257536606374e-05, |
| "loss": 0.2669095230102539, |
| "step": 46400 |
| }, |
| { |
| "epoch": 39.96554694229113, |
| "eval_loss": 0.3341706097126007, |
| "eval_runtime": 17.8602, |
| "eval_samples_per_second": 1064.04, |
| "eval_steps_per_second": 33.258, |
| "step": 46400 |
| }, |
| { |
| "epoch": 40.00861326442722, |
| "grad_norm": 0.4176825284957886, |
| "learning_rate": 3.9968992248062016e-05, |
| "loss": 0.2692737579345703, |
| "step": 46450 |
| }, |
| { |
| "epoch": 40.00861326442722, |
| "eval_loss": 0.3381114900112152, |
| "eval_runtime": 18.0122, |
| "eval_samples_per_second": 1055.065, |
| "eval_steps_per_second": 32.978, |
| "step": 46450 |
| }, |
| { |
| "epoch": 40.05167958656331, |
| "grad_norm": 0.4883769750595093, |
| "learning_rate": 3.979672695951766e-05, |
| "loss": 0.2627450942993164, |
| "step": 46500 |
| }, |
| { |
| "epoch": 40.05167958656331, |
| "eval_loss": 0.3378463685512543, |
| "eval_runtime": 17.9091, |
| "eval_samples_per_second": 1061.137, |
| "eval_steps_per_second": 33.167, |
| "step": 46500 |
| }, |
| { |
| "epoch": 40.094745908699394, |
| "grad_norm": 0.3604494035243988, |
| "learning_rate": 3.96244616709733e-05, |
| "loss": 0.2634261703491211, |
| "step": 46550 |
| }, |
| { |
| "epoch": 40.094745908699394, |
| "eval_loss": 0.3384319245815277, |
| "eval_runtime": 17.3453, |
| "eval_samples_per_second": 1095.63, |
| "eval_steps_per_second": 34.246, |
| "step": 46550 |
| }, |
| { |
| "epoch": 40.137812230835486, |
| "grad_norm": 0.5276474952697754, |
| "learning_rate": 3.945219638242894e-05, |
| "loss": 0.26404022216796874, |
| "step": 46600 |
| }, |
| { |
| "epoch": 40.137812230835486, |
| "eval_loss": 0.33722805976867676, |
| "eval_runtime": 17.9865, |
| "eval_samples_per_second": 1056.568, |
| "eval_steps_per_second": 33.025, |
| "step": 46600 |
| }, |
| { |
| "epoch": 40.18087855297158, |
| "grad_norm": 0.8581849336624146, |
| "learning_rate": 3.9279931093884584e-05, |
| "loss": 0.26176496505737307, |
| "step": 46650 |
| }, |
| { |
| "epoch": 40.18087855297158, |
| "eval_loss": 0.3430112898349762, |
| "eval_runtime": 17.3636, |
| "eval_samples_per_second": 1094.474, |
| "eval_steps_per_second": 34.209, |
| "step": 46650 |
| }, |
| { |
| "epoch": 40.22394487510766, |
| "grad_norm": 0.3712718188762665, |
| "learning_rate": 3.9107665805340226e-05, |
| "loss": 0.26714740753173827, |
| "step": 46700 |
| }, |
| { |
| "epoch": 40.22394487510766, |
| "eval_loss": 0.3383539915084839, |
| "eval_runtime": 18.0349, |
| "eval_samples_per_second": 1053.733, |
| "eval_steps_per_second": 32.936, |
| "step": 46700 |
| }, |
| { |
| "epoch": 40.267011197243754, |
| "grad_norm": 0.37334057688713074, |
| "learning_rate": 3.893540051679587e-05, |
| "loss": 0.26557273864746095, |
| "step": 46750 |
| }, |
| { |
| "epoch": 40.267011197243754, |
| "eval_loss": 0.33767005801200867, |
| "eval_runtime": 17.6561, |
| "eval_samples_per_second": 1076.344, |
| "eval_steps_per_second": 33.643, |
| "step": 46750 |
| }, |
| { |
| "epoch": 40.310077519379846, |
| "grad_norm": 0.45106396079063416, |
| "learning_rate": 3.8763135228251504e-05, |
| "loss": 0.26342580795288084, |
| "step": 46800 |
| }, |
| { |
| "epoch": 40.310077519379846, |
| "eval_loss": 0.33584079146385193, |
| "eval_runtime": 18.1547, |
| "eval_samples_per_second": 1046.781, |
| "eval_steps_per_second": 32.719, |
| "step": 46800 |
| }, |
| { |
| "epoch": 40.35314384151594, |
| "grad_norm": 0.4610295593738556, |
| "learning_rate": 3.859086993970715e-05, |
| "loss": 0.2536019325256348, |
| "step": 46850 |
| }, |
| { |
| "epoch": 40.35314384151594, |
| "eval_loss": 0.3431561589241028, |
| "eval_runtime": 18.4064, |
| "eval_samples_per_second": 1032.465, |
| "eval_steps_per_second": 32.271, |
| "step": 46850 |
| }, |
| { |
| "epoch": 40.39621016365202, |
| "grad_norm": 0.36498934030532837, |
| "learning_rate": 3.8418604651162795e-05, |
| "loss": 0.2718042373657227, |
| "step": 46900 |
| }, |
| { |
| "epoch": 40.39621016365202, |
| "eval_loss": 0.3425901234149933, |
| "eval_runtime": 18.0712, |
| "eval_samples_per_second": 1051.62, |
| "eval_steps_per_second": 32.87, |
| "step": 46900 |
| }, |
| { |
| "epoch": 40.43927648578811, |
| "grad_norm": 0.5364521741867065, |
| "learning_rate": 3.824633936261843e-05, |
| "loss": 0.2712949562072754, |
| "step": 46950 |
| }, |
| { |
| "epoch": 40.43927648578811, |
| "eval_loss": 0.3378094434738159, |
| "eval_runtime": 17.7612, |
| "eval_samples_per_second": 1069.972, |
| "eval_steps_per_second": 33.444, |
| "step": 46950 |
| }, |
| { |
| "epoch": 40.482342807924205, |
| "grad_norm": 0.40720999240875244, |
| "learning_rate": 3.807407407407408e-05, |
| "loss": 0.2558038330078125, |
| "step": 47000 |
| }, |
| { |
| "epoch": 40.482342807924205, |
| "eval_loss": 0.3408735394477844, |
| "eval_runtime": 17.3041, |
| "eval_samples_per_second": 1098.234, |
| "eval_steps_per_second": 34.327, |
| "step": 47000 |
| }, |
| { |
| "epoch": 40.52540913006029, |
| "grad_norm": 0.4001590311527252, |
| "learning_rate": 3.790180878552972e-05, |
| "loss": 0.26414262771606445, |
| "step": 47050 |
| }, |
| { |
| "epoch": 40.52540913006029, |
| "eval_loss": 0.3310953676700592, |
| "eval_runtime": 18.044, |
| "eval_samples_per_second": 1053.203, |
| "eval_steps_per_second": 32.92, |
| "step": 47050 |
| }, |
| { |
| "epoch": 40.56847545219638, |
| "grad_norm": 0.40518489480018616, |
| "learning_rate": 3.7729543496985356e-05, |
| "loss": 0.2695816230773926, |
| "step": 47100 |
| }, |
| { |
| "epoch": 40.56847545219638, |
| "eval_loss": 0.3371044397354126, |
| "eval_runtime": 17.6114, |
| "eval_samples_per_second": 1079.074, |
| "eval_steps_per_second": 33.728, |
| "step": 47100 |
| }, |
| { |
| "epoch": 40.61154177433247, |
| "grad_norm": 0.45011526346206665, |
| "learning_rate": 3.7557278208441e-05, |
| "loss": 0.25704208374023435, |
| "step": 47150 |
| }, |
| { |
| "epoch": 40.61154177433247, |
| "eval_loss": 0.3388862907886505, |
| "eval_runtime": 17.6856, |
| "eval_samples_per_second": 1074.545, |
| "eval_steps_per_second": 33.587, |
| "step": 47150 |
| }, |
| { |
| "epoch": 40.654608096468564, |
| "grad_norm": 0.4376562237739563, |
| "learning_rate": 3.738501291989665e-05, |
| "loss": 0.2663600158691406, |
| "step": 47200 |
| }, |
| { |
| "epoch": 40.654608096468564, |
| "eval_loss": 0.33221572637557983, |
| "eval_runtime": 18.0778, |
| "eval_samples_per_second": 1051.235, |
| "eval_steps_per_second": 32.858, |
| "step": 47200 |
| }, |
| { |
| "epoch": 40.69767441860465, |
| "grad_norm": 0.463076114654541, |
| "learning_rate": 3.721274763135228e-05, |
| "loss": 0.2625453567504883, |
| "step": 47250 |
| }, |
| { |
| "epoch": 40.69767441860465, |
| "eval_loss": 0.3378129303455353, |
| "eval_runtime": 18.3509, |
| "eval_samples_per_second": 1035.591, |
| "eval_steps_per_second": 32.369, |
| "step": 47250 |
| }, |
| { |
| "epoch": 40.74074074074074, |
| "grad_norm": 0.46275097131729126, |
| "learning_rate": 3.7040482342807925e-05, |
| "loss": 0.26067514419555665, |
| "step": 47300 |
| }, |
| { |
| "epoch": 40.74074074074074, |
| "eval_loss": 0.3361418843269348, |
| "eval_runtime": 17.7577, |
| "eval_samples_per_second": 1070.184, |
| "eval_steps_per_second": 33.45, |
| "step": 47300 |
| }, |
| { |
| "epoch": 40.78380706287683, |
| "grad_norm": 0.381462961435318, |
| "learning_rate": 3.686821705426357e-05, |
| "loss": 0.2565507125854492, |
| "step": 47350 |
| }, |
| { |
| "epoch": 40.78380706287683, |
| "eval_loss": 0.33574655652046204, |
| "eval_runtime": 17.9409, |
| "eval_samples_per_second": 1059.255, |
| "eval_steps_per_second": 33.109, |
| "step": 47350 |
| }, |
| { |
| "epoch": 40.82687338501292, |
| "grad_norm": 0.39149317145347595, |
| "learning_rate": 3.669595176571921e-05, |
| "loss": 0.26437042236328123, |
| "step": 47400 |
| }, |
| { |
| "epoch": 40.82687338501292, |
| "eval_loss": 0.33760392665863037, |
| "eval_runtime": 17.7755, |
| "eval_samples_per_second": 1069.112, |
| "eval_steps_per_second": 33.417, |
| "step": 47400 |
| }, |
| { |
| "epoch": 40.86993970714901, |
| "grad_norm": 0.46884697675704956, |
| "learning_rate": 3.652368647717485e-05, |
| "loss": 0.26930549621582034, |
| "step": 47450 |
| }, |
| { |
| "epoch": 40.86993970714901, |
| "eval_loss": 0.3397780656814575, |
| "eval_runtime": 17.4926, |
| "eval_samples_per_second": 1086.403, |
| "eval_steps_per_second": 33.957, |
| "step": 47450 |
| }, |
| { |
| "epoch": 40.9130060292851, |
| "grad_norm": 0.4447813332080841, |
| "learning_rate": 3.635142118863049e-05, |
| "loss": 0.2629986572265625, |
| "step": 47500 |
| }, |
| { |
| "epoch": 40.9130060292851, |
| "eval_loss": 0.33175262808799744, |
| "eval_runtime": 16.8599, |
| "eval_samples_per_second": 1127.169, |
| "eval_steps_per_second": 35.231, |
| "step": 47500 |
| }, |
| { |
| "epoch": 40.95607235142119, |
| "grad_norm": 0.4096522331237793, |
| "learning_rate": 3.6179155900086135e-05, |
| "loss": 0.257623462677002, |
| "step": 47550 |
| }, |
| { |
| "epoch": 40.95607235142119, |
| "eval_loss": 0.3397463262081146, |
| "eval_runtime": 17.5965, |
| "eval_samples_per_second": 1079.99, |
| "eval_steps_per_second": 33.757, |
| "step": 47550 |
| }, |
| { |
| "epoch": 40.999138673557276, |
| "grad_norm": 0.4105287492275238, |
| "learning_rate": 3.600689061154178e-05, |
| "loss": 0.26816184997558595, |
| "step": 47600 |
| }, |
| { |
| "epoch": 40.999138673557276, |
| "eval_loss": 0.33248135447502136, |
| "eval_runtime": 16.1381, |
| "eval_samples_per_second": 1177.587, |
| "eval_steps_per_second": 36.807, |
| "step": 47600 |
| }, |
| { |
| "epoch": 41.04220499569337, |
| "grad_norm": 0.4221794605255127, |
| "learning_rate": 3.583462532299742e-05, |
| "loss": 0.26319524765014646, |
| "step": 47650 |
| }, |
| { |
| "epoch": 41.04220499569337, |
| "eval_loss": 0.33123907446861267, |
| "eval_runtime": 18.0613, |
| "eval_samples_per_second": 1052.193, |
| "eval_steps_per_second": 32.888, |
| "step": 47650 |
| }, |
| { |
| "epoch": 41.08527131782946, |
| "grad_norm": 0.5156925916671753, |
| "learning_rate": 3.5662360034453055e-05, |
| "loss": 0.2654911994934082, |
| "step": 47700 |
| }, |
| { |
| "epoch": 41.08527131782946, |
| "eval_loss": 0.3341188430786133, |
| "eval_runtime": 18.2245, |
| "eval_samples_per_second": 1042.77, |
| "eval_steps_per_second": 32.593, |
| "step": 47700 |
| }, |
| { |
| "epoch": 41.128337639965544, |
| "grad_norm": 0.42098939418792725, |
| "learning_rate": 3.5490094745908704e-05, |
| "loss": 0.26546878814697267, |
| "step": 47750 |
| }, |
| { |
| "epoch": 41.128337639965544, |
| "eval_loss": 0.339838445186615, |
| "eval_runtime": 18.3008, |
| "eval_samples_per_second": 1038.426, |
| "eval_steps_per_second": 32.458, |
| "step": 47750 |
| }, |
| { |
| "epoch": 41.171403962101635, |
| "grad_norm": 0.3625338673591614, |
| "learning_rate": 3.5317829457364346e-05, |
| "loss": 0.25438928604125977, |
| "step": 47800 |
| }, |
| { |
| "epoch": 41.171403962101635, |
| "eval_loss": 0.33727747201919556, |
| "eval_runtime": 18.3263, |
| "eval_samples_per_second": 1036.98, |
| "eval_steps_per_second": 32.412, |
| "step": 47800 |
| }, |
| { |
| "epoch": 41.21447028423773, |
| "grad_norm": 0.4652734398841858, |
| "learning_rate": 3.514556416881998e-05, |
| "loss": 0.2680678939819336, |
| "step": 47850 |
| }, |
| { |
| "epoch": 41.21447028423773, |
| "eval_loss": 0.3393220603466034, |
| "eval_runtime": 17.9053, |
| "eval_samples_per_second": 1061.362, |
| "eval_steps_per_second": 33.175, |
| "step": 47850 |
| }, |
| { |
| "epoch": 41.25753660637382, |
| "grad_norm": 0.46969592571258545, |
| "learning_rate": 3.497329888027562e-05, |
| "loss": 0.26060947418212893, |
| "step": 47900 |
| }, |
| { |
| "epoch": 41.25753660637382, |
| "eval_loss": 0.338578999042511, |
| "eval_runtime": 18.9249, |
| "eval_samples_per_second": 1004.179, |
| "eval_steps_per_second": 31.387, |
| "step": 47900 |
| }, |
| { |
| "epoch": 41.3006029285099, |
| "grad_norm": 0.5016558170318604, |
| "learning_rate": 3.480103359173127e-05, |
| "loss": 0.26476665496826174, |
| "step": 47950 |
| }, |
| { |
| "epoch": 41.3006029285099, |
| "eval_loss": 0.34051215648651123, |
| "eval_runtime": 17.4257, |
| "eval_samples_per_second": 1090.571, |
| "eval_steps_per_second": 34.088, |
| "step": 47950 |
| }, |
| { |
| "epoch": 41.343669250645995, |
| "grad_norm": 0.42962339520454407, |
| "learning_rate": 3.462876830318691e-05, |
| "loss": 0.2569329071044922, |
| "step": 48000 |
| }, |
| { |
| "epoch": 41.343669250645995, |
| "eval_loss": 0.33297860622406006, |
| "eval_runtime": 17.9455, |
| "eval_samples_per_second": 1058.985, |
| "eval_steps_per_second": 33.1, |
| "step": 48000 |
| }, |
| { |
| "epoch": 41.38673557278209, |
| "grad_norm": 0.4176729917526245, |
| "learning_rate": 3.445650301464255e-05, |
| "loss": 0.26670978546142576, |
| "step": 48050 |
| }, |
| { |
| "epoch": 41.38673557278209, |
| "eval_loss": 0.3345198929309845, |
| "eval_runtime": 17.1145, |
| "eval_samples_per_second": 1110.405, |
| "eval_steps_per_second": 34.707, |
| "step": 48050 |
| }, |
| { |
| "epoch": 41.42980189491817, |
| "grad_norm": 0.4664563834667206, |
| "learning_rate": 3.42842377260982e-05, |
| "loss": 0.26591835021972654, |
| "step": 48100 |
| }, |
| { |
| "epoch": 41.42980189491817, |
| "eval_loss": 0.3355900049209595, |
| "eval_runtime": 18.6175, |
| "eval_samples_per_second": 1020.76, |
| "eval_steps_per_second": 31.905, |
| "step": 48100 |
| }, |
| { |
| "epoch": 41.47286821705426, |
| "grad_norm": 0.4347773492336273, |
| "learning_rate": 3.4111972437553834e-05, |
| "loss": 0.26680770874023435, |
| "step": 48150 |
| }, |
| { |
| "epoch": 41.47286821705426, |
| "eval_loss": 0.33552244305610657, |
| "eval_runtime": 17.8257, |
| "eval_samples_per_second": 1066.102, |
| "eval_steps_per_second": 33.323, |
| "step": 48150 |
| }, |
| { |
| "epoch": 41.515934539190354, |
| "grad_norm": 0.3817366361618042, |
| "learning_rate": 3.3939707149009476e-05, |
| "loss": 0.2699763870239258, |
| "step": 48200 |
| }, |
| { |
| "epoch": 41.515934539190354, |
| "eval_loss": 0.3416987359523773, |
| "eval_runtime": 17.8482, |
| "eval_samples_per_second": 1064.756, |
| "eval_steps_per_second": 33.281, |
| "step": 48200 |
| }, |
| { |
| "epoch": 41.559000861326446, |
| "grad_norm": 0.49607574939727783, |
| "learning_rate": 3.376744186046512e-05, |
| "loss": 0.2648237419128418, |
| "step": 48250 |
| }, |
| { |
| "epoch": 41.559000861326446, |
| "eval_loss": 0.33449888229370117, |
| "eval_runtime": 18.244, |
| "eval_samples_per_second": 1041.657, |
| "eval_steps_per_second": 32.559, |
| "step": 48250 |
| }, |
| { |
| "epoch": 41.60206718346253, |
| "grad_norm": 0.4564961791038513, |
| "learning_rate": 3.359517657192076e-05, |
| "loss": 0.26271303176879884, |
| "step": 48300 |
| }, |
| { |
| "epoch": 41.60206718346253, |
| "eval_loss": 0.3342786133289337, |
| "eval_runtime": 18.0953, |
| "eval_samples_per_second": 1050.219, |
| "eval_steps_per_second": 32.826, |
| "step": 48300 |
| }, |
| { |
| "epoch": 41.64513350559862, |
| "grad_norm": 0.41826575994491577, |
| "learning_rate": 3.34229112833764e-05, |
| "loss": 0.2643220329284668, |
| "step": 48350 |
| }, |
| { |
| "epoch": 41.64513350559862, |
| "eval_loss": 0.3378792107105255, |
| "eval_runtime": 18.0764, |
| "eval_samples_per_second": 1051.317, |
| "eval_steps_per_second": 32.861, |
| "step": 48350 |
| }, |
| { |
| "epoch": 41.688199827734714, |
| "grad_norm": 0.3788872957229614, |
| "learning_rate": 3.3250645994832044e-05, |
| "loss": 0.2604197311401367, |
| "step": 48400 |
| }, |
| { |
| "epoch": 41.688199827734714, |
| "eval_loss": 0.3380422592163086, |
| "eval_runtime": 17.7567, |
| "eval_samples_per_second": 1070.243, |
| "eval_steps_per_second": 33.452, |
| "step": 48400 |
| }, |
| { |
| "epoch": 41.7312661498708, |
| "grad_norm": 0.38110384345054626, |
| "learning_rate": 3.3078380706287686e-05, |
| "loss": 0.26157442092895505, |
| "step": 48450 |
| }, |
| { |
| "epoch": 41.7312661498708, |
| "eval_loss": 0.33079907298088074, |
| "eval_runtime": 20.2463, |
| "eval_samples_per_second": 938.642, |
| "eval_steps_per_second": 29.339, |
| "step": 48450 |
| }, |
| { |
| "epoch": 41.77433247200689, |
| "grad_norm": 0.44205212593078613, |
| "learning_rate": 3.290611541774333e-05, |
| "loss": 0.267241325378418, |
| "step": 48500 |
| }, |
| { |
| "epoch": 41.77433247200689, |
| "eval_loss": 0.33610066771507263, |
| "eval_runtime": 19.082, |
| "eval_samples_per_second": 995.915, |
| "eval_steps_per_second": 31.129, |
| "step": 48500 |
| }, |
| { |
| "epoch": 41.81739879414298, |
| "grad_norm": 0.41984260082244873, |
| "learning_rate": 3.273385012919897e-05, |
| "loss": 0.25937877655029296, |
| "step": 48550 |
| }, |
| { |
| "epoch": 41.81739879414298, |
| "eval_loss": 0.3353855609893799, |
| "eval_runtime": 19.6219, |
| "eval_samples_per_second": 968.511, |
| "eval_steps_per_second": 30.272, |
| "step": 48550 |
| }, |
| { |
| "epoch": 41.86046511627907, |
| "grad_norm": 0.3918072283267975, |
| "learning_rate": 3.2561584840654606e-05, |
| "loss": 0.26703338623046874, |
| "step": 48600 |
| }, |
| { |
| "epoch": 41.86046511627907, |
| "eval_loss": 0.3338076174259186, |
| "eval_runtime": 18.1515, |
| "eval_samples_per_second": 1046.967, |
| "eval_steps_per_second": 32.725, |
| "step": 48600 |
| }, |
| { |
| "epoch": 41.90353143841516, |
| "grad_norm": 0.43717628717422485, |
| "learning_rate": 3.2389319552110255e-05, |
| "loss": 0.267167911529541, |
| "step": 48650 |
| }, |
| { |
| "epoch": 41.90353143841516, |
| "eval_loss": 0.3376483619213104, |
| "eval_runtime": 18.5551, |
| "eval_samples_per_second": 1024.192, |
| "eval_steps_per_second": 32.013, |
| "step": 48650 |
| }, |
| { |
| "epoch": 41.94659776055125, |
| "grad_norm": 0.4901149272918701, |
| "learning_rate": 3.221705426356589e-05, |
| "loss": 0.2627080726623535, |
| "step": 48700 |
| }, |
| { |
| "epoch": 41.94659776055125, |
| "eval_loss": 0.33091285824775696, |
| "eval_runtime": 18.4233, |
| "eval_samples_per_second": 1031.522, |
| "eval_steps_per_second": 32.242, |
| "step": 48700 |
| }, |
| { |
| "epoch": 41.98966408268734, |
| "grad_norm": 0.4586520791053772, |
| "learning_rate": 3.204478897502153e-05, |
| "loss": 0.2619011306762695, |
| "step": 48750 |
| }, |
| { |
| "epoch": 41.98966408268734, |
| "eval_loss": 0.3310651183128357, |
| "eval_runtime": 18.994, |
| "eval_samples_per_second": 1000.526, |
| "eval_steps_per_second": 31.273, |
| "step": 48750 |
| }, |
| { |
| "epoch": 42.032730404823425, |
| "grad_norm": 0.5598412156105042, |
| "learning_rate": 3.1872523686477174e-05, |
| "loss": 0.2581977081298828, |
| "step": 48800 |
| }, |
| { |
| "epoch": 42.032730404823425, |
| "eval_loss": 0.333484411239624, |
| "eval_runtime": 17.963, |
| "eval_samples_per_second": 1057.95, |
| "eval_steps_per_second": 33.068, |
| "step": 48800 |
| }, |
| { |
| "epoch": 42.07579672695952, |
| "grad_norm": 0.4388487935066223, |
| "learning_rate": 3.1700258397932816e-05, |
| "loss": 0.2565720367431641, |
| "step": 48850 |
| }, |
| { |
| "epoch": 42.07579672695952, |
| "eval_loss": 0.33121058344841003, |
| "eval_runtime": 17.7479, |
| "eval_samples_per_second": 1070.778, |
| "eval_steps_per_second": 33.469, |
| "step": 48850 |
| }, |
| { |
| "epoch": 42.11886304909561, |
| "grad_norm": 0.4400928318500519, |
| "learning_rate": 3.152799310938846e-05, |
| "loss": 0.2657375717163086, |
| "step": 48900 |
| }, |
| { |
| "epoch": 42.11886304909561, |
| "eval_loss": 0.3297300934791565, |
| "eval_runtime": 17.5994, |
| "eval_samples_per_second": 1079.812, |
| "eval_steps_per_second": 33.751, |
| "step": 48900 |
| }, |
| { |
| "epoch": 42.1619293712317, |
| "grad_norm": 0.4509681761264801, |
| "learning_rate": 3.13557278208441e-05, |
| "loss": 0.26203857421875, |
| "step": 48950 |
| }, |
| { |
| "epoch": 42.1619293712317, |
| "eval_loss": 0.33452802896499634, |
| "eval_runtime": 18.2048, |
| "eval_samples_per_second": 1043.902, |
| "eval_steps_per_second": 32.629, |
| "step": 48950 |
| }, |
| { |
| "epoch": 42.204995693367785, |
| "grad_norm": 0.45013585686683655, |
| "learning_rate": 3.118346253229974e-05, |
| "loss": 0.2583207893371582, |
| "step": 49000 |
| }, |
| { |
| "epoch": 42.204995693367785, |
| "eval_loss": 0.3354299068450928, |
| "eval_runtime": 17.4619, |
| "eval_samples_per_second": 1088.31, |
| "eval_steps_per_second": 34.017, |
| "step": 49000 |
| }, |
| { |
| "epoch": 42.248062015503876, |
| "grad_norm": 0.3658822774887085, |
| "learning_rate": 3.1011197243755385e-05, |
| "loss": 0.269542350769043, |
| "step": 49050 |
| }, |
| { |
| "epoch": 42.248062015503876, |
| "eval_loss": 0.3368607461452484, |
| "eval_runtime": 17.9875, |
| "eval_samples_per_second": 1056.513, |
| "eval_steps_per_second": 33.023, |
| "step": 49050 |
| }, |
| { |
| "epoch": 42.29112833763997, |
| "grad_norm": 0.4629049003124237, |
| "learning_rate": 3.083893195521103e-05, |
| "loss": 0.25855308532714844, |
| "step": 49100 |
| }, |
| { |
| "epoch": 42.29112833763997, |
| "eval_loss": 0.3344835638999939, |
| "eval_runtime": 17.9456, |
| "eval_samples_per_second": 1058.98, |
| "eval_steps_per_second": 33.1, |
| "step": 49100 |
| }, |
| { |
| "epoch": 42.33419465977605, |
| "grad_norm": 0.4146256446838379, |
| "learning_rate": 3.066666666666667e-05, |
| "loss": 0.2610971069335937, |
| "step": 49150 |
| }, |
| { |
| "epoch": 42.33419465977605, |
| "eval_loss": 0.32769039273262024, |
| "eval_runtime": 18.1699, |
| "eval_samples_per_second": 1045.906, |
| "eval_steps_per_second": 32.691, |
| "step": 49150 |
| }, |
| { |
| "epoch": 42.377260981912144, |
| "grad_norm": 0.3447282612323761, |
| "learning_rate": 3.049440137812231e-05, |
| "loss": 0.26025299072265623, |
| "step": 49200 |
| }, |
| { |
| "epoch": 42.377260981912144, |
| "eval_loss": 0.3351108729839325, |
| "eval_runtime": 18.2652, |
| "eval_samples_per_second": 1040.448, |
| "eval_steps_per_second": 32.521, |
| "step": 49200 |
| }, |
| { |
| "epoch": 42.420327304048236, |
| "grad_norm": 0.463405579328537, |
| "learning_rate": 3.0322136089577953e-05, |
| "loss": 0.26323734283447264, |
| "step": 49250 |
| }, |
| { |
| "epoch": 42.420327304048236, |
| "eval_loss": 0.335365355014801, |
| "eval_runtime": 18.0634, |
| "eval_samples_per_second": 1052.071, |
| "eval_steps_per_second": 32.884, |
| "step": 49250 |
| }, |
| { |
| "epoch": 42.46339362618432, |
| "grad_norm": 0.3932636082172394, |
| "learning_rate": 3.0149870801033592e-05, |
| "loss": 0.2605165672302246, |
| "step": 49300 |
| }, |
| { |
| "epoch": 42.46339362618432, |
| "eval_loss": 0.33185839653015137, |
| "eval_runtime": 17.917, |
| "eval_samples_per_second": 1060.669, |
| "eval_steps_per_second": 33.153, |
| "step": 49300 |
| }, |
| { |
| "epoch": 42.50645994832041, |
| "grad_norm": 0.4551706910133362, |
| "learning_rate": 2.9977605512489237e-05, |
| "loss": 0.25998649597167967, |
| "step": 49350 |
| }, |
| { |
| "epoch": 42.50645994832041, |
| "eval_loss": 0.33300530910491943, |
| "eval_runtime": 17.1804, |
| "eval_samples_per_second": 1106.144, |
| "eval_steps_per_second": 34.574, |
| "step": 49350 |
| }, |
| { |
| "epoch": 42.549526270456504, |
| "grad_norm": 0.3885660171508789, |
| "learning_rate": 2.9805340223944876e-05, |
| "loss": 0.2636854553222656, |
| "step": 49400 |
| }, |
| { |
| "epoch": 42.549526270456504, |
| "eval_loss": 0.3332418203353882, |
| "eval_runtime": 18.0481, |
| "eval_samples_per_second": 1052.965, |
| "eval_steps_per_second": 32.912, |
| "step": 49400 |
| }, |
| { |
| "epoch": 42.592592592592595, |
| "grad_norm": 0.4408448338508606, |
| "learning_rate": 2.9633074935400518e-05, |
| "loss": 0.25896127700805666, |
| "step": 49450 |
| }, |
| { |
| "epoch": 42.592592592592595, |
| "eval_loss": 0.330980122089386, |
| "eval_runtime": 17.6177, |
| "eval_samples_per_second": 1078.685, |
| "eval_steps_per_second": 33.716, |
| "step": 49450 |
| }, |
| { |
| "epoch": 42.63565891472868, |
| "grad_norm": 0.43017804622650146, |
| "learning_rate": 2.9460809646856157e-05, |
| "loss": 0.2645112037658691, |
| "step": 49500 |
| }, |
| { |
| "epoch": 42.63565891472868, |
| "eval_loss": 0.32889503240585327, |
| "eval_runtime": 18.5528, |
| "eval_samples_per_second": 1024.322, |
| "eval_steps_per_second": 32.017, |
| "step": 49500 |
| }, |
| { |
| "epoch": 42.67872523686477, |
| "grad_norm": 0.5321599245071411, |
| "learning_rate": 2.9288544358311802e-05, |
| "loss": 0.2638208770751953, |
| "step": 49550 |
| }, |
| { |
| "epoch": 42.67872523686477, |
| "eval_loss": 0.33535560965538025, |
| "eval_runtime": 18.8643, |
| "eval_samples_per_second": 1007.405, |
| "eval_steps_per_second": 31.488, |
| "step": 49550 |
| }, |
| { |
| "epoch": 42.72179155900086, |
| "grad_norm": 0.4819716811180115, |
| "learning_rate": 2.9116279069767444e-05, |
| "loss": 0.2661098289489746, |
| "step": 49600 |
| }, |
| { |
| "epoch": 42.72179155900086, |
| "eval_loss": 0.3298910856246948, |
| "eval_runtime": 17.9128, |
| "eval_samples_per_second": 1060.915, |
| "eval_steps_per_second": 33.161, |
| "step": 49600 |
| }, |
| { |
| "epoch": 42.76485788113695, |
| "grad_norm": 0.3507688343524933, |
| "learning_rate": 2.8944013781223083e-05, |
| "loss": 0.2569438362121582, |
| "step": 49650 |
| }, |
| { |
| "epoch": 42.76485788113695, |
| "eval_loss": 0.32944178581237793, |
| "eval_runtime": 18.1784, |
| "eval_samples_per_second": 1045.419, |
| "eval_steps_per_second": 32.676, |
| "step": 49650 |
| }, |
| { |
| "epoch": 42.80792420327304, |
| "grad_norm": 0.6185017824172974, |
| "learning_rate": 2.8771748492678725e-05, |
| "loss": 0.2623113250732422, |
| "step": 49700 |
| }, |
| { |
| "epoch": 42.80792420327304, |
| "eval_loss": 0.32671746611595154, |
| "eval_runtime": 18.2043, |
| "eval_samples_per_second": 1043.931, |
| "eval_steps_per_second": 32.63, |
| "step": 49700 |
| }, |
| { |
| "epoch": 42.85099052540913, |
| "grad_norm": 0.5226140022277832, |
| "learning_rate": 2.859948320413437e-05, |
| "loss": 0.2594397735595703, |
| "step": 49750 |
| }, |
| { |
| "epoch": 42.85099052540913, |
| "eval_loss": 0.3336170017719269, |
| "eval_runtime": 17.6042, |
| "eval_samples_per_second": 1079.517, |
| "eval_steps_per_second": 33.742, |
| "step": 49750 |
| }, |
| { |
| "epoch": 42.89405684754522, |
| "grad_norm": 0.4084358513355255, |
| "learning_rate": 2.842721791559001e-05, |
| "loss": 0.26164541244506834, |
| "step": 49800 |
| }, |
| { |
| "epoch": 42.89405684754522, |
| "eval_loss": 0.33178937435150146, |
| "eval_runtime": 16.5647, |
| "eval_samples_per_second": 1147.257, |
| "eval_steps_per_second": 35.859, |
| "step": 49800 |
| }, |
| { |
| "epoch": 42.93712316968131, |
| "grad_norm": 0.4608656167984009, |
| "learning_rate": 2.825495262704565e-05, |
| "loss": 0.26180377960205076, |
| "step": 49850 |
| }, |
| { |
| "epoch": 42.93712316968131, |
| "eval_loss": 0.33565860986709595, |
| "eval_runtime": 17.6583, |
| "eval_samples_per_second": 1076.207, |
| "eval_steps_per_second": 33.639, |
| "step": 49850 |
| }, |
| { |
| "epoch": 42.9801894918174, |
| "grad_norm": 0.47300195693969727, |
| "learning_rate": 2.8082687338501297e-05, |
| "loss": 0.25752685546875, |
| "step": 49900 |
| }, |
| { |
| "epoch": 42.9801894918174, |
| "eval_loss": 0.33200475573539734, |
| "eval_runtime": 18.0159, |
| "eval_samples_per_second": 1054.843, |
| "eval_steps_per_second": 32.971, |
| "step": 49900 |
| }, |
| { |
| "epoch": 43.02325581395349, |
| "grad_norm": 0.47372981905937195, |
| "learning_rate": 2.7910422049956936e-05, |
| "loss": 0.25568557739257813, |
| "step": 49950 |
| }, |
| { |
| "epoch": 43.02325581395349, |
| "eval_loss": 0.33379772305488586, |
| "eval_runtime": 17.2294, |
| "eval_samples_per_second": 1103.001, |
| "eval_steps_per_second": 34.476, |
| "step": 49950 |
| }, |
| { |
| "epoch": 43.066322136089575, |
| "grad_norm": 0.4774799942970276, |
| "learning_rate": 2.7738156761412574e-05, |
| "loss": 0.2577634620666504, |
| "step": 50000 |
| }, |
| { |
| "epoch": 43.066322136089575, |
| "eval_loss": 0.33078330755233765, |
| "eval_runtime": 18.2773, |
| "eval_samples_per_second": 1039.762, |
| "eval_steps_per_second": 32.499, |
| "step": 50000 |
| }, |
| { |
| "epoch": 43.109388458225666, |
| "grad_norm": 0.45900219678878784, |
| "learning_rate": 2.7565891472868217e-05, |
| "loss": 0.25360595703125, |
| "step": 50050 |
| }, |
| { |
| "epoch": 43.109388458225666, |
| "eval_loss": 0.3274356424808502, |
| "eval_runtime": 18.2902, |
| "eval_samples_per_second": 1039.027, |
| "eval_steps_per_second": 32.476, |
| "step": 50050 |
| }, |
| { |
| "epoch": 43.15245478036176, |
| "grad_norm": 0.447608083486557, |
| "learning_rate": 2.7393626184323862e-05, |
| "loss": 0.25464582443237305, |
| "step": 50100 |
| }, |
| { |
| "epoch": 43.15245478036176, |
| "eval_loss": 0.33124667406082153, |
| "eval_runtime": 18.1672, |
| "eval_samples_per_second": 1046.06, |
| "eval_steps_per_second": 32.696, |
| "step": 50100 |
| }, |
| { |
| "epoch": 43.19552110249785, |
| "grad_norm": 0.40590664744377136, |
| "learning_rate": 2.72213608957795e-05, |
| "loss": 0.2649323844909668, |
| "step": 50150 |
| }, |
| { |
| "epoch": 43.19552110249785, |
| "eval_loss": 0.33773353695869446, |
| "eval_runtime": 18.3985, |
| "eval_samples_per_second": 1032.912, |
| "eval_steps_per_second": 32.285, |
| "step": 50150 |
| }, |
| { |
| "epoch": 43.238587424633934, |
| "grad_norm": 0.42521902918815613, |
| "learning_rate": 2.7049095607235143e-05, |
| "loss": 0.2657030487060547, |
| "step": 50200 |
| }, |
| { |
| "epoch": 43.238587424633934, |
| "eval_loss": 0.3342694342136383, |
| "eval_runtime": 18.241, |
| "eval_samples_per_second": 1041.829, |
| "eval_steps_per_second": 32.564, |
| "step": 50200 |
| }, |
| { |
| "epoch": 43.281653746770026, |
| "grad_norm": 0.4506593644618988, |
| "learning_rate": 2.687683031869079e-05, |
| "loss": 0.257874755859375, |
| "step": 50250 |
| }, |
| { |
| "epoch": 43.281653746770026, |
| "eval_loss": 0.33657172322273254, |
| "eval_runtime": 17.7859, |
| "eval_samples_per_second": 1068.486, |
| "eval_steps_per_second": 33.397, |
| "step": 50250 |
| }, |
| { |
| "epoch": 43.32472006890612, |
| "grad_norm": 0.4413757026195526, |
| "learning_rate": 2.6704565030146427e-05, |
| "loss": 0.26257347106933593, |
| "step": 50300 |
| }, |
| { |
| "epoch": 43.32472006890612, |
| "eval_loss": 0.33235645294189453, |
| "eval_runtime": 16.8435, |
| "eval_samples_per_second": 1128.27, |
| "eval_steps_per_second": 35.266, |
| "step": 50300 |
| }, |
| { |
| "epoch": 43.3677863910422, |
| "grad_norm": 0.46813246607780457, |
| "learning_rate": 2.653229974160207e-05, |
| "loss": 0.25420507431030276, |
| "step": 50350 |
| }, |
| { |
| "epoch": 43.3677863910422, |
| "eval_loss": 0.33105531334877014, |
| "eval_runtime": 18.0603, |
| "eval_samples_per_second": 1052.255, |
| "eval_steps_per_second": 32.89, |
| "step": 50350 |
| }, |
| { |
| "epoch": 43.41085271317829, |
| "grad_norm": 0.36964699625968933, |
| "learning_rate": 2.6360034453057708e-05, |
| "loss": 0.2610732841491699, |
| "step": 50400 |
| }, |
| { |
| "epoch": 43.41085271317829, |
| "eval_loss": 0.33543485403060913, |
| "eval_runtime": 17.4611, |
| "eval_samples_per_second": 1088.362, |
| "eval_steps_per_second": 34.018, |
| "step": 50400 |
| }, |
| { |
| "epoch": 43.453919035314385, |
| "grad_norm": 0.3838389813899994, |
| "learning_rate": 2.6187769164513353e-05, |
| "loss": 0.2597215270996094, |
| "step": 50450 |
| }, |
| { |
| "epoch": 43.453919035314385, |
| "eval_loss": 0.33240464329719543, |
| "eval_runtime": 18.156, |
| "eval_samples_per_second": 1046.706, |
| "eval_steps_per_second": 32.716, |
| "step": 50450 |
| }, |
| { |
| "epoch": 43.49698535745048, |
| "grad_norm": 0.5293746590614319, |
| "learning_rate": 2.6015503875968995e-05, |
| "loss": 0.257132568359375, |
| "step": 50500 |
| }, |
| { |
| "epoch": 43.49698535745048, |
| "eval_loss": 0.3345556855201721, |
| "eval_runtime": 18.2182, |
| "eval_samples_per_second": 1043.133, |
| "eval_steps_per_second": 32.605, |
| "step": 50500 |
| }, |
| { |
| "epoch": 43.54005167958656, |
| "grad_norm": 0.47793522477149963, |
| "learning_rate": 2.5843238587424634e-05, |
| "loss": 0.25999725341796875, |
| "step": 50550 |
| }, |
| { |
| "epoch": 43.54005167958656, |
| "eval_loss": 0.3289826512336731, |
| "eval_runtime": 18.4921, |
| "eval_samples_per_second": 1027.683, |
| "eval_steps_per_second": 32.122, |
| "step": 50550 |
| }, |
| { |
| "epoch": 43.58311800172265, |
| "grad_norm": 0.38100966811180115, |
| "learning_rate": 2.5670973298880273e-05, |
| "loss": 0.2568831443786621, |
| "step": 50600 |
| }, |
| { |
| "epoch": 43.58311800172265, |
| "eval_loss": 0.33748096227645874, |
| "eval_runtime": 18.0335, |
| "eval_samples_per_second": 1053.816, |
| "eval_steps_per_second": 32.939, |
| "step": 50600 |
| }, |
| { |
| "epoch": 43.626184323858745, |
| "grad_norm": 0.4159799814224243, |
| "learning_rate": 2.549870801033592e-05, |
| "loss": 0.26660945892333987, |
| "step": 50650 |
| }, |
| { |
| "epoch": 43.626184323858745, |
| "eval_loss": 0.33200082182884216, |
| "eval_runtime": 18.123, |
| "eval_samples_per_second": 1048.611, |
| "eval_steps_per_second": 32.776, |
| "step": 50650 |
| }, |
| { |
| "epoch": 43.66925064599483, |
| "grad_norm": 0.3828164339065552, |
| "learning_rate": 2.532644272179156e-05, |
| "loss": 0.260919189453125, |
| "step": 50700 |
| }, |
| { |
| "epoch": 43.66925064599483, |
| "eval_loss": 0.32596173882484436, |
| "eval_runtime": 18.1437, |
| "eval_samples_per_second": 1047.418, |
| "eval_steps_per_second": 32.739, |
| "step": 50700 |
| }, |
| { |
| "epoch": 43.71231696813092, |
| "grad_norm": 0.4727261960506439, |
| "learning_rate": 2.51541774332472e-05, |
| "loss": 0.26335853576660156, |
| "step": 50750 |
| }, |
| { |
| "epoch": 43.71231696813092, |
| "eval_loss": 0.3335762321949005, |
| "eval_runtime": 17.467, |
| "eval_samples_per_second": 1087.997, |
| "eval_steps_per_second": 34.007, |
| "step": 50750 |
| }, |
| { |
| "epoch": 43.75538329026701, |
| "grad_norm": 0.46335121989250183, |
| "learning_rate": 2.4981912144702845e-05, |
| "loss": 0.2584223747253418, |
| "step": 50800 |
| }, |
| { |
| "epoch": 43.75538329026701, |
| "eval_loss": 0.33142974972724915, |
| "eval_runtime": 19.7474, |
| "eval_samples_per_second": 962.354, |
| "eval_steps_per_second": 30.08, |
| "step": 50800 |
| }, |
| { |
| "epoch": 43.798449612403104, |
| "grad_norm": 0.3923814594745636, |
| "learning_rate": 2.4809646856158487e-05, |
| "loss": 0.2604896926879883, |
| "step": 50850 |
| }, |
| { |
| "epoch": 43.798449612403104, |
| "eval_loss": 0.3408214747905731, |
| "eval_runtime": 17.6173, |
| "eval_samples_per_second": 1078.711, |
| "eval_steps_per_second": 33.717, |
| "step": 50850 |
| }, |
| { |
| "epoch": 43.84151593453919, |
| "grad_norm": 0.4811161160469055, |
| "learning_rate": 2.4637381567614126e-05, |
| "loss": 0.2605669593811035, |
| "step": 50900 |
| }, |
| { |
| "epoch": 43.84151593453919, |
| "eval_loss": 0.3232431709766388, |
| "eval_runtime": 18.0865, |
| "eval_samples_per_second": 1050.729, |
| "eval_steps_per_second": 32.842, |
| "step": 50900 |
| }, |
| { |
| "epoch": 43.88458225667528, |
| "grad_norm": 0.5609408617019653, |
| "learning_rate": 2.4465116279069768e-05, |
| "loss": 0.2697344398498535, |
| "step": 50950 |
| }, |
| { |
| "epoch": 43.88458225667528, |
| "eval_loss": 0.3291715979576111, |
| "eval_runtime": 18.1056, |
| "eval_samples_per_second": 1049.618, |
| "eval_steps_per_second": 32.807, |
| "step": 50950 |
| }, |
| { |
| "epoch": 43.92764857881137, |
| "grad_norm": 0.4350355565547943, |
| "learning_rate": 2.429285099052541e-05, |
| "loss": 0.25872198104858396, |
| "step": 51000 |
| }, |
| { |
| "epoch": 43.92764857881137, |
| "eval_loss": 0.33080291748046875, |
| "eval_runtime": 18.1436, |
| "eval_samples_per_second": 1047.421, |
| "eval_steps_per_second": 32.739, |
| "step": 51000 |
| }, |
| { |
| "epoch": 43.970714900947456, |
| "grad_norm": 0.39226019382476807, |
| "learning_rate": 2.4120585701981052e-05, |
| "loss": 0.2671630668640137, |
| "step": 51050 |
| }, |
| { |
| "epoch": 43.970714900947456, |
| "eval_loss": 0.32811781764030457, |
| "eval_runtime": 18.179, |
| "eval_samples_per_second": 1045.38, |
| "eval_steps_per_second": 32.675, |
| "step": 51050 |
| }, |
| { |
| "epoch": 44.01378122308355, |
| "grad_norm": 0.38461270928382874, |
| "learning_rate": 2.3948320413436694e-05, |
| "loss": 0.25831233978271484, |
| "step": 51100 |
| }, |
| { |
| "epoch": 44.01378122308355, |
| "eval_loss": 0.33200836181640625, |
| "eval_runtime": 18.2577, |
| "eval_samples_per_second": 1040.877, |
| "eval_steps_per_second": 32.534, |
| "step": 51100 |
| }, |
| { |
| "epoch": 44.05684754521964, |
| "grad_norm": 0.44967833161354065, |
| "learning_rate": 2.3776055124892336e-05, |
| "loss": 0.2555442428588867, |
| "step": 51150 |
| }, |
| { |
| "epoch": 44.05684754521964, |
| "eval_loss": 0.3291064500808716, |
| "eval_runtime": 17.8675, |
| "eval_samples_per_second": 1063.607, |
| "eval_steps_per_second": 33.245, |
| "step": 51150 |
| }, |
| { |
| "epoch": 44.09991386735573, |
| "grad_norm": 0.41032636165618896, |
| "learning_rate": 2.3603789836347978e-05, |
| "loss": 0.2579687690734863, |
| "step": 51200 |
| }, |
| { |
| "epoch": 44.09991386735573, |
| "eval_loss": 0.32970529794692993, |
| "eval_runtime": 17.543, |
| "eval_samples_per_second": 1083.283, |
| "eval_steps_per_second": 33.86, |
| "step": 51200 |
| }, |
| { |
| "epoch": 44.142980189491816, |
| "grad_norm": 0.3892042338848114, |
| "learning_rate": 2.3431524547803617e-05, |
| "loss": 0.263288688659668, |
| "step": 51250 |
| }, |
| { |
| "epoch": 44.142980189491816, |
| "eval_loss": 0.33633163571357727, |
| "eval_runtime": 17.6106, |
| "eval_samples_per_second": 1079.123, |
| "eval_steps_per_second": 33.73, |
| "step": 51250 |
| }, |
| { |
| "epoch": 44.18604651162791, |
| "grad_norm": 0.44527554512023926, |
| "learning_rate": 2.3259259259259262e-05, |
| "loss": 0.2592180252075195, |
| "step": 51300 |
| }, |
| { |
| "epoch": 44.18604651162791, |
| "eval_loss": 0.33094218373298645, |
| "eval_runtime": 18.2222, |
| "eval_samples_per_second": 1042.904, |
| "eval_steps_per_second": 32.598, |
| "step": 51300 |
| }, |
| { |
| "epoch": 44.229112833764, |
| "grad_norm": 0.3995368480682373, |
| "learning_rate": 2.30869939707149e-05, |
| "loss": 0.25935218811035154, |
| "step": 51350 |
| }, |
| { |
| "epoch": 44.229112833764, |
| "eval_loss": 0.3350081741809845, |
| "eval_runtime": 16.471, |
| "eval_samples_per_second": 1153.786, |
| "eval_steps_per_second": 36.063, |
| "step": 51350 |
| }, |
| { |
| "epoch": 44.27217915590008, |
| "grad_norm": 0.49399951100349426, |
| "learning_rate": 2.2914728682170543e-05, |
| "loss": 0.26251346588134766, |
| "step": 51400 |
| }, |
| { |
| "epoch": 44.27217915590008, |
| "eval_loss": 0.3334828317165375, |
| "eval_runtime": 18.0529, |
| "eval_samples_per_second": 1052.682, |
| "eval_steps_per_second": 32.903, |
| "step": 51400 |
| }, |
| { |
| "epoch": 44.315245478036175, |
| "grad_norm": 0.42008745670318604, |
| "learning_rate": 2.2742463393626185e-05, |
| "loss": 0.25890609741210935, |
| "step": 51450 |
| }, |
| { |
| "epoch": 44.315245478036175, |
| "eval_loss": 0.3312210440635681, |
| "eval_runtime": 18.1708, |
| "eval_samples_per_second": 1045.852, |
| "eval_steps_per_second": 32.69, |
| "step": 51450 |
| }, |
| { |
| "epoch": 44.35831180017227, |
| "grad_norm": 0.4206209182739258, |
| "learning_rate": 2.2570198105081827e-05, |
| "loss": 0.2611784744262695, |
| "step": 51500 |
| }, |
| { |
| "epoch": 44.35831180017227, |
| "eval_loss": 0.3322766125202179, |
| "eval_runtime": 18.1357, |
| "eval_samples_per_second": 1047.879, |
| "eval_steps_per_second": 32.753, |
| "step": 51500 |
| }, |
| { |
| "epoch": 44.40137812230836, |
| "grad_norm": 0.41025853157043457, |
| "learning_rate": 2.2397932816537466e-05, |
| "loss": 0.2615481185913086, |
| "step": 51550 |
| }, |
| { |
| "epoch": 44.40137812230836, |
| "eval_loss": 0.33196064829826355, |
| "eval_runtime": 18.1456, |
| "eval_samples_per_second": 1047.305, |
| "eval_steps_per_second": 32.735, |
| "step": 51550 |
| }, |
| { |
| "epoch": 44.44444444444444, |
| "grad_norm": 0.39682939648628235, |
| "learning_rate": 2.222566752799311e-05, |
| "loss": 0.259089298248291, |
| "step": 51600 |
| }, |
| { |
| "epoch": 44.44444444444444, |
| "eval_loss": 0.32832977175712585, |
| "eval_runtime": 17.8389, |
| "eval_samples_per_second": 1065.313, |
| "eval_steps_per_second": 33.298, |
| "step": 51600 |
| }, |
| { |
| "epoch": 44.487510766580534, |
| "grad_norm": 0.41068243980407715, |
| "learning_rate": 2.2053402239448754e-05, |
| "loss": 0.25956289291381834, |
| "step": 51650 |
| }, |
| { |
| "epoch": 44.487510766580534, |
| "eval_loss": 0.3320268988609314, |
| "eval_runtime": 18.1041, |
| "eval_samples_per_second": 1049.708, |
| "eval_steps_per_second": 32.81, |
| "step": 51650 |
| }, |
| { |
| "epoch": 44.530577088716626, |
| "grad_norm": 0.43129387497901917, |
| "learning_rate": 2.1881136950904392e-05, |
| "loss": 0.25620647430419924, |
| "step": 51700 |
| }, |
| { |
| "epoch": 44.530577088716626, |
| "eval_loss": 0.332096666097641, |
| "eval_runtime": 16.4704, |
| "eval_samples_per_second": 1153.828, |
| "eval_steps_per_second": 36.065, |
| "step": 51700 |
| }, |
| { |
| "epoch": 44.57364341085271, |
| "grad_norm": 0.478359580039978, |
| "learning_rate": 2.1708871662360038e-05, |
| "loss": 0.2545368766784668, |
| "step": 51750 |
| }, |
| { |
| "epoch": 44.57364341085271, |
| "eval_loss": 0.3265804946422577, |
| "eval_runtime": 17.8095, |
| "eval_samples_per_second": 1067.072, |
| "eval_steps_per_second": 33.353, |
| "step": 51750 |
| }, |
| { |
| "epoch": 44.6167097329888, |
| "grad_norm": 0.4747825860977173, |
| "learning_rate": 2.1536606373815677e-05, |
| "loss": 0.2612367820739746, |
| "step": 51800 |
| }, |
| { |
| "epoch": 44.6167097329888, |
| "eval_loss": 0.3282797634601593, |
| "eval_runtime": 16.8719, |
| "eval_samples_per_second": 1126.371, |
| "eval_steps_per_second": 35.207, |
| "step": 51800 |
| }, |
| { |
| "epoch": 44.659776055124894, |
| "grad_norm": 0.41625022888183594, |
| "learning_rate": 2.136434108527132e-05, |
| "loss": 0.2548894691467285, |
| "step": 51850 |
| }, |
| { |
| "epoch": 44.659776055124894, |
| "eval_loss": 0.33373549580574036, |
| "eval_runtime": 17.9156, |
| "eval_samples_per_second": 1060.752, |
| "eval_steps_per_second": 33.155, |
| "step": 51850 |
| }, |
| { |
| "epoch": 44.70284237726098, |
| "grad_norm": 0.40073272585868835, |
| "learning_rate": 2.119207579672696e-05, |
| "loss": 0.2608194923400879, |
| "step": 51900 |
| }, |
| { |
| "epoch": 44.70284237726098, |
| "eval_loss": 0.32843106985092163, |
| "eval_runtime": 18.2982, |
| "eval_samples_per_second": 1038.571, |
| "eval_steps_per_second": 32.462, |
| "step": 51900 |
| }, |
| { |
| "epoch": 44.74590869939707, |
| "grad_norm": 0.4103309214115143, |
| "learning_rate": 2.1019810508182603e-05, |
| "loss": 0.25819131851196286, |
| "step": 51950 |
| }, |
| { |
| "epoch": 44.74590869939707, |
| "eval_loss": 0.32719510793685913, |
| "eval_runtime": 18.27, |
| "eval_samples_per_second": 1040.176, |
| "eval_steps_per_second": 32.512, |
| "step": 51950 |
| }, |
| { |
| "epoch": 44.78897502153316, |
| "grad_norm": 0.4365871846675873, |
| "learning_rate": 2.084754521963824e-05, |
| "loss": 0.2647978591918945, |
| "step": 52000 |
| }, |
| { |
| "epoch": 44.78897502153316, |
| "eval_loss": 0.3374394476413727, |
| "eval_runtime": 17.8003, |
| "eval_samples_per_second": 1067.624, |
| "eval_steps_per_second": 33.37, |
| "step": 52000 |
| }, |
| { |
| "epoch": 44.83204134366925, |
| "grad_norm": 0.4309022128582001, |
| "learning_rate": 2.0675279931093887e-05, |
| "loss": 0.25435049057006837, |
| "step": 52050 |
| }, |
| { |
| "epoch": 44.83204134366925, |
| "eval_loss": 0.33556151390075684, |
| "eval_runtime": 18.5283, |
| "eval_samples_per_second": 1025.676, |
| "eval_steps_per_second": 32.059, |
| "step": 52050 |
| }, |
| { |
| "epoch": 44.87510766580534, |
| "grad_norm": 0.4415874183177948, |
| "learning_rate": 2.050301464254953e-05, |
| "loss": 0.2644994354248047, |
| "step": 52100 |
| }, |
| { |
| "epoch": 44.87510766580534, |
| "eval_loss": 0.3290407061576843, |
| "eval_runtime": 18.4239, |
| "eval_samples_per_second": 1031.488, |
| "eval_steps_per_second": 32.241, |
| "step": 52100 |
| }, |
| { |
| "epoch": 44.91817398794143, |
| "grad_norm": 0.4384152293205261, |
| "learning_rate": 2.0330749354005168e-05, |
| "loss": 0.2529031372070312, |
| "step": 52150 |
| }, |
| { |
| "epoch": 44.91817398794143, |
| "eval_loss": 0.3312751054763794, |
| "eval_runtime": 17.7215, |
| "eval_samples_per_second": 1072.371, |
| "eval_steps_per_second": 33.519, |
| "step": 52150 |
| }, |
| { |
| "epoch": 44.96124031007752, |
| "grad_norm": 0.40394484996795654, |
| "learning_rate": 2.015848406546081e-05, |
| "loss": 0.25720306396484377, |
| "step": 52200 |
| }, |
| { |
| "epoch": 44.96124031007752, |
| "eval_loss": 0.3306456208229065, |
| "eval_runtime": 17.7033, |
| "eval_samples_per_second": 1073.47, |
| "eval_steps_per_second": 33.553, |
| "step": 52200 |
| }, |
| { |
| "epoch": 45.004306632213606, |
| "grad_norm": 0.3723146319389343, |
| "learning_rate": 1.9986218776916452e-05, |
| "loss": 0.2507796859741211, |
| "step": 52250 |
| }, |
| { |
| "epoch": 45.004306632213606, |
| "eval_loss": 0.3309019207954407, |
| "eval_runtime": 17.9496, |
| "eval_samples_per_second": 1058.741, |
| "eval_steps_per_second": 33.093, |
| "step": 52250 |
| }, |
| { |
| "epoch": 45.0473729543497, |
| "grad_norm": 0.4547770619392395, |
| "learning_rate": 1.9813953488372094e-05, |
| "loss": 0.2613633346557617, |
| "step": 52300 |
| }, |
| { |
| "epoch": 45.0473729543497, |
| "eval_loss": 0.33252662420272827, |
| "eval_runtime": 17.1825, |
| "eval_samples_per_second": 1106.008, |
| "eval_steps_per_second": 34.57, |
| "step": 52300 |
| }, |
| { |
| "epoch": 45.09043927648579, |
| "grad_norm": 0.3941754698753357, |
| "learning_rate": 1.9641688199827736e-05, |
| "loss": 0.25863794326782225, |
| "step": 52350 |
| }, |
| { |
| "epoch": 45.09043927648579, |
| "eval_loss": 0.3304085433483124, |
| "eval_runtime": 18.1853, |
| "eval_samples_per_second": 1045.019, |
| "eval_steps_per_second": 32.664, |
| "step": 52350 |
| }, |
| { |
| "epoch": 45.13350559862188, |
| "grad_norm": 0.5553924441337585, |
| "learning_rate": 1.946942291128338e-05, |
| "loss": 0.25572742462158204, |
| "step": 52400 |
| }, |
| { |
| "epoch": 45.13350559862188, |
| "eval_loss": 0.3342275321483612, |
| "eval_runtime": 18.1163, |
| "eval_samples_per_second": 1048.997, |
| "eval_steps_per_second": 32.788, |
| "step": 52400 |
| }, |
| { |
| "epoch": 45.176571920757965, |
| "grad_norm": 0.4295162260532379, |
| "learning_rate": 1.9297157622739017e-05, |
| "loss": 0.25522724151611326, |
| "step": 52450 |
| }, |
| { |
| "epoch": 45.176571920757965, |
| "eval_loss": 0.33520230650901794, |
| "eval_runtime": 18.0909, |
| "eval_samples_per_second": 1050.475, |
| "eval_steps_per_second": 32.834, |
| "step": 52450 |
| }, |
| { |
| "epoch": 45.21963824289406, |
| "grad_norm": 0.4033823311328888, |
| "learning_rate": 1.912489233419466e-05, |
| "loss": 0.2575173568725586, |
| "step": 52500 |
| }, |
| { |
| "epoch": 45.21963824289406, |
| "eval_loss": 0.3277575671672821, |
| "eval_runtime": 18.049, |
| "eval_samples_per_second": 1052.913, |
| "eval_steps_per_second": 32.91, |
| "step": 52500 |
| }, |
| { |
| "epoch": 45.26270456503015, |
| "grad_norm": 0.40482550859451294, |
| "learning_rate": 1.8952627045650305e-05, |
| "loss": 0.25760974884033205, |
| "step": 52550 |
| }, |
| { |
| "epoch": 45.26270456503015, |
| "eval_loss": 0.32644209265708923, |
| "eval_runtime": 18.4841, |
| "eval_samples_per_second": 1028.129, |
| "eval_steps_per_second": 32.136, |
| "step": 52550 |
| }, |
| { |
| "epoch": 45.30577088716623, |
| "grad_norm": 0.4248245358467102, |
| "learning_rate": 1.8780361757105943e-05, |
| "loss": 0.2546268081665039, |
| "step": 52600 |
| }, |
| { |
| "epoch": 45.30577088716623, |
| "eval_loss": 0.329515278339386, |
| "eval_runtime": 18.1088, |
| "eval_samples_per_second": 1049.433, |
| "eval_steps_per_second": 32.802, |
| "step": 52600 |
| }, |
| { |
| "epoch": 45.348837209302324, |
| "grad_norm": 0.4354708790779114, |
| "learning_rate": 1.8608096468561585e-05, |
| "loss": 0.262480640411377, |
| "step": 52650 |
| }, |
| { |
| "epoch": 45.348837209302324, |
| "eval_loss": 0.3266183137893677, |
| "eval_runtime": 17.4725, |
| "eval_samples_per_second": 1087.655, |
| "eval_steps_per_second": 33.996, |
| "step": 52650 |
| }, |
| { |
| "epoch": 45.391903531438416, |
| "grad_norm": 0.4125041663646698, |
| "learning_rate": 1.8435831180017228e-05, |
| "loss": 0.24627891540527344, |
| "step": 52700 |
| }, |
| { |
| "epoch": 45.391903531438416, |
| "eval_loss": 0.33478984236717224, |
| "eval_runtime": 18.0794, |
| "eval_samples_per_second": 1051.138, |
| "eval_steps_per_second": 32.855, |
| "step": 52700 |
| }, |
| { |
| "epoch": 45.43496985357451, |
| "grad_norm": 0.40405088663101196, |
| "learning_rate": 1.826356589147287e-05, |
| "loss": 0.2658543014526367, |
| "step": 52750 |
| }, |
| { |
| "epoch": 45.43496985357451, |
| "eval_loss": 0.32871609926223755, |
| "eval_runtime": 17.3887, |
| "eval_samples_per_second": 1092.896, |
| "eval_steps_per_second": 34.16, |
| "step": 52750 |
| }, |
| { |
| "epoch": 45.47803617571059, |
| "grad_norm": 0.5048729181289673, |
| "learning_rate": 1.809130060292851e-05, |
| "loss": 0.2580097198486328, |
| "step": 52800 |
| }, |
| { |
| "epoch": 45.47803617571059, |
| "eval_loss": 0.3327273428440094, |
| "eval_runtime": 18.192, |
| "eval_samples_per_second": 1044.637, |
| "eval_steps_per_second": 32.652, |
| "step": 52800 |
| }, |
| { |
| "epoch": 45.521102497846684, |
| "grad_norm": 0.44812899827957153, |
| "learning_rate": 1.7919035314384154e-05, |
| "loss": 0.26092041015625, |
| "step": 52850 |
| }, |
| { |
| "epoch": 45.521102497846684, |
| "eval_loss": 0.3265608847141266, |
| "eval_runtime": 18.1555, |
| "eval_samples_per_second": 1046.735, |
| "eval_steps_per_second": 32.717, |
| "step": 52850 |
| }, |
| { |
| "epoch": 45.564168819982775, |
| "grad_norm": 0.4317931532859802, |
| "learning_rate": 1.7746770025839793e-05, |
| "loss": 0.25728765487670896, |
| "step": 52900 |
| }, |
| { |
| "epoch": 45.564168819982775, |
| "eval_loss": 0.32284799218177795, |
| "eval_runtime": 17.9284, |
| "eval_samples_per_second": 1059.994, |
| "eval_steps_per_second": 33.132, |
| "step": 52900 |
| }, |
| { |
| "epoch": 45.60723514211886, |
| "grad_norm": 0.4485609233379364, |
| "learning_rate": 1.7574504737295435e-05, |
| "loss": 0.2618018341064453, |
| "step": 52950 |
| }, |
| { |
| "epoch": 45.60723514211886, |
| "eval_loss": 0.3257175385951996, |
| "eval_runtime": 18.0633, |
| "eval_samples_per_second": 1052.076, |
| "eval_steps_per_second": 32.884, |
| "step": 52950 |
| }, |
| { |
| "epoch": 45.65030146425495, |
| "grad_norm": 0.45407286286354065, |
| "learning_rate": 1.740223944875108e-05, |
| "loss": 0.2605839157104492, |
| "step": 53000 |
| }, |
| { |
| "epoch": 45.65030146425495, |
| "eval_loss": 0.32873979210853577, |
| "eval_runtime": 18.0677, |
| "eval_samples_per_second": 1051.825, |
| "eval_steps_per_second": 32.876, |
| "step": 53000 |
| }, |
| { |
| "epoch": 45.69336778639104, |
| "grad_norm": 0.40683576464653015, |
| "learning_rate": 1.722997416020672e-05, |
| "loss": 0.25455360412597655, |
| "step": 53050 |
| }, |
| { |
| "epoch": 45.69336778639104, |
| "eval_loss": 0.33273980021476746, |
| "eval_runtime": 17.9958, |
| "eval_samples_per_second": 1056.026, |
| "eval_steps_per_second": 33.008, |
| "step": 53050 |
| }, |
| { |
| "epoch": 45.736434108527135, |
| "grad_norm": 0.48655572533607483, |
| "learning_rate": 1.705770887166236e-05, |
| "loss": 0.260802116394043, |
| "step": 53100 |
| }, |
| { |
| "epoch": 45.736434108527135, |
| "eval_loss": 0.33060774207115173, |
| "eval_runtime": 17.1445, |
| "eval_samples_per_second": 1108.46, |
| "eval_steps_per_second": 34.647, |
| "step": 53100 |
| }, |
| { |
| "epoch": 45.77950043066322, |
| "grad_norm": 0.4370211660861969, |
| "learning_rate": 1.6885443583118003e-05, |
| "loss": 0.25209793090820315, |
| "step": 53150 |
| }, |
| { |
| "epoch": 45.77950043066322, |
| "eval_loss": 0.3310869038105011, |
| "eval_runtime": 18.1304, |
| "eval_samples_per_second": 1048.182, |
| "eval_steps_per_second": 32.763, |
| "step": 53150 |
| }, |
| { |
| "epoch": 45.82256675279931, |
| "grad_norm": 0.41281944513320923, |
| "learning_rate": 1.6713178294573645e-05, |
| "loss": 0.2589684295654297, |
| "step": 53200 |
| }, |
| { |
| "epoch": 45.82256675279931, |
| "eval_loss": 0.3299771547317505, |
| "eval_runtime": 18.0123, |
| "eval_samples_per_second": 1055.055, |
| "eval_steps_per_second": 32.977, |
| "step": 53200 |
| }, |
| { |
| "epoch": 45.8656330749354, |
| "grad_norm": 0.4713948965072632, |
| "learning_rate": 1.6540913006029284e-05, |
| "loss": 0.2571216011047363, |
| "step": 53250 |
| }, |
| { |
| "epoch": 45.8656330749354, |
| "eval_loss": 0.3286995589733124, |
| "eval_runtime": 19.7704, |
| "eval_samples_per_second": 961.233, |
| "eval_steps_per_second": 30.045, |
| "step": 53250 |
| }, |
| { |
| "epoch": 45.90869939707149, |
| "grad_norm": 0.35535186529159546, |
| "learning_rate": 1.636864771748493e-05, |
| "loss": 0.2573351287841797, |
| "step": 53300 |
| }, |
| { |
| "epoch": 45.90869939707149, |
| "eval_loss": 0.3261243402957916, |
| "eval_runtime": 17.9332, |
| "eval_samples_per_second": 1059.709, |
| "eval_steps_per_second": 33.123, |
| "step": 53300 |
| }, |
| { |
| "epoch": 45.95176571920758, |
| "grad_norm": 0.40924257040023804, |
| "learning_rate": 1.6196382428940568e-05, |
| "loss": 0.2555196189880371, |
| "step": 53350 |
| }, |
| { |
| "epoch": 45.95176571920758, |
| "eval_loss": 0.3238484859466553, |
| "eval_runtime": 18.0699, |
| "eval_samples_per_second": 1051.695, |
| "eval_steps_per_second": 32.872, |
| "step": 53350 |
| }, |
| { |
| "epoch": 45.99483204134367, |
| "grad_norm": 0.49290111660957336, |
| "learning_rate": 1.602411714039621e-05, |
| "loss": 0.2545882034301758, |
| "step": 53400 |
| }, |
| { |
| "epoch": 45.99483204134367, |
| "eval_loss": 0.33049580454826355, |
| "eval_runtime": 19.3384, |
| "eval_samples_per_second": 982.707, |
| "eval_steps_per_second": 30.716, |
| "step": 53400 |
| }, |
| { |
| "epoch": 46.03789836347976, |
| "grad_norm": 0.37689730525016785, |
| "learning_rate": 1.5851851851851852e-05, |
| "loss": 0.2548778533935547, |
| "step": 53450 |
| }, |
| { |
| "epoch": 46.03789836347976, |
| "eval_loss": 0.33154818415641785, |
| "eval_runtime": 19.374, |
| "eval_samples_per_second": 980.902, |
| "eval_steps_per_second": 30.66, |
| "step": 53450 |
| }, |
| { |
| "epoch": 46.08096468561585, |
| "grad_norm": 0.4257497489452362, |
| "learning_rate": 1.5679586563307494e-05, |
| "loss": 0.26114877700805667, |
| "step": 53500 |
| }, |
| { |
| "epoch": 46.08096468561585, |
| "eval_loss": 0.33142903447151184, |
| "eval_runtime": 18.3151, |
| "eval_samples_per_second": 1037.612, |
| "eval_steps_per_second": 32.432, |
| "step": 53500 |
| }, |
| { |
| "epoch": 46.12403100775194, |
| "grad_norm": 0.45820152759552, |
| "learning_rate": 1.5507321274763136e-05, |
| "loss": 0.25467180252075194, |
| "step": 53550 |
| }, |
| { |
| "epoch": 46.12403100775194, |
| "eval_loss": 0.33298689126968384, |
| "eval_runtime": 17.1221, |
| "eval_samples_per_second": 1109.909, |
| "eval_steps_per_second": 34.692, |
| "step": 53550 |
| }, |
| { |
| "epoch": 46.16709732988803, |
| "grad_norm": 0.3875587284564972, |
| "learning_rate": 1.533505598621878e-05, |
| "loss": 0.2580219268798828, |
| "step": 53600 |
| }, |
| { |
| "epoch": 46.16709732988803, |
| "eval_loss": 0.3247772455215454, |
| "eval_runtime": 17.6361, |
| "eval_samples_per_second": 1077.56, |
| "eval_steps_per_second": 33.681, |
| "step": 53600 |
| }, |
| { |
| "epoch": 46.210163652024114, |
| "grad_norm": 0.401325523853302, |
| "learning_rate": 1.516279069767442e-05, |
| "loss": 0.2550651168823242, |
| "step": 53650 |
| }, |
| { |
| "epoch": 46.210163652024114, |
| "eval_loss": 0.3269961476325989, |
| "eval_runtime": 17.6496, |
| "eval_samples_per_second": 1076.735, |
| "eval_steps_per_second": 33.655, |
| "step": 53650 |
| }, |
| { |
| "epoch": 46.253229974160206, |
| "grad_norm": 0.3880271315574646, |
| "learning_rate": 1.499052540913006e-05, |
| "loss": 0.26004016876220704, |
| "step": 53700 |
| }, |
| { |
| "epoch": 46.253229974160206, |
| "eval_loss": 0.329743891954422, |
| "eval_runtime": 17.3231, |
| "eval_samples_per_second": 1097.03, |
| "eval_steps_per_second": 34.289, |
| "step": 53700 |
| }, |
| { |
| "epoch": 46.2962962962963, |
| "grad_norm": 0.43584710359573364, |
| "learning_rate": 1.4818260120585703e-05, |
| "loss": 0.26099538803100586, |
| "step": 53750 |
| }, |
| { |
| "epoch": 46.2962962962963, |
| "eval_loss": 0.32898804545402527, |
| "eval_runtime": 18.4366, |
| "eval_samples_per_second": 1030.774, |
| "eval_steps_per_second": 32.218, |
| "step": 53750 |
| }, |
| { |
| "epoch": 46.33936261843239, |
| "grad_norm": 0.42027243971824646, |
| "learning_rate": 1.4645994832041344e-05, |
| "loss": 0.2571408462524414, |
| "step": 53800 |
| }, |
| { |
| "epoch": 46.33936261843239, |
| "eval_loss": 0.3268490135669708, |
| "eval_runtime": 17.572, |
| "eval_samples_per_second": 1081.493, |
| "eval_steps_per_second": 33.804, |
| "step": 53800 |
| }, |
| { |
| "epoch": 46.382428940568474, |
| "grad_norm": 0.45979616045951843, |
| "learning_rate": 1.4473729543496986e-05, |
| "loss": 0.26052181243896483, |
| "step": 53850 |
| }, |
| { |
| "epoch": 46.382428940568474, |
| "eval_loss": 0.32973024249076843, |
| "eval_runtime": 17.8772, |
| "eval_samples_per_second": 1063.033, |
| "eval_steps_per_second": 33.227, |
| "step": 53850 |
| }, |
| { |
| "epoch": 46.425495262704565, |
| "grad_norm": 0.42952507734298706, |
| "learning_rate": 1.430146425495263e-05, |
| "loss": 0.26365802764892576, |
| "step": 53900 |
| }, |
| { |
| "epoch": 46.425495262704565, |
| "eval_loss": 0.3266441226005554, |
| "eval_runtime": 17.8894, |
| "eval_samples_per_second": 1062.304, |
| "eval_steps_per_second": 33.204, |
| "step": 53900 |
| }, |
| { |
| "epoch": 46.46856158484066, |
| "grad_norm": 0.42997005581855774, |
| "learning_rate": 1.412919896640827e-05, |
| "loss": 0.25490142822265627, |
| "step": 53950 |
| }, |
| { |
| "epoch": 46.46856158484066, |
| "eval_loss": 0.3287542164325714, |
| "eval_runtime": 18.2943, |
| "eval_samples_per_second": 1038.796, |
| "eval_steps_per_second": 32.469, |
| "step": 53950 |
| }, |
| { |
| "epoch": 46.51162790697674, |
| "grad_norm": 0.5448318719863892, |
| "learning_rate": 1.3956933677863912e-05, |
| "loss": 0.2567383575439453, |
| "step": 54000 |
| }, |
| { |
| "epoch": 46.51162790697674, |
| "eval_loss": 0.3273468613624573, |
| "eval_runtime": 17.8252, |
| "eval_samples_per_second": 1066.132, |
| "eval_steps_per_second": 33.324, |
| "step": 54000 |
| }, |
| { |
| "epoch": 46.55469422911283, |
| "grad_norm": 0.4054015874862671, |
| "learning_rate": 1.3784668389319552e-05, |
| "loss": 0.26060123443603517, |
| "step": 54050 |
| }, |
| { |
| "epoch": 46.55469422911283, |
| "eval_loss": 0.3238947093486786, |
| "eval_runtime": 18.5551, |
| "eval_samples_per_second": 1024.192, |
| "eval_steps_per_second": 32.013, |
| "step": 54050 |
| }, |
| { |
| "epoch": 46.597760551248925, |
| "grad_norm": 0.4301529824733734, |
| "learning_rate": 1.3612403100775195e-05, |
| "loss": 0.2529969787597656, |
| "step": 54100 |
| }, |
| { |
| "epoch": 46.597760551248925, |
| "eval_loss": 0.32576170563697815, |
| "eval_runtime": 19.3566, |
| "eval_samples_per_second": 981.785, |
| "eval_steps_per_second": 30.687, |
| "step": 54100 |
| }, |
| { |
| "epoch": 46.64082687338501, |
| "grad_norm": 0.4531412422657013, |
| "learning_rate": 1.3440137812230835e-05, |
| "loss": 0.25750720977783204, |
| "step": 54150 |
| }, |
| { |
| "epoch": 46.64082687338501, |
| "eval_loss": 0.3297915458679199, |
| "eval_runtime": 18.2668, |
| "eval_samples_per_second": 1040.358, |
| "eval_steps_per_second": 32.518, |
| "step": 54150 |
| }, |
| { |
| "epoch": 46.6838931955211, |
| "grad_norm": 0.4417518377304077, |
| "learning_rate": 1.3267872523686479e-05, |
| "loss": 0.25784589767456056, |
| "step": 54200 |
| }, |
| { |
| "epoch": 46.6838931955211, |
| "eval_loss": 0.325896292924881, |
| "eval_runtime": 18.1482, |
| "eval_samples_per_second": 1047.157, |
| "eval_steps_per_second": 32.731, |
| "step": 54200 |
| }, |
| { |
| "epoch": 46.72695951765719, |
| "grad_norm": 0.49990609288215637, |
| "learning_rate": 1.3095607235142119e-05, |
| "loss": 0.25872652053833006, |
| "step": 54250 |
| }, |
| { |
| "epoch": 46.72695951765719, |
| "eval_loss": 0.3264675438404083, |
| "eval_runtime": 17.9997, |
| "eval_samples_per_second": 1055.796, |
| "eval_steps_per_second": 33.001, |
| "step": 54250 |
| }, |
| { |
| "epoch": 46.770025839793284, |
| "grad_norm": 0.38507169485092163, |
| "learning_rate": 1.2923341946597761e-05, |
| "loss": 0.25351362228393554, |
| "step": 54300 |
| }, |
| { |
| "epoch": 46.770025839793284, |
| "eval_loss": 0.3271394371986389, |
| "eval_runtime": 17.7696, |
| "eval_samples_per_second": 1069.467, |
| "eval_steps_per_second": 33.428, |
| "step": 54300 |
| }, |
| { |
| "epoch": 46.81309216192937, |
| "grad_norm": 0.38704913854599, |
| "learning_rate": 1.2751076658053403e-05, |
| "loss": 0.2537462615966797, |
| "step": 54350 |
| }, |
| { |
| "epoch": 46.81309216192937, |
| "eval_loss": 0.328862726688385, |
| "eval_runtime": 18.6196, |
| "eval_samples_per_second": 1020.647, |
| "eval_steps_per_second": 31.902, |
| "step": 54350 |
| }, |
| { |
| "epoch": 46.85615848406546, |
| "grad_norm": 0.41383081674575806, |
| "learning_rate": 1.2578811369509044e-05, |
| "loss": 0.2505047607421875, |
| "step": 54400 |
| }, |
| { |
| "epoch": 46.85615848406546, |
| "eval_loss": 0.32361650466918945, |
| "eval_runtime": 18.0463, |
| "eval_samples_per_second": 1053.071, |
| "eval_steps_per_second": 32.915, |
| "step": 54400 |
| }, |
| { |
| "epoch": 46.89922480620155, |
| "grad_norm": 0.45698070526123047, |
| "learning_rate": 1.2406546080964686e-05, |
| "loss": 0.2587569618225098, |
| "step": 54450 |
| }, |
| { |
| "epoch": 46.89922480620155, |
| "eval_loss": 0.3258303701877594, |
| "eval_runtime": 17.9164, |
| "eval_samples_per_second": 1060.705, |
| "eval_steps_per_second": 33.154, |
| "step": 54450 |
| }, |
| { |
| "epoch": 46.94229112833764, |
| "grad_norm": 0.47596439719200134, |
| "learning_rate": 1.2234280792420328e-05, |
| "loss": 0.25485145568847656, |
| "step": 54500 |
| }, |
| { |
| "epoch": 46.94229112833764, |
| "eval_loss": 0.32724428176879883, |
| "eval_runtime": 18.2902, |
| "eval_samples_per_second": 1039.028, |
| "eval_steps_per_second": 32.476, |
| "step": 54500 |
| }, |
| { |
| "epoch": 46.98535745047373, |
| "grad_norm": 0.5511539578437805, |
| "learning_rate": 1.206201550387597e-05, |
| "loss": 0.2561703872680664, |
| "step": 54550 |
| }, |
| { |
| "epoch": 46.98535745047373, |
| "eval_loss": 0.3254358172416687, |
| "eval_runtime": 18.259, |
| "eval_samples_per_second": 1040.8, |
| "eval_steps_per_second": 32.532, |
| "step": 54550 |
| }, |
| { |
| "epoch": 47.02842377260982, |
| "grad_norm": 0.5038396120071411, |
| "learning_rate": 1.1889750215331612e-05, |
| "loss": 0.25494846343994143, |
| "step": 54600 |
| }, |
| { |
| "epoch": 47.02842377260982, |
| "eval_loss": 0.3333961069583893, |
| "eval_runtime": 18.4274, |
| "eval_samples_per_second": 1031.293, |
| "eval_steps_per_second": 32.235, |
| "step": 54600 |
| }, |
| { |
| "epoch": 47.07149009474591, |
| "grad_norm": 0.45146816968917847, |
| "learning_rate": 1.1717484926787253e-05, |
| "loss": 0.2557457733154297, |
| "step": 54650 |
| }, |
| { |
| "epoch": 47.07149009474591, |
| "eval_loss": 0.3291355073451996, |
| "eval_runtime": 18.674, |
| "eval_samples_per_second": 1017.673, |
| "eval_steps_per_second": 31.809, |
| "step": 54650 |
| }, |
| { |
| "epoch": 47.114556416881996, |
| "grad_norm": 0.4485445022583008, |
| "learning_rate": 1.1545219638242895e-05, |
| "loss": 0.2512784957885742, |
| "step": 54700 |
| }, |
| { |
| "epoch": 47.114556416881996, |
| "eval_loss": 0.32779595255851746, |
| "eval_runtime": 17.9957, |
| "eval_samples_per_second": 1056.03, |
| "eval_steps_per_second": 33.008, |
| "step": 54700 |
| }, |
| { |
| "epoch": 47.15762273901809, |
| "grad_norm": 0.3778316080570221, |
| "learning_rate": 1.1372954349698537e-05, |
| "loss": 0.25392818450927734, |
| "step": 54750 |
| }, |
| { |
| "epoch": 47.15762273901809, |
| "eval_loss": 0.32727065682411194, |
| "eval_runtime": 17.4622, |
| "eval_samples_per_second": 1088.295, |
| "eval_steps_per_second": 34.016, |
| "step": 54750 |
| }, |
| { |
| "epoch": 47.20068906115418, |
| "grad_norm": 0.3127480447292328, |
| "learning_rate": 1.1200689061154177e-05, |
| "loss": 0.2568006324768066, |
| "step": 54800 |
| }, |
| { |
| "epoch": 47.20068906115418, |
| "eval_loss": 0.3252197802066803, |
| "eval_runtime": 18.0907, |
| "eval_samples_per_second": 1050.487, |
| "eval_steps_per_second": 32.835, |
| "step": 54800 |
| }, |
| { |
| "epoch": 47.243755383290264, |
| "grad_norm": 0.37857651710510254, |
| "learning_rate": 1.102842377260982e-05, |
| "loss": 0.2547434234619141, |
| "step": 54850 |
| }, |
| { |
| "epoch": 47.243755383290264, |
| "eval_loss": 0.3276120126247406, |
| "eval_runtime": 17.9336, |
| "eval_samples_per_second": 1059.684, |
| "eval_steps_per_second": 33.122, |
| "step": 54850 |
| }, |
| { |
| "epoch": 47.286821705426355, |
| "grad_norm": 0.411051869392395, |
| "learning_rate": 1.0856158484065461e-05, |
| "loss": 0.2587508010864258, |
| "step": 54900 |
| }, |
| { |
| "epoch": 47.286821705426355, |
| "eval_loss": 0.3328407108783722, |
| "eval_runtime": 18.5652, |
| "eval_samples_per_second": 1023.636, |
| "eval_steps_per_second": 31.995, |
| "step": 54900 |
| }, |
| { |
| "epoch": 47.32988802756245, |
| "grad_norm": 0.4203979969024658, |
| "learning_rate": 1.0683893195521102e-05, |
| "loss": 0.2552628517150879, |
| "step": 54950 |
| }, |
| { |
| "epoch": 47.32988802756245, |
| "eval_loss": 0.3313995897769928, |
| "eval_runtime": 18.193, |
| "eval_samples_per_second": 1044.576, |
| "eval_steps_per_second": 32.65, |
| "step": 54950 |
| }, |
| { |
| "epoch": 47.37295434969854, |
| "grad_norm": 0.39956605434417725, |
| "learning_rate": 1.0511627906976746e-05, |
| "loss": 0.2482987403869629, |
| "step": 55000 |
| }, |
| { |
| "epoch": 47.37295434969854, |
| "eval_loss": 0.3247491121292114, |
| "eval_runtime": 18.1318, |
| "eval_samples_per_second": 1048.104, |
| "eval_steps_per_second": 32.76, |
| "step": 55000 |
| }, |
| { |
| "epoch": 47.41602067183462, |
| "grad_norm": 0.4590926170349121, |
| "learning_rate": 1.0339362618432388e-05, |
| "loss": 0.25707162857055665, |
| "step": 55050 |
| }, |
| { |
| "epoch": 47.41602067183462, |
| "eval_loss": 0.323581337928772, |
| "eval_runtime": 18.2993, |
| "eval_samples_per_second": 1038.512, |
| "eval_steps_per_second": 32.46, |
| "step": 55050 |
| }, |
| { |
| "epoch": 47.459086993970715, |
| "grad_norm": 0.3866632282733917, |
| "learning_rate": 1.0167097329888028e-05, |
| "loss": 0.25558311462402344, |
| "step": 55100 |
| }, |
| { |
| "epoch": 47.459086993970715, |
| "eval_loss": 0.3233027160167694, |
| "eval_runtime": 18.1385, |
| "eval_samples_per_second": 1047.718, |
| "eval_steps_per_second": 32.748, |
| "step": 55100 |
| }, |
| { |
| "epoch": 47.502153316106806, |
| "grad_norm": 0.5196178555488586, |
| "learning_rate": 9.99483204134367e-06, |
| "loss": 0.2581501007080078, |
| "step": 55150 |
| }, |
| { |
| "epoch": 47.502153316106806, |
| "eval_loss": 0.3227282166481018, |
| "eval_runtime": 17.7718, |
| "eval_samples_per_second": 1069.334, |
| "eval_steps_per_second": 33.424, |
| "step": 55150 |
| }, |
| { |
| "epoch": 47.54521963824289, |
| "grad_norm": 0.9048319458961487, |
| "learning_rate": 9.822566752799312e-06, |
| "loss": 0.2537050628662109, |
| "step": 55200 |
| }, |
| { |
| "epoch": 47.54521963824289, |
| "eval_loss": 0.3279529809951782, |
| "eval_runtime": 17.8061, |
| "eval_samples_per_second": 1067.276, |
| "eval_steps_per_second": 33.359, |
| "step": 55200 |
| }, |
| { |
| "epoch": 47.58828596037898, |
| "grad_norm": 0.4730582535266876, |
| "learning_rate": 9.650301464254953e-06, |
| "loss": 0.2550501251220703, |
| "step": 55250 |
| }, |
| { |
| "epoch": 47.58828596037898, |
| "eval_loss": 0.3323853313922882, |
| "eval_runtime": 18.4314, |
| "eval_samples_per_second": 1031.065, |
| "eval_steps_per_second": 32.228, |
| "step": 55250 |
| }, |
| { |
| "epoch": 47.631352282515074, |
| "grad_norm": 0.4794359803199768, |
| "learning_rate": 9.478036175710595e-06, |
| "loss": 0.25382413864135744, |
| "step": 55300 |
| }, |
| { |
| "epoch": 47.631352282515074, |
| "eval_loss": 0.3281784951686859, |
| "eval_runtime": 18.7928, |
| "eval_samples_per_second": 1011.237, |
| "eval_steps_per_second": 31.608, |
| "step": 55300 |
| }, |
| { |
| "epoch": 47.674418604651166, |
| "grad_norm": 0.45010513067245483, |
| "learning_rate": 9.305770887166237e-06, |
| "loss": 0.256310920715332, |
| "step": 55350 |
| }, |
| { |
| "epoch": 47.674418604651166, |
| "eval_loss": 0.32857298851013184, |
| "eval_runtime": 18.0712, |
| "eval_samples_per_second": 1051.618, |
| "eval_steps_per_second": 32.87, |
| "step": 55350 |
| }, |
| { |
| "epoch": 47.71748492678725, |
| "grad_norm": 0.37499430775642395, |
| "learning_rate": 9.133505598621877e-06, |
| "loss": 0.2561494827270508, |
| "step": 55400 |
| }, |
| { |
| "epoch": 47.71748492678725, |
| "eval_loss": 0.3282778263092041, |
| "eval_runtime": 16.3235, |
| "eval_samples_per_second": 1164.211, |
| "eval_steps_per_second": 36.389, |
| "step": 55400 |
| }, |
| { |
| "epoch": 47.76055124892334, |
| "grad_norm": 0.4058770537376404, |
| "learning_rate": 8.96124031007752e-06, |
| "loss": 0.251326904296875, |
| "step": 55450 |
| }, |
| { |
| "epoch": 47.76055124892334, |
| "eval_loss": 0.3290371298789978, |
| "eval_runtime": 17.7668, |
| "eval_samples_per_second": 1069.636, |
| "eval_steps_per_second": 33.433, |
| "step": 55450 |
| }, |
| { |
| "epoch": 47.80361757105943, |
| "grad_norm": 0.36981111764907837, |
| "learning_rate": 8.788975021533161e-06, |
| "loss": 0.2633692359924316, |
| "step": 55500 |
| }, |
| { |
| "epoch": 47.80361757105943, |
| "eval_loss": 0.323087602853775, |
| "eval_runtime": 18.1501, |
| "eval_samples_per_second": 1047.047, |
| "eval_steps_per_second": 32.727, |
| "step": 55500 |
| }, |
| { |
| "epoch": 47.84668389319552, |
| "grad_norm": 0.4622083306312561, |
| "learning_rate": 8.616709732988804e-06, |
| "loss": 0.2480868911743164, |
| "step": 55550 |
| }, |
| { |
| "epoch": 47.84668389319552, |
| "eval_loss": 0.3338957726955414, |
| "eval_runtime": 19.5237, |
| "eval_samples_per_second": 973.38, |
| "eval_steps_per_second": 30.425, |
| "step": 55550 |
| }, |
| { |
| "epoch": 47.88975021533161, |
| "grad_norm": 0.5124086141586304, |
| "learning_rate": 8.444444444444446e-06, |
| "loss": 0.2595103645324707, |
| "step": 55600 |
| }, |
| { |
| "epoch": 47.88975021533161, |
| "eval_loss": 0.3314855098724365, |
| "eval_runtime": 16.1957, |
| "eval_samples_per_second": 1173.398, |
| "eval_steps_per_second": 36.676, |
| "step": 55600 |
| }, |
| { |
| "epoch": 47.9328165374677, |
| "grad_norm": 0.4180132746696472, |
| "learning_rate": 8.272179155900086e-06, |
| "loss": 0.25893829345703123, |
| "step": 55650 |
| }, |
| { |
| "epoch": 47.9328165374677, |
| "eval_loss": 0.3274875581264496, |
| "eval_runtime": 16.6508, |
| "eval_samples_per_second": 1141.326, |
| "eval_steps_per_second": 35.674, |
| "step": 55650 |
| }, |
| { |
| "epoch": 47.97588285960379, |
| "grad_norm": 0.39777863025665283, |
| "learning_rate": 8.099913867355728e-06, |
| "loss": 0.2580837059020996, |
| "step": 55700 |
| }, |
| { |
| "epoch": 47.97588285960379, |
| "eval_loss": 0.32824796438217163, |
| "eval_runtime": 17.4958, |
| "eval_samples_per_second": 1086.202, |
| "eval_steps_per_second": 33.951, |
| "step": 55700 |
| }, |
| { |
| "epoch": 48.01894918173988, |
| "grad_norm": 0.3993493914604187, |
| "learning_rate": 7.92764857881137e-06, |
| "loss": 0.26053876876831056, |
| "step": 55750 |
| }, |
| { |
| "epoch": 48.01894918173988, |
| "eval_loss": 0.32810813188552856, |
| "eval_runtime": 17.3915, |
| "eval_samples_per_second": 1092.721, |
| "eval_steps_per_second": 34.155, |
| "step": 55750 |
| }, |
| { |
| "epoch": 48.06201550387597, |
| "grad_norm": 0.4241078495979309, |
| "learning_rate": 7.75538329026701e-06, |
| "loss": 0.25140480041503904, |
| "step": 55800 |
| }, |
| { |
| "epoch": 48.06201550387597, |
| "eval_loss": 0.32518553733825684, |
| "eval_runtime": 17.5472, |
| "eval_samples_per_second": 1083.024, |
| "eval_steps_per_second": 33.852, |
| "step": 55800 |
| }, |
| { |
| "epoch": 48.10508182601206, |
| "grad_norm": 0.4179937243461609, |
| "learning_rate": 7.583118001722653e-06, |
| "loss": 0.2537392044067383, |
| "step": 55850 |
| }, |
| { |
| "epoch": 48.10508182601206, |
| "eval_loss": 0.3269120454788208, |
| "eval_runtime": 16.5361, |
| "eval_samples_per_second": 1149.244, |
| "eval_steps_per_second": 35.921, |
| "step": 55850 |
| }, |
| { |
| "epoch": 48.148148148148145, |
| "grad_norm": 0.4175696074962616, |
| "learning_rate": 7.410852713178294e-06, |
| "loss": 0.2580715560913086, |
| "step": 55900 |
| }, |
| { |
| "epoch": 48.148148148148145, |
| "eval_loss": 0.333906352519989, |
| "eval_runtime": 17.1017, |
| "eval_samples_per_second": 1111.232, |
| "eval_steps_per_second": 34.733, |
| "step": 55900 |
| }, |
| { |
| "epoch": 48.19121447028424, |
| "grad_norm": 0.4846994876861572, |
| "learning_rate": 7.238587424633937e-06, |
| "loss": 0.26174249649047854, |
| "step": 55950 |
| }, |
| { |
| "epoch": 48.19121447028424, |
| "eval_loss": 0.3237777352333069, |
| "eval_runtime": 17.4587, |
| "eval_samples_per_second": 1088.511, |
| "eval_steps_per_second": 34.023, |
| "step": 55950 |
| }, |
| { |
| "epoch": 48.23428079242033, |
| "grad_norm": 0.4444175660610199, |
| "learning_rate": 7.066322136089579e-06, |
| "loss": 0.25842620849609377, |
| "step": 56000 |
| }, |
| { |
| "epoch": 48.23428079242033, |
| "eval_loss": 0.334178626537323, |
| "eval_runtime": 17.5253, |
| "eval_samples_per_second": 1084.378, |
| "eval_steps_per_second": 33.894, |
| "step": 56000 |
| }, |
| { |
| "epoch": 48.27734711455642, |
| "grad_norm": 0.3756895065307617, |
| "learning_rate": 6.89405684754522e-06, |
| "loss": 0.25008062362670896, |
| "step": 56050 |
| }, |
| { |
| "epoch": 48.27734711455642, |
| "eval_loss": 0.3291133940219879, |
| "eval_runtime": 18.0697, |
| "eval_samples_per_second": 1051.705, |
| "eval_steps_per_second": 32.873, |
| "step": 56050 |
| }, |
| { |
| "epoch": 48.320413436692505, |
| "grad_norm": 0.43787986040115356, |
| "learning_rate": 6.721791559000862e-06, |
| "loss": 0.2632747268676758, |
| "step": 56100 |
| }, |
| { |
| "epoch": 48.320413436692505, |
| "eval_loss": 0.32688236236572266, |
| "eval_runtime": 17.8228, |
| "eval_samples_per_second": 1066.273, |
| "eval_steps_per_second": 33.328, |
| "step": 56100 |
| }, |
| { |
| "epoch": 48.363479758828596, |
| "grad_norm": 0.37965553998947144, |
| "learning_rate": 6.549526270456504e-06, |
| "loss": 0.2511505699157715, |
| "step": 56150 |
| }, |
| { |
| "epoch": 48.363479758828596, |
| "eval_loss": 0.33095940947532654, |
| "eval_runtime": 17.6923, |
| "eval_samples_per_second": 1074.138, |
| "eval_steps_per_second": 33.574, |
| "step": 56150 |
| }, |
| { |
| "epoch": 48.40654608096469, |
| "grad_norm": 0.44011443853378296, |
| "learning_rate": 6.377260981912145e-06, |
| "loss": 0.2500026893615723, |
| "step": 56200 |
| }, |
| { |
| "epoch": 48.40654608096469, |
| "eval_loss": 0.3213208317756653, |
| "eval_runtime": 18.091, |
| "eval_samples_per_second": 1050.465, |
| "eval_steps_per_second": 32.834, |
| "step": 56200 |
| }, |
| { |
| "epoch": 48.44961240310077, |
| "grad_norm": 0.41959789395332336, |
| "learning_rate": 6.204995693367786e-06, |
| "loss": 0.25453189849853514, |
| "step": 56250 |
| }, |
| { |
| "epoch": 48.44961240310077, |
| "eval_loss": 0.32835957407951355, |
| "eval_runtime": 18.0779, |
| "eval_samples_per_second": 1051.228, |
| "eval_steps_per_second": 32.858, |
| "step": 56250 |
| }, |
| { |
| "epoch": 48.492678725236864, |
| "grad_norm": 0.4679143726825714, |
| "learning_rate": 6.032730404823428e-06, |
| "loss": 0.2501845359802246, |
| "step": 56300 |
| }, |
| { |
| "epoch": 48.492678725236864, |
| "eval_loss": 0.3308468163013458, |
| "eval_runtime": 18.1336, |
| "eval_samples_per_second": 1048.0, |
| "eval_steps_per_second": 32.757, |
| "step": 56300 |
| }, |
| { |
| "epoch": 48.535745047372956, |
| "grad_norm": 0.4774787724018097, |
| "learning_rate": 5.86046511627907e-06, |
| "loss": 0.26023746490478517, |
| "step": 56350 |
| }, |
| { |
| "epoch": 48.535745047372956, |
| "eval_loss": 0.33171480894088745, |
| "eval_runtime": 18.1585, |
| "eval_samples_per_second": 1046.562, |
| "eval_steps_per_second": 32.712, |
| "step": 56350 |
| }, |
| { |
| "epoch": 48.57881136950905, |
| "grad_norm": 0.4261847138404846, |
| "learning_rate": 5.688199827734712e-06, |
| "loss": 0.25504995346069337, |
| "step": 56400 |
| }, |
| { |
| "epoch": 48.57881136950905, |
| "eval_loss": 0.330726683139801, |
| "eval_runtime": 17.8733, |
| "eval_samples_per_second": 1063.26, |
| "eval_steps_per_second": 33.234, |
| "step": 56400 |
| }, |
| { |
| "epoch": 48.62187769164513, |
| "grad_norm": 0.45377856492996216, |
| "learning_rate": 5.515934539190353e-06, |
| "loss": 0.25870571136474607, |
| "step": 56450 |
| }, |
| { |
| "epoch": 48.62187769164513, |
| "eval_loss": 0.32752057909965515, |
| "eval_runtime": 16.8981, |
| "eval_samples_per_second": 1124.625, |
| "eval_steps_per_second": 35.152, |
| "step": 56450 |
| }, |
| { |
| "epoch": 48.66494401378122, |
| "grad_norm": 0.4299466609954834, |
| "learning_rate": 5.343669250645995e-06, |
| "loss": 0.24694084167480468, |
| "step": 56500 |
| }, |
| { |
| "epoch": 48.66494401378122, |
| "eval_loss": 0.3338819146156311, |
| "eval_runtime": 18.0458, |
| "eval_samples_per_second": 1053.096, |
| "eval_steps_per_second": 32.916, |
| "step": 56500 |
| }, |
| { |
| "epoch": 48.708010335917315, |
| "grad_norm": 0.3479226529598236, |
| "learning_rate": 5.171403962101637e-06, |
| "loss": 0.26142990112304687, |
| "step": 56550 |
| }, |
| { |
| "epoch": 48.708010335917315, |
| "eval_loss": 0.32899126410484314, |
| "eval_runtime": 18.0383, |
| "eval_samples_per_second": 1053.535, |
| "eval_steps_per_second": 32.93, |
| "step": 56550 |
| }, |
| { |
| "epoch": 48.7510766580534, |
| "grad_norm": 0.47992098331451416, |
| "learning_rate": 4.999138673557278e-06, |
| "loss": 0.25217716217041014, |
| "step": 56600 |
| }, |
| { |
| "epoch": 48.7510766580534, |
| "eval_loss": 0.33228740096092224, |
| "eval_runtime": 17.5929, |
| "eval_samples_per_second": 1080.206, |
| "eval_steps_per_second": 33.764, |
| "step": 56600 |
| }, |
| { |
| "epoch": 48.79414298018949, |
| "grad_norm": 0.3716151714324951, |
| "learning_rate": 4.82687338501292e-06, |
| "loss": 0.2575519943237305, |
| "step": 56650 |
| }, |
| { |
| "epoch": 48.79414298018949, |
| "eval_loss": 0.32792726159095764, |
| "eval_runtime": 17.7377, |
| "eval_samples_per_second": 1071.389, |
| "eval_steps_per_second": 33.488, |
| "step": 56650 |
| }, |
| { |
| "epoch": 48.83720930232558, |
| "grad_norm": 0.5173632502555847, |
| "learning_rate": 4.654608096468562e-06, |
| "loss": 0.2547392272949219, |
| "step": 56700 |
| }, |
| { |
| "epoch": 48.83720930232558, |
| "eval_loss": 0.3242984712123871, |
| "eval_runtime": 17.8788, |
| "eval_samples_per_second": 1062.933, |
| "eval_steps_per_second": 33.224, |
| "step": 56700 |
| }, |
| { |
| "epoch": 48.880275624461675, |
| "grad_norm": 0.4812866747379303, |
| "learning_rate": 4.482342807924204e-06, |
| "loss": 0.25394062042236326, |
| "step": 56750 |
| }, |
| { |
| "epoch": 48.880275624461675, |
| "eval_loss": 0.3332490622997284, |
| "eval_runtime": 18.2249, |
| "eval_samples_per_second": 1042.75, |
| "eval_steps_per_second": 32.593, |
| "step": 56750 |
| }, |
| { |
| "epoch": 48.92334194659776, |
| "grad_norm": 0.3856547772884369, |
| "learning_rate": 4.310077519379845e-06, |
| "loss": 0.25180145263671877, |
| "step": 56800 |
| }, |
| { |
| "epoch": 48.92334194659776, |
| "eval_loss": 0.3311121463775635, |
| "eval_runtime": 18.1641, |
| "eval_samples_per_second": 1046.238, |
| "eval_steps_per_second": 32.702, |
| "step": 56800 |
| }, |
| { |
| "epoch": 48.96640826873385, |
| "grad_norm": 0.39660078287124634, |
| "learning_rate": 4.137812230835487e-06, |
| "loss": 0.253763427734375, |
| "step": 56850 |
| }, |
| { |
| "epoch": 48.96640826873385, |
| "eval_loss": 0.32043248414993286, |
| "eval_runtime": 18.0899, |
| "eval_samples_per_second": 1050.53, |
| "eval_steps_per_second": 32.836, |
| "step": 56850 |
| }, |
| { |
| "epoch": 49.00947459086994, |
| "grad_norm": 0.49255695939064026, |
| "learning_rate": 3.965546942291128e-06, |
| "loss": 0.25162689208984373, |
| "step": 56900 |
| }, |
| { |
| "epoch": 49.00947459086994, |
| "eval_loss": 0.3301583528518677, |
| "eval_runtime": 18.2011, |
| "eval_samples_per_second": 1044.111, |
| "eval_steps_per_second": 32.635, |
| "step": 56900 |
| }, |
| { |
| "epoch": 49.05254091300603, |
| "grad_norm": 0.46763548254966736, |
| "learning_rate": 3.7932816537467705e-06, |
| "loss": 0.25125923156738283, |
| "step": 56950 |
| }, |
| { |
| "epoch": 49.05254091300603, |
| "eval_loss": 0.3262256383895874, |
| "eval_runtime": 17.8076, |
| "eval_samples_per_second": 1067.186, |
| "eval_steps_per_second": 33.357, |
| "step": 56950 |
| }, |
| { |
| "epoch": 49.09560723514212, |
| "grad_norm": 0.4103085994720459, |
| "learning_rate": 3.621016365202412e-06, |
| "loss": 0.2495307731628418, |
| "step": 57000 |
| }, |
| { |
| "epoch": 49.09560723514212, |
| "eval_loss": 0.3282013237476349, |
| "eval_runtime": 17.9406, |
| "eval_samples_per_second": 1059.273, |
| "eval_steps_per_second": 33.109, |
| "step": 57000 |
| }, |
| { |
| "epoch": 49.13867355727821, |
| "grad_norm": 0.34167978167533875, |
| "learning_rate": 3.4487510766580534e-06, |
| "loss": 0.2514477729797363, |
| "step": 57050 |
| }, |
| { |
| "epoch": 49.13867355727821, |
| "eval_loss": 0.3242286145687103, |
| "eval_runtime": 17.5082, |
| "eval_samples_per_second": 1085.432, |
| "eval_steps_per_second": 33.927, |
| "step": 57050 |
| }, |
| { |
| "epoch": 49.181739879414295, |
| "grad_norm": 0.4692245125770569, |
| "learning_rate": 3.276485788113695e-06, |
| "loss": 0.25927200317382815, |
| "step": 57100 |
| }, |
| { |
| "epoch": 49.181739879414295, |
| "eval_loss": 0.3265242278575897, |
| "eval_runtime": 18.1814, |
| "eval_samples_per_second": 1045.241, |
| "eval_steps_per_second": 32.671, |
| "step": 57100 |
| }, |
| { |
| "epoch": 49.224806201550386, |
| "grad_norm": 0.48123684525489807, |
| "learning_rate": 3.104220499569337e-06, |
| "loss": 0.2532792663574219, |
| "step": 57150 |
| }, |
| { |
| "epoch": 49.224806201550386, |
| "eval_loss": 0.324819952249527, |
| "eval_runtime": 17.9832, |
| "eval_samples_per_second": 1056.762, |
| "eval_steps_per_second": 33.031, |
| "step": 57150 |
| }, |
| { |
| "epoch": 49.26787252368648, |
| "grad_norm": 0.3839048147201538, |
| "learning_rate": 2.9319552110249785e-06, |
| "loss": 0.2522504234313965, |
| "step": 57200 |
| }, |
| { |
| "epoch": 49.26787252368648, |
| "eval_loss": 0.3209121823310852, |
| "eval_runtime": 17.9498, |
| "eval_samples_per_second": 1058.728, |
| "eval_steps_per_second": 33.092, |
| "step": 57200 |
| }, |
| { |
| "epoch": 49.31093884582257, |
| "grad_norm": 0.38812488317489624, |
| "learning_rate": 2.7596899224806206e-06, |
| "loss": 0.2546123504638672, |
| "step": 57250 |
| }, |
| { |
| "epoch": 49.31093884582257, |
| "eval_loss": 0.3188753128051758, |
| "eval_runtime": 17.8365, |
| "eval_samples_per_second": 1065.453, |
| "eval_steps_per_second": 33.302, |
| "step": 57250 |
| }, |
| { |
| "epoch": 49.354005167958654, |
| "grad_norm": 0.42530906200408936, |
| "learning_rate": 2.587424633936262e-06, |
| "loss": 0.2523210525512695, |
| "step": 57300 |
| }, |
| { |
| "epoch": 49.354005167958654, |
| "eval_loss": 0.322894424200058, |
| "eval_runtime": 18.1021, |
| "eval_samples_per_second": 1049.822, |
| "eval_steps_per_second": 32.814, |
| "step": 57300 |
| }, |
| { |
| "epoch": 49.397071490094746, |
| "grad_norm": 0.46608835458755493, |
| "learning_rate": 2.415159345391904e-06, |
| "loss": 0.2563011169433594, |
| "step": 57350 |
| }, |
| { |
| "epoch": 49.397071490094746, |
| "eval_loss": 0.3301422595977783, |
| "eval_runtime": 18.03, |
| "eval_samples_per_second": 1054.021, |
| "eval_steps_per_second": 32.945, |
| "step": 57350 |
| }, |
| { |
| "epoch": 49.44013781223084, |
| "grad_norm": 0.3985242247581482, |
| "learning_rate": 2.242894056847545e-06, |
| "loss": 0.2541914939880371, |
| "step": 57400 |
| }, |
| { |
| "epoch": 49.44013781223084, |
| "eval_loss": 0.3331603705883026, |
| "eval_runtime": 17.8584, |
| "eval_samples_per_second": 1064.148, |
| "eval_steps_per_second": 33.262, |
| "step": 57400 |
| }, |
| { |
| "epoch": 49.48320413436692, |
| "grad_norm": 0.4562121033668518, |
| "learning_rate": 2.070628768303187e-06, |
| "loss": 0.2531538391113281, |
| "step": 57450 |
| }, |
| { |
| "epoch": 49.48320413436692, |
| "eval_loss": 0.3276031017303467, |
| "eval_runtime": 17.8464, |
| "eval_samples_per_second": 1064.864, |
| "eval_steps_per_second": 33.284, |
| "step": 57450 |
| }, |
| { |
| "epoch": 49.52627045650301, |
| "grad_norm": 0.39313840866088867, |
| "learning_rate": 1.8983634797588287e-06, |
| "loss": 0.26208810806274413, |
| "step": 57500 |
| }, |
| { |
| "epoch": 49.52627045650301, |
| "eval_loss": 0.33253973722457886, |
| "eval_runtime": 17.1618, |
| "eval_samples_per_second": 1107.343, |
| "eval_steps_per_second": 34.612, |
| "step": 57500 |
| }, |
| { |
| "epoch": 49.569336778639105, |
| "grad_norm": 0.4557619094848633, |
| "learning_rate": 1.7260981912144702e-06, |
| "loss": 0.2460823440551758, |
| "step": 57550 |
| }, |
| { |
| "epoch": 49.569336778639105, |
| "eval_loss": 0.327955424785614, |
| "eval_runtime": 18.1273, |
| "eval_samples_per_second": 1048.364, |
| "eval_steps_per_second": 32.768, |
| "step": 57550 |
| }, |
| { |
| "epoch": 49.6124031007752, |
| "grad_norm": 0.5313220620155334, |
| "learning_rate": 1.553832902670112e-06, |
| "loss": 0.2594928359985352, |
| "step": 57600 |
| }, |
| { |
| "epoch": 49.6124031007752, |
| "eval_loss": 0.32700130343437195, |
| "eval_runtime": 18.1934, |
| "eval_samples_per_second": 1044.556, |
| "eval_steps_per_second": 32.649, |
| "step": 57600 |
| }, |
| { |
| "epoch": 49.65546942291128, |
| "grad_norm": 0.42723724246025085, |
| "learning_rate": 1.3815676141257536e-06, |
| "loss": 0.25847515106201174, |
| "step": 57650 |
| }, |
| { |
| "epoch": 49.65546942291128, |
| "eval_loss": 0.3303566575050354, |
| "eval_runtime": 18.0919, |
| "eval_samples_per_second": 1050.416, |
| "eval_steps_per_second": 32.832, |
| "step": 57650 |
| }, |
| { |
| "epoch": 49.69853574504737, |
| "grad_norm": 0.4465530216693878, |
| "learning_rate": 1.2093023255813954e-06, |
| "loss": 0.2579035186767578, |
| "step": 57700 |
| }, |
| { |
| "epoch": 49.69853574504737, |
| "eval_loss": 0.32566210627555847, |
| "eval_runtime": 18.1173, |
| "eval_samples_per_second": 1048.941, |
| "eval_steps_per_second": 32.786, |
| "step": 57700 |
| }, |
| { |
| "epoch": 49.741602067183464, |
| "grad_norm": 0.46042123436927795, |
| "learning_rate": 1.0370370370370371e-06, |
| "loss": 0.25134170532226563, |
| "step": 57750 |
| }, |
| { |
| "epoch": 49.741602067183464, |
| "eval_loss": 0.33262744545936584, |
| "eval_runtime": 18.0792, |
| "eval_samples_per_second": 1051.155, |
| "eval_steps_per_second": 32.856, |
| "step": 57750 |
| }, |
| { |
| "epoch": 49.78466838931955, |
| "grad_norm": 0.3972814977169037, |
| "learning_rate": 8.647717484926788e-07, |
| "loss": 0.2506429290771484, |
| "step": 57800 |
| }, |
| { |
| "epoch": 49.78466838931955, |
| "eval_loss": 0.32905587553977966, |
| "eval_runtime": 17.5711, |
| "eval_samples_per_second": 1081.55, |
| "eval_steps_per_second": 33.806, |
| "step": 57800 |
| }, |
| { |
| "epoch": 49.82773471145564, |
| "grad_norm": 0.40822234749794006, |
| "learning_rate": 6.925064599483205e-07, |
| "loss": 0.2546335220336914, |
| "step": 57850 |
| }, |
| { |
| "epoch": 49.82773471145564, |
| "eval_loss": 0.3248952031135559, |
| "eval_runtime": 17.4019, |
| "eval_samples_per_second": 1092.064, |
| "eval_steps_per_second": 34.134, |
| "step": 57850 |
| }, |
| { |
| "epoch": 49.87080103359173, |
| "grad_norm": 0.3996763825416565, |
| "learning_rate": 5.202411714039622e-07, |
| "loss": 0.25602977752685546, |
| "step": 57900 |
| }, |
| { |
| "epoch": 49.87080103359173, |
| "eval_loss": 0.32748210430145264, |
| "eval_runtime": 18.3587, |
| "eval_samples_per_second": 1035.148, |
| "eval_steps_per_second": 32.355, |
| "step": 57900 |
| }, |
| { |
| "epoch": 49.913867355727824, |
| "grad_norm": 0.5123440027236938, |
| "learning_rate": 3.4797588285960383e-07, |
| "loss": 0.25229768753051757, |
| "step": 57950 |
| }, |
| { |
| "epoch": 49.913867355727824, |
| "eval_loss": 0.32939910888671875, |
| "eval_runtime": 18.5038, |
| "eval_samples_per_second": 1027.032, |
| "eval_steps_per_second": 32.101, |
| "step": 57950 |
| }, |
| { |
| "epoch": 49.95693367786391, |
| "grad_norm": 0.4721235930919647, |
| "learning_rate": 1.7571059431524548e-07, |
| "loss": 0.25507354736328125, |
| "step": 58000 |
| }, |
| { |
| "epoch": 49.95693367786391, |
| "eval_loss": 0.3246816396713257, |
| "eval_runtime": 18.9481, |
| "eval_samples_per_second": 1002.948, |
| "eval_steps_per_second": 31.349, |
| "step": 58000 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 0.35689032077789307, |
| "learning_rate": 3.4453057708871665e-09, |
| "loss": 0.25033538818359374, |
| "step": 58050 |
| }, |
| { |
| "epoch": 50.0, |
| "eval_loss": 0.3281475007534027, |
| "eval_runtime": 17.9258, |
| "eval_samples_per_second": 1060.145, |
| "eval_steps_per_second": 33.137, |
| "step": 58050 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 58050, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 50, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.561600021990736e+18, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|