| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 726327, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002065185515614868, |
| "grad_norm": 0.7036675810813904, |
| "learning_rate": 0.0001996, |
| "loss": 7.8213, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.004130371031229736, |
| "grad_norm": 1.0079172849655151, |
| "learning_rate": 0.0003996, |
| "loss": 6.7143, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.006195556546844603, |
| "grad_norm": 1.0771256685256958, |
| "learning_rate": 0.0005996, |
| "loss": 6.5273, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.008260742062459471, |
| "grad_norm": 1.1658340692520142, |
| "learning_rate": 0.0007996, |
| "loss": 6.3948, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.010325927578074339, |
| "grad_norm": 1.4879825115203857, |
| "learning_rate": 0.0009996, |
| "loss": 5.9994, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.012391113093689206, |
| "grad_norm": 1.6412945985794067, |
| "learning_rate": 0.0009993106087504335, |
| "loss": 4.7896, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.014456298609304074, |
| "grad_norm": 1.4124268293380737, |
| "learning_rate": 0.0009986198359552766, |
| "loss": 3.8145, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.016521484124918943, |
| "grad_norm": 1.2819844484329224, |
| "learning_rate": 0.0009979290631601198, |
| "loss": 3.4161, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.01858666964053381, |
| "grad_norm": 1.155612826347351, |
| "learning_rate": 0.0009972382903649629, |
| "loss": 3.1921, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.020651855156148678, |
| "grad_norm": 1.1647284030914307, |
| "learning_rate": 0.0009965475175698062, |
| "loss": 3.055, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.022717040671763545, |
| "grad_norm": 1.1216390132904053, |
| "learning_rate": 0.0009958567447746491, |
| "loss": 2.9393, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.024782226187378412, |
| "grad_norm": 1.1327152252197266, |
| "learning_rate": 0.0009951659719794922, |
| "loss": 2.8641, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.02684741170299328, |
| "grad_norm": 1.0822185277938843, |
| "learning_rate": 0.0009944751991843356, |
| "loss": 2.7994, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.028912597218608147, |
| "grad_norm": 1.0448203086853027, |
| "learning_rate": 0.0009937844263891787, |
| "loss": 2.7461, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.030977782734223015, |
| "grad_norm": 1.0597904920578003, |
| "learning_rate": 0.0009930936535940218, |
| "loss": 2.7034, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.033042968249837885, |
| "grad_norm": 1.0737932920455933, |
| "learning_rate": 0.000992402880798865, |
| "loss": 2.6603, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.03510815376545275, |
| "grad_norm": 1.033523440361023, |
| "learning_rate": 0.000991712108003708, |
| "loss": 2.6265, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.03717333928106762, |
| "grad_norm": 0.976208508014679, |
| "learning_rate": 0.0009910213352085512, |
| "loss": 2.5961, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.03923852479668249, |
| "grad_norm": 0.9439292550086975, |
| "learning_rate": 0.0009903305624133943, |
| "loss": 2.576, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.041303710312297355, |
| "grad_norm": 0.9609880447387695, |
| "learning_rate": 0.0009896397896182376, |
| "loss": 2.5466, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.04336889582791222, |
| "grad_norm": 0.9652389883995056, |
| "learning_rate": 0.0009889490168230807, |
| "loss": 2.527, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.04543408134352709, |
| "grad_norm": 1.0029548406600952, |
| "learning_rate": 0.0009882582440279238, |
| "loss": 2.5086, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.04749926685914196, |
| "grad_norm": 0.9536625742912292, |
| "learning_rate": 0.000987567471232767, |
| "loss": 2.4902, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.049564452374756825, |
| "grad_norm": 0.9976128339767456, |
| "learning_rate": 0.00098687669843761, |
| "loss": 2.4657, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.05162963789037169, |
| "grad_norm": 1.0109055042266846, |
| "learning_rate": 0.0009861859256424532, |
| "loss": 2.4605, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.05369482340598656, |
| "grad_norm": 0.9560060501098633, |
| "learning_rate": 0.0009854951528472963, |
| "loss": 2.4431, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.05576000892160143, |
| "grad_norm": 0.9709720015525818, |
| "learning_rate": 0.0009848043800521396, |
| "loss": 2.4307, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.057825194437216294, |
| "grad_norm": 0.9962353706359863, |
| "learning_rate": 0.0009841136072569828, |
| "loss": 2.4115, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.05989037995283116, |
| "grad_norm": 0.9110284447669983, |
| "learning_rate": 0.0009834228344618259, |
| "loss": 2.4021, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.06195556546844603, |
| "grad_norm": 0.9498186707496643, |
| "learning_rate": 0.000982732061666669, |
| "loss": 2.3856, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.0640207509840609, |
| "grad_norm": 0.8862460851669312, |
| "learning_rate": 0.0009820412888715121, |
| "loss": 2.3762, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.06608593649967577, |
| "grad_norm": 0.9397904276847839, |
| "learning_rate": 0.0009813505160763552, |
| "loss": 2.3679, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.06815112201529064, |
| "grad_norm": 0.9054779410362244, |
| "learning_rate": 0.0009806597432811984, |
| "loss": 2.3561, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.0702163075309055, |
| "grad_norm": 0.8556115627288818, |
| "learning_rate": 0.0009799689704860417, |
| "loss": 2.353, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.07228149304652037, |
| "grad_norm": 0.9651133418083191, |
| "learning_rate": 0.0009792781976908848, |
| "loss": 2.3416, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.07434667856213524, |
| "grad_norm": 0.9362500309944153, |
| "learning_rate": 0.000978587424895728, |
| "loss": 2.3328, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.07641186407775011, |
| "grad_norm": 0.9050174951553345, |
| "learning_rate": 0.000977896652100571, |
| "loss": 2.3216, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.07847704959336498, |
| "grad_norm": 0.8851823806762695, |
| "learning_rate": 0.0009772058793054142, |
| "loss": 2.3177, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.08054223510897984, |
| "grad_norm": 0.8814013600349426, |
| "learning_rate": 0.0009765151065102573, |
| "loss": 2.3002, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.08260742062459471, |
| "grad_norm": 0.9048078656196594, |
| "learning_rate": 0.0009758243337151005, |
| "loss": 2.305, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.08467260614020958, |
| "grad_norm": 0.8821763396263123, |
| "learning_rate": 0.0009751335609199436, |
| "loss": 2.2925, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.08673779165582444, |
| "grad_norm": 0.873921811580658, |
| "learning_rate": 0.0009744427881247867, |
| "loss": 2.2861, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.08880297717143931, |
| "grad_norm": 0.8664683699607849, |
| "learning_rate": 0.0009737520153296299, |
| "loss": 2.2809, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.09086816268705418, |
| "grad_norm": 0.9115278720855713, |
| "learning_rate": 0.0009730612425344731, |
| "loss": 2.2765, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.09293334820266905, |
| "grad_norm": 0.875135064125061, |
| "learning_rate": 0.0009723704697393162, |
| "loss": 2.2699, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.09499853371828391, |
| "grad_norm": 0.8888856172561646, |
| "learning_rate": 0.0009716796969441593, |
| "loss": 2.2637, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.09706371923389878, |
| "grad_norm": 0.8921205401420593, |
| "learning_rate": 0.0009709889241490025, |
| "loss": 2.2591, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.09912890474951365, |
| "grad_norm": 0.840370774269104, |
| "learning_rate": 0.0009702981513538456, |
| "loss": 2.25, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.10119409026512852, |
| "grad_norm": 0.8678010702133179, |
| "learning_rate": 0.0009696073785586888, |
| "loss": 2.2472, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.10325927578074338, |
| "grad_norm": 0.8795912265777588, |
| "learning_rate": 0.0009689166057635319, |
| "loss": 2.2403, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.10532446129635825, |
| "grad_norm": 0.909457266330719, |
| "learning_rate": 0.0009682258329683751, |
| "loss": 2.2362, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.10738964681197312, |
| "grad_norm": 0.8739911317825317, |
| "learning_rate": 0.0009675350601732182, |
| "loss": 2.2285, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.10945483232758799, |
| "grad_norm": 0.8885407447814941, |
| "learning_rate": 0.0009668442873780614, |
| "loss": 2.2268, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.11152001784320285, |
| "grad_norm": 0.8348733186721802, |
| "learning_rate": 0.0009661535145829045, |
| "loss": 2.2201, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.11358520335881772, |
| "grad_norm": 0.8733665943145752, |
| "learning_rate": 0.0009654627417877476, |
| "loss": 2.219, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.11565038887443259, |
| "grad_norm": 0.8849397897720337, |
| "learning_rate": 0.0009647719689925908, |
| "loss": 2.2115, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.11771557439004746, |
| "grad_norm": 0.8752795457839966, |
| "learning_rate": 0.0009640811961974339, |
| "loss": 2.2051, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.11978075990566232, |
| "grad_norm": 0.8557031750679016, |
| "learning_rate": 0.0009633904234022772, |
| "loss": 2.2029, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.12184594542127719, |
| "grad_norm": 0.8175500631332397, |
| "learning_rate": 0.0009626996506071203, |
| "loss": 2.1967, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.12391113093689206, |
| "grad_norm": 0.8393064737319946, |
| "learning_rate": 0.0009620088778119633, |
| "loss": 2.1949, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.12597631645250693, |
| "grad_norm": 0.8515117764472961, |
| "learning_rate": 0.0009613181050168065, |
| "loss": 2.1909, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.1280415019681218, |
| "grad_norm": 0.8967178463935852, |
| "learning_rate": 0.0009606273322216496, |
| "loss": 2.1858, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.13010668748373666, |
| "grad_norm": 0.8990112543106079, |
| "learning_rate": 0.0009599365594264929, |
| "loss": 2.1814, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.13217187299935154, |
| "grad_norm": 0.8051102161407471, |
| "learning_rate": 0.000959245786631336, |
| "loss": 2.1803, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.1342370585149664, |
| "grad_norm": 0.8505108952522278, |
| "learning_rate": 0.0009585550138361792, |
| "loss": 2.1775, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.13630224403058128, |
| "grad_norm": 0.8586075305938721, |
| "learning_rate": 0.0009578642410410222, |
| "loss": 2.1671, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.13836742954619613, |
| "grad_norm": 0.830560028553009, |
| "learning_rate": 0.0009571734682458653, |
| "loss": 2.1697, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.140432615061811, |
| "grad_norm": 0.8251802325248718, |
| "learning_rate": 0.0009564826954507086, |
| "loss": 2.1646, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.14249780057742586, |
| "grad_norm": 0.8522030711174011, |
| "learning_rate": 0.0009557919226555517, |
| "loss": 2.1609, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.14456298609304075, |
| "grad_norm": 0.8344951272010803, |
| "learning_rate": 0.0009551011498603949, |
| "loss": 2.1542, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.1466281716086556, |
| "grad_norm": 0.8527629375457764, |
| "learning_rate": 0.000954410377065238, |
| "loss": 2.1584, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.14869335712427048, |
| "grad_norm": 0.8409314155578613, |
| "learning_rate": 0.0009537196042700811, |
| "loss": 2.1472, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.15075854263988533, |
| "grad_norm": 0.8568186163902283, |
| "learning_rate": 0.0009530288314749243, |
| "loss": 2.146, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.15282372815550022, |
| "grad_norm": 0.8142380118370056, |
| "learning_rate": 0.0009523380586797674, |
| "loss": 2.1467, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.15488891367111507, |
| "grad_norm": 0.8309258222579956, |
| "learning_rate": 0.0009516472858846106, |
| "loss": 2.142, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.15695409918672995, |
| "grad_norm": 0.8471255302429199, |
| "learning_rate": 0.0009509565130894537, |
| "loss": 2.1425, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.1590192847023448, |
| "grad_norm": 0.8846974968910217, |
| "learning_rate": 0.0009502657402942969, |
| "loss": 2.1377, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.16108447021795969, |
| "grad_norm": 0.8476696014404297, |
| "learning_rate": 0.00094957496749914, |
| "loss": 2.1356, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.16314965573357454, |
| "grad_norm": 0.8468635678291321, |
| "learning_rate": 0.0009488841947039832, |
| "loss": 2.1316, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.16521484124918942, |
| "grad_norm": 0.8356343507766724, |
| "learning_rate": 0.0009481934219088263, |
| "loss": 2.1269, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.16728002676480427, |
| "grad_norm": 0.7726144790649414, |
| "learning_rate": 0.0009475026491136694, |
| "loss": 2.1277, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.16934521228041916, |
| "grad_norm": 0.8468815684318542, |
| "learning_rate": 0.0009468118763185126, |
| "loss": 2.1332, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.171410397796034, |
| "grad_norm": 0.78179931640625, |
| "learning_rate": 0.0009461211035233558, |
| "loss": 2.1195, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.1734755833116489, |
| "grad_norm": 0.8012422919273376, |
| "learning_rate": 0.0009454303307281989, |
| "loss": 2.115, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.17554076882726374, |
| "grad_norm": 0.8458732962608337, |
| "learning_rate": 0.000944739557933042, |
| "loss": 2.1146, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.17760595434287862, |
| "grad_norm": 0.8353042006492615, |
| "learning_rate": 0.0009440487851378852, |
| "loss": 2.1123, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.17967113985849348, |
| "grad_norm": 0.8672284483909607, |
| "learning_rate": 0.0009433580123427283, |
| "loss": 2.1114, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.18173632537410836, |
| "grad_norm": 0.7824869751930237, |
| "learning_rate": 0.0009426672395475715, |
| "loss": 2.1082, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.1838015108897232, |
| "grad_norm": 0.7976692318916321, |
| "learning_rate": 0.0009419764667524147, |
| "loss": 2.1052, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.1858666964053381, |
| "grad_norm": 0.876670777797699, |
| "learning_rate": 0.0009412856939572577, |
| "loss": 2.1033, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.18793188192095295, |
| "grad_norm": 0.7947434186935425, |
| "learning_rate": 0.0009405949211621009, |
| "loss": 2.0971, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.18999706743656783, |
| "grad_norm": 0.823627233505249, |
| "learning_rate": 0.000939904148366944, |
| "loss": 2.0984, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.19206225295218268, |
| "grad_norm": 0.8043273091316223, |
| "learning_rate": 0.0009392133755717873, |
| "loss": 2.0958, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.19412743846779756, |
| "grad_norm": 0.8782801032066345, |
| "learning_rate": 0.0009385226027766304, |
| "loss": 2.0914, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.19619262398341242, |
| "grad_norm": 0.8043196201324463, |
| "learning_rate": 0.0009378318299814735, |
| "loss": 2.0888, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.1982578094990273, |
| "grad_norm": 0.8064476251602173, |
| "learning_rate": 0.0009371410571863166, |
| "loss": 2.0847, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.20032299501464215, |
| "grad_norm": 0.801071047782898, |
| "learning_rate": 0.0009364502843911597, |
| "loss": 2.0844, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.20238818053025703, |
| "grad_norm": 0.8486244678497314, |
| "learning_rate": 0.000935759511596003, |
| "loss": 2.0853, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.2044533660458719, |
| "grad_norm": 0.813061535358429, |
| "learning_rate": 0.0009350687388008461, |
| "loss": 2.0812, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.20651855156148677, |
| "grad_norm": 0.8625230193138123, |
| "learning_rate": 0.0009343779660056893, |
| "loss": 2.0832, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.20858373707710165, |
| "grad_norm": 0.8224324584007263, |
| "learning_rate": 0.0009336871932105324, |
| "loss": 2.0785, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.2106489225927165, |
| "grad_norm": 0.8722664713859558, |
| "learning_rate": 0.0009329964204153754, |
| "loss": 2.074, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.21271410810833138, |
| "grad_norm": 0.8052055239677429, |
| "learning_rate": 0.0009323056476202187, |
| "loss": 2.074, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.21477929362394624, |
| "grad_norm": 0.8521301746368408, |
| "learning_rate": 0.0009316148748250618, |
| "loss": 2.0681, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.21684447913956112, |
| "grad_norm": 0.846494197845459, |
| "learning_rate": 0.000930924102029905, |
| "loss": 2.073, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.21890966465517597, |
| "grad_norm": 0.8026652336120605, |
| "learning_rate": 0.0009302333292347481, |
| "loss": 2.0685, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.22097485017079085, |
| "grad_norm": 0.8246744871139526, |
| "learning_rate": 0.0009295425564395913, |
| "loss": 2.0653, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.2230400356864057, |
| "grad_norm": 0.8326907157897949, |
| "learning_rate": 0.0009288517836444344, |
| "loss": 2.0643, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.2251052212020206, |
| "grad_norm": 0.7792090177536011, |
| "learning_rate": 0.0009281610108492775, |
| "loss": 2.0622, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.22717040671763544, |
| "grad_norm": 0.8691778779029846, |
| "learning_rate": 0.0009274702380541207, |
| "loss": 2.0624, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.22923559223325032, |
| "grad_norm": 0.7907185554504395, |
| "learning_rate": 0.0009267794652589638, |
| "loss": 2.0571, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.23130077774886518, |
| "grad_norm": 0.8440839052200317, |
| "learning_rate": 0.000926088692463807, |
| "loss": 2.0612, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.23336596326448006, |
| "grad_norm": 0.8027564883232117, |
| "learning_rate": 0.0009253979196686502, |
| "loss": 2.054, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.2354311487800949, |
| "grad_norm": 0.7806565165519714, |
| "learning_rate": 0.0009247071468734933, |
| "loss": 2.053, |
| "step": 57000 |
| }, |
| { |
| "epoch": 0.2374963342957098, |
| "grad_norm": 0.8598223328590393, |
| "learning_rate": 0.0009240163740783364, |
| "loss": 2.0518, |
| "step": 57500 |
| }, |
| { |
| "epoch": 0.23956151981132465, |
| "grad_norm": 0.8221333622932434, |
| "learning_rate": 0.0009233256012831795, |
| "loss": 2.052, |
| "step": 58000 |
| }, |
| { |
| "epoch": 0.24162670532693953, |
| "grad_norm": 0.8474496603012085, |
| "learning_rate": 0.0009226348284880227, |
| "loss": 2.0435, |
| "step": 58500 |
| }, |
| { |
| "epoch": 0.24369189084255438, |
| "grad_norm": 0.8255507349967957, |
| "learning_rate": 0.0009219440556928659, |
| "loss": 2.045, |
| "step": 59000 |
| }, |
| { |
| "epoch": 0.24575707635816926, |
| "grad_norm": 0.7817030549049377, |
| "learning_rate": 0.0009212532828977091, |
| "loss": 2.0472, |
| "step": 59500 |
| }, |
| { |
| "epoch": 0.24782226187378412, |
| "grad_norm": 0.7616594433784485, |
| "learning_rate": 0.0009205625101025521, |
| "loss": 2.0424, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.249887447389399, |
| "grad_norm": 0.8131653070449829, |
| "learning_rate": 0.0009198717373073953, |
| "loss": 2.0438, |
| "step": 60500 |
| }, |
| { |
| "epoch": 0.25195263290501385, |
| "grad_norm": 0.7939597368240356, |
| "learning_rate": 0.0009191809645122384, |
| "loss": 2.0392, |
| "step": 61000 |
| }, |
| { |
| "epoch": 0.2540178184206287, |
| "grad_norm": 0.823221743106842, |
| "learning_rate": 0.0009184901917170816, |
| "loss": 2.0409, |
| "step": 61500 |
| }, |
| { |
| "epoch": 0.2560830039362436, |
| "grad_norm": 0.8100286722183228, |
| "learning_rate": 0.0009177994189219248, |
| "loss": 2.0352, |
| "step": 62000 |
| }, |
| { |
| "epoch": 0.25814818945185847, |
| "grad_norm": 0.84886634349823, |
| "learning_rate": 0.0009171086461267679, |
| "loss": 2.0395, |
| "step": 62500 |
| }, |
| { |
| "epoch": 0.2602133749674733, |
| "grad_norm": 0.8171844482421875, |
| "learning_rate": 0.000916417873331611, |
| "loss": 2.0374, |
| "step": 63000 |
| }, |
| { |
| "epoch": 0.2622785604830882, |
| "grad_norm": 0.8373914957046509, |
| "learning_rate": 0.0009157271005364541, |
| "loss": 2.0302, |
| "step": 63500 |
| }, |
| { |
| "epoch": 0.2643437459987031, |
| "grad_norm": 0.8553788065910339, |
| "learning_rate": 0.0009150363277412974, |
| "loss": 2.0346, |
| "step": 64000 |
| }, |
| { |
| "epoch": 0.26640893151431794, |
| "grad_norm": 0.8569718599319458, |
| "learning_rate": 0.0009143455549461405, |
| "loss": 2.0347, |
| "step": 64500 |
| }, |
| { |
| "epoch": 0.2684741170299328, |
| "grad_norm": 0.8263908624649048, |
| "learning_rate": 0.0009136547821509836, |
| "loss": 2.0306, |
| "step": 65000 |
| }, |
| { |
| "epoch": 0.27053930254554764, |
| "grad_norm": 0.8501819372177124, |
| "learning_rate": 0.0009129640093558268, |
| "loss": 2.0271, |
| "step": 65500 |
| }, |
| { |
| "epoch": 0.27260448806116255, |
| "grad_norm": 0.8343943357467651, |
| "learning_rate": 0.0009122732365606698, |
| "loss": 2.0296, |
| "step": 66000 |
| }, |
| { |
| "epoch": 0.2746696735767774, |
| "grad_norm": 0.8072646856307983, |
| "learning_rate": 0.000911582463765513, |
| "loss": 2.0238, |
| "step": 66500 |
| }, |
| { |
| "epoch": 0.27673485909239226, |
| "grad_norm": 0.8142940998077393, |
| "learning_rate": 0.0009108916909703562, |
| "loss": 2.0289, |
| "step": 67000 |
| }, |
| { |
| "epoch": 0.2788000446080071, |
| "grad_norm": 0.7751716375350952, |
| "learning_rate": 0.0009102009181751994, |
| "loss": 2.0262, |
| "step": 67500 |
| }, |
| { |
| "epoch": 0.280865230123622, |
| "grad_norm": 0.7758037447929382, |
| "learning_rate": 0.0009095101453800425, |
| "loss": 2.0202, |
| "step": 68000 |
| }, |
| { |
| "epoch": 0.2829304156392369, |
| "grad_norm": 0.8752540349960327, |
| "learning_rate": 0.0009088193725848856, |
| "loss": 2.0195, |
| "step": 68500 |
| }, |
| { |
| "epoch": 0.28499560115485173, |
| "grad_norm": 0.8347713351249695, |
| "learning_rate": 0.0009081285997897288, |
| "loss": 2.0187, |
| "step": 69000 |
| }, |
| { |
| "epoch": 0.2870607866704666, |
| "grad_norm": 0.8156507015228271, |
| "learning_rate": 0.0009074378269945719, |
| "loss": 2.0157, |
| "step": 69500 |
| }, |
| { |
| "epoch": 0.2891259721860815, |
| "grad_norm": 0.7821555137634277, |
| "learning_rate": 0.0009067470541994151, |
| "loss": 2.0152, |
| "step": 70000 |
| }, |
| { |
| "epoch": 0.29119115770169635, |
| "grad_norm": 0.84757399559021, |
| "learning_rate": 0.0009060562814042582, |
| "loss": 2.0157, |
| "step": 70500 |
| }, |
| { |
| "epoch": 0.2932563432173112, |
| "grad_norm": 0.8818306922912598, |
| "learning_rate": 0.0009053655086091014, |
| "loss": 2.0121, |
| "step": 71000 |
| }, |
| { |
| "epoch": 0.29532152873292605, |
| "grad_norm": 0.8257991671562195, |
| "learning_rate": 0.0009046747358139446, |
| "loss": 2.009, |
| "step": 71500 |
| }, |
| { |
| "epoch": 0.29738671424854096, |
| "grad_norm": 0.821416437625885, |
| "learning_rate": 0.0009039839630187876, |
| "loss": 2.0094, |
| "step": 72000 |
| }, |
| { |
| "epoch": 0.2994518997641558, |
| "grad_norm": 0.7886099815368652, |
| "learning_rate": 0.0009032931902236308, |
| "loss": 2.0067, |
| "step": 72500 |
| }, |
| { |
| "epoch": 0.30151708527977067, |
| "grad_norm": 0.8650347590446472, |
| "learning_rate": 0.0009026024174284739, |
| "loss": 2.0046, |
| "step": 73000 |
| }, |
| { |
| "epoch": 0.3035822707953855, |
| "grad_norm": 0.8249508738517761, |
| "learning_rate": 0.0009019116446333171, |
| "loss": 2.0044, |
| "step": 73500 |
| }, |
| { |
| "epoch": 0.30564745631100043, |
| "grad_norm": 0.8648396730422974, |
| "learning_rate": 0.0009012208718381603, |
| "loss": 2.0061, |
| "step": 74000 |
| }, |
| { |
| "epoch": 0.3077126418266153, |
| "grad_norm": 0.8078823089599609, |
| "learning_rate": 0.0009005300990430035, |
| "loss": 2.001, |
| "step": 74500 |
| }, |
| { |
| "epoch": 0.30977782734223014, |
| "grad_norm": 0.8452419638633728, |
| "learning_rate": 0.0008998393262478465, |
| "loss": 1.9992, |
| "step": 75000 |
| }, |
| { |
| "epoch": 0.31184301285784505, |
| "grad_norm": 0.7989551424980164, |
| "learning_rate": 0.0008991485534526896, |
| "loss": 2.007, |
| "step": 75500 |
| }, |
| { |
| "epoch": 0.3139081983734599, |
| "grad_norm": 0.8734456300735474, |
| "learning_rate": 0.0008984577806575328, |
| "loss": 2.0004, |
| "step": 76000 |
| }, |
| { |
| "epoch": 0.31597338388907475, |
| "grad_norm": 0.8965834975242615, |
| "learning_rate": 0.000897767007862376, |
| "loss": 2.0034, |
| "step": 76500 |
| }, |
| { |
| "epoch": 0.3180385694046896, |
| "grad_norm": 0.7855513691902161, |
| "learning_rate": 0.0008970762350672192, |
| "loss": 1.9932, |
| "step": 77000 |
| }, |
| { |
| "epoch": 0.3201037549203045, |
| "grad_norm": 0.825775682926178, |
| "learning_rate": 0.0008963854622720623, |
| "loss": 1.9979, |
| "step": 77500 |
| }, |
| { |
| "epoch": 0.32216894043591937, |
| "grad_norm": 0.7757362127304077, |
| "learning_rate": 0.0008956946894769054, |
| "loss": 1.9974, |
| "step": 78000 |
| }, |
| { |
| "epoch": 0.3242341259515342, |
| "grad_norm": 0.8657450675964355, |
| "learning_rate": 0.0008950039166817485, |
| "loss": 1.9976, |
| "step": 78500 |
| }, |
| { |
| "epoch": 0.3262993114671491, |
| "grad_norm": 0.8072881102561951, |
| "learning_rate": 0.0008943131438865916, |
| "loss": 1.9934, |
| "step": 79000 |
| }, |
| { |
| "epoch": 0.328364496982764, |
| "grad_norm": 0.7893191576004028, |
| "learning_rate": 0.0008936223710914349, |
| "loss": 1.9945, |
| "step": 79500 |
| }, |
| { |
| "epoch": 0.33042968249837884, |
| "grad_norm": 0.8834479451179504, |
| "learning_rate": 0.000892931598296278, |
| "loss": 1.9914, |
| "step": 80000 |
| }, |
| { |
| "epoch": 0.3324948680139937, |
| "grad_norm": 0.8713655471801758, |
| "learning_rate": 0.0008922408255011212, |
| "loss": 1.9946, |
| "step": 80500 |
| }, |
| { |
| "epoch": 0.33456005352960855, |
| "grad_norm": 0.8255290389060974, |
| "learning_rate": 0.0008915500527059643, |
| "loss": 1.9914, |
| "step": 81000 |
| }, |
| { |
| "epoch": 0.33662523904522346, |
| "grad_norm": 0.8153598308563232, |
| "learning_rate": 0.0008908592799108073, |
| "loss": 1.9861, |
| "step": 81500 |
| }, |
| { |
| "epoch": 0.3386904245608383, |
| "grad_norm": 0.8533855080604553, |
| "learning_rate": 0.0008901685071156506, |
| "loss": 1.9887, |
| "step": 82000 |
| }, |
| { |
| "epoch": 0.34075561007645316, |
| "grad_norm": 0.912350594997406, |
| "learning_rate": 0.0008894777343204937, |
| "loss": 1.9923, |
| "step": 82500 |
| }, |
| { |
| "epoch": 0.342820795592068, |
| "grad_norm": 0.8206115365028381, |
| "learning_rate": 0.0008887869615253369, |
| "loss": 1.9869, |
| "step": 83000 |
| }, |
| { |
| "epoch": 0.3448859811076829, |
| "grad_norm": 0.8313278555870056, |
| "learning_rate": 0.00088809618873018, |
| "loss": 1.9863, |
| "step": 83500 |
| }, |
| { |
| "epoch": 0.3469511666232978, |
| "grad_norm": 0.9152906537055969, |
| "learning_rate": 0.0008874054159350233, |
| "loss": 1.9786, |
| "step": 84000 |
| }, |
| { |
| "epoch": 0.34901635213891263, |
| "grad_norm": 0.8398587107658386, |
| "learning_rate": 0.0008867146431398663, |
| "loss": 1.986, |
| "step": 84500 |
| }, |
| { |
| "epoch": 0.3510815376545275, |
| "grad_norm": 0.8084604144096375, |
| "learning_rate": 0.0008860238703447094, |
| "loss": 1.9837, |
| "step": 85000 |
| }, |
| { |
| "epoch": 0.3531467231701424, |
| "grad_norm": 0.7918562889099121, |
| "learning_rate": 0.0008853330975495526, |
| "loss": 1.9789, |
| "step": 85500 |
| }, |
| { |
| "epoch": 0.35521190868575725, |
| "grad_norm": 0.8110492825508118, |
| "learning_rate": 0.0008846423247543957, |
| "loss": 1.9754, |
| "step": 86000 |
| }, |
| { |
| "epoch": 0.3572770942013721, |
| "grad_norm": 0.7786925435066223, |
| "learning_rate": 0.000883951551959239, |
| "loss": 1.9817, |
| "step": 86500 |
| }, |
| { |
| "epoch": 0.35934227971698696, |
| "grad_norm": 0.8928225636482239, |
| "learning_rate": 0.0008832607791640821, |
| "loss": 1.9809, |
| "step": 87000 |
| }, |
| { |
| "epoch": 0.36140746523260187, |
| "grad_norm": 0.821860134601593, |
| "learning_rate": 0.0008825700063689252, |
| "loss": 1.9759, |
| "step": 87500 |
| }, |
| { |
| "epoch": 0.3634726507482167, |
| "grad_norm": 0.8514395952224731, |
| "learning_rate": 0.0008818792335737683, |
| "loss": 1.9739, |
| "step": 88000 |
| }, |
| { |
| "epoch": 0.3655378362638316, |
| "grad_norm": 0.8256642818450928, |
| "learning_rate": 0.0008811884607786114, |
| "loss": 1.9743, |
| "step": 88500 |
| }, |
| { |
| "epoch": 0.3676030217794464, |
| "grad_norm": 0.8043322563171387, |
| "learning_rate": 0.0008804976879834547, |
| "loss": 1.975, |
| "step": 89000 |
| }, |
| { |
| "epoch": 0.36966820729506134, |
| "grad_norm": 0.8065923452377319, |
| "learning_rate": 0.0008798069151882978, |
| "loss": 1.9712, |
| "step": 89500 |
| }, |
| { |
| "epoch": 0.3717333928106762, |
| "grad_norm": 0.8350073099136353, |
| "learning_rate": 0.000879116142393141, |
| "loss": 1.9741, |
| "step": 90000 |
| }, |
| { |
| "epoch": 0.37379857832629104, |
| "grad_norm": 0.8081244230270386, |
| "learning_rate": 0.000878425369597984, |
| "loss": 1.977, |
| "step": 90500 |
| }, |
| { |
| "epoch": 0.3758637638419059, |
| "grad_norm": 0.7285000681877136, |
| "learning_rate": 0.0008777345968028272, |
| "loss": 1.9688, |
| "step": 91000 |
| }, |
| { |
| "epoch": 0.3779289493575208, |
| "grad_norm": 0.8110142350196838, |
| "learning_rate": 0.0008770438240076704, |
| "loss": 1.9675, |
| "step": 91500 |
| }, |
| { |
| "epoch": 0.37999413487313566, |
| "grad_norm": 0.8193402886390686, |
| "learning_rate": 0.0008763530512125135, |
| "loss": 1.9682, |
| "step": 92000 |
| }, |
| { |
| "epoch": 0.3820593203887505, |
| "grad_norm": 0.8382455110549927, |
| "learning_rate": 0.0008756622784173567, |
| "loss": 1.9652, |
| "step": 92500 |
| }, |
| { |
| "epoch": 0.38412450590436537, |
| "grad_norm": 0.7900645732879639, |
| "learning_rate": 0.0008749715056221998, |
| "loss": 1.9654, |
| "step": 93000 |
| }, |
| { |
| "epoch": 0.3861896914199803, |
| "grad_norm": 0.7835169434547424, |
| "learning_rate": 0.0008742807328270429, |
| "loss": 1.9676, |
| "step": 93500 |
| }, |
| { |
| "epoch": 0.38825487693559513, |
| "grad_norm": 0.8066137433052063, |
| "learning_rate": 0.000873589960031886, |
| "loss": 1.9668, |
| "step": 94000 |
| }, |
| { |
| "epoch": 0.39032006245121, |
| "grad_norm": 0.8150152564048767, |
| "learning_rate": 0.0008728991872367293, |
| "loss": 1.9683, |
| "step": 94500 |
| }, |
| { |
| "epoch": 0.39238524796682483, |
| "grad_norm": 1.05111825466156, |
| "learning_rate": 0.0008722084144415724, |
| "loss": 1.9613, |
| "step": 95000 |
| }, |
| { |
| "epoch": 0.39445043348243974, |
| "grad_norm": 0.8422666788101196, |
| "learning_rate": 0.0008715176416464155, |
| "loss": 1.9625, |
| "step": 95500 |
| }, |
| { |
| "epoch": 0.3965156189980546, |
| "grad_norm": 0.8087729215621948, |
| "learning_rate": 0.0008708268688512587, |
| "loss": 1.9657, |
| "step": 96000 |
| }, |
| { |
| "epoch": 0.39858080451366945, |
| "grad_norm": 0.8095026612281799, |
| "learning_rate": 0.0008701360960561017, |
| "loss": 1.9636, |
| "step": 96500 |
| }, |
| { |
| "epoch": 0.4006459900292843, |
| "grad_norm": 0.7824914455413818, |
| "learning_rate": 0.000869445323260945, |
| "loss": 1.9655, |
| "step": 97000 |
| }, |
| { |
| "epoch": 0.4027111755448992, |
| "grad_norm": 0.8077009320259094, |
| "learning_rate": 0.0008687545504657881, |
| "loss": 1.9577, |
| "step": 97500 |
| }, |
| { |
| "epoch": 0.40477636106051407, |
| "grad_norm": 0.7984289526939392, |
| "learning_rate": 0.0008680637776706313, |
| "loss": 1.9575, |
| "step": 98000 |
| }, |
| { |
| "epoch": 0.4068415465761289, |
| "grad_norm": 0.8378064036369324, |
| "learning_rate": 0.0008673730048754744, |
| "loss": 1.9587, |
| "step": 98500 |
| }, |
| { |
| "epoch": 0.4089067320917438, |
| "grad_norm": 0.7952322959899902, |
| "learning_rate": 0.0008666822320803175, |
| "loss": 1.9572, |
| "step": 99000 |
| }, |
| { |
| "epoch": 0.4109719176073587, |
| "grad_norm": 0.9045737385749817, |
| "learning_rate": 0.0008659914592851607, |
| "loss": 1.957, |
| "step": 99500 |
| }, |
| { |
| "epoch": 0.41303710312297354, |
| "grad_norm": 0.8450877666473389, |
| "learning_rate": 0.0008653006864900038, |
| "loss": 1.9573, |
| "step": 100000 |
| }, |
| { |
| "epoch": 0.4151022886385884, |
| "grad_norm": 0.8580604791641235, |
| "learning_rate": 0.000864609913694847, |
| "loss": 1.9561, |
| "step": 100500 |
| }, |
| { |
| "epoch": 0.4171674741542033, |
| "grad_norm": 0.8783984780311584, |
| "learning_rate": 0.0008639191408996901, |
| "loss": 1.9551, |
| "step": 101000 |
| }, |
| { |
| "epoch": 0.41923265966981815, |
| "grad_norm": 0.7707995772361755, |
| "learning_rate": 0.0008632283681045334, |
| "loss": 1.9551, |
| "step": 101500 |
| }, |
| { |
| "epoch": 0.421297845185433, |
| "grad_norm": 0.7902424931526184, |
| "learning_rate": 0.0008625375953093765, |
| "loss": 1.9544, |
| "step": 102000 |
| }, |
| { |
| "epoch": 0.42336303070104786, |
| "grad_norm": 0.850943922996521, |
| "learning_rate": 0.0008618468225142195, |
| "loss": 1.9556, |
| "step": 102500 |
| }, |
| { |
| "epoch": 0.42542821621666277, |
| "grad_norm": 0.918465793132782, |
| "learning_rate": 0.0008611560497190627, |
| "loss": 1.9504, |
| "step": 103000 |
| }, |
| { |
| "epoch": 0.4274934017322776, |
| "grad_norm": 0.8017387390136719, |
| "learning_rate": 0.0008604652769239058, |
| "loss": 1.9467, |
| "step": 103500 |
| }, |
| { |
| "epoch": 0.4295585872478925, |
| "grad_norm": 0.8548043370246887, |
| "learning_rate": 0.000859774504128749, |
| "loss": 1.9488, |
| "step": 104000 |
| }, |
| { |
| "epoch": 0.43162377276350733, |
| "grad_norm": 0.8529847264289856, |
| "learning_rate": 0.0008590837313335922, |
| "loss": 1.9472, |
| "step": 104500 |
| }, |
| { |
| "epoch": 0.43368895827912224, |
| "grad_norm": 0.9331560730934143, |
| "learning_rate": 0.0008583929585384354, |
| "loss": 1.9444, |
| "step": 105000 |
| }, |
| { |
| "epoch": 0.4357541437947371, |
| "grad_norm": 0.7767966985702515, |
| "learning_rate": 0.0008577021857432784, |
| "loss": 1.9437, |
| "step": 105500 |
| }, |
| { |
| "epoch": 0.43781932931035195, |
| "grad_norm": 0.8031103610992432, |
| "learning_rate": 0.0008570114129481215, |
| "loss": 1.9481, |
| "step": 106000 |
| }, |
| { |
| "epoch": 0.4398845148259668, |
| "grad_norm": 0.8879848122596741, |
| "learning_rate": 0.0008563206401529647, |
| "loss": 1.9439, |
| "step": 106500 |
| }, |
| { |
| "epoch": 0.4419497003415817, |
| "grad_norm": 0.8233328461647034, |
| "learning_rate": 0.0008556298673578079, |
| "loss": 1.9485, |
| "step": 107000 |
| }, |
| { |
| "epoch": 0.44401488585719656, |
| "grad_norm": 0.8277767300605774, |
| "learning_rate": 0.0008549390945626511, |
| "loss": 1.9475, |
| "step": 107500 |
| }, |
| { |
| "epoch": 0.4460800713728114, |
| "grad_norm": 0.8291540741920471, |
| "learning_rate": 0.0008542483217674942, |
| "loss": 1.9503, |
| "step": 108000 |
| }, |
| { |
| "epoch": 0.44814525688842627, |
| "grad_norm": 0.8007998466491699, |
| "learning_rate": 0.0008535575489723373, |
| "loss": 1.9441, |
| "step": 108500 |
| }, |
| { |
| "epoch": 0.4502104424040412, |
| "grad_norm": 0.7802460193634033, |
| "learning_rate": 0.0008528667761771804, |
| "loss": 1.9422, |
| "step": 109000 |
| }, |
| { |
| "epoch": 0.45227562791965603, |
| "grad_norm": 0.7900969982147217, |
| "learning_rate": 0.0008521760033820236, |
| "loss": 1.9418, |
| "step": 109500 |
| }, |
| { |
| "epoch": 0.4543408134352709, |
| "grad_norm": 0.958767294883728, |
| "learning_rate": 0.0008514852305868668, |
| "loss": 1.9431, |
| "step": 110000 |
| }, |
| { |
| "epoch": 0.45640599895088574, |
| "grad_norm": 0.8186129331588745, |
| "learning_rate": 0.0008507944577917099, |
| "loss": 1.9368, |
| "step": 110500 |
| }, |
| { |
| "epoch": 0.45847118446650065, |
| "grad_norm": 0.7958455085754395, |
| "learning_rate": 0.0008501036849965531, |
| "loss": 1.9368, |
| "step": 111000 |
| }, |
| { |
| "epoch": 0.4605363699821155, |
| "grad_norm": 1.3525958061218262, |
| "learning_rate": 0.0008494129122013961, |
| "loss": 1.936, |
| "step": 111500 |
| }, |
| { |
| "epoch": 0.46260155549773035, |
| "grad_norm": 0.8453717827796936, |
| "learning_rate": 0.0008487221394062394, |
| "loss": 1.9438, |
| "step": 112000 |
| }, |
| { |
| "epoch": 0.4646667410133452, |
| "grad_norm": 0.8021391034126282, |
| "learning_rate": 0.0008480313666110825, |
| "loss": 1.9428, |
| "step": 112500 |
| }, |
| { |
| "epoch": 0.4667319265289601, |
| "grad_norm": 0.8905833959579468, |
| "learning_rate": 0.0008473405938159256, |
| "loss": 1.9416, |
| "step": 113000 |
| }, |
| { |
| "epoch": 0.46879711204457497, |
| "grad_norm": 0.789579451084137, |
| "learning_rate": 0.0008466498210207688, |
| "loss": 1.9406, |
| "step": 113500 |
| }, |
| { |
| "epoch": 0.4708622975601898, |
| "grad_norm": 0.8398124575614929, |
| "learning_rate": 0.000845959048225612, |
| "loss": 1.935, |
| "step": 114000 |
| }, |
| { |
| "epoch": 0.4729274830758047, |
| "grad_norm": 0.8189172148704529, |
| "learning_rate": 0.0008452682754304551, |
| "loss": 1.9367, |
| "step": 114500 |
| }, |
| { |
| "epoch": 0.4749926685914196, |
| "grad_norm": 0.7979219555854797, |
| "learning_rate": 0.0008445775026352982, |
| "loss": 1.9309, |
| "step": 115000 |
| }, |
| { |
| "epoch": 0.47705785410703444, |
| "grad_norm": 0.9062512516975403, |
| "learning_rate": 0.0008438867298401414, |
| "loss": 1.9389, |
| "step": 115500 |
| }, |
| { |
| "epoch": 0.4791230396226493, |
| "grad_norm": 0.9431639909744263, |
| "learning_rate": 0.0008431959570449845, |
| "loss": 1.9307, |
| "step": 116000 |
| }, |
| { |
| "epoch": 0.48118822513826415, |
| "grad_norm": 0.8639684319496155, |
| "learning_rate": 0.0008425051842498276, |
| "loss": 1.9325, |
| "step": 116500 |
| }, |
| { |
| "epoch": 0.48325341065387906, |
| "grad_norm": 0.8229732513427734, |
| "learning_rate": 0.0008418144114546709, |
| "loss": 1.9323, |
| "step": 117000 |
| }, |
| { |
| "epoch": 0.4853185961694939, |
| "grad_norm": 0.789789080619812, |
| "learning_rate": 0.0008411236386595139, |
| "loss": 1.9317, |
| "step": 117500 |
| }, |
| { |
| "epoch": 0.48738378168510876, |
| "grad_norm": 0.8473231196403503, |
| "learning_rate": 0.0008404328658643571, |
| "loss": 1.9332, |
| "step": 118000 |
| }, |
| { |
| "epoch": 0.4894489672007236, |
| "grad_norm": 0.9255551099777222, |
| "learning_rate": 0.0008397420930692002, |
| "loss": 1.9309, |
| "step": 118500 |
| }, |
| { |
| "epoch": 0.4915141527163385, |
| "grad_norm": 0.7924582958221436, |
| "learning_rate": 0.0008390513202740435, |
| "loss": 1.9277, |
| "step": 119000 |
| }, |
| { |
| "epoch": 0.4935793382319534, |
| "grad_norm": 0.8907535672187805, |
| "learning_rate": 0.0008383605474788866, |
| "loss": 1.9294, |
| "step": 119500 |
| }, |
| { |
| "epoch": 0.49564452374756823, |
| "grad_norm": 0.8191530108451843, |
| "learning_rate": 0.0008376697746837297, |
| "loss": 1.9306, |
| "step": 120000 |
| }, |
| { |
| "epoch": 0.4977097092631831, |
| "grad_norm": 0.8925333023071289, |
| "learning_rate": 0.0008369790018885728, |
| "loss": 1.9229, |
| "step": 120500 |
| }, |
| { |
| "epoch": 0.499774894778798, |
| "grad_norm": 0.8087531924247742, |
| "learning_rate": 0.0008362882290934159, |
| "loss": 1.9243, |
| "step": 121000 |
| }, |
| { |
| "epoch": 0.5018400802944128, |
| "grad_norm": 0.8658357858657837, |
| "learning_rate": 0.0008355974562982591, |
| "loss": 1.9296, |
| "step": 121500 |
| }, |
| { |
| "epoch": 0.5039052658100277, |
| "grad_norm": 0.8883163332939148, |
| "learning_rate": 0.0008349066835031023, |
| "loss": 1.9228, |
| "step": 122000 |
| }, |
| { |
| "epoch": 0.5059704513256426, |
| "grad_norm": 0.9020292162895203, |
| "learning_rate": 0.0008342159107079455, |
| "loss": 1.9242, |
| "step": 122500 |
| }, |
| { |
| "epoch": 0.5080356368412574, |
| "grad_norm": 0.7825981974601746, |
| "learning_rate": 0.0008335251379127886, |
| "loss": 1.9244, |
| "step": 123000 |
| }, |
| { |
| "epoch": 0.5101008223568723, |
| "grad_norm": 0.7903372645378113, |
| "learning_rate": 0.0008328343651176316, |
| "loss": 1.9263, |
| "step": 123500 |
| }, |
| { |
| "epoch": 0.5121660078724872, |
| "grad_norm": 0.8415020108222961, |
| "learning_rate": 0.0008321435923224748, |
| "loss": 1.9243, |
| "step": 124000 |
| }, |
| { |
| "epoch": 0.5142311933881021, |
| "grad_norm": 0.8838851451873779, |
| "learning_rate": 0.000831452819527318, |
| "loss": 1.9229, |
| "step": 124500 |
| }, |
| { |
| "epoch": 0.5162963789037169, |
| "grad_norm": 0.8412485718727112, |
| "learning_rate": 0.0008307620467321612, |
| "loss": 1.9198, |
| "step": 125000 |
| }, |
| { |
| "epoch": 0.5183615644193318, |
| "grad_norm": 0.8944464921951294, |
| "learning_rate": 0.0008300712739370043, |
| "loss": 1.9242, |
| "step": 125500 |
| }, |
| { |
| "epoch": 0.5204267499349466, |
| "grad_norm": 0.8970022797584534, |
| "learning_rate": 0.0008293805011418475, |
| "loss": 1.9157, |
| "step": 126000 |
| }, |
| { |
| "epoch": 0.5224919354505615, |
| "grad_norm": 0.7767829895019531, |
| "learning_rate": 0.0008286897283466905, |
| "loss": 1.9146, |
| "step": 126500 |
| }, |
| { |
| "epoch": 0.5245571209661763, |
| "grad_norm": 0.9366709589958191, |
| "learning_rate": 0.0008279989555515337, |
| "loss": 1.9219, |
| "step": 127000 |
| }, |
| { |
| "epoch": 0.5266223064817913, |
| "grad_norm": 0.813752293586731, |
| "learning_rate": 0.0008273081827563769, |
| "loss": 1.9131, |
| "step": 127500 |
| }, |
| { |
| "epoch": 0.5286874919974062, |
| "grad_norm": 0.7913943529129028, |
| "learning_rate": 0.00082661740996122, |
| "loss": 1.9151, |
| "step": 128000 |
| }, |
| { |
| "epoch": 0.530752677513021, |
| "grad_norm": 0.7573590278625488, |
| "learning_rate": 0.0008259266371660632, |
| "loss": 1.9141, |
| "step": 128500 |
| }, |
| { |
| "epoch": 0.5328178630286359, |
| "grad_norm": 0.8860184550285339, |
| "learning_rate": 0.0008252358643709063, |
| "loss": 1.9157, |
| "step": 129000 |
| }, |
| { |
| "epoch": 0.5348830485442507, |
| "grad_norm": 0.7423400282859802, |
| "learning_rate": 0.0008245450915757495, |
| "loss": 1.9123, |
| "step": 129500 |
| }, |
| { |
| "epoch": 0.5369482340598656, |
| "grad_norm": 0.7855700254440308, |
| "learning_rate": 0.0008238543187805926, |
| "loss": 1.9156, |
| "step": 130000 |
| }, |
| { |
| "epoch": 0.5390134195754804, |
| "grad_norm": 0.7748924493789673, |
| "learning_rate": 0.0008231635459854357, |
| "loss": 1.9125, |
| "step": 130500 |
| }, |
| { |
| "epoch": 0.5410786050910953, |
| "grad_norm": 0.823998212814331, |
| "learning_rate": 0.0008224727731902789, |
| "loss": 1.914, |
| "step": 131000 |
| }, |
| { |
| "epoch": 0.5431437906067103, |
| "grad_norm": 0.837291955947876, |
| "learning_rate": 0.000821782000395122, |
| "loss": 1.9135, |
| "step": 131500 |
| }, |
| { |
| "epoch": 0.5452089761223251, |
| "grad_norm": 0.8040900230407715, |
| "learning_rate": 0.0008210912275999653, |
| "loss": 1.9167, |
| "step": 132000 |
| }, |
| { |
| "epoch": 0.54727416163794, |
| "grad_norm": 0.8205652236938477, |
| "learning_rate": 0.0008204004548048083, |
| "loss": 1.9111, |
| "step": 132500 |
| }, |
| { |
| "epoch": 0.5493393471535548, |
| "grad_norm": 0.9085518717765808, |
| "learning_rate": 0.0008197096820096515, |
| "loss": 1.9082, |
| "step": 133000 |
| }, |
| { |
| "epoch": 0.5514045326691697, |
| "grad_norm": 0.9547085165977478, |
| "learning_rate": 0.0008190189092144946, |
| "loss": 1.9066, |
| "step": 133500 |
| }, |
| { |
| "epoch": 0.5534697181847845, |
| "grad_norm": 0.8351136445999146, |
| "learning_rate": 0.0008183281364193377, |
| "loss": 1.9152, |
| "step": 134000 |
| }, |
| { |
| "epoch": 0.5555349037003994, |
| "grad_norm": 0.814534068107605, |
| "learning_rate": 0.000817637363624181, |
| "loss": 1.9093, |
| "step": 134500 |
| }, |
| { |
| "epoch": 0.5576000892160142, |
| "grad_norm": 0.8208035826683044, |
| "learning_rate": 0.0008169465908290241, |
| "loss": 1.9107, |
| "step": 135000 |
| }, |
| { |
| "epoch": 0.5596652747316292, |
| "grad_norm": 0.8544581532478333, |
| "learning_rate": 0.0008162558180338672, |
| "loss": 1.9092, |
| "step": 135500 |
| }, |
| { |
| "epoch": 0.561730460247244, |
| "grad_norm": 0.8623299598693848, |
| "learning_rate": 0.0008155650452387103, |
| "loss": 1.9097, |
| "step": 136000 |
| }, |
| { |
| "epoch": 0.5637956457628589, |
| "grad_norm": 0.8688506484031677, |
| "learning_rate": 0.0008148742724435535, |
| "loss": 1.9103, |
| "step": 136500 |
| }, |
| { |
| "epoch": 0.5658608312784738, |
| "grad_norm": 0.8412228226661682, |
| "learning_rate": 0.0008141834996483967, |
| "loss": 1.9074, |
| "step": 137000 |
| }, |
| { |
| "epoch": 0.5679260167940886, |
| "grad_norm": 0.8734971880912781, |
| "learning_rate": 0.0008134927268532398, |
| "loss": 1.906, |
| "step": 137500 |
| }, |
| { |
| "epoch": 0.5699912023097035, |
| "grad_norm": 0.8894969820976257, |
| "learning_rate": 0.000812801954058083, |
| "loss": 1.9072, |
| "step": 138000 |
| }, |
| { |
| "epoch": 0.5720563878253183, |
| "grad_norm": 0.7939966320991516, |
| "learning_rate": 0.0008121111812629261, |
| "loss": 1.908, |
| "step": 138500 |
| }, |
| { |
| "epoch": 0.5741215733409332, |
| "grad_norm": 0.8480666875839233, |
| "learning_rate": 0.0008114204084677692, |
| "loss": 1.9031, |
| "step": 139000 |
| }, |
| { |
| "epoch": 0.5761867588565481, |
| "grad_norm": 0.7555306553840637, |
| "learning_rate": 0.0008107296356726124, |
| "loss": 1.902, |
| "step": 139500 |
| }, |
| { |
| "epoch": 0.578251944372163, |
| "grad_norm": 0.8896836638450623, |
| "learning_rate": 0.0008100388628774556, |
| "loss": 1.9075, |
| "step": 140000 |
| }, |
| { |
| "epoch": 0.5803171298877778, |
| "grad_norm": 0.9097606539726257, |
| "learning_rate": 0.0008093480900822987, |
| "loss": 1.9072, |
| "step": 140500 |
| }, |
| { |
| "epoch": 0.5823823154033927, |
| "grad_norm": 0.9234623312950134, |
| "learning_rate": 0.0008086573172871418, |
| "loss": 1.9053, |
| "step": 141000 |
| }, |
| { |
| "epoch": 0.5844475009190075, |
| "grad_norm": 0.9804132580757141, |
| "learning_rate": 0.000807966544491985, |
| "loss": 1.8995, |
| "step": 141500 |
| }, |
| { |
| "epoch": 0.5865126864346224, |
| "grad_norm": 0.8714466691017151, |
| "learning_rate": 0.0008072757716968281, |
| "loss": 1.9036, |
| "step": 142000 |
| }, |
| { |
| "epoch": 0.5885778719502373, |
| "grad_norm": 0.8345698714256287, |
| "learning_rate": 0.0008065849989016713, |
| "loss": 1.8996, |
| "step": 142500 |
| }, |
| { |
| "epoch": 0.5906430574658521, |
| "grad_norm": 0.8244128227233887, |
| "learning_rate": 0.0008058942261065144, |
| "loss": 1.9, |
| "step": 143000 |
| }, |
| { |
| "epoch": 0.5927082429814671, |
| "grad_norm": 0.8433374166488647, |
| "learning_rate": 0.0008052034533113576, |
| "loss": 1.8983, |
| "step": 143500 |
| }, |
| { |
| "epoch": 0.5947734284970819, |
| "grad_norm": 0.9245389699935913, |
| "learning_rate": 0.0008045126805162007, |
| "loss": 1.8985, |
| "step": 144000 |
| }, |
| { |
| "epoch": 0.5968386140126968, |
| "grad_norm": 0.8123714923858643, |
| "learning_rate": 0.0008038219077210439, |
| "loss": 1.8955, |
| "step": 144500 |
| }, |
| { |
| "epoch": 0.5989037995283116, |
| "grad_norm": 0.834078848361969, |
| "learning_rate": 0.000803131134925887, |
| "loss": 1.8985, |
| "step": 145000 |
| }, |
| { |
| "epoch": 0.6009689850439265, |
| "grad_norm": 0.8230902552604675, |
| "learning_rate": 0.0008024403621307301, |
| "loss": 1.8993, |
| "step": 145500 |
| }, |
| { |
| "epoch": 0.6030341705595413, |
| "grad_norm": 0.7516800761222839, |
| "learning_rate": 0.0008017495893355733, |
| "loss": 1.8954, |
| "step": 146000 |
| }, |
| { |
| "epoch": 0.6050993560751562, |
| "grad_norm": 0.8156014084815979, |
| "learning_rate": 0.0008010588165404164, |
| "loss": 1.8934, |
| "step": 146500 |
| }, |
| { |
| "epoch": 0.607164541590771, |
| "grad_norm": 0.8357443809509277, |
| "learning_rate": 0.0008003680437452597, |
| "loss": 1.8985, |
| "step": 147000 |
| }, |
| { |
| "epoch": 0.609229727106386, |
| "grad_norm": 0.8833040595054626, |
| "learning_rate": 0.0007996772709501028, |
| "loss": 1.8951, |
| "step": 147500 |
| }, |
| { |
| "epoch": 0.6112949126220009, |
| "grad_norm": 0.9052265286445618, |
| "learning_rate": 0.0007989864981549458, |
| "loss": 1.8919, |
| "step": 148000 |
| }, |
| { |
| "epoch": 0.6133600981376157, |
| "grad_norm": 0.7939783334732056, |
| "learning_rate": 0.000798295725359789, |
| "loss": 1.8982, |
| "step": 148500 |
| }, |
| { |
| "epoch": 0.6154252836532306, |
| "grad_norm": 0.8598021864891052, |
| "learning_rate": 0.0007976049525646321, |
| "loss": 1.8955, |
| "step": 149000 |
| }, |
| { |
| "epoch": 0.6174904691688454, |
| "grad_norm": 0.7993877530097961, |
| "learning_rate": 0.0007969141797694754, |
| "loss": 1.894, |
| "step": 149500 |
| }, |
| { |
| "epoch": 0.6195556546844603, |
| "grad_norm": 0.8220402002334595, |
| "learning_rate": 0.0007962234069743185, |
| "loss": 1.8963, |
| "step": 150000 |
| }, |
| { |
| "epoch": 0.6216208402000751, |
| "grad_norm": 0.9298192262649536, |
| "learning_rate": 0.0007955326341791617, |
| "loss": 1.8902, |
| "step": 150500 |
| }, |
| { |
| "epoch": 0.6236860257156901, |
| "grad_norm": 0.7912063002586365, |
| "learning_rate": 0.0007948418613840047, |
| "loss": 1.8918, |
| "step": 151000 |
| }, |
| { |
| "epoch": 0.625751211231305, |
| "grad_norm": 0.907156765460968, |
| "learning_rate": 0.0007941510885888478, |
| "loss": 1.8929, |
| "step": 151500 |
| }, |
| { |
| "epoch": 0.6278163967469198, |
| "grad_norm": 0.8619490265846252, |
| "learning_rate": 0.0007934603157936911, |
| "loss": 1.8881, |
| "step": 152000 |
| }, |
| { |
| "epoch": 0.6298815822625347, |
| "grad_norm": 0.8170045018196106, |
| "learning_rate": 0.0007927695429985342, |
| "loss": 1.8879, |
| "step": 152500 |
| }, |
| { |
| "epoch": 0.6319467677781495, |
| "grad_norm": 0.7822418212890625, |
| "learning_rate": 0.0007920787702033774, |
| "loss": 1.8956, |
| "step": 153000 |
| }, |
| { |
| "epoch": 0.6340119532937644, |
| "grad_norm": 0.878753125667572, |
| "learning_rate": 0.0007913879974082205, |
| "loss": 1.8914, |
| "step": 153500 |
| }, |
| { |
| "epoch": 0.6360771388093792, |
| "grad_norm": 0.8338424563407898, |
| "learning_rate": 0.0007906972246130636, |
| "loss": 1.8911, |
| "step": 154000 |
| }, |
| { |
| "epoch": 0.6381423243249941, |
| "grad_norm": 0.8565462827682495, |
| "learning_rate": 0.0007900064518179068, |
| "loss": 1.8881, |
| "step": 154500 |
| }, |
| { |
| "epoch": 0.640207509840609, |
| "grad_norm": 0.82133948802948, |
| "learning_rate": 0.0007893156790227499, |
| "loss": 1.8884, |
| "step": 155000 |
| }, |
| { |
| "epoch": 0.6422726953562239, |
| "grad_norm": 0.9342901706695557, |
| "learning_rate": 0.0007886249062275931, |
| "loss": 1.8865, |
| "step": 155500 |
| }, |
| { |
| "epoch": 0.6443378808718387, |
| "grad_norm": 0.8597960472106934, |
| "learning_rate": 0.0007879341334324362, |
| "loss": 1.8893, |
| "step": 156000 |
| }, |
| { |
| "epoch": 0.6464030663874536, |
| "grad_norm": 0.816633939743042, |
| "learning_rate": 0.0007872433606372795, |
| "loss": 1.8831, |
| "step": 156500 |
| }, |
| { |
| "epoch": 0.6484682519030684, |
| "grad_norm": 0.8402358293533325, |
| "learning_rate": 0.0007865525878421225, |
| "loss": 1.8844, |
| "step": 157000 |
| }, |
| { |
| "epoch": 0.6505334374186833, |
| "grad_norm": 0.8066496253013611, |
| "learning_rate": 0.0007858618150469657, |
| "loss": 1.8879, |
| "step": 157500 |
| }, |
| { |
| "epoch": 0.6525986229342982, |
| "grad_norm": 0.7855266332626343, |
| "learning_rate": 0.0007851710422518088, |
| "loss": 1.8883, |
| "step": 158000 |
| }, |
| { |
| "epoch": 0.654663808449913, |
| "grad_norm": 0.8272327184677124, |
| "learning_rate": 0.0007844802694566519, |
| "loss": 1.8823, |
| "step": 158500 |
| }, |
| { |
| "epoch": 0.656728993965528, |
| "grad_norm": 0.7959176898002625, |
| "learning_rate": 0.0007837894966614951, |
| "loss": 1.8862, |
| "step": 159000 |
| }, |
| { |
| "epoch": 0.6587941794811428, |
| "grad_norm": 0.8315137028694153, |
| "learning_rate": 0.0007830987238663383, |
| "loss": 1.8839, |
| "step": 159500 |
| }, |
| { |
| "epoch": 0.6608593649967577, |
| "grad_norm": 0.8382706046104431, |
| "learning_rate": 0.0007824079510711814, |
| "loss": 1.8838, |
| "step": 160000 |
| }, |
| { |
| "epoch": 0.6629245505123725, |
| "grad_norm": 0.7986578941345215, |
| "learning_rate": 0.0007817171782760245, |
| "loss": 1.8825, |
| "step": 160500 |
| }, |
| { |
| "epoch": 0.6649897360279874, |
| "grad_norm": 0.8452582359313965, |
| "learning_rate": 0.0007810264054808677, |
| "loss": 1.884, |
| "step": 161000 |
| }, |
| { |
| "epoch": 0.6670549215436022, |
| "grad_norm": 0.86090487241745, |
| "learning_rate": 0.0007803356326857108, |
| "loss": 1.8837, |
| "step": 161500 |
| }, |
| { |
| "epoch": 0.6691201070592171, |
| "grad_norm": 0.8608242273330688, |
| "learning_rate": 0.000779644859890554, |
| "loss": 1.8791, |
| "step": 162000 |
| }, |
| { |
| "epoch": 0.671185292574832, |
| "grad_norm": 0.8503440618515015, |
| "learning_rate": 0.0007789540870953972, |
| "loss": 1.8771, |
| "step": 162500 |
| }, |
| { |
| "epoch": 0.6732504780904469, |
| "grad_norm": 0.7802348136901855, |
| "learning_rate": 0.0007782633143002402, |
| "loss": 1.8848, |
| "step": 163000 |
| }, |
| { |
| "epoch": 0.6753156636060618, |
| "grad_norm": 0.9252862930297852, |
| "learning_rate": 0.0007775725415050834, |
| "loss": 1.8791, |
| "step": 163500 |
| }, |
| { |
| "epoch": 0.6773808491216766, |
| "grad_norm": 0.8752533793449402, |
| "learning_rate": 0.0007768817687099265, |
| "loss": 1.881, |
| "step": 164000 |
| }, |
| { |
| "epoch": 0.6794460346372915, |
| "grad_norm": 0.9123765826225281, |
| "learning_rate": 0.0007761909959147698, |
| "loss": 1.8809, |
| "step": 164500 |
| }, |
| { |
| "epoch": 0.6815112201529063, |
| "grad_norm": 0.8338991403579712, |
| "learning_rate": 0.0007755002231196129, |
| "loss": 1.8786, |
| "step": 165000 |
| }, |
| { |
| "epoch": 0.6835764056685212, |
| "grad_norm": 0.8287580609321594, |
| "learning_rate": 0.000774809450324456, |
| "loss": 1.8797, |
| "step": 165500 |
| }, |
| { |
| "epoch": 0.685641591184136, |
| "grad_norm": 0.8854800462722778, |
| "learning_rate": 0.0007741186775292991, |
| "loss": 1.8799, |
| "step": 166000 |
| }, |
| { |
| "epoch": 0.6877067766997509, |
| "grad_norm": 0.8071344494819641, |
| "learning_rate": 0.0007734279047341422, |
| "loss": 1.8738, |
| "step": 166500 |
| }, |
| { |
| "epoch": 0.6897719622153659, |
| "grad_norm": 0.8015414476394653, |
| "learning_rate": 0.0007727371319389855, |
| "loss": 1.8764, |
| "step": 167000 |
| }, |
| { |
| "epoch": 0.6918371477309807, |
| "grad_norm": 0.8209612965583801, |
| "learning_rate": 0.0007720463591438286, |
| "loss": 1.8802, |
| "step": 167500 |
| }, |
| { |
| "epoch": 0.6939023332465956, |
| "grad_norm": 0.8554903268814087, |
| "learning_rate": 0.0007713555863486718, |
| "loss": 1.8742, |
| "step": 168000 |
| }, |
| { |
| "epoch": 0.6959675187622104, |
| "grad_norm": 0.8712402582168579, |
| "learning_rate": 0.0007706648135535149, |
| "loss": 1.8761, |
| "step": 168500 |
| }, |
| { |
| "epoch": 0.6980327042778253, |
| "grad_norm": 0.8566715121269226, |
| "learning_rate": 0.0007699740407583579, |
| "loss": 1.8742, |
| "step": 169000 |
| }, |
| { |
| "epoch": 0.7000978897934401, |
| "grad_norm": 0.8078393340110779, |
| "learning_rate": 0.0007692832679632012, |
| "loss": 1.8723, |
| "step": 169500 |
| }, |
| { |
| "epoch": 0.702163075309055, |
| "grad_norm": 0.8996677994728088, |
| "learning_rate": 0.0007685924951680443, |
| "loss": 1.8729, |
| "step": 170000 |
| }, |
| { |
| "epoch": 0.7042282608246698, |
| "grad_norm": 0.8081231117248535, |
| "learning_rate": 0.0007679017223728875, |
| "loss": 1.8749, |
| "step": 170500 |
| }, |
| { |
| "epoch": 0.7062934463402848, |
| "grad_norm": 0.8042668104171753, |
| "learning_rate": 0.0007672109495777306, |
| "loss": 1.869, |
| "step": 171000 |
| }, |
| { |
| "epoch": 0.7083586318558996, |
| "grad_norm": 0.8018625378608704, |
| "learning_rate": 0.0007665201767825738, |
| "loss": 1.87, |
| "step": 171500 |
| }, |
| { |
| "epoch": 0.7104238173715145, |
| "grad_norm": 0.8580893874168396, |
| "learning_rate": 0.0007658294039874169, |
| "loss": 1.874, |
| "step": 172000 |
| }, |
| { |
| "epoch": 0.7124890028871294, |
| "grad_norm": 0.8910616636276245, |
| "learning_rate": 0.00076513863119226, |
| "loss": 1.8749, |
| "step": 172500 |
| }, |
| { |
| "epoch": 0.7145541884027442, |
| "grad_norm": 0.9036041498184204, |
| "learning_rate": 0.0007644478583971032, |
| "loss": 1.874, |
| "step": 173000 |
| }, |
| { |
| "epoch": 0.7166193739183591, |
| "grad_norm": 0.8568321466445923, |
| "learning_rate": 0.0007637570856019463, |
| "loss": 1.8683, |
| "step": 173500 |
| }, |
| { |
| "epoch": 0.7186845594339739, |
| "grad_norm": 0.7695671916007996, |
| "learning_rate": 0.0007630663128067895, |
| "loss": 1.8677, |
| "step": 174000 |
| }, |
| { |
| "epoch": 0.7207497449495888, |
| "grad_norm": 0.9139420390129089, |
| "learning_rate": 0.0007623755400116327, |
| "loss": 1.8694, |
| "step": 174500 |
| }, |
| { |
| "epoch": 0.7228149304652037, |
| "grad_norm": 0.8462100625038147, |
| "learning_rate": 0.0007616847672164757, |
| "loss": 1.8714, |
| "step": 175000 |
| }, |
| { |
| "epoch": 0.7248801159808186, |
| "grad_norm": 0.8447960615158081, |
| "learning_rate": 0.0007609939944213189, |
| "loss": 1.8669, |
| "step": 175500 |
| }, |
| { |
| "epoch": 0.7269453014964334, |
| "grad_norm": 0.810688316822052, |
| "learning_rate": 0.000760303221626162, |
| "loss": 1.8637, |
| "step": 176000 |
| }, |
| { |
| "epoch": 0.7290104870120483, |
| "grad_norm": 0.7696130871772766, |
| "learning_rate": 0.0007596124488310052, |
| "loss": 1.8658, |
| "step": 176500 |
| }, |
| { |
| "epoch": 0.7310756725276631, |
| "grad_norm": 0.8709802031517029, |
| "learning_rate": 0.0007589216760358484, |
| "loss": 1.8664, |
| "step": 177000 |
| }, |
| { |
| "epoch": 0.733140858043278, |
| "grad_norm": 0.8365340828895569, |
| "learning_rate": 0.0007582309032406916, |
| "loss": 1.8656, |
| "step": 177500 |
| }, |
| { |
| "epoch": 0.7352060435588929, |
| "grad_norm": 1.0223950147628784, |
| "learning_rate": 0.0007575401304455346, |
| "loss": 1.8694, |
| "step": 178000 |
| }, |
| { |
| "epoch": 0.7372712290745078, |
| "grad_norm": 0.829572319984436, |
| "learning_rate": 0.0007568493576503777, |
| "loss": 1.8708, |
| "step": 178500 |
| }, |
| { |
| "epoch": 0.7393364145901227, |
| "grad_norm": 0.80058354139328, |
| "learning_rate": 0.0007561585848552209, |
| "loss": 1.8687, |
| "step": 179000 |
| }, |
| { |
| "epoch": 0.7414016001057375, |
| "grad_norm": 0.8351449370384216, |
| "learning_rate": 0.0007554678120600641, |
| "loss": 1.8639, |
| "step": 179500 |
| }, |
| { |
| "epoch": 0.7434667856213524, |
| "grad_norm": 0.905135989189148, |
| "learning_rate": 0.0007547770392649073, |
| "loss": 1.8645, |
| "step": 180000 |
| }, |
| { |
| "epoch": 0.7455319711369672, |
| "grad_norm": 0.8477722406387329, |
| "learning_rate": 0.0007540862664697504, |
| "loss": 1.8615, |
| "step": 180500 |
| }, |
| { |
| "epoch": 0.7475971566525821, |
| "grad_norm": 0.9718809723854065, |
| "learning_rate": 0.0007533954936745935, |
| "loss": 1.8575, |
| "step": 181000 |
| }, |
| { |
| "epoch": 0.7496623421681969, |
| "grad_norm": 0.9097675681114197, |
| "learning_rate": 0.0007527047208794366, |
| "loss": 1.8632, |
| "step": 181500 |
| }, |
| { |
| "epoch": 0.7517275276838118, |
| "grad_norm": 0.9181948900222778, |
| "learning_rate": 0.0007520139480842798, |
| "loss": 1.864, |
| "step": 182000 |
| }, |
| { |
| "epoch": 0.7537927131994268, |
| "grad_norm": 0.8058791160583496, |
| "learning_rate": 0.000751323175289123, |
| "loss": 1.8585, |
| "step": 182500 |
| }, |
| { |
| "epoch": 0.7558578987150416, |
| "grad_norm": 0.818452775478363, |
| "learning_rate": 0.0007506324024939661, |
| "loss": 1.8621, |
| "step": 183000 |
| }, |
| { |
| "epoch": 0.7579230842306565, |
| "grad_norm": 0.8789253830909729, |
| "learning_rate": 0.0007499416296988093, |
| "loss": 1.8567, |
| "step": 183500 |
| }, |
| { |
| "epoch": 0.7599882697462713, |
| "grad_norm": 0.8578682541847229, |
| "learning_rate": 0.0007492508569036523, |
| "loss": 1.8615, |
| "step": 184000 |
| }, |
| { |
| "epoch": 0.7620534552618862, |
| "grad_norm": 0.8071935176849365, |
| "learning_rate": 0.0007485600841084956, |
| "loss": 1.8589, |
| "step": 184500 |
| }, |
| { |
| "epoch": 0.764118640777501, |
| "grad_norm": 0.8300654888153076, |
| "learning_rate": 0.0007478693113133387, |
| "loss": 1.8588, |
| "step": 185000 |
| }, |
| { |
| "epoch": 0.7661838262931159, |
| "grad_norm": 0.8175327181816101, |
| "learning_rate": 0.0007471785385181818, |
| "loss": 1.8617, |
| "step": 185500 |
| }, |
| { |
| "epoch": 0.7682490118087307, |
| "grad_norm": 0.8610235452651978, |
| "learning_rate": 0.000746487765723025, |
| "loss": 1.8561, |
| "step": 186000 |
| }, |
| { |
| "epoch": 0.7703141973243457, |
| "grad_norm": 0.857377290725708, |
| "learning_rate": 0.0007457969929278681, |
| "loss": 1.856, |
| "step": 186500 |
| }, |
| { |
| "epoch": 0.7723793828399605, |
| "grad_norm": 0.8002254366874695, |
| "learning_rate": 0.0007451062201327113, |
| "loss": 1.8609, |
| "step": 187000 |
| }, |
| { |
| "epoch": 0.7744445683555754, |
| "grad_norm": 0.9635730385780334, |
| "learning_rate": 0.0007444154473375544, |
| "loss": 1.8592, |
| "step": 187500 |
| }, |
| { |
| "epoch": 0.7765097538711903, |
| "grad_norm": 0.8251007795333862, |
| "learning_rate": 0.0007437246745423976, |
| "loss": 1.8623, |
| "step": 188000 |
| }, |
| { |
| "epoch": 0.7785749393868051, |
| "grad_norm": 0.8280484676361084, |
| "learning_rate": 0.0007430339017472407, |
| "loss": 1.855, |
| "step": 188500 |
| }, |
| { |
| "epoch": 0.78064012490242, |
| "grad_norm": 0.7635123133659363, |
| "learning_rate": 0.0007423431289520838, |
| "loss": 1.8559, |
| "step": 189000 |
| }, |
| { |
| "epoch": 0.7827053104180348, |
| "grad_norm": 0.7589561939239502, |
| "learning_rate": 0.0007416523561569271, |
| "loss": 1.8576, |
| "step": 189500 |
| }, |
| { |
| "epoch": 0.7847704959336497, |
| "grad_norm": 0.876846969127655, |
| "learning_rate": 0.0007409615833617701, |
| "loss": 1.8571, |
| "step": 190000 |
| }, |
| { |
| "epoch": 0.7868356814492646, |
| "grad_norm": 0.9164223074913025, |
| "learning_rate": 0.0007402708105666133, |
| "loss": 1.8542, |
| "step": 190500 |
| }, |
| { |
| "epoch": 0.7889008669648795, |
| "grad_norm": 0.9645445346832275, |
| "learning_rate": 0.0007395800377714564, |
| "loss": 1.8584, |
| "step": 191000 |
| }, |
| { |
| "epoch": 0.7909660524804943, |
| "grad_norm": 0.8780491948127747, |
| "learning_rate": 0.0007388892649762996, |
| "loss": 1.862, |
| "step": 191500 |
| }, |
| { |
| "epoch": 0.7930312379961092, |
| "grad_norm": 0.8747962117195129, |
| "learning_rate": 0.0007381984921811428, |
| "loss": 1.8544, |
| "step": 192000 |
| }, |
| { |
| "epoch": 0.795096423511724, |
| "grad_norm": 0.8750070333480835, |
| "learning_rate": 0.0007375077193859859, |
| "loss": 1.8562, |
| "step": 192500 |
| }, |
| { |
| "epoch": 0.7971616090273389, |
| "grad_norm": 0.7979694604873657, |
| "learning_rate": 0.000736816946590829, |
| "loss": 1.8559, |
| "step": 193000 |
| }, |
| { |
| "epoch": 0.7992267945429538, |
| "grad_norm": 0.8153182864189148, |
| "learning_rate": 0.0007361261737956721, |
| "loss": 1.8497, |
| "step": 193500 |
| }, |
| { |
| "epoch": 0.8012919800585686, |
| "grad_norm": 0.9015457034111023, |
| "learning_rate": 0.0007354354010005153, |
| "loss": 1.85, |
| "step": 194000 |
| }, |
| { |
| "epoch": 0.8033571655741836, |
| "grad_norm": 0.845658540725708, |
| "learning_rate": 0.0007347446282053585, |
| "loss": 1.853, |
| "step": 194500 |
| }, |
| { |
| "epoch": 0.8054223510897984, |
| "grad_norm": 0.839846670627594, |
| "learning_rate": 0.0007340538554102017, |
| "loss": 1.8511, |
| "step": 195000 |
| }, |
| { |
| "epoch": 0.8074875366054133, |
| "grad_norm": 0.8285427689552307, |
| "learning_rate": 0.0007333630826150448, |
| "loss": 1.8512, |
| "step": 195500 |
| }, |
| { |
| "epoch": 0.8095527221210281, |
| "grad_norm": 0.8489523530006409, |
| "learning_rate": 0.0007326723098198878, |
| "loss": 1.8538, |
| "step": 196000 |
| }, |
| { |
| "epoch": 0.811617907636643, |
| "grad_norm": 0.8332532644271851, |
| "learning_rate": 0.000731981537024731, |
| "loss": 1.8528, |
| "step": 196500 |
| }, |
| { |
| "epoch": 0.8136830931522578, |
| "grad_norm": 0.8185180425643921, |
| "learning_rate": 0.0007312907642295742, |
| "loss": 1.8472, |
| "step": 197000 |
| }, |
| { |
| "epoch": 0.8157482786678727, |
| "grad_norm": 0.8716513514518738, |
| "learning_rate": 0.0007305999914344174, |
| "loss": 1.8487, |
| "step": 197500 |
| }, |
| { |
| "epoch": 0.8178134641834875, |
| "grad_norm": 0.8488348126411438, |
| "learning_rate": 0.0007299092186392605, |
| "loss": 1.8487, |
| "step": 198000 |
| }, |
| { |
| "epoch": 0.8198786496991025, |
| "grad_norm": 0.7853295207023621, |
| "learning_rate": 0.0007292184458441037, |
| "loss": 1.8467, |
| "step": 198500 |
| }, |
| { |
| "epoch": 0.8219438352147174, |
| "grad_norm": 0.8092118501663208, |
| "learning_rate": 0.0007285276730489468, |
| "loss": 1.8454, |
| "step": 199000 |
| }, |
| { |
| "epoch": 0.8240090207303322, |
| "grad_norm": 0.8414338231086731, |
| "learning_rate": 0.0007278369002537899, |
| "loss": 1.8456, |
| "step": 199500 |
| }, |
| { |
| "epoch": 0.8260742062459471, |
| "grad_norm": 0.7936431765556335, |
| "learning_rate": 0.0007271461274586331, |
| "loss": 1.8455, |
| "step": 200000 |
| }, |
| { |
| "epoch": 0.8281393917615619, |
| "grad_norm": 0.8624149560928345, |
| "learning_rate": 0.0007264553546634762, |
| "loss": 1.8417, |
| "step": 200500 |
| }, |
| { |
| "epoch": 0.8302045772771768, |
| "grad_norm": 0.7787384986877441, |
| "learning_rate": 0.0007257645818683194, |
| "loss": 1.8469, |
| "step": 201000 |
| }, |
| { |
| "epoch": 0.8322697627927916, |
| "grad_norm": 0.7881982922554016, |
| "learning_rate": 0.0007250738090731625, |
| "loss": 1.8469, |
| "step": 201500 |
| }, |
| { |
| "epoch": 0.8343349483084066, |
| "grad_norm": 0.8017438650131226, |
| "learning_rate": 0.0007243830362780058, |
| "loss": 1.8477, |
| "step": 202000 |
| }, |
| { |
| "epoch": 0.8364001338240215, |
| "grad_norm": 0.8839012980461121, |
| "learning_rate": 0.0007236922634828488, |
| "loss": 1.8433, |
| "step": 202500 |
| }, |
| { |
| "epoch": 0.8384653193396363, |
| "grad_norm": 0.8032566905021667, |
| "learning_rate": 0.0007230014906876919, |
| "loss": 1.845, |
| "step": 203000 |
| }, |
| { |
| "epoch": 0.8405305048552512, |
| "grad_norm": 0.9038089513778687, |
| "learning_rate": 0.0007223107178925351, |
| "loss": 1.8425, |
| "step": 203500 |
| }, |
| { |
| "epoch": 0.842595690370866, |
| "grad_norm": 0.9411084651947021, |
| "learning_rate": 0.0007216199450973782, |
| "loss": 1.8414, |
| "step": 204000 |
| }, |
| { |
| "epoch": 0.8446608758864809, |
| "grad_norm": 0.8129530549049377, |
| "learning_rate": 0.0007209291723022215, |
| "loss": 1.8413, |
| "step": 204500 |
| }, |
| { |
| "epoch": 0.8467260614020957, |
| "grad_norm": 0.7938794493675232, |
| "learning_rate": 0.0007202383995070646, |
| "loss": 1.8419, |
| "step": 205000 |
| }, |
| { |
| "epoch": 0.8487912469177106, |
| "grad_norm": 0.9388673305511475, |
| "learning_rate": 0.0007195476267119077, |
| "loss": 1.8433, |
| "step": 205500 |
| }, |
| { |
| "epoch": 0.8508564324333255, |
| "grad_norm": 0.9263845682144165, |
| "learning_rate": 0.0007188568539167508, |
| "loss": 1.8407, |
| "step": 206000 |
| }, |
| { |
| "epoch": 0.8529216179489404, |
| "grad_norm": 0.7958715558052063, |
| "learning_rate": 0.0007181660811215939, |
| "loss": 1.8464, |
| "step": 206500 |
| }, |
| { |
| "epoch": 0.8549868034645552, |
| "grad_norm": 0.7796356081962585, |
| "learning_rate": 0.0007174753083264372, |
| "loss": 1.8444, |
| "step": 207000 |
| }, |
| { |
| "epoch": 0.8570519889801701, |
| "grad_norm": 0.8039528727531433, |
| "learning_rate": 0.0007167845355312803, |
| "loss": 1.8474, |
| "step": 207500 |
| }, |
| { |
| "epoch": 0.859117174495785, |
| "grad_norm": 0.8445290327072144, |
| "learning_rate": 0.0007160937627361235, |
| "loss": 1.8405, |
| "step": 208000 |
| }, |
| { |
| "epoch": 0.8611823600113998, |
| "grad_norm": 0.8098761439323425, |
| "learning_rate": 0.0007154029899409665, |
| "loss": 1.8395, |
| "step": 208500 |
| }, |
| { |
| "epoch": 0.8632475455270147, |
| "grad_norm": 0.8343963027000427, |
| "learning_rate": 0.0007147122171458097, |
| "loss": 1.8362, |
| "step": 209000 |
| }, |
| { |
| "epoch": 0.8653127310426295, |
| "grad_norm": 0.8452053666114807, |
| "learning_rate": 0.0007140214443506529, |
| "loss": 1.8431, |
| "step": 209500 |
| }, |
| { |
| "epoch": 0.8673779165582445, |
| "grad_norm": 0.8454539179801941, |
| "learning_rate": 0.000713330671555496, |
| "loss": 1.8327, |
| "step": 210000 |
| }, |
| { |
| "epoch": 0.8694431020738593, |
| "grad_norm": 0.7599641680717468, |
| "learning_rate": 0.0007126398987603392, |
| "loss": 1.8335, |
| "step": 210500 |
| }, |
| { |
| "epoch": 0.8715082875894742, |
| "grad_norm": 0.8617073893547058, |
| "learning_rate": 0.0007119491259651823, |
| "loss": 1.8381, |
| "step": 211000 |
| }, |
| { |
| "epoch": 0.873573473105089, |
| "grad_norm": 0.8182563781738281, |
| "learning_rate": 0.0007112583531700254, |
| "loss": 1.8359, |
| "step": 211500 |
| }, |
| { |
| "epoch": 0.8756386586207039, |
| "grad_norm": 0.8188121318817139, |
| "learning_rate": 0.0007105675803748686, |
| "loss": 1.8362, |
| "step": 212000 |
| }, |
| { |
| "epoch": 0.8777038441363187, |
| "grad_norm": 0.7888435125350952, |
| "learning_rate": 0.0007098768075797118, |
| "loss": 1.8389, |
| "step": 212500 |
| }, |
| { |
| "epoch": 0.8797690296519336, |
| "grad_norm": 0.8186080455780029, |
| "learning_rate": 0.0007091860347845549, |
| "loss": 1.8356, |
| "step": 213000 |
| }, |
| { |
| "epoch": 0.8818342151675485, |
| "grad_norm": 0.7884934544563293, |
| "learning_rate": 0.000708495261989398, |
| "loss": 1.8356, |
| "step": 213500 |
| }, |
| { |
| "epoch": 0.8838994006831634, |
| "grad_norm": 0.8210222125053406, |
| "learning_rate": 0.0007078044891942412, |
| "loss": 1.8348, |
| "step": 214000 |
| }, |
| { |
| "epoch": 0.8859645861987783, |
| "grad_norm": 0.868903398513794, |
| "learning_rate": 0.0007071137163990843, |
| "loss": 1.8349, |
| "step": 214500 |
| }, |
| { |
| "epoch": 0.8880297717143931, |
| "grad_norm": 0.8679877519607544, |
| "learning_rate": 0.0007064229436039275, |
| "loss": 1.834, |
| "step": 215000 |
| }, |
| { |
| "epoch": 0.890094957230008, |
| "grad_norm": 0.8414639234542847, |
| "learning_rate": 0.0007057321708087706, |
| "loss": 1.8348, |
| "step": 215500 |
| }, |
| { |
| "epoch": 0.8921601427456228, |
| "grad_norm": 0.8036888241767883, |
| "learning_rate": 0.0007050413980136138, |
| "loss": 1.8331, |
| "step": 216000 |
| }, |
| { |
| "epoch": 0.8942253282612377, |
| "grad_norm": 0.833270251750946, |
| "learning_rate": 0.0007043506252184569, |
| "loss": 1.8317, |
| "step": 216500 |
| }, |
| { |
| "epoch": 0.8962905137768525, |
| "grad_norm": 0.7350865602493286, |
| "learning_rate": 0.0007036598524233001, |
| "loss": 1.8305, |
| "step": 217000 |
| }, |
| { |
| "epoch": 0.8983556992924674, |
| "grad_norm": 0.8501140475273132, |
| "learning_rate": 0.0007029690796281432, |
| "loss": 1.8278, |
| "step": 217500 |
| }, |
| { |
| "epoch": 0.9004208848080824, |
| "grad_norm": 0.82123202085495, |
| "learning_rate": 0.0007022783068329863, |
| "loss": 1.8285, |
| "step": 218000 |
| }, |
| { |
| "epoch": 0.9024860703236972, |
| "grad_norm": 0.8079880475997925, |
| "learning_rate": 0.0007015875340378295, |
| "loss": 1.833, |
| "step": 218500 |
| }, |
| { |
| "epoch": 0.9045512558393121, |
| "grad_norm": 0.7871448397636414, |
| "learning_rate": 0.0007008967612426726, |
| "loss": 1.8338, |
| "step": 219000 |
| }, |
| { |
| "epoch": 0.9066164413549269, |
| "grad_norm": 0.8511725664138794, |
| "learning_rate": 0.0007002059884475159, |
| "loss": 1.8325, |
| "step": 219500 |
| }, |
| { |
| "epoch": 0.9086816268705418, |
| "grad_norm": 0.9022111296653748, |
| "learning_rate": 0.000699515215652359, |
| "loss": 1.8292, |
| "step": 220000 |
| }, |
| { |
| "epoch": 0.9107468123861566, |
| "grad_norm": 0.8371003270149231, |
| "learning_rate": 0.000698824442857202, |
| "loss": 1.8287, |
| "step": 220500 |
| }, |
| { |
| "epoch": 0.9128119979017715, |
| "grad_norm": 0.824407160282135, |
| "learning_rate": 0.0006981336700620452, |
| "loss": 1.8315, |
| "step": 221000 |
| }, |
| { |
| "epoch": 0.9148771834173863, |
| "grad_norm": 0.847411572933197, |
| "learning_rate": 0.0006974428972668883, |
| "loss": 1.8333, |
| "step": 221500 |
| }, |
| { |
| "epoch": 0.9169423689330013, |
| "grad_norm": 0.8592170476913452, |
| "learning_rate": 0.0006967521244717316, |
| "loss": 1.8234, |
| "step": 222000 |
| }, |
| { |
| "epoch": 0.9190075544486161, |
| "grad_norm": 0.7863643169403076, |
| "learning_rate": 0.0006960613516765747, |
| "loss": 1.8364, |
| "step": 222500 |
| }, |
| { |
| "epoch": 0.921072739964231, |
| "grad_norm": 0.7801703214645386, |
| "learning_rate": 0.0006953705788814179, |
| "loss": 1.8243, |
| "step": 223000 |
| }, |
| { |
| "epoch": 0.9231379254798459, |
| "grad_norm": 0.8160432577133179, |
| "learning_rate": 0.0006946798060862609, |
| "loss": 1.8236, |
| "step": 223500 |
| }, |
| { |
| "epoch": 0.9252031109954607, |
| "grad_norm": 0.825862467288971, |
| "learning_rate": 0.000693989033291104, |
| "loss": 1.827, |
| "step": 224000 |
| }, |
| { |
| "epoch": 0.9272682965110756, |
| "grad_norm": 0.8575713634490967, |
| "learning_rate": 0.0006932982604959473, |
| "loss": 1.8225, |
| "step": 224500 |
| }, |
| { |
| "epoch": 0.9293334820266904, |
| "grad_norm": 0.7798600792884827, |
| "learning_rate": 0.0006926074877007904, |
| "loss": 1.8276, |
| "step": 225000 |
| }, |
| { |
| "epoch": 0.9313986675423054, |
| "grad_norm": 0.8042396903038025, |
| "learning_rate": 0.0006919167149056336, |
| "loss": 1.8274, |
| "step": 225500 |
| }, |
| { |
| "epoch": 0.9334638530579202, |
| "grad_norm": 0.8900014758110046, |
| "learning_rate": 0.0006912259421104767, |
| "loss": 1.8242, |
| "step": 226000 |
| }, |
| { |
| "epoch": 0.9355290385735351, |
| "grad_norm": 0.8842340111732483, |
| "learning_rate": 0.0006905351693153198, |
| "loss": 1.8222, |
| "step": 226500 |
| }, |
| { |
| "epoch": 0.9375942240891499, |
| "grad_norm": 0.8076005578041077, |
| "learning_rate": 0.000689844396520163, |
| "loss": 1.824, |
| "step": 227000 |
| }, |
| { |
| "epoch": 0.9396594096047648, |
| "grad_norm": 0.8478308916091919, |
| "learning_rate": 0.0006891536237250061, |
| "loss": 1.8213, |
| "step": 227500 |
| }, |
| { |
| "epoch": 0.9417245951203796, |
| "grad_norm": 0.8478752374649048, |
| "learning_rate": 0.0006884628509298493, |
| "loss": 1.8271, |
| "step": 228000 |
| }, |
| { |
| "epoch": 0.9437897806359945, |
| "grad_norm": 0.8306804299354553, |
| "learning_rate": 0.0006877720781346924, |
| "loss": 1.8257, |
| "step": 228500 |
| }, |
| { |
| "epoch": 0.9458549661516094, |
| "grad_norm": 0.8503381013870239, |
| "learning_rate": 0.0006870813053395356, |
| "loss": 1.8196, |
| "step": 229000 |
| }, |
| { |
| "epoch": 0.9479201516672243, |
| "grad_norm": 0.7972338795661926, |
| "learning_rate": 0.0006863905325443787, |
| "loss": 1.8219, |
| "step": 229500 |
| }, |
| { |
| "epoch": 0.9499853371828392, |
| "grad_norm": 0.8305501341819763, |
| "learning_rate": 0.0006856997597492219, |
| "loss": 1.8204, |
| "step": 230000 |
| }, |
| { |
| "epoch": 0.952050522698454, |
| "grad_norm": 0.8877650499343872, |
| "learning_rate": 0.000685008986954065, |
| "loss": 1.8227, |
| "step": 230500 |
| }, |
| { |
| "epoch": 0.9541157082140689, |
| "grad_norm": 0.8762148022651672, |
| "learning_rate": 0.0006843182141589081, |
| "loss": 1.8224, |
| "step": 231000 |
| }, |
| { |
| "epoch": 0.9561808937296837, |
| "grad_norm": 0.7984791398048401, |
| "learning_rate": 0.0006836274413637513, |
| "loss": 1.8188, |
| "step": 231500 |
| }, |
| { |
| "epoch": 0.9582460792452986, |
| "grad_norm": 0.8119187355041504, |
| "learning_rate": 0.0006829366685685945, |
| "loss": 1.8212, |
| "step": 232000 |
| }, |
| { |
| "epoch": 0.9603112647609134, |
| "grad_norm": 0.8037796020507812, |
| "learning_rate": 0.0006822458957734376, |
| "loss": 1.8187, |
| "step": 232500 |
| }, |
| { |
| "epoch": 0.9623764502765283, |
| "grad_norm": 0.8950905799865723, |
| "learning_rate": 0.0006815551229782807, |
| "loss": 1.816, |
| "step": 233000 |
| }, |
| { |
| "epoch": 0.9644416357921433, |
| "grad_norm": 0.8347873091697693, |
| "learning_rate": 0.0006808643501831239, |
| "loss": 1.8218, |
| "step": 233500 |
| }, |
| { |
| "epoch": 0.9665068213077581, |
| "grad_norm": 0.8473377823829651, |
| "learning_rate": 0.000680173577387967, |
| "loss": 1.8195, |
| "step": 234000 |
| }, |
| { |
| "epoch": 0.968572006823373, |
| "grad_norm": 0.7937746644020081, |
| "learning_rate": 0.0006794828045928102, |
| "loss": 1.8171, |
| "step": 234500 |
| }, |
| { |
| "epoch": 0.9706371923389878, |
| "grad_norm": 0.7679367065429688, |
| "learning_rate": 0.0006787920317976534, |
| "loss": 1.8146, |
| "step": 235000 |
| }, |
| { |
| "epoch": 0.9727023778546027, |
| "grad_norm": 0.8515623807907104, |
| "learning_rate": 0.0006781012590024964, |
| "loss": 1.8173, |
| "step": 235500 |
| }, |
| { |
| "epoch": 0.9747675633702175, |
| "grad_norm": 0.8188038468360901, |
| "learning_rate": 0.0006774104862073396, |
| "loss": 1.817, |
| "step": 236000 |
| }, |
| { |
| "epoch": 0.9768327488858324, |
| "grad_norm": 0.7979288697242737, |
| "learning_rate": 0.0006767197134121827, |
| "loss": 1.819, |
| "step": 236500 |
| }, |
| { |
| "epoch": 0.9788979344014472, |
| "grad_norm": 0.8797492980957031, |
| "learning_rate": 0.000676028940617026, |
| "loss": 1.8148, |
| "step": 237000 |
| }, |
| { |
| "epoch": 0.9809631199170622, |
| "grad_norm": 0.8223576545715332, |
| "learning_rate": 0.0006753381678218691, |
| "loss": 1.815, |
| "step": 237500 |
| }, |
| { |
| "epoch": 0.983028305432677, |
| "grad_norm": 0.8249248266220093, |
| "learning_rate": 0.0006746473950267122, |
| "loss": 1.8174, |
| "step": 238000 |
| }, |
| { |
| "epoch": 0.9850934909482919, |
| "grad_norm": 0.8677356243133545, |
| "learning_rate": 0.0006739566222315553, |
| "loss": 1.8209, |
| "step": 238500 |
| }, |
| { |
| "epoch": 0.9871586764639068, |
| "grad_norm": 0.829744815826416, |
| "learning_rate": 0.0006732658494363984, |
| "loss": 1.8132, |
| "step": 239000 |
| }, |
| { |
| "epoch": 0.9892238619795216, |
| "grad_norm": 0.8238321542739868, |
| "learning_rate": 0.0006725750766412417, |
| "loss": 1.8104, |
| "step": 239500 |
| }, |
| { |
| "epoch": 0.9912890474951365, |
| "grad_norm": 0.8242679834365845, |
| "learning_rate": 0.0006718843038460848, |
| "loss": 1.8129, |
| "step": 240000 |
| }, |
| { |
| "epoch": 0.9933542330107513, |
| "grad_norm": 0.7887668013572693, |
| "learning_rate": 0.000671193531050928, |
| "loss": 1.8156, |
| "step": 240500 |
| }, |
| { |
| "epoch": 0.9954194185263662, |
| "grad_norm": 0.7950047850608826, |
| "learning_rate": 0.0006705027582557711, |
| "loss": 1.8158, |
| "step": 241000 |
| }, |
| { |
| "epoch": 0.9974846040419811, |
| "grad_norm": 0.7892596125602722, |
| "learning_rate": 0.0006698119854606141, |
| "loss": 1.8122, |
| "step": 241500 |
| }, |
| { |
| "epoch": 0.999549789557596, |
| "grad_norm": 0.8291964530944824, |
| "learning_rate": 0.0006691212126654574, |
| "loss": 1.8176, |
| "step": 242000 |
| }, |
| { |
| "epoch": 1.0016149750732108, |
| "grad_norm": 0.7552099227905273, |
| "learning_rate": 0.0006684304398703005, |
| "loss": 1.8117, |
| "step": 242500 |
| }, |
| { |
| "epoch": 1.0036801605888257, |
| "grad_norm": 0.9016017317771912, |
| "learning_rate": 0.0006677396670751437, |
| "loss": 1.8141, |
| "step": 243000 |
| }, |
| { |
| "epoch": 1.0057453461044406, |
| "grad_norm": 0.7623195648193359, |
| "learning_rate": 0.0006670488942799868, |
| "loss": 1.8104, |
| "step": 243500 |
| }, |
| { |
| "epoch": 1.0078105316200554, |
| "grad_norm": 0.8782749772071838, |
| "learning_rate": 0.00066635812148483, |
| "loss": 1.813, |
| "step": 244000 |
| }, |
| { |
| "epoch": 1.0098757171356703, |
| "grad_norm": 0.800456702709198, |
| "learning_rate": 0.000665667348689673, |
| "loss": 1.8105, |
| "step": 244500 |
| }, |
| { |
| "epoch": 1.0119409026512851, |
| "grad_norm": 0.855076014995575, |
| "learning_rate": 0.0006649765758945162, |
| "loss": 1.8081, |
| "step": 245000 |
| }, |
| { |
| "epoch": 1.0140060881669, |
| "grad_norm": 0.8036173582077026, |
| "learning_rate": 0.0006642858030993594, |
| "loss": 1.8112, |
| "step": 245500 |
| }, |
| { |
| "epoch": 1.0160712736825148, |
| "grad_norm": 0.8001554012298584, |
| "learning_rate": 0.0006635950303042025, |
| "loss": 1.8079, |
| "step": 246000 |
| }, |
| { |
| "epoch": 1.0181364591981297, |
| "grad_norm": 0.8144285082817078, |
| "learning_rate": 0.0006629042575090457, |
| "loss": 1.8076, |
| "step": 246500 |
| }, |
| { |
| "epoch": 1.0202016447137445, |
| "grad_norm": 0.8857467174530029, |
| "learning_rate": 0.0006622134847138889, |
| "loss": 1.8108, |
| "step": 247000 |
| }, |
| { |
| "epoch": 1.0222668302293596, |
| "grad_norm": 0.7909874320030212, |
| "learning_rate": 0.000661522711918732, |
| "loss": 1.8068, |
| "step": 247500 |
| }, |
| { |
| "epoch": 1.0243320157449745, |
| "grad_norm": 0.8089008331298828, |
| "learning_rate": 0.0006608319391235751, |
| "loss": 1.8093, |
| "step": 248000 |
| }, |
| { |
| "epoch": 1.0263972012605893, |
| "grad_norm": 0.8550245761871338, |
| "learning_rate": 0.0006601411663284182, |
| "loss": 1.8087, |
| "step": 248500 |
| }, |
| { |
| "epoch": 1.0284623867762042, |
| "grad_norm": 0.8594583868980408, |
| "learning_rate": 0.0006594503935332614, |
| "loss": 1.8062, |
| "step": 249000 |
| }, |
| { |
| "epoch": 1.030527572291819, |
| "grad_norm": 0.8355042338371277, |
| "learning_rate": 0.0006587596207381046, |
| "loss": 1.809, |
| "step": 249500 |
| }, |
| { |
| "epoch": 1.0325927578074339, |
| "grad_norm": 0.8276521563529968, |
| "learning_rate": 0.0006580688479429478, |
| "loss": 1.8138, |
| "step": 250000 |
| }, |
| { |
| "epoch": 1.0346579433230487, |
| "grad_norm": 0.8123018741607666, |
| "learning_rate": 0.0006573780751477908, |
| "loss": 1.8017, |
| "step": 250500 |
| }, |
| { |
| "epoch": 1.0367231288386636, |
| "grad_norm": 0.7968121767044067, |
| "learning_rate": 0.000656687302352634, |
| "loss": 1.8076, |
| "step": 251000 |
| }, |
| { |
| "epoch": 1.0387883143542784, |
| "grad_norm": 0.941233217716217, |
| "learning_rate": 0.0006559965295574771, |
| "loss": 1.8059, |
| "step": 251500 |
| }, |
| { |
| "epoch": 1.0408534998698933, |
| "grad_norm": 0.8153935074806213, |
| "learning_rate": 0.0006553057567623202, |
| "loss": 1.8065, |
| "step": 252000 |
| }, |
| { |
| "epoch": 1.0429186853855081, |
| "grad_norm": 0.7739303112030029, |
| "learning_rate": 0.0006546149839671635, |
| "loss": 1.8043, |
| "step": 252500 |
| }, |
| { |
| "epoch": 1.044983870901123, |
| "grad_norm": 0.8117313385009766, |
| "learning_rate": 0.0006539242111720066, |
| "loss": 1.8004, |
| "step": 253000 |
| }, |
| { |
| "epoch": 1.0470490564167378, |
| "grad_norm": 0.8029870390892029, |
| "learning_rate": 0.0006532334383768497, |
| "loss": 1.8046, |
| "step": 253500 |
| }, |
| { |
| "epoch": 1.0491142419323527, |
| "grad_norm": 0.8172849416732788, |
| "learning_rate": 0.0006525426655816928, |
| "loss": 1.8059, |
| "step": 254000 |
| }, |
| { |
| "epoch": 1.0511794274479676, |
| "grad_norm": 0.7874976992607117, |
| "learning_rate": 0.0006518518927865361, |
| "loss": 1.8054, |
| "step": 254500 |
| }, |
| { |
| "epoch": 1.0532446129635824, |
| "grad_norm": 0.7877236604690552, |
| "learning_rate": 0.0006511611199913792, |
| "loss": 1.8026, |
| "step": 255000 |
| }, |
| { |
| "epoch": 1.0553097984791975, |
| "grad_norm": 0.8584260940551758, |
| "learning_rate": 0.0006504703471962223, |
| "loss": 1.8007, |
| "step": 255500 |
| }, |
| { |
| "epoch": 1.0573749839948123, |
| "grad_norm": 0.8094419240951538, |
| "learning_rate": 0.0006497795744010655, |
| "loss": 1.7978, |
| "step": 256000 |
| }, |
| { |
| "epoch": 1.0594401695104272, |
| "grad_norm": 0.7890325784683228, |
| "learning_rate": 0.0006490888016059086, |
| "loss": 1.8, |
| "step": 256500 |
| }, |
| { |
| "epoch": 1.061505355026042, |
| "grad_norm": 0.8764976263046265, |
| "learning_rate": 0.0006483980288107518, |
| "loss": 1.8046, |
| "step": 257000 |
| }, |
| { |
| "epoch": 1.063570540541657, |
| "grad_norm": 0.8675107359886169, |
| "learning_rate": 0.0006477072560155949, |
| "loss": 1.8036, |
| "step": 257500 |
| }, |
| { |
| "epoch": 1.0656357260572717, |
| "grad_norm": 0.7634553909301758, |
| "learning_rate": 0.0006470164832204381, |
| "loss": 1.8018, |
| "step": 258000 |
| }, |
| { |
| "epoch": 1.0677009115728866, |
| "grad_norm": 0.7381558418273926, |
| "learning_rate": 0.0006463257104252812, |
| "loss": 1.7998, |
| "step": 258500 |
| }, |
| { |
| "epoch": 1.0697660970885015, |
| "grad_norm": 0.9076355695724487, |
| "learning_rate": 0.0006456349376301243, |
| "loss": 1.7948, |
| "step": 259000 |
| }, |
| { |
| "epoch": 1.0718312826041163, |
| "grad_norm": 0.9148507118225098, |
| "learning_rate": 0.0006449441648349676, |
| "loss": 1.8018, |
| "step": 259500 |
| }, |
| { |
| "epoch": 1.0738964681197312, |
| "grad_norm": 0.7980071902275085, |
| "learning_rate": 0.0006442533920398106, |
| "loss": 1.8023, |
| "step": 260000 |
| }, |
| { |
| "epoch": 1.075961653635346, |
| "grad_norm": 0.8765013217926025, |
| "learning_rate": 0.0006435626192446538, |
| "loss": 1.8003, |
| "step": 260500 |
| }, |
| { |
| "epoch": 1.0780268391509609, |
| "grad_norm": 0.8422530293464661, |
| "learning_rate": 0.0006428718464494969, |
| "loss": 1.802, |
| "step": 261000 |
| }, |
| { |
| "epoch": 1.0800920246665757, |
| "grad_norm": 0.8139374852180481, |
| "learning_rate": 0.0006421810736543401, |
| "loss": 1.8038, |
| "step": 261500 |
| }, |
| { |
| "epoch": 1.0821572101821906, |
| "grad_norm": 0.7891634702682495, |
| "learning_rate": 0.0006414903008591833, |
| "loss": 1.8047, |
| "step": 262000 |
| }, |
| { |
| "epoch": 1.0842223956978054, |
| "grad_norm": 0.822912335395813, |
| "learning_rate": 0.0006407995280640264, |
| "loss": 1.7979, |
| "step": 262500 |
| }, |
| { |
| "epoch": 1.0862875812134205, |
| "grad_norm": 0.8557060360908508, |
| "learning_rate": 0.0006401087552688695, |
| "loss": 1.7974, |
| "step": 263000 |
| }, |
| { |
| "epoch": 1.0883527667290354, |
| "grad_norm": 0.8948346972465515, |
| "learning_rate": 0.0006394179824737126, |
| "loss": 1.8004, |
| "step": 263500 |
| }, |
| { |
| "epoch": 1.0904179522446502, |
| "grad_norm": 0.795589029788971, |
| "learning_rate": 0.0006387272096785558, |
| "loss": 1.7988, |
| "step": 264000 |
| }, |
| { |
| "epoch": 1.092483137760265, |
| "grad_norm": 0.7854675054550171, |
| "learning_rate": 0.000638036436883399, |
| "loss": 1.7999, |
| "step": 264500 |
| }, |
| { |
| "epoch": 1.09454832327588, |
| "grad_norm": 0.8320429921150208, |
| "learning_rate": 0.0006373456640882422, |
| "loss": 1.7988, |
| "step": 265000 |
| }, |
| { |
| "epoch": 1.0966135087914948, |
| "grad_norm": 0.7923471331596375, |
| "learning_rate": 0.0006366548912930853, |
| "loss": 1.7947, |
| "step": 265500 |
| }, |
| { |
| "epoch": 1.0986786943071096, |
| "grad_norm": 0.7947016358375549, |
| "learning_rate": 0.0006359641184979283, |
| "loss": 1.7949, |
| "step": 266000 |
| }, |
| { |
| "epoch": 1.1007438798227245, |
| "grad_norm": 0.8632909655570984, |
| "learning_rate": 0.0006352733457027715, |
| "loss": 1.7917, |
| "step": 266500 |
| }, |
| { |
| "epoch": 1.1028090653383393, |
| "grad_norm": 0.8080165982246399, |
| "learning_rate": 0.0006345825729076146, |
| "loss": 1.7925, |
| "step": 267000 |
| }, |
| { |
| "epoch": 1.1048742508539542, |
| "grad_norm": 0.8370658159255981, |
| "learning_rate": 0.0006338918001124579, |
| "loss": 1.7926, |
| "step": 267500 |
| }, |
| { |
| "epoch": 1.106939436369569, |
| "grad_norm": 0.7986084818840027, |
| "learning_rate": 0.000633201027317301, |
| "loss": 1.7918, |
| "step": 268000 |
| }, |
| { |
| "epoch": 1.109004621885184, |
| "grad_norm": 0.8623395562171936, |
| "learning_rate": 0.0006325102545221442, |
| "loss": 1.7916, |
| "step": 268500 |
| }, |
| { |
| "epoch": 1.1110698074007987, |
| "grad_norm": 0.8417394757270813, |
| "learning_rate": 0.0006318194817269872, |
| "loss": 1.7924, |
| "step": 269000 |
| }, |
| { |
| "epoch": 1.1131349929164136, |
| "grad_norm": 0.8161811232566833, |
| "learning_rate": 0.0006311287089318303, |
| "loss": 1.7914, |
| "step": 269500 |
| }, |
| { |
| "epoch": 1.1152001784320285, |
| "grad_norm": 0.8812907338142395, |
| "learning_rate": 0.0006304379361366736, |
| "loss": 1.7906, |
| "step": 270000 |
| }, |
| { |
| "epoch": 1.1172653639476433, |
| "grad_norm": 0.7669122219085693, |
| "learning_rate": 0.0006297471633415167, |
| "loss": 1.7917, |
| "step": 270500 |
| }, |
| { |
| "epoch": 1.1193305494632584, |
| "grad_norm": 0.792958676815033, |
| "learning_rate": 0.0006290563905463599, |
| "loss": 1.7916, |
| "step": 271000 |
| }, |
| { |
| "epoch": 1.1213957349788732, |
| "grad_norm": 0.8431819677352905, |
| "learning_rate": 0.000628365617751203, |
| "loss": 1.7913, |
| "step": 271500 |
| }, |
| { |
| "epoch": 1.123460920494488, |
| "grad_norm": 0.8096106648445129, |
| "learning_rate": 0.0006276748449560462, |
| "loss": 1.7937, |
| "step": 272000 |
| }, |
| { |
| "epoch": 1.125526106010103, |
| "grad_norm": 0.8288501501083374, |
| "learning_rate": 0.0006269840721608893, |
| "loss": 1.7928, |
| "step": 272500 |
| }, |
| { |
| "epoch": 1.1275912915257178, |
| "grad_norm": 0.8212178349494934, |
| "learning_rate": 0.0006262932993657324, |
| "loss": 1.7892, |
| "step": 273000 |
| }, |
| { |
| "epoch": 1.1296564770413327, |
| "grad_norm": 0.7889783978462219, |
| "learning_rate": 0.0006256025265705756, |
| "loss": 1.7865, |
| "step": 273500 |
| }, |
| { |
| "epoch": 1.1317216625569475, |
| "grad_norm": 0.8126891255378723, |
| "learning_rate": 0.0006249117537754187, |
| "loss": 1.7884, |
| "step": 274000 |
| }, |
| { |
| "epoch": 1.1337868480725624, |
| "grad_norm": 0.8215599060058594, |
| "learning_rate": 0.000624220980980262, |
| "loss": 1.788, |
| "step": 274500 |
| }, |
| { |
| "epoch": 1.1358520335881772, |
| "grad_norm": 0.7967174053192139, |
| "learning_rate": 0.000623530208185105, |
| "loss": 1.7886, |
| "step": 275000 |
| }, |
| { |
| "epoch": 1.137917219103792, |
| "grad_norm": 0.8354322910308838, |
| "learning_rate": 0.0006228394353899482, |
| "loss": 1.7841, |
| "step": 275500 |
| }, |
| { |
| "epoch": 1.139982404619407, |
| "grad_norm": 0.7985238432884216, |
| "learning_rate": 0.0006221486625947913, |
| "loss": 1.7886, |
| "step": 276000 |
| }, |
| { |
| "epoch": 1.1420475901350218, |
| "grad_norm": 0.8069713115692139, |
| "learning_rate": 0.0006214578897996344, |
| "loss": 1.7893, |
| "step": 276500 |
| }, |
| { |
| "epoch": 1.1441127756506366, |
| "grad_norm": 0.8244253396987915, |
| "learning_rate": 0.0006207671170044777, |
| "loss": 1.7846, |
| "step": 277000 |
| }, |
| { |
| "epoch": 1.1461779611662515, |
| "grad_norm": 0.7911844253540039, |
| "learning_rate": 0.0006200763442093208, |
| "loss": 1.782, |
| "step": 277500 |
| }, |
| { |
| "epoch": 1.1482431466818666, |
| "grad_norm": 0.8204144239425659, |
| "learning_rate": 0.0006193855714141639, |
| "loss": 1.7877, |
| "step": 278000 |
| }, |
| { |
| "epoch": 1.1503083321974814, |
| "grad_norm": 0.7840794920921326, |
| "learning_rate": 0.000618694798619007, |
| "loss": 1.7855, |
| "step": 278500 |
| }, |
| { |
| "epoch": 1.1523735177130963, |
| "grad_norm": 0.7567317485809326, |
| "learning_rate": 0.0006180040258238502, |
| "loss": 1.7815, |
| "step": 279000 |
| }, |
| { |
| "epoch": 1.1544387032287111, |
| "grad_norm": 0.8889859914779663, |
| "learning_rate": 0.0006173132530286934, |
| "loss": 1.7844, |
| "step": 279500 |
| }, |
| { |
| "epoch": 1.156503888744326, |
| "grad_norm": 0.7965997457504272, |
| "learning_rate": 0.0006166224802335365, |
| "loss": 1.7823, |
| "step": 280000 |
| }, |
| { |
| "epoch": 1.1585690742599408, |
| "grad_norm": 0.7915734052658081, |
| "learning_rate": 0.0006159317074383797, |
| "loss": 1.7829, |
| "step": 280500 |
| }, |
| { |
| "epoch": 1.1606342597755557, |
| "grad_norm": 0.8453460335731506, |
| "learning_rate": 0.0006152409346432227, |
| "loss": 1.7854, |
| "step": 281000 |
| }, |
| { |
| "epoch": 1.1626994452911705, |
| "grad_norm": 0.8347840905189514, |
| "learning_rate": 0.0006145501618480659, |
| "loss": 1.7777, |
| "step": 281500 |
| }, |
| { |
| "epoch": 1.1647646308067854, |
| "grad_norm": 0.806870698928833, |
| "learning_rate": 0.000613859389052909, |
| "loss": 1.7814, |
| "step": 282000 |
| }, |
| { |
| "epoch": 1.1668298163224002, |
| "grad_norm": 0.7722708582878113, |
| "learning_rate": 0.0006131686162577523, |
| "loss": 1.7816, |
| "step": 282500 |
| }, |
| { |
| "epoch": 1.168895001838015, |
| "grad_norm": 0.7900815606117249, |
| "learning_rate": 0.0006124778434625954, |
| "loss": 1.78, |
| "step": 283000 |
| }, |
| { |
| "epoch": 1.17096018735363, |
| "grad_norm": 0.8070388436317444, |
| "learning_rate": 0.0006117870706674385, |
| "loss": 1.7794, |
| "step": 283500 |
| }, |
| { |
| "epoch": 1.1730253728692448, |
| "grad_norm": 0.8343568444252014, |
| "learning_rate": 0.0006110962978722816, |
| "loss": 1.7817, |
| "step": 284000 |
| }, |
| { |
| "epoch": 1.1750905583848597, |
| "grad_norm": 0.7810460329055786, |
| "learning_rate": 0.0006104055250771247, |
| "loss": 1.7822, |
| "step": 284500 |
| }, |
| { |
| "epoch": 1.1771557439004745, |
| "grad_norm": 0.8281691670417786, |
| "learning_rate": 0.000609714752281968, |
| "loss": 1.7773, |
| "step": 285000 |
| }, |
| { |
| "epoch": 1.1792209294160894, |
| "grad_norm": 0.7959678769111633, |
| "learning_rate": 0.0006090239794868111, |
| "loss": 1.7755, |
| "step": 285500 |
| }, |
| { |
| "epoch": 1.1812861149317042, |
| "grad_norm": 0.7893877625465393, |
| "learning_rate": 0.0006083332066916543, |
| "loss": 1.7788, |
| "step": 286000 |
| }, |
| { |
| "epoch": 1.183351300447319, |
| "grad_norm": 0.794282078742981, |
| "learning_rate": 0.0006076424338964974, |
| "loss": 1.7785, |
| "step": 286500 |
| }, |
| { |
| "epoch": 1.1854164859629341, |
| "grad_norm": 0.833561360836029, |
| "learning_rate": 0.0006069516611013404, |
| "loss": 1.778, |
| "step": 287000 |
| }, |
| { |
| "epoch": 1.187481671478549, |
| "grad_norm": 0.7725043296813965, |
| "learning_rate": 0.0006062608883061837, |
| "loss": 1.7763, |
| "step": 287500 |
| }, |
| { |
| "epoch": 1.1895468569941638, |
| "grad_norm": 0.8378251194953918, |
| "learning_rate": 0.0006055701155110268, |
| "loss": 1.7785, |
| "step": 288000 |
| }, |
| { |
| "epoch": 1.1916120425097787, |
| "grad_norm": 0.8435170650482178, |
| "learning_rate": 0.00060487934271587, |
| "loss": 1.7808, |
| "step": 288500 |
| }, |
| { |
| "epoch": 1.1936772280253936, |
| "grad_norm": 0.7910299301147461, |
| "learning_rate": 0.0006041885699207131, |
| "loss": 1.7791, |
| "step": 289000 |
| }, |
| { |
| "epoch": 1.1957424135410084, |
| "grad_norm": 0.7965072989463806, |
| "learning_rate": 0.0006034977971255562, |
| "loss": 1.7762, |
| "step": 289500 |
| }, |
| { |
| "epoch": 1.1978075990566233, |
| "grad_norm": 0.7592757344245911, |
| "learning_rate": 0.0006028070243303994, |
| "loss": 1.7724, |
| "step": 290000 |
| }, |
| { |
| "epoch": 1.1998727845722381, |
| "grad_norm": 0.7980614304542542, |
| "learning_rate": 0.0006021162515352425, |
| "loss": 1.7773, |
| "step": 290500 |
| }, |
| { |
| "epoch": 1.201937970087853, |
| "grad_norm": 0.8618481755256653, |
| "learning_rate": 0.0006014254787400857, |
| "loss": 1.7773, |
| "step": 291000 |
| }, |
| { |
| "epoch": 1.2040031556034678, |
| "grad_norm": 0.7855138778686523, |
| "learning_rate": 0.0006007347059449288, |
| "loss": 1.7775, |
| "step": 291500 |
| }, |
| { |
| "epoch": 1.2060683411190827, |
| "grad_norm": 0.9088487029075623, |
| "learning_rate": 0.000600043933149772, |
| "loss": 1.7732, |
| "step": 292000 |
| }, |
| { |
| "epoch": 1.2081335266346975, |
| "grad_norm": 0.7684744000434875, |
| "learning_rate": 0.0005993531603546152, |
| "loss": 1.7712, |
| "step": 292500 |
| }, |
| { |
| "epoch": 1.2101987121503124, |
| "grad_norm": 0.8111701607704163, |
| "learning_rate": 0.0005986623875594582, |
| "loss": 1.7728, |
| "step": 293000 |
| }, |
| { |
| "epoch": 1.2122638976659272, |
| "grad_norm": 0.7883111834526062, |
| "learning_rate": 0.0005979716147643014, |
| "loss": 1.7744, |
| "step": 293500 |
| }, |
| { |
| "epoch": 1.2143290831815423, |
| "grad_norm": 0.888268768787384, |
| "learning_rate": 0.0005972808419691445, |
| "loss": 1.7758, |
| "step": 294000 |
| }, |
| { |
| "epoch": 1.2163942686971572, |
| "grad_norm": 1.260141372680664, |
| "learning_rate": 0.0005965900691739878, |
| "loss": 1.7733, |
| "step": 294500 |
| }, |
| { |
| "epoch": 1.218459454212772, |
| "grad_norm": 0.7965800166130066, |
| "learning_rate": 0.0005958992963788309, |
| "loss": 1.7681, |
| "step": 295000 |
| }, |
| { |
| "epoch": 1.2205246397283869, |
| "grad_norm": 0.8069186806678772, |
| "learning_rate": 0.0005952085235836741, |
| "loss": 1.7709, |
| "step": 295500 |
| }, |
| { |
| "epoch": 1.2225898252440017, |
| "grad_norm": 0.7815278172492981, |
| "learning_rate": 0.0005945177507885171, |
| "loss": 1.7746, |
| "step": 296000 |
| }, |
| { |
| "epoch": 1.2246550107596166, |
| "grad_norm": 0.8087014555931091, |
| "learning_rate": 0.0005938269779933602, |
| "loss": 1.7769, |
| "step": 296500 |
| }, |
| { |
| "epoch": 1.2267201962752314, |
| "grad_norm": 0.8358011245727539, |
| "learning_rate": 0.0005931362051982034, |
| "loss": 1.7748, |
| "step": 297000 |
| }, |
| { |
| "epoch": 1.2287853817908463, |
| "grad_norm": 0.7773184776306152, |
| "learning_rate": 0.0005924454324030466, |
| "loss": 1.7687, |
| "step": 297500 |
| }, |
| { |
| "epoch": 1.2308505673064611, |
| "grad_norm": 0.8666139245033264, |
| "learning_rate": 0.0005917546596078898, |
| "loss": 1.771, |
| "step": 298000 |
| }, |
| { |
| "epoch": 1.232915752822076, |
| "grad_norm": 0.7656075954437256, |
| "learning_rate": 0.0005910638868127329, |
| "loss": 1.7668, |
| "step": 298500 |
| }, |
| { |
| "epoch": 1.2349809383376908, |
| "grad_norm": 0.78355473279953, |
| "learning_rate": 0.000590373114017576, |
| "loss": 1.7773, |
| "step": 299000 |
| }, |
| { |
| "epoch": 1.2370461238533057, |
| "grad_norm": 0.7689515948295593, |
| "learning_rate": 0.0005896823412224191, |
| "loss": 1.7722, |
| "step": 299500 |
| }, |
| { |
| "epoch": 1.2391113093689206, |
| "grad_norm": 0.7110136151313782, |
| "learning_rate": 0.0005889915684272623, |
| "loss": 1.7741, |
| "step": 300000 |
| }, |
| { |
| "epoch": 1.2411764948845354, |
| "grad_norm": 0.7835440635681152, |
| "learning_rate": 0.0005883007956321055, |
| "loss": 1.7699, |
| "step": 300500 |
| }, |
| { |
| "epoch": 1.2432416804001503, |
| "grad_norm": 0.8212382793426514, |
| "learning_rate": 0.0005876100228369486, |
| "loss": 1.7723, |
| "step": 301000 |
| }, |
| { |
| "epoch": 1.2453068659157651, |
| "grad_norm": 1.0025386810302734, |
| "learning_rate": 0.0005869192500417918, |
| "loss": 1.774, |
| "step": 301500 |
| }, |
| { |
| "epoch": 1.24737205143138, |
| "grad_norm": 0.8511669635772705, |
| "learning_rate": 0.0005862284772466348, |
| "loss": 1.769, |
| "step": 302000 |
| }, |
| { |
| "epoch": 1.249437236946995, |
| "grad_norm": 0.8234755992889404, |
| "learning_rate": 0.0005855377044514781, |
| "loss": 1.7718, |
| "step": 302500 |
| }, |
| { |
| "epoch": 1.25150242246261, |
| "grad_norm": 0.7945066094398499, |
| "learning_rate": 0.0005848469316563212, |
| "loss": 1.7678, |
| "step": 303000 |
| }, |
| { |
| "epoch": 1.2535676079782248, |
| "grad_norm": 0.7915132641792297, |
| "learning_rate": 0.0005841561588611643, |
| "loss": 1.7661, |
| "step": 303500 |
| }, |
| { |
| "epoch": 1.2556327934938396, |
| "grad_norm": 0.7837565541267395, |
| "learning_rate": 0.0005834653860660075, |
| "loss": 1.7655, |
| "step": 304000 |
| }, |
| { |
| "epoch": 1.2576979790094545, |
| "grad_norm": 0.8219539523124695, |
| "learning_rate": 0.0005827746132708506, |
| "loss": 1.768, |
| "step": 304500 |
| }, |
| { |
| "epoch": 1.2597631645250693, |
| "grad_norm": 0.7561802268028259, |
| "learning_rate": 0.0005820838404756938, |
| "loss": 1.7652, |
| "step": 305000 |
| }, |
| { |
| "epoch": 1.2618283500406842, |
| "grad_norm": 0.7804844975471497, |
| "learning_rate": 0.0005813930676805369, |
| "loss": 1.767, |
| "step": 305500 |
| }, |
| { |
| "epoch": 1.263893535556299, |
| "grad_norm": 0.7776834964752197, |
| "learning_rate": 0.0005807022948853801, |
| "loss": 1.7619, |
| "step": 306000 |
| }, |
| { |
| "epoch": 1.2659587210719139, |
| "grad_norm": 0.7807164192199707, |
| "learning_rate": 0.0005800115220902232, |
| "loss": 1.7677, |
| "step": 306500 |
| }, |
| { |
| "epoch": 1.2680239065875287, |
| "grad_norm": 0.7830272316932678, |
| "learning_rate": 0.0005793207492950663, |
| "loss": 1.7638, |
| "step": 307000 |
| }, |
| { |
| "epoch": 1.2700890921031436, |
| "grad_norm": 0.7787231206893921, |
| "learning_rate": 0.0005786299764999096, |
| "loss": 1.7605, |
| "step": 307500 |
| }, |
| { |
| "epoch": 1.2721542776187584, |
| "grad_norm": 0.7798328399658203, |
| "learning_rate": 0.0005779392037047526, |
| "loss": 1.7617, |
| "step": 308000 |
| }, |
| { |
| "epoch": 1.2742194631343733, |
| "grad_norm": 0.8115136027336121, |
| "learning_rate": 0.0005772484309095958, |
| "loss": 1.7629, |
| "step": 308500 |
| }, |
| { |
| "epoch": 1.2762846486499884, |
| "grad_norm": 0.7370808720588684, |
| "learning_rate": 0.0005765576581144389, |
| "loss": 1.7584, |
| "step": 309000 |
| }, |
| { |
| "epoch": 1.2783498341656032, |
| "grad_norm": 0.7543977499008179, |
| "learning_rate": 0.0005758668853192822, |
| "loss": 1.7643, |
| "step": 309500 |
| }, |
| { |
| "epoch": 1.280415019681218, |
| "grad_norm": 0.8601275086402893, |
| "learning_rate": 0.0005751761125241253, |
| "loss": 1.7606, |
| "step": 310000 |
| }, |
| { |
| "epoch": 1.282480205196833, |
| "grad_norm": 0.7618570327758789, |
| "learning_rate": 0.0005744853397289684, |
| "loss": 1.7604, |
| "step": 310500 |
| }, |
| { |
| "epoch": 1.2845453907124478, |
| "grad_norm": 0.7939981818199158, |
| "learning_rate": 0.0005737945669338115, |
| "loss": 1.7632, |
| "step": 311000 |
| }, |
| { |
| "epoch": 1.2866105762280626, |
| "grad_norm": 0.7793112397193909, |
| "learning_rate": 0.0005731037941386546, |
| "loss": 1.7631, |
| "step": 311500 |
| }, |
| { |
| "epoch": 1.2886757617436775, |
| "grad_norm": 0.7892638444900513, |
| "learning_rate": 0.0005724130213434978, |
| "loss": 1.7576, |
| "step": 312000 |
| }, |
| { |
| "epoch": 1.2907409472592923, |
| "grad_norm": 0.8487522602081299, |
| "learning_rate": 0.000571722248548341, |
| "loss": 1.761, |
| "step": 312500 |
| }, |
| { |
| "epoch": 1.2928061327749072, |
| "grad_norm": 0.7883718609809875, |
| "learning_rate": 0.0005710314757531842, |
| "loss": 1.7571, |
| "step": 313000 |
| }, |
| { |
| "epoch": 1.294871318290522, |
| "grad_norm": 0.7739648818969727, |
| "learning_rate": 0.0005703407029580273, |
| "loss": 1.7595, |
| "step": 313500 |
| }, |
| { |
| "epoch": 1.296936503806137, |
| "grad_norm": 0.7544906139373779, |
| "learning_rate": 0.0005696499301628703, |
| "loss": 1.7563, |
| "step": 314000 |
| }, |
| { |
| "epoch": 1.2990016893217518, |
| "grad_norm": 0.7876557111740112, |
| "learning_rate": 0.0005689591573677135, |
| "loss": 1.7623, |
| "step": 314500 |
| }, |
| { |
| "epoch": 1.3010668748373666, |
| "grad_norm": 0.8152881860733032, |
| "learning_rate": 0.0005682683845725567, |
| "loss": 1.7561, |
| "step": 315000 |
| }, |
| { |
| "epoch": 1.3031320603529815, |
| "grad_norm": 0.8232436180114746, |
| "learning_rate": 0.0005675776117773999, |
| "loss": 1.7547, |
| "step": 315500 |
| }, |
| { |
| "epoch": 1.3051972458685963, |
| "grad_norm": 0.8717594742774963, |
| "learning_rate": 0.000566886838982243, |
| "loss": 1.7589, |
| "step": 316000 |
| }, |
| { |
| "epoch": 1.3072624313842112, |
| "grad_norm": 0.8005387187004089, |
| "learning_rate": 0.0005661960661870862, |
| "loss": 1.7531, |
| "step": 316500 |
| }, |
| { |
| "epoch": 1.309327616899826, |
| "grad_norm": 0.7955303192138672, |
| "learning_rate": 0.0005655052933919294, |
| "loss": 1.7581, |
| "step": 317000 |
| }, |
| { |
| "epoch": 1.3113928024154409, |
| "grad_norm": 0.785753071308136, |
| "learning_rate": 0.0005648145205967724, |
| "loss": 1.7584, |
| "step": 317500 |
| }, |
| { |
| "epoch": 1.3134579879310557, |
| "grad_norm": 0.775891125202179, |
| "learning_rate": 0.0005641237478016156, |
| "loss": 1.755, |
| "step": 318000 |
| }, |
| { |
| "epoch": 1.3155231734466706, |
| "grad_norm": 0.8091910481452942, |
| "learning_rate": 0.0005634329750064587, |
| "loss": 1.7562, |
| "step": 318500 |
| }, |
| { |
| "epoch": 1.3175883589622857, |
| "grad_norm": 0.7912936806678772, |
| "learning_rate": 0.0005627422022113019, |
| "loss": 1.7593, |
| "step": 319000 |
| }, |
| { |
| "epoch": 1.3196535444779005, |
| "grad_norm": 0.7949129343032837, |
| "learning_rate": 0.000562051429416145, |
| "loss": 1.7555, |
| "step": 319500 |
| }, |
| { |
| "epoch": 1.3217187299935154, |
| "grad_norm": 0.784271240234375, |
| "learning_rate": 0.0005613606566209883, |
| "loss": 1.7578, |
| "step": 320000 |
| }, |
| { |
| "epoch": 1.3237839155091302, |
| "grad_norm": 0.8425039052963257, |
| "learning_rate": 0.0005606698838258313, |
| "loss": 1.7578, |
| "step": 320500 |
| }, |
| { |
| "epoch": 1.325849101024745, |
| "grad_norm": 0.9427282214164734, |
| "learning_rate": 0.0005599791110306744, |
| "loss": 1.7553, |
| "step": 321000 |
| }, |
| { |
| "epoch": 1.32791428654036, |
| "grad_norm": 2.0400569438934326, |
| "learning_rate": 0.0005592883382355176, |
| "loss": 1.7582, |
| "step": 321500 |
| }, |
| { |
| "epoch": 1.3299794720559748, |
| "grad_norm": 0.8407232761383057, |
| "learning_rate": 0.0005585975654403607, |
| "loss": 1.7528, |
| "step": 322000 |
| }, |
| { |
| "epoch": 1.3320446575715896, |
| "grad_norm": 0.8112275004386902, |
| "learning_rate": 0.000557906792645204, |
| "loss": 1.7532, |
| "step": 322500 |
| }, |
| { |
| "epoch": 1.3341098430872045, |
| "grad_norm": 0.7587988376617432, |
| "learning_rate": 0.0005572160198500471, |
| "loss": 1.7578, |
| "step": 323000 |
| }, |
| { |
| "epoch": 1.3361750286028193, |
| "grad_norm": 0.7508676052093506, |
| "learning_rate": 0.0005565252470548902, |
| "loss": 1.7538, |
| "step": 323500 |
| }, |
| { |
| "epoch": 1.3382402141184342, |
| "grad_norm": 0.7507205009460449, |
| "learning_rate": 0.0005558344742597333, |
| "loss": 1.7516, |
| "step": 324000 |
| }, |
| { |
| "epoch": 1.340305399634049, |
| "grad_norm": 0.8090864419937134, |
| "learning_rate": 0.0005551437014645764, |
| "loss": 1.7537, |
| "step": 324500 |
| }, |
| { |
| "epoch": 1.3423705851496641, |
| "grad_norm": 0.7598680853843689, |
| "learning_rate": 0.0005544529286694197, |
| "loss": 1.7528, |
| "step": 325000 |
| }, |
| { |
| "epoch": 1.344435770665279, |
| "grad_norm": 0.795917272567749, |
| "learning_rate": 0.0005537621558742628, |
| "loss": 1.7503, |
| "step": 325500 |
| }, |
| { |
| "epoch": 1.3465009561808938, |
| "grad_norm": 0.8306310772895813, |
| "learning_rate": 0.000553071383079106, |
| "loss": 1.7519, |
| "step": 326000 |
| }, |
| { |
| "epoch": 1.3485661416965087, |
| "grad_norm": 0.7626925110816956, |
| "learning_rate": 0.000552380610283949, |
| "loss": 1.7504, |
| "step": 326500 |
| }, |
| { |
| "epoch": 1.3506313272121235, |
| "grad_norm": 0.8428457975387573, |
| "learning_rate": 0.0005516898374887922, |
| "loss": 1.75, |
| "step": 327000 |
| }, |
| { |
| "epoch": 1.3526965127277384, |
| "grad_norm": 0.7658423185348511, |
| "learning_rate": 0.0005509990646936354, |
| "loss": 1.7544, |
| "step": 327500 |
| }, |
| { |
| "epoch": 1.3547616982433532, |
| "grad_norm": 0.7995271682739258, |
| "learning_rate": 0.0005503082918984785, |
| "loss": 1.7523, |
| "step": 328000 |
| }, |
| { |
| "epoch": 1.356826883758968, |
| "grad_norm": 0.7842050790786743, |
| "learning_rate": 0.0005496175191033217, |
| "loss": 1.75, |
| "step": 328500 |
| }, |
| { |
| "epoch": 1.358892069274583, |
| "grad_norm": 0.7905313968658447, |
| "learning_rate": 0.0005489267463081648, |
| "loss": 1.7459, |
| "step": 329000 |
| }, |
| { |
| "epoch": 1.3609572547901978, |
| "grad_norm": 0.7925072312355042, |
| "learning_rate": 0.0005482359735130079, |
| "loss": 1.7489, |
| "step": 329500 |
| }, |
| { |
| "epoch": 1.3630224403058127, |
| "grad_norm": 0.786297082901001, |
| "learning_rate": 0.0005475452007178511, |
| "loss": 1.7511, |
| "step": 330000 |
| }, |
| { |
| "epoch": 1.3650876258214275, |
| "grad_norm": 0.8276360630989075, |
| "learning_rate": 0.0005468544279226943, |
| "loss": 1.7501, |
| "step": 330500 |
| }, |
| { |
| "epoch": 1.3671528113370424, |
| "grad_norm": 0.7593994736671448, |
| "learning_rate": 0.0005461636551275374, |
| "loss": 1.7495, |
| "step": 331000 |
| }, |
| { |
| "epoch": 1.3692179968526572, |
| "grad_norm": 0.7812342047691345, |
| "learning_rate": 0.0005454728823323805, |
| "loss": 1.7444, |
| "step": 331500 |
| }, |
| { |
| "epoch": 1.371283182368272, |
| "grad_norm": 0.8850775957107544, |
| "learning_rate": 0.0005447821095372237, |
| "loss": 1.7459, |
| "step": 332000 |
| }, |
| { |
| "epoch": 1.373348367883887, |
| "grad_norm": 0.7758823037147522, |
| "learning_rate": 0.0005440913367420668, |
| "loss": 1.7466, |
| "step": 332500 |
| }, |
| { |
| "epoch": 1.3754135533995018, |
| "grad_norm": 0.7858127951622009, |
| "learning_rate": 0.00054340056394691, |
| "loss": 1.7484, |
| "step": 333000 |
| }, |
| { |
| "epoch": 1.3774787389151166, |
| "grad_norm": 0.7900636792182922, |
| "learning_rate": 0.0005427097911517531, |
| "loss": 1.7463, |
| "step": 333500 |
| }, |
| { |
| "epoch": 1.3795439244307315, |
| "grad_norm": 0.8080687522888184, |
| "learning_rate": 0.0005420190183565963, |
| "loss": 1.7465, |
| "step": 334000 |
| }, |
| { |
| "epoch": 1.3816091099463466, |
| "grad_norm": 0.8253558278083801, |
| "learning_rate": 0.0005413282455614394, |
| "loss": 1.7429, |
| "step": 334500 |
| }, |
| { |
| "epoch": 1.3836742954619614, |
| "grad_norm": 0.8424259424209595, |
| "learning_rate": 0.0005406374727662826, |
| "loss": 1.748, |
| "step": 335000 |
| }, |
| { |
| "epoch": 1.3857394809775763, |
| "grad_norm": 0.7918978929519653, |
| "learning_rate": 0.0005399466999711257, |
| "loss": 1.7424, |
| "step": 335500 |
| }, |
| { |
| "epoch": 1.3878046664931911, |
| "grad_norm": 0.7710541486740112, |
| "learning_rate": 0.0005392559271759688, |
| "loss": 1.7429, |
| "step": 336000 |
| }, |
| { |
| "epoch": 1.389869852008806, |
| "grad_norm": 0.8952863216400146, |
| "learning_rate": 0.000538565154380812, |
| "loss": 1.7379, |
| "step": 336500 |
| }, |
| { |
| "epoch": 1.3919350375244208, |
| "grad_norm": 0.7974414229393005, |
| "learning_rate": 0.0005378743815856551, |
| "loss": 1.7442, |
| "step": 337000 |
| }, |
| { |
| "epoch": 1.3940002230400357, |
| "grad_norm": 0.8121086955070496, |
| "learning_rate": 0.0005371836087904984, |
| "loss": 1.7417, |
| "step": 337500 |
| }, |
| { |
| "epoch": 1.3960654085556505, |
| "grad_norm": 0.7721625566482544, |
| "learning_rate": 0.0005364928359953415, |
| "loss": 1.7399, |
| "step": 338000 |
| }, |
| { |
| "epoch": 1.3981305940712654, |
| "grad_norm": 0.7423729300498962, |
| "learning_rate": 0.0005358020632001845, |
| "loss": 1.7451, |
| "step": 338500 |
| }, |
| { |
| "epoch": 1.4001957795868802, |
| "grad_norm": 0.8138153553009033, |
| "learning_rate": 0.0005351112904050277, |
| "loss": 1.7405, |
| "step": 339000 |
| }, |
| { |
| "epoch": 1.402260965102495, |
| "grad_norm": 0.8146694302558899, |
| "learning_rate": 0.0005344205176098708, |
| "loss": 1.7431, |
| "step": 339500 |
| }, |
| { |
| "epoch": 1.40432615061811, |
| "grad_norm": 0.826502799987793, |
| "learning_rate": 0.0005337297448147141, |
| "loss": 1.7389, |
| "step": 340000 |
| }, |
| { |
| "epoch": 1.406391336133725, |
| "grad_norm": 0.7904347777366638, |
| "learning_rate": 0.0005330389720195572, |
| "loss": 1.7387, |
| "step": 340500 |
| }, |
| { |
| "epoch": 1.4084565216493399, |
| "grad_norm": 0.7897937297821045, |
| "learning_rate": 0.0005323481992244004, |
| "loss": 1.7428, |
| "step": 341000 |
| }, |
| { |
| "epoch": 1.4105217071649547, |
| "grad_norm": 0.8036805391311646, |
| "learning_rate": 0.0005316574264292434, |
| "loss": 1.7417, |
| "step": 341500 |
| }, |
| { |
| "epoch": 1.4125868926805696, |
| "grad_norm": 0.7628007531166077, |
| "learning_rate": 0.0005309666536340865, |
| "loss": 1.7404, |
| "step": 342000 |
| }, |
| { |
| "epoch": 1.4146520781961844, |
| "grad_norm": 0.8156040906906128, |
| "learning_rate": 0.0005302758808389298, |
| "loss": 1.7374, |
| "step": 342500 |
| }, |
| { |
| "epoch": 1.4167172637117993, |
| "grad_norm": 0.8283891081809998, |
| "learning_rate": 0.0005295851080437729, |
| "loss": 1.7373, |
| "step": 343000 |
| }, |
| { |
| "epoch": 1.4187824492274141, |
| "grad_norm": 0.8151522278785706, |
| "learning_rate": 0.0005288943352486161, |
| "loss": 1.7356, |
| "step": 343500 |
| }, |
| { |
| "epoch": 1.420847634743029, |
| "grad_norm": 0.8706732988357544, |
| "learning_rate": 0.0005282035624534592, |
| "loss": 1.7403, |
| "step": 344000 |
| }, |
| { |
| "epoch": 1.4229128202586439, |
| "grad_norm": 0.8165752291679382, |
| "learning_rate": 0.0005275127896583023, |
| "loss": 1.7405, |
| "step": 344500 |
| }, |
| { |
| "epoch": 1.4249780057742587, |
| "grad_norm": 0.8452313542366028, |
| "learning_rate": 0.0005268220168631455, |
| "loss": 1.7385, |
| "step": 345000 |
| }, |
| { |
| "epoch": 1.4270431912898736, |
| "grad_norm": 0.8291791081428528, |
| "learning_rate": 0.0005261312440679886, |
| "loss": 1.7381, |
| "step": 345500 |
| }, |
| { |
| "epoch": 1.4291083768054884, |
| "grad_norm": 0.8294808864593506, |
| "learning_rate": 0.0005254404712728318, |
| "loss": 1.7398, |
| "step": 346000 |
| }, |
| { |
| "epoch": 1.4311735623211033, |
| "grad_norm": 0.8119639754295349, |
| "learning_rate": 0.0005247496984776749, |
| "loss": 1.7386, |
| "step": 346500 |
| }, |
| { |
| "epoch": 1.4332387478367181, |
| "grad_norm": 0.7947481870651245, |
| "learning_rate": 0.0005240589256825181, |
| "loss": 1.7361, |
| "step": 347000 |
| }, |
| { |
| "epoch": 1.435303933352333, |
| "grad_norm": 0.8204724192619324, |
| "learning_rate": 0.0005233681528873612, |
| "loss": 1.7405, |
| "step": 347500 |
| }, |
| { |
| "epoch": 1.4373691188679478, |
| "grad_norm": 0.7718450427055359, |
| "learning_rate": 0.0005226773800922044, |
| "loss": 1.7398, |
| "step": 348000 |
| }, |
| { |
| "epoch": 1.4394343043835627, |
| "grad_norm": 0.7919915318489075, |
| "learning_rate": 0.0005219866072970475, |
| "loss": 1.7334, |
| "step": 348500 |
| }, |
| { |
| "epoch": 1.4414994898991775, |
| "grad_norm": 0.8244622945785522, |
| "learning_rate": 0.0005212958345018906, |
| "loss": 1.7363, |
| "step": 349000 |
| }, |
| { |
| "epoch": 1.4435646754147924, |
| "grad_norm": 0.8124867677688599, |
| "learning_rate": 0.0005206050617067338, |
| "loss": 1.7368, |
| "step": 349500 |
| }, |
| { |
| "epoch": 1.4456298609304072, |
| "grad_norm": 0.8139218091964722, |
| "learning_rate": 0.000519914288911577, |
| "loss": 1.7344, |
| "step": 350000 |
| }, |
| { |
| "epoch": 1.4476950464460223, |
| "grad_norm": 0.7997359037399292, |
| "learning_rate": 0.0005192235161164201, |
| "loss": 1.7296, |
| "step": 350500 |
| }, |
| { |
| "epoch": 1.4497602319616372, |
| "grad_norm": 0.8655456304550171, |
| "learning_rate": 0.0005185327433212632, |
| "loss": 1.7353, |
| "step": 351000 |
| }, |
| { |
| "epoch": 1.451825417477252, |
| "grad_norm": 0.8099657893180847, |
| "learning_rate": 0.0005178419705261064, |
| "loss": 1.7356, |
| "step": 351500 |
| }, |
| { |
| "epoch": 1.4538906029928669, |
| "grad_norm": 0.7905128002166748, |
| "learning_rate": 0.0005171511977309495, |
| "loss": 1.7331, |
| "step": 352000 |
| }, |
| { |
| "epoch": 1.4559557885084817, |
| "grad_norm": 0.7679085731506348, |
| "learning_rate": 0.0005164604249357927, |
| "loss": 1.7347, |
| "step": 352500 |
| }, |
| { |
| "epoch": 1.4580209740240966, |
| "grad_norm": 0.7984927892684937, |
| "learning_rate": 0.0005157696521406359, |
| "loss": 1.7331, |
| "step": 353000 |
| }, |
| { |
| "epoch": 1.4600861595397114, |
| "grad_norm": 0.8509982824325562, |
| "learning_rate": 0.0005150788793454789, |
| "loss": 1.7281, |
| "step": 353500 |
| }, |
| { |
| "epoch": 1.4621513450553263, |
| "grad_norm": 0.756581723690033, |
| "learning_rate": 0.0005143881065503221, |
| "loss": 1.7272, |
| "step": 354000 |
| }, |
| { |
| "epoch": 1.4642165305709411, |
| "grad_norm": 0.808980405330658, |
| "learning_rate": 0.0005136973337551652, |
| "loss": 1.7321, |
| "step": 354500 |
| }, |
| { |
| "epoch": 1.466281716086556, |
| "grad_norm": 0.8383910059928894, |
| "learning_rate": 0.0005130065609600085, |
| "loss": 1.7337, |
| "step": 355000 |
| }, |
| { |
| "epoch": 1.4683469016021709, |
| "grad_norm": 0.7818363308906555, |
| "learning_rate": 0.0005123157881648516, |
| "loss": 1.7327, |
| "step": 355500 |
| }, |
| { |
| "epoch": 1.470412087117786, |
| "grad_norm": 0.7779876589775085, |
| "learning_rate": 0.0005116250153696947, |
| "loss": 1.7317, |
| "step": 356000 |
| }, |
| { |
| "epoch": 1.4724772726334008, |
| "grad_norm": 0.7729701399803162, |
| "learning_rate": 0.0005109342425745378, |
| "loss": 1.7382, |
| "step": 356500 |
| }, |
| { |
| "epoch": 1.4745424581490156, |
| "grad_norm": 0.7971392273902893, |
| "learning_rate": 0.0005102434697793809, |
| "loss": 1.7274, |
| "step": 357000 |
| }, |
| { |
| "epoch": 1.4766076436646305, |
| "grad_norm": 0.8112899661064148, |
| "learning_rate": 0.0005095526969842242, |
| "loss": 1.7297, |
| "step": 357500 |
| }, |
| { |
| "epoch": 1.4786728291802453, |
| "grad_norm": 0.7968249917030334, |
| "learning_rate": 0.0005088619241890673, |
| "loss": 1.7293, |
| "step": 358000 |
| }, |
| { |
| "epoch": 1.4807380146958602, |
| "grad_norm": 0.7626878619194031, |
| "learning_rate": 0.0005081711513939105, |
| "loss": 1.7308, |
| "step": 358500 |
| }, |
| { |
| "epoch": 1.482803200211475, |
| "grad_norm": 0.7603055834770203, |
| "learning_rate": 0.0005074803785987536, |
| "loss": 1.7285, |
| "step": 359000 |
| }, |
| { |
| "epoch": 1.48486838572709, |
| "grad_norm": 0.7844238877296448, |
| "learning_rate": 0.0005067896058035966, |
| "loss": 1.7296, |
| "step": 359500 |
| }, |
| { |
| "epoch": 1.4869335712427048, |
| "grad_norm": 0.7728045582771301, |
| "learning_rate": 0.0005060988330084399, |
| "loss": 1.7287, |
| "step": 360000 |
| }, |
| { |
| "epoch": 1.4889987567583196, |
| "grad_norm": 0.7845308184623718, |
| "learning_rate": 0.000505408060213283, |
| "loss": 1.7249, |
| "step": 360500 |
| }, |
| { |
| "epoch": 1.4910639422739345, |
| "grad_norm": 0.8352622985839844, |
| "learning_rate": 0.0005047172874181262, |
| "loss": 1.7259, |
| "step": 361000 |
| }, |
| { |
| "epoch": 1.4931291277895493, |
| "grad_norm": 0.8270286917686462, |
| "learning_rate": 0.0005040265146229693, |
| "loss": 1.7291, |
| "step": 361500 |
| }, |
| { |
| "epoch": 1.4951943133051642, |
| "grad_norm": 0.7802717089653015, |
| "learning_rate": 0.0005033357418278125, |
| "loss": 1.7243, |
| "step": 362000 |
| }, |
| { |
| "epoch": 1.497259498820779, |
| "grad_norm": 0.7886295914649963, |
| "learning_rate": 0.0005026449690326556, |
| "loss": 1.7298, |
| "step": 362500 |
| }, |
| { |
| "epoch": 1.4993246843363939, |
| "grad_norm": 0.8236453533172607, |
| "learning_rate": 0.0005019541962374987, |
| "loss": 1.725, |
| "step": 363000 |
| }, |
| { |
| "epoch": 1.5013898698520087, |
| "grad_norm": 0.7767708897590637, |
| "learning_rate": 0.0005012634234423419, |
| "loss": 1.7302, |
| "step": 363500 |
| }, |
| { |
| "epoch": 1.5034550553676236, |
| "grad_norm": 0.7658302783966064, |
| "learning_rate": 0.000500572650647185, |
| "loss": 1.7227, |
| "step": 364000 |
| }, |
| { |
| "epoch": 1.5055202408832384, |
| "grad_norm": 0.7607765793800354, |
| "learning_rate": 0.0004998818778520282, |
| "loss": 1.7292, |
| "step": 364500 |
| }, |
| { |
| "epoch": 1.5075854263988533, |
| "grad_norm": 0.7231427431106567, |
| "learning_rate": 0.0004991911050568714, |
| "loss": 1.7257, |
| "step": 365000 |
| }, |
| { |
| "epoch": 1.5096506119144681, |
| "grad_norm": 0.965886116027832, |
| "learning_rate": 0.0004985003322617145, |
| "loss": 1.7238, |
| "step": 365500 |
| }, |
| { |
| "epoch": 1.511715797430083, |
| "grad_norm": 0.8113678097724915, |
| "learning_rate": 0.0004978095594665576, |
| "loss": 1.7213, |
| "step": 366000 |
| }, |
| { |
| "epoch": 1.5137809829456979, |
| "grad_norm": 0.8098276853561401, |
| "learning_rate": 0.0004971187866714007, |
| "loss": 1.7289, |
| "step": 366500 |
| }, |
| { |
| "epoch": 1.515846168461313, |
| "grad_norm": 1.9579529762268066, |
| "learning_rate": 0.0004964280138762439, |
| "loss": 1.7273, |
| "step": 367000 |
| }, |
| { |
| "epoch": 1.5179113539769278, |
| "grad_norm": 0.8172611594200134, |
| "learning_rate": 0.0004957372410810871, |
| "loss": 1.728, |
| "step": 367500 |
| }, |
| { |
| "epoch": 1.5199765394925426, |
| "grad_norm": 0.7897489666938782, |
| "learning_rate": 0.0004950464682859302, |
| "loss": 1.722, |
| "step": 368000 |
| }, |
| { |
| "epoch": 1.5220417250081575, |
| "grad_norm": 0.804604172706604, |
| "learning_rate": 0.0004943556954907734, |
| "loss": 1.7228, |
| "step": 368500 |
| }, |
| { |
| "epoch": 1.5241069105237723, |
| "grad_norm": 0.8362699151039124, |
| "learning_rate": 0.0004936649226956165, |
| "loss": 1.7204, |
| "step": 369000 |
| }, |
| { |
| "epoch": 1.5261720960393872, |
| "grad_norm": 0.7928584814071655, |
| "learning_rate": 0.0004929741499004596, |
| "loss": 1.7232, |
| "step": 369500 |
| }, |
| { |
| "epoch": 1.528237281555002, |
| "grad_norm": 0.8171131610870361, |
| "learning_rate": 0.0004922833771053028, |
| "loss": 1.721, |
| "step": 370000 |
| }, |
| { |
| "epoch": 1.530302467070617, |
| "grad_norm": 0.7682649493217468, |
| "learning_rate": 0.000491592604310146, |
| "loss": 1.7181, |
| "step": 370500 |
| }, |
| { |
| "epoch": 1.532367652586232, |
| "grad_norm": 0.7589514255523682, |
| "learning_rate": 0.0004909018315149891, |
| "loss": 1.7207, |
| "step": 371000 |
| }, |
| { |
| "epoch": 1.5344328381018468, |
| "grad_norm": 0.7927723526954651, |
| "learning_rate": 0.0004902110587198322, |
| "loss": 1.7172, |
| "step": 371500 |
| }, |
| { |
| "epoch": 1.5364980236174617, |
| "grad_norm": 0.7993720173835754, |
| "learning_rate": 0.0004895202859246754, |
| "loss": 1.7239, |
| "step": 372000 |
| }, |
| { |
| "epoch": 1.5385632091330765, |
| "grad_norm": 0.75545734167099, |
| "learning_rate": 0.0004888295131295186, |
| "loss": 1.7193, |
| "step": 372500 |
| }, |
| { |
| "epoch": 1.5406283946486914, |
| "grad_norm": 0.8410167694091797, |
| "learning_rate": 0.0004881387403343617, |
| "loss": 1.7212, |
| "step": 373000 |
| }, |
| { |
| "epoch": 1.5426935801643062, |
| "grad_norm": 0.7745389938354492, |
| "learning_rate": 0.00048744796753920485, |
| "loss": 1.7178, |
| "step": 373500 |
| }, |
| { |
| "epoch": 1.544758765679921, |
| "grad_norm": 1.1876429319381714, |
| "learning_rate": 0.00048675719474404797, |
| "loss": 1.7166, |
| "step": 374000 |
| }, |
| { |
| "epoch": 1.546823951195536, |
| "grad_norm": 0.8236234188079834, |
| "learning_rate": 0.0004860664219488911, |
| "loss": 1.7244, |
| "step": 374500 |
| }, |
| { |
| "epoch": 1.5488891367111508, |
| "grad_norm": 0.8301746845245361, |
| "learning_rate": 0.00048537564915373426, |
| "loss": 1.7154, |
| "step": 375000 |
| }, |
| { |
| "epoch": 1.5509543222267657, |
| "grad_norm": 0.8341511487960815, |
| "learning_rate": 0.0004846848763585774, |
| "loss": 1.7201, |
| "step": 375500 |
| }, |
| { |
| "epoch": 1.5530195077423805, |
| "grad_norm": 0.772774338722229, |
| "learning_rate": 0.00048399410356342055, |
| "loss": 1.7194, |
| "step": 376000 |
| }, |
| { |
| "epoch": 1.5550846932579954, |
| "grad_norm": 0.7931101322174072, |
| "learning_rate": 0.0004833033307682637, |
| "loss": 1.7218, |
| "step": 376500 |
| }, |
| { |
| "epoch": 1.5571498787736102, |
| "grad_norm": 0.7973618507385254, |
| "learning_rate": 0.00048261255797310684, |
| "loss": 1.7132, |
| "step": 377000 |
| }, |
| { |
| "epoch": 1.559215064289225, |
| "grad_norm": 0.7944709062576294, |
| "learning_rate": 0.00048192178517795, |
| "loss": 1.7152, |
| "step": 377500 |
| }, |
| { |
| "epoch": 1.56128024980484, |
| "grad_norm": 0.7873803377151489, |
| "learning_rate": 0.00048123101238279313, |
| "loss": 1.7136, |
| "step": 378000 |
| }, |
| { |
| "epoch": 1.5633454353204548, |
| "grad_norm": 0.8176526427268982, |
| "learning_rate": 0.00048054023958763625, |
| "loss": 1.7146, |
| "step": 378500 |
| }, |
| { |
| "epoch": 1.5654106208360696, |
| "grad_norm": 0.80870121717453, |
| "learning_rate": 0.0004798494667924794, |
| "loss": 1.7152, |
| "step": 379000 |
| }, |
| { |
| "epoch": 1.5674758063516845, |
| "grad_norm": 0.8075997829437256, |
| "learning_rate": 0.0004791586939973226, |
| "loss": 1.7156, |
| "step": 379500 |
| }, |
| { |
| "epoch": 1.5695409918672993, |
| "grad_norm": 0.7649165391921997, |
| "learning_rate": 0.0004784679212021657, |
| "loss": 1.7128, |
| "step": 380000 |
| }, |
| { |
| "epoch": 1.5716061773829142, |
| "grad_norm": 0.7735922932624817, |
| "learning_rate": 0.0004777771484070089, |
| "loss": 1.7127, |
| "step": 380500 |
| }, |
| { |
| "epoch": 1.573671362898529, |
| "grad_norm": 0.8073831796646118, |
| "learning_rate": 0.00047708637561185205, |
| "loss": 1.7157, |
| "step": 381000 |
| }, |
| { |
| "epoch": 1.575736548414144, |
| "grad_norm": 0.7841485738754272, |
| "learning_rate": 0.0004763956028166951, |
| "loss": 1.7164, |
| "step": 381500 |
| }, |
| { |
| "epoch": 1.5778017339297588, |
| "grad_norm": 0.7511780261993408, |
| "learning_rate": 0.0004757048300215383, |
| "loss": 1.7146, |
| "step": 382000 |
| }, |
| { |
| "epoch": 1.5798669194453738, |
| "grad_norm": 0.7717761993408203, |
| "learning_rate": 0.00047501405722638146, |
| "loss": 1.7141, |
| "step": 382500 |
| }, |
| { |
| "epoch": 1.5819321049609887, |
| "grad_norm": 0.8251765966415405, |
| "learning_rate": 0.0004743232844312246, |
| "loss": 1.7146, |
| "step": 383000 |
| }, |
| { |
| "epoch": 1.5839972904766035, |
| "grad_norm": 0.8129590749740601, |
| "learning_rate": 0.00047363251163606775, |
| "loss": 1.7151, |
| "step": 383500 |
| }, |
| { |
| "epoch": 1.5860624759922184, |
| "grad_norm": 0.7942067384719849, |
| "learning_rate": 0.0004729417388409109, |
| "loss": 1.7145, |
| "step": 384000 |
| }, |
| { |
| "epoch": 1.5881276615078332, |
| "grad_norm": 0.8091747760772705, |
| "learning_rate": 0.00047225096604575404, |
| "loss": 1.7102, |
| "step": 384500 |
| }, |
| { |
| "epoch": 1.590192847023448, |
| "grad_norm": 0.8157942295074463, |
| "learning_rate": 0.00047156019325059716, |
| "loss": 1.7139, |
| "step": 385000 |
| }, |
| { |
| "epoch": 1.592258032539063, |
| "grad_norm": 0.7791504859924316, |
| "learning_rate": 0.00047086942045544033, |
| "loss": 1.7138, |
| "step": 385500 |
| }, |
| { |
| "epoch": 1.5943232180546778, |
| "grad_norm": 0.8184142708778381, |
| "learning_rate": 0.00047017864766028345, |
| "loss": 1.7119, |
| "step": 386000 |
| }, |
| { |
| "epoch": 1.5963884035702929, |
| "grad_norm": 0.7216043472290039, |
| "learning_rate": 0.0004694878748651266, |
| "loss": 1.7062, |
| "step": 386500 |
| }, |
| { |
| "epoch": 1.5984535890859077, |
| "grad_norm": 0.7634962797164917, |
| "learning_rate": 0.0004687971020699698, |
| "loss": 1.7109, |
| "step": 387000 |
| }, |
| { |
| "epoch": 1.6005187746015226, |
| "grad_norm": 0.7682668566703796, |
| "learning_rate": 0.0004681063292748129, |
| "loss": 1.7175, |
| "step": 387500 |
| }, |
| { |
| "epoch": 1.6025839601171374, |
| "grad_norm": 0.7891648411750793, |
| "learning_rate": 0.0004674155564796561, |
| "loss": 1.7076, |
| "step": 388000 |
| }, |
| { |
| "epoch": 1.6046491456327523, |
| "grad_norm": 0.7735166549682617, |
| "learning_rate": 0.0004667247836844992, |
| "loss": 1.7122, |
| "step": 388500 |
| }, |
| { |
| "epoch": 1.6067143311483671, |
| "grad_norm": 0.7342345714569092, |
| "learning_rate": 0.0004660340108893423, |
| "loss": 1.7093, |
| "step": 389000 |
| }, |
| { |
| "epoch": 1.608779516663982, |
| "grad_norm": 0.7745596170425415, |
| "learning_rate": 0.0004653432380941855, |
| "loss": 1.7072, |
| "step": 389500 |
| }, |
| { |
| "epoch": 1.6108447021795969, |
| "grad_norm": 0.7656903266906738, |
| "learning_rate": 0.00046465246529902866, |
| "loss": 1.7114, |
| "step": 390000 |
| }, |
| { |
| "epoch": 1.6129098876952117, |
| "grad_norm": 0.807043731212616, |
| "learning_rate": 0.0004639616925038718, |
| "loss": 1.7085, |
| "step": 390500 |
| }, |
| { |
| "epoch": 1.6149750732108266, |
| "grad_norm": 0.7980780601501465, |
| "learning_rate": 0.00046327091970871495, |
| "loss": 1.7054, |
| "step": 391000 |
| }, |
| { |
| "epoch": 1.6170402587264414, |
| "grad_norm": 0.7772185802459717, |
| "learning_rate": 0.0004625801469135581, |
| "loss": 1.7077, |
| "step": 391500 |
| }, |
| { |
| "epoch": 1.6191054442420563, |
| "grad_norm": 0.7955535054206848, |
| "learning_rate": 0.0004618893741184012, |
| "loss": 1.7059, |
| "step": 392000 |
| }, |
| { |
| "epoch": 1.6211706297576711, |
| "grad_norm": 0.7842792868614197, |
| "learning_rate": 0.00046119860132324436, |
| "loss": 1.7032, |
| "step": 392500 |
| }, |
| { |
| "epoch": 1.623235815273286, |
| "grad_norm": 0.7722345590591431, |
| "learning_rate": 0.00046050782852808753, |
| "loss": 1.7076, |
| "step": 393000 |
| }, |
| { |
| "epoch": 1.6253010007889008, |
| "grad_norm": 0.7836925983428955, |
| "learning_rate": 0.00045981705573293065, |
| "loss": 1.701, |
| "step": 393500 |
| }, |
| { |
| "epoch": 1.6273661863045157, |
| "grad_norm": 0.8407610058784485, |
| "learning_rate": 0.0004591262829377738, |
| "loss": 1.7054, |
| "step": 394000 |
| }, |
| { |
| "epoch": 1.6294313718201305, |
| "grad_norm": 0.7842757701873779, |
| "learning_rate": 0.000458435510142617, |
| "loss": 1.7085, |
| "step": 394500 |
| }, |
| { |
| "epoch": 1.6314965573357454, |
| "grad_norm": 0.7749829292297363, |
| "learning_rate": 0.0004577447373474601, |
| "loss": 1.7082, |
| "step": 395000 |
| }, |
| { |
| "epoch": 1.6335617428513602, |
| "grad_norm": 0.7778738141059875, |
| "learning_rate": 0.0004570539645523032, |
| "loss": 1.7071, |
| "step": 395500 |
| }, |
| { |
| "epoch": 1.635626928366975, |
| "grad_norm": 0.7654650211334229, |
| "learning_rate": 0.0004563631917571464, |
| "loss": 1.7093, |
| "step": 396000 |
| }, |
| { |
| "epoch": 1.63769211388259, |
| "grad_norm": 0.7864561676979065, |
| "learning_rate": 0.0004556724189619895, |
| "loss": 1.7035, |
| "step": 396500 |
| }, |
| { |
| "epoch": 1.6397572993982048, |
| "grad_norm": 0.7672191262245178, |
| "learning_rate": 0.0004549816461668327, |
| "loss": 1.7052, |
| "step": 397000 |
| }, |
| { |
| "epoch": 1.6418224849138197, |
| "grad_norm": 0.7847920656204224, |
| "learning_rate": 0.00045429087337167586, |
| "loss": 1.7033, |
| "step": 397500 |
| }, |
| { |
| "epoch": 1.6438876704294345, |
| "grad_norm": 0.7824931144714355, |
| "learning_rate": 0.000453600100576519, |
| "loss": 1.7042, |
| "step": 398000 |
| }, |
| { |
| "epoch": 1.6459528559450496, |
| "grad_norm": 0.7992446422576904, |
| "learning_rate": 0.00045290932778136215, |
| "loss": 1.7051, |
| "step": 398500 |
| }, |
| { |
| "epoch": 1.6480180414606644, |
| "grad_norm": 0.7504148483276367, |
| "learning_rate": 0.00045221855498620527, |
| "loss": 1.7036, |
| "step": 399000 |
| }, |
| { |
| "epoch": 1.6500832269762793, |
| "grad_norm": 0.8227455019950867, |
| "learning_rate": 0.0004515277821910484, |
| "loss": 1.6998, |
| "step": 399500 |
| }, |
| { |
| "epoch": 1.6521484124918941, |
| "grad_norm": 0.7897786498069763, |
| "learning_rate": 0.00045083700939589156, |
| "loss": 1.7047, |
| "step": 400000 |
| }, |
| { |
| "epoch": 1.654213598007509, |
| "grad_norm": 0.7825984954833984, |
| "learning_rate": 0.00045014623660073473, |
| "loss": 1.7043, |
| "step": 400500 |
| }, |
| { |
| "epoch": 1.6562787835231239, |
| "grad_norm": 0.8071085214614868, |
| "learning_rate": 0.00044945546380557785, |
| "loss": 1.7035, |
| "step": 401000 |
| }, |
| { |
| "epoch": 1.6583439690387387, |
| "grad_norm": 0.9406007528305054, |
| "learning_rate": 0.000448764691010421, |
| "loss": 1.7011, |
| "step": 401500 |
| }, |
| { |
| "epoch": 1.6604091545543538, |
| "grad_norm": 0.7797788381576538, |
| "learning_rate": 0.0004480739182152642, |
| "loss": 1.7024, |
| "step": 402000 |
| }, |
| { |
| "epoch": 1.6624743400699686, |
| "grad_norm": 0.8257543444633484, |
| "learning_rate": 0.00044738314542010725, |
| "loss": 1.6958, |
| "step": 402500 |
| }, |
| { |
| "epoch": 1.6645395255855835, |
| "grad_norm": 0.8066025972366333, |
| "learning_rate": 0.0004466923726249504, |
| "loss": 1.6998, |
| "step": 403000 |
| }, |
| { |
| "epoch": 1.6666047111011983, |
| "grad_norm": 0.8041613698005676, |
| "learning_rate": 0.0004460015998297936, |
| "loss": 1.7053, |
| "step": 403500 |
| }, |
| { |
| "epoch": 1.6686698966168132, |
| "grad_norm": 0.8146698474884033, |
| "learning_rate": 0.0004453108270346367, |
| "loss": 1.6969, |
| "step": 404000 |
| }, |
| { |
| "epoch": 1.670735082132428, |
| "grad_norm": 0.7349113821983337, |
| "learning_rate": 0.0004446200542394799, |
| "loss": 1.6997, |
| "step": 404500 |
| }, |
| { |
| "epoch": 1.672800267648043, |
| "grad_norm": 0.7776924967765808, |
| "learning_rate": 0.00044392928144432306, |
| "loss": 1.6976, |
| "step": 405000 |
| }, |
| { |
| "epoch": 1.6748654531636578, |
| "grad_norm": 0.8118670582771301, |
| "learning_rate": 0.0004432385086491662, |
| "loss": 1.7039, |
| "step": 405500 |
| }, |
| { |
| "epoch": 1.6769306386792726, |
| "grad_norm": 0.7525516152381897, |
| "learning_rate": 0.0004425477358540093, |
| "loss": 1.7017, |
| "step": 406000 |
| }, |
| { |
| "epoch": 1.6789958241948875, |
| "grad_norm": 0.7723379135131836, |
| "learning_rate": 0.00044185696305885247, |
| "loss": 1.6997, |
| "step": 406500 |
| }, |
| { |
| "epoch": 1.6810610097105023, |
| "grad_norm": 0.7601300477981567, |
| "learning_rate": 0.00044116619026369564, |
| "loss": 1.6931, |
| "step": 407000 |
| }, |
| { |
| "epoch": 1.6831261952261172, |
| "grad_norm": 0.788893461227417, |
| "learning_rate": 0.00044047541746853876, |
| "loss": 1.6941, |
| "step": 407500 |
| }, |
| { |
| "epoch": 1.685191380741732, |
| "grad_norm": 0.8101310133934021, |
| "learning_rate": 0.00043978464467338193, |
| "loss": 1.697, |
| "step": 408000 |
| }, |
| { |
| "epoch": 1.6872565662573469, |
| "grad_norm": 0.7839348912239075, |
| "learning_rate": 0.0004390938718782251, |
| "loss": 1.7037, |
| "step": 408500 |
| }, |
| { |
| "epoch": 1.6893217517729617, |
| "grad_norm": 1.0003387928009033, |
| "learning_rate": 0.0004384030990830682, |
| "loss": 1.6995, |
| "step": 409000 |
| }, |
| { |
| "epoch": 1.6913869372885766, |
| "grad_norm": 0.7542647123336792, |
| "learning_rate": 0.00043771232628791134, |
| "loss": 1.6982, |
| "step": 409500 |
| }, |
| { |
| "epoch": 1.6934521228041914, |
| "grad_norm": 0.8054424524307251, |
| "learning_rate": 0.0004370215534927545, |
| "loss": 1.6971, |
| "step": 410000 |
| }, |
| { |
| "epoch": 1.6955173083198063, |
| "grad_norm": 0.7765061259269714, |
| "learning_rate": 0.0004363307806975976, |
| "loss": 1.6951, |
| "step": 410500 |
| }, |
| { |
| "epoch": 1.6975824938354211, |
| "grad_norm": 0.7774503231048584, |
| "learning_rate": 0.0004356400079024408, |
| "loss": 1.6947, |
| "step": 411000 |
| }, |
| { |
| "epoch": 1.699647679351036, |
| "grad_norm": 0.8000075817108154, |
| "learning_rate": 0.00043494923510728397, |
| "loss": 1.6937, |
| "step": 411500 |
| }, |
| { |
| "epoch": 1.7017128648666509, |
| "grad_norm": 0.8032427430152893, |
| "learning_rate": 0.0004342584623121271, |
| "loss": 1.6921, |
| "step": 412000 |
| }, |
| { |
| "epoch": 1.7037780503822657, |
| "grad_norm": 0.7988405227661133, |
| "learning_rate": 0.00043356768951697026, |
| "loss": 1.6968, |
| "step": 412500 |
| }, |
| { |
| "epoch": 1.7058432358978806, |
| "grad_norm": 0.7719324231147766, |
| "learning_rate": 0.0004328769167218134, |
| "loss": 1.6912, |
| "step": 413000 |
| }, |
| { |
| "epoch": 1.7079084214134954, |
| "grad_norm": 0.7580344080924988, |
| "learning_rate": 0.0004321861439266565, |
| "loss": 1.6951, |
| "step": 413500 |
| }, |
| { |
| "epoch": 1.7099736069291105, |
| "grad_norm": 0.8045200705528259, |
| "learning_rate": 0.00043149537113149967, |
| "loss": 1.6921, |
| "step": 414000 |
| }, |
| { |
| "epoch": 1.7120387924447253, |
| "grad_norm": 0.7698059678077698, |
| "learning_rate": 0.00043080459833634284, |
| "loss": 1.6929, |
| "step": 414500 |
| }, |
| { |
| "epoch": 1.7141039779603402, |
| "grad_norm": 0.8124533891677856, |
| "learning_rate": 0.00043011382554118596, |
| "loss": 1.6918, |
| "step": 415000 |
| }, |
| { |
| "epoch": 1.716169163475955, |
| "grad_norm": 0.7770412564277649, |
| "learning_rate": 0.00042942305274602913, |
| "loss": 1.6903, |
| "step": 415500 |
| }, |
| { |
| "epoch": 1.71823434899157, |
| "grad_norm": 0.7901027202606201, |
| "learning_rate": 0.0004287322799508723, |
| "loss": 1.6932, |
| "step": 416000 |
| }, |
| { |
| "epoch": 1.7202995345071848, |
| "grad_norm": 0.7586656212806702, |
| "learning_rate": 0.00042804150715571536, |
| "loss": 1.6932, |
| "step": 416500 |
| }, |
| { |
| "epoch": 1.7223647200227996, |
| "grad_norm": 0.7596163153648376, |
| "learning_rate": 0.00042735073436055853, |
| "loss": 1.6979, |
| "step": 417000 |
| }, |
| { |
| "epoch": 1.7244299055384145, |
| "grad_norm": 0.7645015716552734, |
| "learning_rate": 0.0004266599615654017, |
| "loss": 1.6929, |
| "step": 417500 |
| }, |
| { |
| "epoch": 1.7264950910540295, |
| "grad_norm": 0.8256881237030029, |
| "learning_rate": 0.0004259691887702448, |
| "loss": 1.691, |
| "step": 418000 |
| }, |
| { |
| "epoch": 1.7285602765696444, |
| "grad_norm": 0.78524249792099, |
| "learning_rate": 0.000425278415975088, |
| "loss": 1.69, |
| "step": 418500 |
| }, |
| { |
| "epoch": 1.7306254620852592, |
| "grad_norm": 0.814737856388092, |
| "learning_rate": 0.00042458764317993117, |
| "loss": 1.6926, |
| "step": 419000 |
| }, |
| { |
| "epoch": 1.732690647600874, |
| "grad_norm": 0.7561067342758179, |
| "learning_rate": 0.00042389687038477423, |
| "loss": 1.6928, |
| "step": 419500 |
| }, |
| { |
| "epoch": 1.734755833116489, |
| "grad_norm": 0.7771661281585693, |
| "learning_rate": 0.0004232060975896174, |
| "loss": 1.6863, |
| "step": 420000 |
| }, |
| { |
| "epoch": 1.7368210186321038, |
| "grad_norm": 0.7177093625068665, |
| "learning_rate": 0.0004225153247944606, |
| "loss": 1.6931, |
| "step": 420500 |
| }, |
| { |
| "epoch": 1.7388862041477187, |
| "grad_norm": 0.8142688870429993, |
| "learning_rate": 0.0004218245519993037, |
| "loss": 1.6895, |
| "step": 421000 |
| }, |
| { |
| "epoch": 1.7409513896633335, |
| "grad_norm": 0.8166112899780273, |
| "learning_rate": 0.00042113377920414687, |
| "loss": 1.69, |
| "step": 421500 |
| }, |
| { |
| "epoch": 1.7430165751789484, |
| "grad_norm": 0.7927871942520142, |
| "learning_rate": 0.00042044300640899004, |
| "loss": 1.69, |
| "step": 422000 |
| }, |
| { |
| "epoch": 1.7450817606945632, |
| "grad_norm": 0.8192989230155945, |
| "learning_rate": 0.00041975223361383316, |
| "loss": 1.6834, |
| "step": 422500 |
| }, |
| { |
| "epoch": 1.747146946210178, |
| "grad_norm": 0.825117290019989, |
| "learning_rate": 0.00041906146081867627, |
| "loss": 1.6875, |
| "step": 423000 |
| }, |
| { |
| "epoch": 1.749212131725793, |
| "grad_norm": 0.8357008695602417, |
| "learning_rate": 0.00041837068802351944, |
| "loss": 1.6869, |
| "step": 423500 |
| }, |
| { |
| "epoch": 1.7512773172414078, |
| "grad_norm": 0.8047915101051331, |
| "learning_rate": 0.00041767991522836256, |
| "loss": 1.6864, |
| "step": 424000 |
| }, |
| { |
| "epoch": 1.7533425027570226, |
| "grad_norm": 0.8068717122077942, |
| "learning_rate": 0.00041698914243320573, |
| "loss": 1.6871, |
| "step": 424500 |
| }, |
| { |
| "epoch": 1.7554076882726375, |
| "grad_norm": 0.7879107594490051, |
| "learning_rate": 0.0004162983696380489, |
| "loss": 1.6826, |
| "step": 425000 |
| }, |
| { |
| "epoch": 1.7574728737882523, |
| "grad_norm": 0.7748578190803528, |
| "learning_rate": 0.000415607596842892, |
| "loss": 1.6831, |
| "step": 425500 |
| }, |
| { |
| "epoch": 1.7595380593038672, |
| "grad_norm": 0.7206512093544006, |
| "learning_rate": 0.0004149168240477352, |
| "loss": 1.6884, |
| "step": 426000 |
| }, |
| { |
| "epoch": 1.761603244819482, |
| "grad_norm": 0.7805559039115906, |
| "learning_rate": 0.0004142260512525783, |
| "loss": 1.6863, |
| "step": 426500 |
| }, |
| { |
| "epoch": 1.763668430335097, |
| "grad_norm": 0.8146787285804749, |
| "learning_rate": 0.00041353527845742143, |
| "loss": 1.6838, |
| "step": 427000 |
| }, |
| { |
| "epoch": 1.7657336158507118, |
| "grad_norm": 0.7216916680335999, |
| "learning_rate": 0.0004128445056622646, |
| "loss": 1.6863, |
| "step": 427500 |
| }, |
| { |
| "epoch": 1.7677988013663266, |
| "grad_norm": 0.7865545153617859, |
| "learning_rate": 0.0004121537328671078, |
| "loss": 1.6838, |
| "step": 428000 |
| }, |
| { |
| "epoch": 1.7698639868819415, |
| "grad_norm": 0.7617883682250977, |
| "learning_rate": 0.0004114629600719509, |
| "loss": 1.684, |
| "step": 428500 |
| }, |
| { |
| "epoch": 1.7719291723975563, |
| "grad_norm": 0.8186792135238647, |
| "learning_rate": 0.00041077218727679407, |
| "loss": 1.6828, |
| "step": 429000 |
| }, |
| { |
| "epoch": 1.7739943579131714, |
| "grad_norm": 0.7898605465888977, |
| "learning_rate": 0.00041008141448163724, |
| "loss": 1.68, |
| "step": 429500 |
| }, |
| { |
| "epoch": 1.7760595434287862, |
| "grad_norm": 0.7490332126617432, |
| "learning_rate": 0.0004093906416864803, |
| "loss": 1.6822, |
| "step": 430000 |
| }, |
| { |
| "epoch": 1.778124728944401, |
| "grad_norm": 0.7616461515426636, |
| "learning_rate": 0.00040869986889132347, |
| "loss": 1.6866, |
| "step": 430500 |
| }, |
| { |
| "epoch": 1.780189914460016, |
| "grad_norm": 0.7681095004081726, |
| "learning_rate": 0.00040800909609616664, |
| "loss": 1.6811, |
| "step": 431000 |
| }, |
| { |
| "epoch": 1.7822550999756308, |
| "grad_norm": 0.7684192657470703, |
| "learning_rate": 0.00040731832330100976, |
| "loss": 1.6862, |
| "step": 431500 |
| }, |
| { |
| "epoch": 1.7843202854912457, |
| "grad_norm": 0.7826496362686157, |
| "learning_rate": 0.00040662755050585293, |
| "loss": 1.6859, |
| "step": 432000 |
| }, |
| { |
| "epoch": 1.7863854710068605, |
| "grad_norm": 0.7974809408187866, |
| "learning_rate": 0.0004059367777106961, |
| "loss": 1.6833, |
| "step": 432500 |
| }, |
| { |
| "epoch": 1.7884506565224754, |
| "grad_norm": 0.8294712901115417, |
| "learning_rate": 0.0004052460049155392, |
| "loss": 1.6832, |
| "step": 433000 |
| }, |
| { |
| "epoch": 1.7905158420380904, |
| "grad_norm": 0.8153785467147827, |
| "learning_rate": 0.00040455523212038234, |
| "loss": 1.6811, |
| "step": 433500 |
| }, |
| { |
| "epoch": 1.7925810275537053, |
| "grad_norm": 0.80795818567276, |
| "learning_rate": 0.0004038644593252255, |
| "loss": 1.6788, |
| "step": 434000 |
| }, |
| { |
| "epoch": 1.7946462130693202, |
| "grad_norm": 0.7648016214370728, |
| "learning_rate": 0.00040317368653006863, |
| "loss": 1.686, |
| "step": 434500 |
| }, |
| { |
| "epoch": 1.796711398584935, |
| "grad_norm": 0.7882753610610962, |
| "learning_rate": 0.0004024829137349118, |
| "loss": 1.6792, |
| "step": 435000 |
| }, |
| { |
| "epoch": 1.7987765841005499, |
| "grad_norm": 0.7577452659606934, |
| "learning_rate": 0.000401792140939755, |
| "loss": 1.6803, |
| "step": 435500 |
| }, |
| { |
| "epoch": 1.8008417696161647, |
| "grad_norm": 0.7712865471839905, |
| "learning_rate": 0.0004011013681445981, |
| "loss": 1.6813, |
| "step": 436000 |
| }, |
| { |
| "epoch": 1.8029069551317796, |
| "grad_norm": 0.7820202708244324, |
| "learning_rate": 0.00040041059534944126, |
| "loss": 1.6819, |
| "step": 436500 |
| }, |
| { |
| "epoch": 1.8049721406473944, |
| "grad_norm": 0.7566621899604797, |
| "learning_rate": 0.0003997198225542844, |
| "loss": 1.68, |
| "step": 437000 |
| }, |
| { |
| "epoch": 1.8070373261630093, |
| "grad_norm": 0.7587839365005493, |
| "learning_rate": 0.0003990290497591275, |
| "loss": 1.6833, |
| "step": 437500 |
| }, |
| { |
| "epoch": 1.8091025116786241, |
| "grad_norm": 0.798997700214386, |
| "learning_rate": 0.00039833827696397067, |
| "loss": 1.6797, |
| "step": 438000 |
| }, |
| { |
| "epoch": 1.811167697194239, |
| "grad_norm": 0.7913112044334412, |
| "learning_rate": 0.00039764750416881384, |
| "loss": 1.6792, |
| "step": 438500 |
| }, |
| { |
| "epoch": 1.8132328827098538, |
| "grad_norm": 0.7663547992706299, |
| "learning_rate": 0.00039695673137365696, |
| "loss": 1.6807, |
| "step": 439000 |
| }, |
| { |
| "epoch": 1.8152980682254687, |
| "grad_norm": 0.77425217628479, |
| "learning_rate": 0.00039626595857850013, |
| "loss": 1.6759, |
| "step": 439500 |
| }, |
| { |
| "epoch": 1.8173632537410835, |
| "grad_norm": 0.807633101940155, |
| "learning_rate": 0.0003955751857833433, |
| "loss": 1.6777, |
| "step": 440000 |
| }, |
| { |
| "epoch": 1.8194284392566984, |
| "grad_norm": 0.7748910188674927, |
| "learning_rate": 0.00039488441298818637, |
| "loss": 1.6794, |
| "step": 440500 |
| }, |
| { |
| "epoch": 1.8214936247723132, |
| "grad_norm": 0.8132478594779968, |
| "learning_rate": 0.00039419364019302954, |
| "loss": 1.6777, |
| "step": 441000 |
| }, |
| { |
| "epoch": 1.823558810287928, |
| "grad_norm": 0.7609587907791138, |
| "learning_rate": 0.0003935028673978727, |
| "loss": 1.6775, |
| "step": 441500 |
| }, |
| { |
| "epoch": 1.825623995803543, |
| "grad_norm": 0.8203696608543396, |
| "learning_rate": 0.00039281209460271583, |
| "loss": 1.675, |
| "step": 442000 |
| }, |
| { |
| "epoch": 1.8276891813191578, |
| "grad_norm": 0.7865495681762695, |
| "learning_rate": 0.000392121321807559, |
| "loss": 1.6783, |
| "step": 442500 |
| }, |
| { |
| "epoch": 1.8297543668347727, |
| "grad_norm": 0.7632693648338318, |
| "learning_rate": 0.0003914305490124022, |
| "loss": 1.6771, |
| "step": 443000 |
| }, |
| { |
| "epoch": 1.8318195523503875, |
| "grad_norm": 0.790891706943512, |
| "learning_rate": 0.0003907397762172453, |
| "loss": 1.6787, |
| "step": 443500 |
| }, |
| { |
| "epoch": 1.8338847378660024, |
| "grad_norm": 0.7918925881385803, |
| "learning_rate": 0.0003900490034220884, |
| "loss": 1.6749, |
| "step": 444000 |
| }, |
| { |
| "epoch": 1.8359499233816172, |
| "grad_norm": 0.8381515741348267, |
| "learning_rate": 0.0003893582306269316, |
| "loss": 1.6741, |
| "step": 444500 |
| }, |
| { |
| "epoch": 1.838015108897232, |
| "grad_norm": 0.8085419535636902, |
| "learning_rate": 0.0003886674578317747, |
| "loss": 1.6697, |
| "step": 445000 |
| }, |
| { |
| "epoch": 1.8400802944128472, |
| "grad_norm": 0.7606683969497681, |
| "learning_rate": 0.00038797668503661787, |
| "loss": 1.6745, |
| "step": 445500 |
| }, |
| { |
| "epoch": 1.842145479928462, |
| "grad_norm": 0.7622495889663696, |
| "learning_rate": 0.00038728591224146104, |
| "loss": 1.6722, |
| "step": 446000 |
| }, |
| { |
| "epoch": 1.8442106654440769, |
| "grad_norm": 0.8180463910102844, |
| "learning_rate": 0.00038659513944630416, |
| "loss": 1.6708, |
| "step": 446500 |
| }, |
| { |
| "epoch": 1.8462758509596917, |
| "grad_norm": 0.7783413529396057, |
| "learning_rate": 0.00038590436665114733, |
| "loss": 1.6741, |
| "step": 447000 |
| }, |
| { |
| "epoch": 1.8483410364753066, |
| "grad_norm": 0.7698727250099182, |
| "learning_rate": 0.00038521359385599045, |
| "loss": 1.6744, |
| "step": 447500 |
| }, |
| { |
| "epoch": 1.8504062219909214, |
| "grad_norm": 0.7889679670333862, |
| "learning_rate": 0.00038452282106083357, |
| "loss": 1.6744, |
| "step": 448000 |
| }, |
| { |
| "epoch": 1.8524714075065363, |
| "grad_norm": 0.8463781476020813, |
| "learning_rate": 0.00038383204826567674, |
| "loss": 1.6693, |
| "step": 448500 |
| }, |
| { |
| "epoch": 1.8545365930221511, |
| "grad_norm": 0.7730614542961121, |
| "learning_rate": 0.0003831412754705199, |
| "loss": 1.6748, |
| "step": 449000 |
| }, |
| { |
| "epoch": 1.8566017785377662, |
| "grad_norm": 0.7694717049598694, |
| "learning_rate": 0.00038245050267536303, |
| "loss": 1.6723, |
| "step": 449500 |
| }, |
| { |
| "epoch": 1.858666964053381, |
| "grad_norm": 0.7720078229904175, |
| "learning_rate": 0.0003817597298802062, |
| "loss": 1.6712, |
| "step": 450000 |
| }, |
| { |
| "epoch": 1.860732149568996, |
| "grad_norm": 0.7817273139953613, |
| "learning_rate": 0.0003810689570850494, |
| "loss": 1.6703, |
| "step": 450500 |
| }, |
| { |
| "epoch": 1.8627973350846108, |
| "grad_norm": 0.7825304269790649, |
| "learning_rate": 0.00038037818428989244, |
| "loss": 1.6688, |
| "step": 451000 |
| }, |
| { |
| "epoch": 1.8648625206002256, |
| "grad_norm": 0.7758463621139526, |
| "learning_rate": 0.0003796874114947356, |
| "loss": 1.6657, |
| "step": 451500 |
| }, |
| { |
| "epoch": 1.8669277061158405, |
| "grad_norm": 0.7757241129875183, |
| "learning_rate": 0.0003789966386995788, |
| "loss": 1.6734, |
| "step": 452000 |
| }, |
| { |
| "epoch": 1.8689928916314553, |
| "grad_norm": 0.8086944222450256, |
| "learning_rate": 0.0003783058659044219, |
| "loss": 1.669, |
| "step": 452500 |
| }, |
| { |
| "epoch": 1.8710580771470702, |
| "grad_norm": 0.7736507058143616, |
| "learning_rate": 0.00037761509310926507, |
| "loss": 1.6729, |
| "step": 453000 |
| }, |
| { |
| "epoch": 1.873123262662685, |
| "grad_norm": 0.7895172834396362, |
| "learning_rate": 0.00037692432031410824, |
| "loss": 1.6681, |
| "step": 453500 |
| }, |
| { |
| "epoch": 1.8751884481782999, |
| "grad_norm": 0.7610639929771423, |
| "learning_rate": 0.00037623354751895136, |
| "loss": 1.6624, |
| "step": 454000 |
| }, |
| { |
| "epoch": 1.8772536336939147, |
| "grad_norm": 0.7881196737289429, |
| "learning_rate": 0.0003755427747237945, |
| "loss": 1.6697, |
| "step": 454500 |
| }, |
| { |
| "epoch": 1.8793188192095296, |
| "grad_norm": 0.7839071154594421, |
| "learning_rate": 0.00037485200192863765, |
| "loss": 1.6713, |
| "step": 455000 |
| }, |
| { |
| "epoch": 1.8813840047251444, |
| "grad_norm": 0.7790060043334961, |
| "learning_rate": 0.00037416122913348077, |
| "loss": 1.6683, |
| "step": 455500 |
| }, |
| { |
| "epoch": 1.8834491902407593, |
| "grad_norm": 0.757612943649292, |
| "learning_rate": 0.00037347045633832394, |
| "loss": 1.6662, |
| "step": 456000 |
| }, |
| { |
| "epoch": 1.8855143757563742, |
| "grad_norm": 0.7868499755859375, |
| "learning_rate": 0.0003727796835431671, |
| "loss": 1.6666, |
| "step": 456500 |
| }, |
| { |
| "epoch": 1.887579561271989, |
| "grad_norm": 0.8040853142738342, |
| "learning_rate": 0.00037208891074801023, |
| "loss": 1.6637, |
| "step": 457000 |
| }, |
| { |
| "epoch": 1.8896447467876039, |
| "grad_norm": 0.7756462693214417, |
| "learning_rate": 0.0003713981379528534, |
| "loss": 1.6678, |
| "step": 457500 |
| }, |
| { |
| "epoch": 1.8917099323032187, |
| "grad_norm": 0.781300961971283, |
| "learning_rate": 0.0003707073651576965, |
| "loss": 1.6656, |
| "step": 458000 |
| }, |
| { |
| "epoch": 1.8937751178188336, |
| "grad_norm": 0.7810469269752502, |
| "learning_rate": 0.00037001659236253964, |
| "loss": 1.6617, |
| "step": 458500 |
| }, |
| { |
| "epoch": 1.8958403033344484, |
| "grad_norm": 0.7562840580940247, |
| "learning_rate": 0.0003693258195673828, |
| "loss": 1.6635, |
| "step": 459000 |
| }, |
| { |
| "epoch": 1.8979054888500633, |
| "grad_norm": 0.7803590893745422, |
| "learning_rate": 0.000368635046772226, |
| "loss": 1.6689, |
| "step": 459500 |
| }, |
| { |
| "epoch": 1.8999706743656781, |
| "grad_norm": 0.8209202885627747, |
| "learning_rate": 0.0003679442739770691, |
| "loss": 1.6632, |
| "step": 460000 |
| }, |
| { |
| "epoch": 1.902035859881293, |
| "grad_norm": 0.7608214020729065, |
| "learning_rate": 0.00036725350118191227, |
| "loss": 1.6607, |
| "step": 460500 |
| }, |
| { |
| "epoch": 1.904101045396908, |
| "grad_norm": 0.796277642250061, |
| "learning_rate": 0.00036656272838675544, |
| "loss": 1.6653, |
| "step": 461000 |
| }, |
| { |
| "epoch": 1.906166230912523, |
| "grad_norm": 0.796653687953949, |
| "learning_rate": 0.0003658719555915985, |
| "loss": 1.6601, |
| "step": 461500 |
| }, |
| { |
| "epoch": 1.9082314164281378, |
| "grad_norm": 0.7833842039108276, |
| "learning_rate": 0.0003651811827964417, |
| "loss": 1.662, |
| "step": 462000 |
| }, |
| { |
| "epoch": 1.9102966019437526, |
| "grad_norm": 0.7710606455802917, |
| "learning_rate": 0.00036449041000128485, |
| "loss": 1.6616, |
| "step": 462500 |
| }, |
| { |
| "epoch": 1.9123617874593675, |
| "grad_norm": 0.7609611749649048, |
| "learning_rate": 0.00036379963720612797, |
| "loss": 1.6612, |
| "step": 463000 |
| }, |
| { |
| "epoch": 1.9144269729749823, |
| "grad_norm": 0.7709171175956726, |
| "learning_rate": 0.00036310886441097114, |
| "loss": 1.6657, |
| "step": 463500 |
| }, |
| { |
| "epoch": 1.9164921584905972, |
| "grad_norm": 0.7778812646865845, |
| "learning_rate": 0.0003624180916158143, |
| "loss": 1.6663, |
| "step": 464000 |
| }, |
| { |
| "epoch": 1.918557344006212, |
| "grad_norm": 0.7948848605155945, |
| "learning_rate": 0.0003617273188206574, |
| "loss": 1.6607, |
| "step": 464500 |
| }, |
| { |
| "epoch": 1.920622529521827, |
| "grad_norm": 0.7880497574806213, |
| "learning_rate": 0.00036103654602550055, |
| "loss": 1.6615, |
| "step": 465000 |
| }, |
| { |
| "epoch": 1.922687715037442, |
| "grad_norm": 0.7933222055435181, |
| "learning_rate": 0.0003603457732303437, |
| "loss": 1.6622, |
| "step": 465500 |
| }, |
| { |
| "epoch": 1.9247529005530568, |
| "grad_norm": 0.7489884495735168, |
| "learning_rate": 0.00035965500043518684, |
| "loss": 1.6594, |
| "step": 466000 |
| }, |
| { |
| "epoch": 1.9268180860686717, |
| "grad_norm": 0.7909550666809082, |
| "learning_rate": 0.00035896422764003, |
| "loss": 1.6606, |
| "step": 466500 |
| }, |
| { |
| "epoch": 1.9288832715842865, |
| "grad_norm": 0.8264633417129517, |
| "learning_rate": 0.0003582734548448732, |
| "loss": 1.6586, |
| "step": 467000 |
| }, |
| { |
| "epoch": 1.9309484570999014, |
| "grad_norm": 0.8184587359428406, |
| "learning_rate": 0.00035758268204971635, |
| "loss": 1.6621, |
| "step": 467500 |
| }, |
| { |
| "epoch": 1.9330136426155162, |
| "grad_norm": 0.78268963098526, |
| "learning_rate": 0.0003568919092545594, |
| "loss": 1.6603, |
| "step": 468000 |
| }, |
| { |
| "epoch": 1.935078828131131, |
| "grad_norm": 0.7832273244857788, |
| "learning_rate": 0.0003562011364594026, |
| "loss": 1.661, |
| "step": 468500 |
| }, |
| { |
| "epoch": 1.937144013646746, |
| "grad_norm": 0.7547221183776855, |
| "learning_rate": 0.00035551036366424576, |
| "loss": 1.6607, |
| "step": 469000 |
| }, |
| { |
| "epoch": 1.9392091991623608, |
| "grad_norm": 0.8514434099197388, |
| "learning_rate": 0.0003548195908690889, |
| "loss": 1.6612, |
| "step": 469500 |
| }, |
| { |
| "epoch": 1.9412743846779756, |
| "grad_norm": 0.7895204424858093, |
| "learning_rate": 0.00035412881807393205, |
| "loss": 1.6577, |
| "step": 470000 |
| }, |
| { |
| "epoch": 1.9433395701935905, |
| "grad_norm": 0.8596895933151245, |
| "learning_rate": 0.0003534380452787752, |
| "loss": 1.6572, |
| "step": 470500 |
| }, |
| { |
| "epoch": 1.9454047557092053, |
| "grad_norm": 0.7693920731544495, |
| "learning_rate": 0.00035274727248361834, |
| "loss": 1.6587, |
| "step": 471000 |
| }, |
| { |
| "epoch": 1.9474699412248202, |
| "grad_norm": 0.8171895742416382, |
| "learning_rate": 0.00035205649968846146, |
| "loss": 1.6628, |
| "step": 471500 |
| }, |
| { |
| "epoch": 1.949535126740435, |
| "grad_norm": 0.7534123659133911, |
| "learning_rate": 0.00035136572689330463, |
| "loss": 1.6561, |
| "step": 472000 |
| }, |
| { |
| "epoch": 1.95160031225605, |
| "grad_norm": 0.7739940881729126, |
| "learning_rate": 0.00035067495409814775, |
| "loss": 1.6567, |
| "step": 472500 |
| }, |
| { |
| "epoch": 1.9536654977716648, |
| "grad_norm": 0.7825185656547546, |
| "learning_rate": 0.0003499841813029909, |
| "loss": 1.6544, |
| "step": 473000 |
| }, |
| { |
| "epoch": 1.9557306832872796, |
| "grad_norm": 0.7564761638641357, |
| "learning_rate": 0.0003492934085078341, |
| "loss": 1.6577, |
| "step": 473500 |
| }, |
| { |
| "epoch": 1.9577958688028945, |
| "grad_norm": 0.808772087097168, |
| "learning_rate": 0.0003486026357126772, |
| "loss": 1.655, |
| "step": 474000 |
| }, |
| { |
| "epoch": 1.9598610543185093, |
| "grad_norm": 0.8151499629020691, |
| "learning_rate": 0.0003479118629175204, |
| "loss": 1.6579, |
| "step": 474500 |
| }, |
| { |
| "epoch": 1.9619262398341242, |
| "grad_norm": 0.8655403852462769, |
| "learning_rate": 0.0003472210901223635, |
| "loss": 1.6532, |
| "step": 475000 |
| }, |
| { |
| "epoch": 1.963991425349739, |
| "grad_norm": 0.7786942720413208, |
| "learning_rate": 0.0003465303173272066, |
| "loss": 1.653, |
| "step": 475500 |
| }, |
| { |
| "epoch": 1.9660566108653539, |
| "grad_norm": 0.8005113005638123, |
| "learning_rate": 0.0003458395445320498, |
| "loss": 1.6538, |
| "step": 476000 |
| }, |
| { |
| "epoch": 1.968121796380969, |
| "grad_norm": 0.7797335386276245, |
| "learning_rate": 0.00034514877173689296, |
| "loss": 1.6567, |
| "step": 476500 |
| }, |
| { |
| "epoch": 1.9701869818965838, |
| "grad_norm": 0.7935357689857483, |
| "learning_rate": 0.0003444579989417361, |
| "loss": 1.6551, |
| "step": 477000 |
| }, |
| { |
| "epoch": 1.9722521674121987, |
| "grad_norm": 0.7659555077552795, |
| "learning_rate": 0.00034376722614657925, |
| "loss": 1.654, |
| "step": 477500 |
| }, |
| { |
| "epoch": 1.9743173529278135, |
| "grad_norm": 0.7984480857849121, |
| "learning_rate": 0.0003430764533514224, |
| "loss": 1.6513, |
| "step": 478000 |
| }, |
| { |
| "epoch": 1.9763825384434284, |
| "grad_norm": 0.7759101986885071, |
| "learning_rate": 0.0003423856805562655, |
| "loss": 1.6517, |
| "step": 478500 |
| }, |
| { |
| "epoch": 1.9784477239590432, |
| "grad_norm": 0.7922109961509705, |
| "learning_rate": 0.00034169490776110866, |
| "loss": 1.6551, |
| "step": 479000 |
| }, |
| { |
| "epoch": 1.980512909474658, |
| "grad_norm": 0.7864669561386108, |
| "learning_rate": 0.00034100413496595183, |
| "loss": 1.6521, |
| "step": 479500 |
| }, |
| { |
| "epoch": 1.982578094990273, |
| "grad_norm": 0.7987329959869385, |
| "learning_rate": 0.00034031336217079495, |
| "loss": 1.6531, |
| "step": 480000 |
| }, |
| { |
| "epoch": 1.984643280505888, |
| "grad_norm": 0.7777888774871826, |
| "learning_rate": 0.0003396225893756381, |
| "loss": 1.6509, |
| "step": 480500 |
| }, |
| { |
| "epoch": 1.9867084660215029, |
| "grad_norm": 0.7795775532722473, |
| "learning_rate": 0.0003389318165804813, |
| "loss": 1.6518, |
| "step": 481000 |
| }, |
| { |
| "epoch": 1.9887736515371177, |
| "grad_norm": 0.7711332440376282, |
| "learning_rate": 0.0003382410437853244, |
| "loss": 1.6519, |
| "step": 481500 |
| }, |
| { |
| "epoch": 1.9908388370527326, |
| "grad_norm": 0.8026793003082275, |
| "learning_rate": 0.0003375502709901675, |
| "loss": 1.6509, |
| "step": 482000 |
| }, |
| { |
| "epoch": 1.9929040225683474, |
| "grad_norm": 0.7959824204444885, |
| "learning_rate": 0.0003368594981950107, |
| "loss": 1.6511, |
| "step": 482500 |
| }, |
| { |
| "epoch": 1.9949692080839623, |
| "grad_norm": 0.7960503697395325, |
| "learning_rate": 0.0003361687253998538, |
| "loss": 1.6534, |
| "step": 483000 |
| }, |
| { |
| "epoch": 1.9970343935995771, |
| "grad_norm": 0.8475084900856018, |
| "learning_rate": 0.000335477952604697, |
| "loss": 1.6517, |
| "step": 483500 |
| }, |
| { |
| "epoch": 1.999099579115192, |
| "grad_norm": 0.7885191440582275, |
| "learning_rate": 0.00033478717980954016, |
| "loss": 1.6531, |
| "step": 484000 |
| }, |
| { |
| "epoch": 2.001164764630807, |
| "grad_norm": 0.7712221145629883, |
| "learning_rate": 0.0003340964070143833, |
| "loss": 1.6502, |
| "step": 484500 |
| }, |
| { |
| "epoch": 2.0032299501464217, |
| "grad_norm": 0.7648369669914246, |
| "learning_rate": 0.00033340563421922645, |
| "loss": 1.6441, |
| "step": 485000 |
| }, |
| { |
| "epoch": 2.0052951356620365, |
| "grad_norm": 0.8174281120300293, |
| "learning_rate": 0.00033271486142406957, |
| "loss": 1.6481, |
| "step": 485500 |
| }, |
| { |
| "epoch": 2.0073603211776514, |
| "grad_norm": 0.7810222506523132, |
| "learning_rate": 0.0003320240886289127, |
| "loss": 1.6484, |
| "step": 486000 |
| }, |
| { |
| "epoch": 2.0094255066932663, |
| "grad_norm": 0.8257454633712769, |
| "learning_rate": 0.00033133331583375586, |
| "loss": 1.6465, |
| "step": 486500 |
| }, |
| { |
| "epoch": 2.011490692208881, |
| "grad_norm": 0.7819936871528625, |
| "learning_rate": 0.00033064254303859903, |
| "loss": 1.6459, |
| "step": 487000 |
| }, |
| { |
| "epoch": 2.013555877724496, |
| "grad_norm": 0.7899196743965149, |
| "learning_rate": 0.00032995177024344215, |
| "loss": 1.6459, |
| "step": 487500 |
| }, |
| { |
| "epoch": 2.015621063240111, |
| "grad_norm": 0.8132250905036926, |
| "learning_rate": 0.0003292609974482853, |
| "loss": 1.6488, |
| "step": 488000 |
| }, |
| { |
| "epoch": 2.0176862487557257, |
| "grad_norm": 0.803816020488739, |
| "learning_rate": 0.0003285702246531285, |
| "loss": 1.6426, |
| "step": 488500 |
| }, |
| { |
| "epoch": 2.0197514342713405, |
| "grad_norm": 0.7602670788764954, |
| "learning_rate": 0.00032787945185797155, |
| "loss": 1.6462, |
| "step": 489000 |
| }, |
| { |
| "epoch": 2.0218166197869554, |
| "grad_norm": 0.7873088121414185, |
| "learning_rate": 0.0003271886790628147, |
| "loss": 1.6463, |
| "step": 489500 |
| }, |
| { |
| "epoch": 2.0238818053025702, |
| "grad_norm": 0.81231290102005, |
| "learning_rate": 0.0003264979062676579, |
| "loss": 1.6477, |
| "step": 490000 |
| }, |
| { |
| "epoch": 2.025946990818185, |
| "grad_norm": 0.8037064671516418, |
| "learning_rate": 0.000325807133472501, |
| "loss": 1.6454, |
| "step": 490500 |
| }, |
| { |
| "epoch": 2.0280121763338, |
| "grad_norm": 0.8113204836845398, |
| "learning_rate": 0.0003251163606773442, |
| "loss": 1.6449, |
| "step": 491000 |
| }, |
| { |
| "epoch": 2.030077361849415, |
| "grad_norm": 0.7967438101768494, |
| "learning_rate": 0.00032442558788218736, |
| "loss": 1.6413, |
| "step": 491500 |
| }, |
| { |
| "epoch": 2.0321425473650296, |
| "grad_norm": 0.7982317805290222, |
| "learning_rate": 0.0003237348150870305, |
| "loss": 1.6461, |
| "step": 492000 |
| }, |
| { |
| "epoch": 2.0342077328806445, |
| "grad_norm": 0.786389172077179, |
| "learning_rate": 0.0003230440422918736, |
| "loss": 1.6492, |
| "step": 492500 |
| }, |
| { |
| "epoch": 2.0362729183962593, |
| "grad_norm": 0.8528838157653809, |
| "learning_rate": 0.00032235326949671677, |
| "loss": 1.6433, |
| "step": 493000 |
| }, |
| { |
| "epoch": 2.038338103911874, |
| "grad_norm": 0.7775473594665527, |
| "learning_rate": 0.0003216624967015599, |
| "loss": 1.6445, |
| "step": 493500 |
| }, |
| { |
| "epoch": 2.040403289427489, |
| "grad_norm": 0.7589669227600098, |
| "learning_rate": 0.00032097172390640306, |
| "loss": 1.6424, |
| "step": 494000 |
| }, |
| { |
| "epoch": 2.0424684749431044, |
| "grad_norm": 0.7403915524482727, |
| "learning_rate": 0.00032028095111124623, |
| "loss": 1.6414, |
| "step": 494500 |
| }, |
| { |
| "epoch": 2.044533660458719, |
| "grad_norm": 0.7815344333648682, |
| "learning_rate": 0.00031959017831608935, |
| "loss": 1.6398, |
| "step": 495000 |
| }, |
| { |
| "epoch": 2.046598845974334, |
| "grad_norm": 0.7826516628265381, |
| "learning_rate": 0.0003188994055209325, |
| "loss": 1.6404, |
| "step": 495500 |
| }, |
| { |
| "epoch": 2.048664031489949, |
| "grad_norm": 0.8382503986358643, |
| "learning_rate": 0.00031820863272577564, |
| "loss": 1.6477, |
| "step": 496000 |
| }, |
| { |
| "epoch": 2.0507292170055638, |
| "grad_norm": 0.8345251679420471, |
| "learning_rate": 0.00031751785993061875, |
| "loss": 1.6395, |
| "step": 496500 |
| }, |
| { |
| "epoch": 2.0527944025211786, |
| "grad_norm": 0.7702645659446716, |
| "learning_rate": 0.0003168270871354619, |
| "loss": 1.6393, |
| "step": 497000 |
| }, |
| { |
| "epoch": 2.0548595880367935, |
| "grad_norm": 0.7861506938934326, |
| "learning_rate": 0.0003161363143403051, |
| "loss": 1.6431, |
| "step": 497500 |
| }, |
| { |
| "epoch": 2.0569247735524083, |
| "grad_norm": 0.8483462929725647, |
| "learning_rate": 0.0003154455415451482, |
| "loss": 1.638, |
| "step": 498000 |
| }, |
| { |
| "epoch": 2.058989959068023, |
| "grad_norm": 0.7427666783332825, |
| "learning_rate": 0.0003147547687499914, |
| "loss": 1.6398, |
| "step": 498500 |
| }, |
| { |
| "epoch": 2.061055144583638, |
| "grad_norm": 0.8200947046279907, |
| "learning_rate": 0.00031406399595483456, |
| "loss": 1.6442, |
| "step": 499000 |
| }, |
| { |
| "epoch": 2.063120330099253, |
| "grad_norm": 0.7826699018478394, |
| "learning_rate": 0.0003133732231596776, |
| "loss": 1.6373, |
| "step": 499500 |
| }, |
| { |
| "epoch": 2.0651855156148677, |
| "grad_norm": 0.8340067267417908, |
| "learning_rate": 0.0003126824503645208, |
| "loss": 1.6423, |
| "step": 500000 |
| }, |
| { |
| "epoch": 2.0672507011304826, |
| "grad_norm": 0.8408244252204895, |
| "learning_rate": 0.00031199167756936397, |
| "loss": 1.6385, |
| "step": 500500 |
| }, |
| { |
| "epoch": 2.0693158866460974, |
| "grad_norm": 0.7903205752372742, |
| "learning_rate": 0.0003113009047742071, |
| "loss": 1.64, |
| "step": 501000 |
| }, |
| { |
| "epoch": 2.0713810721617123, |
| "grad_norm": 0.8002933859825134, |
| "learning_rate": 0.00031061013197905026, |
| "loss": 1.6406, |
| "step": 501500 |
| }, |
| { |
| "epoch": 2.073446257677327, |
| "grad_norm": 0.7864850759506226, |
| "learning_rate": 0.00030991935918389343, |
| "loss": 1.6357, |
| "step": 502000 |
| }, |
| { |
| "epoch": 2.075511443192942, |
| "grad_norm": 0.8161391615867615, |
| "learning_rate": 0.00030922858638873655, |
| "loss": 1.6361, |
| "step": 502500 |
| }, |
| { |
| "epoch": 2.077576628708557, |
| "grad_norm": 0.8277705311775208, |
| "learning_rate": 0.00030853781359357966, |
| "loss": 1.6375, |
| "step": 503000 |
| }, |
| { |
| "epoch": 2.0796418142241717, |
| "grad_norm": 0.7988829016685486, |
| "learning_rate": 0.00030784704079842284, |
| "loss": 1.6327, |
| "step": 503500 |
| }, |
| { |
| "epoch": 2.0817069997397866, |
| "grad_norm": 0.7771642208099365, |
| "learning_rate": 0.00030715626800326595, |
| "loss": 1.6331, |
| "step": 504000 |
| }, |
| { |
| "epoch": 2.0837721852554014, |
| "grad_norm": 0.7471011281013489, |
| "learning_rate": 0.0003064654952081091, |
| "loss": 1.6352, |
| "step": 504500 |
| }, |
| { |
| "epoch": 2.0858373707710163, |
| "grad_norm": 0.7738475203514099, |
| "learning_rate": 0.0003057747224129523, |
| "loss": 1.6401, |
| "step": 505000 |
| }, |
| { |
| "epoch": 2.087902556286631, |
| "grad_norm": 0.7593071460723877, |
| "learning_rate": 0.0003050839496177954, |
| "loss": 1.6394, |
| "step": 505500 |
| }, |
| { |
| "epoch": 2.089967741802246, |
| "grad_norm": 0.7778981328010559, |
| "learning_rate": 0.0003043931768226386, |
| "loss": 1.6379, |
| "step": 506000 |
| }, |
| { |
| "epoch": 2.092032927317861, |
| "grad_norm": 0.7668618559837341, |
| "learning_rate": 0.0003037024040274817, |
| "loss": 1.6374, |
| "step": 506500 |
| }, |
| { |
| "epoch": 2.0940981128334757, |
| "grad_norm": 0.7854458689689636, |
| "learning_rate": 0.0003030116312323248, |
| "loss": 1.6314, |
| "step": 507000 |
| }, |
| { |
| "epoch": 2.0961632983490905, |
| "grad_norm": 0.7660508751869202, |
| "learning_rate": 0.000302320858437168, |
| "loss": 1.6326, |
| "step": 507500 |
| }, |
| { |
| "epoch": 2.0982284838647054, |
| "grad_norm": 0.759593665599823, |
| "learning_rate": 0.00030163008564201117, |
| "loss": 1.6352, |
| "step": 508000 |
| }, |
| { |
| "epoch": 2.1002936693803202, |
| "grad_norm": 0.7907975912094116, |
| "learning_rate": 0.0003009393128468543, |
| "loss": 1.6301, |
| "step": 508500 |
| }, |
| { |
| "epoch": 2.102358854895935, |
| "grad_norm": 0.8606127500534058, |
| "learning_rate": 0.00030024854005169746, |
| "loss": 1.6329, |
| "step": 509000 |
| }, |
| { |
| "epoch": 2.10442404041155, |
| "grad_norm": 0.788470447063446, |
| "learning_rate": 0.00029955776725654063, |
| "loss": 1.6336, |
| "step": 509500 |
| }, |
| { |
| "epoch": 2.106489225927165, |
| "grad_norm": 0.7975521087646484, |
| "learning_rate": 0.0002988669944613837, |
| "loss": 1.6354, |
| "step": 510000 |
| }, |
| { |
| "epoch": 2.10855441144278, |
| "grad_norm": 0.8134068250656128, |
| "learning_rate": 0.00029817622166622686, |
| "loss": 1.6354, |
| "step": 510500 |
| }, |
| { |
| "epoch": 2.110619596958395, |
| "grad_norm": 0.8084931373596191, |
| "learning_rate": 0.00029748544887107004, |
| "loss": 1.6348, |
| "step": 511000 |
| }, |
| { |
| "epoch": 2.11268478247401, |
| "grad_norm": 0.8037887811660767, |
| "learning_rate": 0.00029679467607591315, |
| "loss": 1.6315, |
| "step": 511500 |
| }, |
| { |
| "epoch": 2.1147499679896247, |
| "grad_norm": 0.7990454435348511, |
| "learning_rate": 0.0002961039032807563, |
| "loss": 1.6297, |
| "step": 512000 |
| }, |
| { |
| "epoch": 2.1168151535052395, |
| "grad_norm": 0.7971472144126892, |
| "learning_rate": 0.0002954131304855995, |
| "loss": 1.6312, |
| "step": 512500 |
| }, |
| { |
| "epoch": 2.1188803390208544, |
| "grad_norm": 0.8105595707893372, |
| "learning_rate": 0.0002947223576904426, |
| "loss": 1.6291, |
| "step": 513000 |
| }, |
| { |
| "epoch": 2.1209455245364692, |
| "grad_norm": 0.8046666979789734, |
| "learning_rate": 0.00029403158489528573, |
| "loss": 1.6323, |
| "step": 513500 |
| }, |
| { |
| "epoch": 2.123010710052084, |
| "grad_norm": 0.8228232860565186, |
| "learning_rate": 0.0002933408121001289, |
| "loss": 1.6309, |
| "step": 514000 |
| }, |
| { |
| "epoch": 2.125075895567699, |
| "grad_norm": 0.7555162906646729, |
| "learning_rate": 0.000292650039304972, |
| "loss": 1.6305, |
| "step": 514500 |
| }, |
| { |
| "epoch": 2.127141081083314, |
| "grad_norm": 0.7698606848716736, |
| "learning_rate": 0.0002919592665098152, |
| "loss": 1.6326, |
| "step": 515000 |
| }, |
| { |
| "epoch": 2.1292062665989286, |
| "grad_norm": 0.7718132138252258, |
| "learning_rate": 0.00029126849371465837, |
| "loss": 1.6291, |
| "step": 515500 |
| }, |
| { |
| "epoch": 2.1312714521145435, |
| "grad_norm": 0.7855656147003174, |
| "learning_rate": 0.0002905777209195015, |
| "loss": 1.6283, |
| "step": 516000 |
| }, |
| { |
| "epoch": 2.1333366376301584, |
| "grad_norm": 0.8064797520637512, |
| "learning_rate": 0.00028988694812434466, |
| "loss": 1.628, |
| "step": 516500 |
| }, |
| { |
| "epoch": 2.135401823145773, |
| "grad_norm": 0.7986974716186523, |
| "learning_rate": 0.0002891961753291878, |
| "loss": 1.6288, |
| "step": 517000 |
| }, |
| { |
| "epoch": 2.137467008661388, |
| "grad_norm": 0.8309503197669983, |
| "learning_rate": 0.0002885054025340309, |
| "loss": 1.6316, |
| "step": 517500 |
| }, |
| { |
| "epoch": 2.139532194177003, |
| "grad_norm": 0.8169652819633484, |
| "learning_rate": 0.00028781462973887406, |
| "loss": 1.6266, |
| "step": 518000 |
| }, |
| { |
| "epoch": 2.1415973796926178, |
| "grad_norm": 0.7754685282707214, |
| "learning_rate": 0.00028712385694371724, |
| "loss": 1.6307, |
| "step": 518500 |
| }, |
| { |
| "epoch": 2.1436625652082326, |
| "grad_norm": 0.7740616798400879, |
| "learning_rate": 0.00028643308414856035, |
| "loss": 1.6287, |
| "step": 519000 |
| }, |
| { |
| "epoch": 2.1457277507238475, |
| "grad_norm": 0.7874515056610107, |
| "learning_rate": 0.0002857423113534035, |
| "loss": 1.6254, |
| "step": 519500 |
| }, |
| { |
| "epoch": 2.1477929362394623, |
| "grad_norm": 0.8042634725570679, |
| "learning_rate": 0.0002850515385582467, |
| "loss": 1.6248, |
| "step": 520000 |
| }, |
| { |
| "epoch": 2.149858121755077, |
| "grad_norm": 0.8339025974273682, |
| "learning_rate": 0.00028436076576308976, |
| "loss": 1.6293, |
| "step": 520500 |
| }, |
| { |
| "epoch": 2.151923307270692, |
| "grad_norm": 0.822348415851593, |
| "learning_rate": 0.00028366999296793293, |
| "loss": 1.6297, |
| "step": 521000 |
| }, |
| { |
| "epoch": 2.153988492786307, |
| "grad_norm": 0.7726020812988281, |
| "learning_rate": 0.0002829792201727761, |
| "loss": 1.6291, |
| "step": 521500 |
| }, |
| { |
| "epoch": 2.1560536783019217, |
| "grad_norm": 0.7853801846504211, |
| "learning_rate": 0.0002822884473776192, |
| "loss": 1.6255, |
| "step": 522000 |
| }, |
| { |
| "epoch": 2.1581188638175366, |
| "grad_norm": 0.7884477376937866, |
| "learning_rate": 0.0002815976745824624, |
| "loss": 1.627, |
| "step": 522500 |
| }, |
| { |
| "epoch": 2.1601840493331514, |
| "grad_norm": 0.8253931999206543, |
| "learning_rate": 0.00028090690178730557, |
| "loss": 1.6259, |
| "step": 523000 |
| }, |
| { |
| "epoch": 2.1622492348487663, |
| "grad_norm": 0.7904614210128784, |
| "learning_rate": 0.00028021612899214863, |
| "loss": 1.6269, |
| "step": 523500 |
| }, |
| { |
| "epoch": 2.164314420364381, |
| "grad_norm": 0.7632104158401489, |
| "learning_rate": 0.0002795253561969918, |
| "loss": 1.6276, |
| "step": 524000 |
| }, |
| { |
| "epoch": 2.166379605879996, |
| "grad_norm": 0.8433115482330322, |
| "learning_rate": 0.000278834583401835, |
| "loss": 1.624, |
| "step": 524500 |
| }, |
| { |
| "epoch": 2.168444791395611, |
| "grad_norm": 0.8692212104797363, |
| "learning_rate": 0.0002781438106066781, |
| "loss": 1.6217, |
| "step": 525000 |
| }, |
| { |
| "epoch": 2.1705099769112257, |
| "grad_norm": 0.7796012759208679, |
| "learning_rate": 0.00027745303781152126, |
| "loss": 1.622, |
| "step": 525500 |
| }, |
| { |
| "epoch": 2.172575162426841, |
| "grad_norm": 0.8118318319320679, |
| "learning_rate": 0.00027676226501636443, |
| "loss": 1.6234, |
| "step": 526000 |
| }, |
| { |
| "epoch": 2.174640347942456, |
| "grad_norm": 0.7940993309020996, |
| "learning_rate": 0.00027607149222120755, |
| "loss": 1.626, |
| "step": 526500 |
| }, |
| { |
| "epoch": 2.1767055334580707, |
| "grad_norm": 0.797366738319397, |
| "learning_rate": 0.00027538071942605067, |
| "loss": 1.6189, |
| "step": 527000 |
| }, |
| { |
| "epoch": 2.1787707189736856, |
| "grad_norm": 0.7807763814926147, |
| "learning_rate": 0.00027468994663089384, |
| "loss": 1.6204, |
| "step": 527500 |
| }, |
| { |
| "epoch": 2.1808359044893004, |
| "grad_norm": 0.8015199303627014, |
| "learning_rate": 0.000273999173835737, |
| "loss": 1.6214, |
| "step": 528000 |
| }, |
| { |
| "epoch": 2.1829010900049153, |
| "grad_norm": 0.8279714584350586, |
| "learning_rate": 0.00027330840104058013, |
| "loss": 1.6238, |
| "step": 528500 |
| }, |
| { |
| "epoch": 2.18496627552053, |
| "grad_norm": 0.7654675245285034, |
| "learning_rate": 0.0002726176282454233, |
| "loss": 1.6285, |
| "step": 529000 |
| }, |
| { |
| "epoch": 2.187031461036145, |
| "grad_norm": 0.7837437391281128, |
| "learning_rate": 0.0002719268554502665, |
| "loss": 1.6214, |
| "step": 529500 |
| }, |
| { |
| "epoch": 2.18909664655176, |
| "grad_norm": 1.069981336593628, |
| "learning_rate": 0.0002712360826551096, |
| "loss": 1.6179, |
| "step": 530000 |
| }, |
| { |
| "epoch": 2.1911618320673747, |
| "grad_norm": 0.7750839591026306, |
| "learning_rate": 0.0002705453098599527, |
| "loss": 1.6207, |
| "step": 530500 |
| }, |
| { |
| "epoch": 2.1932270175829895, |
| "grad_norm": 0.7411586046218872, |
| "learning_rate": 0.0002698545370647959, |
| "loss": 1.6213, |
| "step": 531000 |
| }, |
| { |
| "epoch": 2.1952922030986044, |
| "grad_norm": 0.8239914774894714, |
| "learning_rate": 0.000269163764269639, |
| "loss": 1.6162, |
| "step": 531500 |
| }, |
| { |
| "epoch": 2.1973573886142193, |
| "grad_norm": 0.7895837426185608, |
| "learning_rate": 0.0002684729914744822, |
| "loss": 1.6175, |
| "step": 532000 |
| }, |
| { |
| "epoch": 2.199422574129834, |
| "grad_norm": 0.7678940892219543, |
| "learning_rate": 0.00026778221867932534, |
| "loss": 1.6182, |
| "step": 532500 |
| }, |
| { |
| "epoch": 2.201487759645449, |
| "grad_norm": 0.7663738131523132, |
| "learning_rate": 0.00026709144588416846, |
| "loss": 1.6161, |
| "step": 533000 |
| }, |
| { |
| "epoch": 2.203552945161064, |
| "grad_norm": 0.8070668578147888, |
| "learning_rate": 0.00026640067308901163, |
| "loss": 1.6155, |
| "step": 533500 |
| }, |
| { |
| "epoch": 2.2056181306766787, |
| "grad_norm": 0.8500379323959351, |
| "learning_rate": 0.00026570990029385475, |
| "loss": 1.6189, |
| "step": 534000 |
| }, |
| { |
| "epoch": 2.2076833161922935, |
| "grad_norm": 0.8292637467384338, |
| "learning_rate": 0.00026501912749869787, |
| "loss": 1.6153, |
| "step": 534500 |
| }, |
| { |
| "epoch": 2.2097485017079084, |
| "grad_norm": 0.7907617688179016, |
| "learning_rate": 0.00026432835470354104, |
| "loss": 1.6174, |
| "step": 535000 |
| }, |
| { |
| "epoch": 2.2118136872235232, |
| "grad_norm": 0.7643933892250061, |
| "learning_rate": 0.0002636375819083842, |
| "loss": 1.6175, |
| "step": 535500 |
| }, |
| { |
| "epoch": 2.213878872739138, |
| "grad_norm": 0.7963258624076843, |
| "learning_rate": 0.00026294680911322733, |
| "loss": 1.6184, |
| "step": 536000 |
| }, |
| { |
| "epoch": 2.215944058254753, |
| "grad_norm": 0.7595391273498535, |
| "learning_rate": 0.0002622560363180705, |
| "loss": 1.6167, |
| "step": 536500 |
| }, |
| { |
| "epoch": 2.218009243770368, |
| "grad_norm": 0.8099820613861084, |
| "learning_rate": 0.0002615652635229137, |
| "loss": 1.6172, |
| "step": 537000 |
| }, |
| { |
| "epoch": 2.2200744292859826, |
| "grad_norm": 0.82416170835495, |
| "learning_rate": 0.00026087449072775674, |
| "loss": 1.6141, |
| "step": 537500 |
| }, |
| { |
| "epoch": 2.2221396148015975, |
| "grad_norm": 0.8243468999862671, |
| "learning_rate": 0.0002601837179325999, |
| "loss": 1.616, |
| "step": 538000 |
| }, |
| { |
| "epoch": 2.2242048003172123, |
| "grad_norm": 0.8235235214233398, |
| "learning_rate": 0.0002594929451374431, |
| "loss": 1.6142, |
| "step": 538500 |
| }, |
| { |
| "epoch": 2.226269985832827, |
| "grad_norm": 0.8147215843200684, |
| "learning_rate": 0.0002588021723422862, |
| "loss": 1.6113, |
| "step": 539000 |
| }, |
| { |
| "epoch": 2.228335171348442, |
| "grad_norm": 0.8038352131843567, |
| "learning_rate": 0.00025811139954712937, |
| "loss": 1.6198, |
| "step": 539500 |
| }, |
| { |
| "epoch": 2.230400356864057, |
| "grad_norm": 0.7971067428588867, |
| "learning_rate": 0.00025742062675197254, |
| "loss": 1.6171, |
| "step": 540000 |
| }, |
| { |
| "epoch": 2.2324655423796718, |
| "grad_norm": 0.8829773664474487, |
| "learning_rate": 0.00025672985395681566, |
| "loss": 1.6124, |
| "step": 540500 |
| }, |
| { |
| "epoch": 2.2345307278952866, |
| "grad_norm": 0.8199840784072876, |
| "learning_rate": 0.0002560390811616588, |
| "loss": 1.6113, |
| "step": 541000 |
| }, |
| { |
| "epoch": 2.236595913410902, |
| "grad_norm": 0.8040071725845337, |
| "learning_rate": 0.00025534830836650195, |
| "loss": 1.617, |
| "step": 541500 |
| }, |
| { |
| "epoch": 2.2386610989265168, |
| "grad_norm": 0.7963501811027527, |
| "learning_rate": 0.00025465753557134507, |
| "loss": 1.6092, |
| "step": 542000 |
| }, |
| { |
| "epoch": 2.2407262844421316, |
| "grad_norm": 0.8022527694702148, |
| "learning_rate": 0.00025396676277618824, |
| "loss": 1.6121, |
| "step": 542500 |
| }, |
| { |
| "epoch": 2.2427914699577465, |
| "grad_norm": 0.7954930663108826, |
| "learning_rate": 0.0002532759899810314, |
| "loss": 1.6151, |
| "step": 543000 |
| }, |
| { |
| "epoch": 2.2448566554733613, |
| "grad_norm": 0.7801050543785095, |
| "learning_rate": 0.00025258521718587453, |
| "loss": 1.6093, |
| "step": 543500 |
| }, |
| { |
| "epoch": 2.246921840988976, |
| "grad_norm": 0.7806600332260132, |
| "learning_rate": 0.0002518944443907177, |
| "loss": 1.6089, |
| "step": 544000 |
| }, |
| { |
| "epoch": 2.248987026504591, |
| "grad_norm": 0.7561779022216797, |
| "learning_rate": 0.0002512036715955608, |
| "loss": 1.6132, |
| "step": 544500 |
| }, |
| { |
| "epoch": 2.251052212020206, |
| "grad_norm": 0.8682865500450134, |
| "learning_rate": 0.00025051289880040394, |
| "loss": 1.6124, |
| "step": 545000 |
| }, |
| { |
| "epoch": 2.2531173975358207, |
| "grad_norm": 0.7335362434387207, |
| "learning_rate": 0.0002498221260052471, |
| "loss": 1.6115, |
| "step": 545500 |
| }, |
| { |
| "epoch": 2.2551825830514356, |
| "grad_norm": 0.8360188603401184, |
| "learning_rate": 0.0002491313532100903, |
| "loss": 1.6157, |
| "step": 546000 |
| }, |
| { |
| "epoch": 2.2572477685670504, |
| "grad_norm": 0.8048787713050842, |
| "learning_rate": 0.0002484405804149334, |
| "loss": 1.6138, |
| "step": 546500 |
| }, |
| { |
| "epoch": 2.2593129540826653, |
| "grad_norm": 0.7759965658187866, |
| "learning_rate": 0.00024774980761977657, |
| "loss": 1.6112, |
| "step": 547000 |
| }, |
| { |
| "epoch": 2.26137813959828, |
| "grad_norm": 0.8284432888031006, |
| "learning_rate": 0.0002470590348246197, |
| "loss": 1.6077, |
| "step": 547500 |
| }, |
| { |
| "epoch": 2.263443325113895, |
| "grad_norm": 0.8492142558097839, |
| "learning_rate": 0.00024636826202946286, |
| "loss": 1.6059, |
| "step": 548000 |
| }, |
| { |
| "epoch": 2.26550851062951, |
| "grad_norm": 0.921442449092865, |
| "learning_rate": 0.000245677489234306, |
| "loss": 1.6078, |
| "step": 548500 |
| }, |
| { |
| "epoch": 2.2675736961451247, |
| "grad_norm": 0.7907894253730774, |
| "learning_rate": 0.00024498671643914915, |
| "loss": 1.6118, |
| "step": 549000 |
| }, |
| { |
| "epoch": 2.2696388816607396, |
| "grad_norm": 0.7716451287269592, |
| "learning_rate": 0.0002442959436439923, |
| "loss": 1.6066, |
| "step": 549500 |
| }, |
| { |
| "epoch": 2.2717040671763544, |
| "grad_norm": 0.757423460483551, |
| "learning_rate": 0.00024360517084883544, |
| "loss": 1.6098, |
| "step": 550000 |
| }, |
| { |
| "epoch": 2.2737692526919693, |
| "grad_norm": 0.8127204179763794, |
| "learning_rate": 0.00024291439805367856, |
| "loss": 1.6089, |
| "step": 550500 |
| }, |
| { |
| "epoch": 2.275834438207584, |
| "grad_norm": 0.7709484100341797, |
| "learning_rate": 0.00024222362525852173, |
| "loss": 1.6081, |
| "step": 551000 |
| }, |
| { |
| "epoch": 2.277899623723199, |
| "grad_norm": 0.7922874093055725, |
| "learning_rate": 0.00024153285246336488, |
| "loss": 1.6064, |
| "step": 551500 |
| }, |
| { |
| "epoch": 2.279964809238814, |
| "grad_norm": 0.789162814617157, |
| "learning_rate": 0.00024084207966820802, |
| "loss": 1.6052, |
| "step": 552000 |
| }, |
| { |
| "epoch": 2.2820299947544287, |
| "grad_norm": 0.8289847373962402, |
| "learning_rate": 0.00024015130687305117, |
| "loss": 1.604, |
| "step": 552500 |
| }, |
| { |
| "epoch": 2.2840951802700435, |
| "grad_norm": 0.8505263924598694, |
| "learning_rate": 0.0002394605340778943, |
| "loss": 1.6046, |
| "step": 553000 |
| }, |
| { |
| "epoch": 2.2861603657856584, |
| "grad_norm": 0.7633844614028931, |
| "learning_rate": 0.00023876976128273746, |
| "loss": 1.6098, |
| "step": 553500 |
| }, |
| { |
| "epoch": 2.2882255513012733, |
| "grad_norm": 0.775978147983551, |
| "learning_rate": 0.0002380789884875806, |
| "loss": 1.606, |
| "step": 554000 |
| }, |
| { |
| "epoch": 2.290290736816888, |
| "grad_norm": 0.8002934455871582, |
| "learning_rate": 0.00023738821569242374, |
| "loss": 1.5989, |
| "step": 554500 |
| }, |
| { |
| "epoch": 2.292355922332503, |
| "grad_norm": 0.8203332424163818, |
| "learning_rate": 0.0002366974428972669, |
| "loss": 1.6076, |
| "step": 555000 |
| }, |
| { |
| "epoch": 2.294421107848118, |
| "grad_norm": 0.7718694806098938, |
| "learning_rate": 0.00023600667010211006, |
| "loss": 1.6023, |
| "step": 555500 |
| }, |
| { |
| "epoch": 2.296486293363733, |
| "grad_norm": 0.8252015709877014, |
| "learning_rate": 0.00023531589730695318, |
| "loss": 1.6046, |
| "step": 556000 |
| }, |
| { |
| "epoch": 2.2985514788793475, |
| "grad_norm": 0.868835985660553, |
| "learning_rate": 0.00023462512451179632, |
| "loss": 1.6013, |
| "step": 556500 |
| }, |
| { |
| "epoch": 2.300616664394963, |
| "grad_norm": 0.8472076058387756, |
| "learning_rate": 0.0002339343517166395, |
| "loss": 1.6006, |
| "step": 557000 |
| }, |
| { |
| "epoch": 2.3026818499105772, |
| "grad_norm": 0.7968847751617432, |
| "learning_rate": 0.00023324357892148261, |
| "loss": 1.6055, |
| "step": 557500 |
| }, |
| { |
| "epoch": 2.3047470354261925, |
| "grad_norm": 0.8021098375320435, |
| "learning_rate": 0.00023255280612632576, |
| "loss": 1.6016, |
| "step": 558000 |
| }, |
| { |
| "epoch": 2.3068122209418074, |
| "grad_norm": 0.852824866771698, |
| "learning_rate": 0.00023186203333116893, |
| "loss": 1.6028, |
| "step": 558500 |
| }, |
| { |
| "epoch": 2.3088774064574222, |
| "grad_norm": 0.8099557161331177, |
| "learning_rate": 0.00023117126053601208, |
| "loss": 1.6022, |
| "step": 559000 |
| }, |
| { |
| "epoch": 2.310942591973037, |
| "grad_norm": 0.7851099371910095, |
| "learning_rate": 0.0002304804877408552, |
| "loss": 1.6012, |
| "step": 559500 |
| }, |
| { |
| "epoch": 2.313007777488652, |
| "grad_norm": 0.7841119170188904, |
| "learning_rate": 0.00022978971494569837, |
| "loss": 1.6055, |
| "step": 560000 |
| }, |
| { |
| "epoch": 2.315072963004267, |
| "grad_norm": 0.7725875973701477, |
| "learning_rate": 0.0002290989421505415, |
| "loss": 1.6012, |
| "step": 560500 |
| }, |
| { |
| "epoch": 2.3171381485198816, |
| "grad_norm": 0.8066521286964417, |
| "learning_rate": 0.00022840816935538463, |
| "loss": 1.6021, |
| "step": 561000 |
| }, |
| { |
| "epoch": 2.3192033340354965, |
| "grad_norm": 0.804887056350708, |
| "learning_rate": 0.0002277173965602278, |
| "loss": 1.6006, |
| "step": 561500 |
| }, |
| { |
| "epoch": 2.3212685195511114, |
| "grad_norm": 0.7885397672653198, |
| "learning_rate": 0.00022702662376507094, |
| "loss": 1.6035, |
| "step": 562000 |
| }, |
| { |
| "epoch": 2.323333705066726, |
| "grad_norm": 0.7402700781822205, |
| "learning_rate": 0.00022633585096991406, |
| "loss": 1.5988, |
| "step": 562500 |
| }, |
| { |
| "epoch": 2.325398890582341, |
| "grad_norm": 0.801807701587677, |
| "learning_rate": 0.00022564507817475723, |
| "loss": 1.6025, |
| "step": 563000 |
| }, |
| { |
| "epoch": 2.327464076097956, |
| "grad_norm": 0.7947646379470825, |
| "learning_rate": 0.00022495430537960038, |
| "loss": 1.6002, |
| "step": 563500 |
| }, |
| { |
| "epoch": 2.3295292616135708, |
| "grad_norm": 0.8268435001373291, |
| "learning_rate": 0.00022426353258444352, |
| "loss": 1.5977, |
| "step": 564000 |
| }, |
| { |
| "epoch": 2.3315944471291856, |
| "grad_norm": 0.8092913627624512, |
| "learning_rate": 0.00022357275978928667, |
| "loss": 1.5996, |
| "step": 564500 |
| }, |
| { |
| "epoch": 2.3336596326448005, |
| "grad_norm": 0.7848919630050659, |
| "learning_rate": 0.00022288198699412981, |
| "loss": 1.5995, |
| "step": 565000 |
| }, |
| { |
| "epoch": 2.3357248181604153, |
| "grad_norm": 0.802832305431366, |
| "learning_rate": 0.00022219121419897296, |
| "loss": 1.5979, |
| "step": 565500 |
| }, |
| { |
| "epoch": 2.33779000367603, |
| "grad_norm": 0.8020511865615845, |
| "learning_rate": 0.0002215004414038161, |
| "loss": 1.5981, |
| "step": 566000 |
| }, |
| { |
| "epoch": 2.339855189191645, |
| "grad_norm": 0.8132838606834412, |
| "learning_rate": 0.00022080966860865925, |
| "loss": 1.5959, |
| "step": 566500 |
| }, |
| { |
| "epoch": 2.34192037470726, |
| "grad_norm": 0.8069867491722107, |
| "learning_rate": 0.0002201188958135024, |
| "loss": 1.599, |
| "step": 567000 |
| }, |
| { |
| "epoch": 2.3439855602228747, |
| "grad_norm": 0.8337593078613281, |
| "learning_rate": 0.00021942812301834556, |
| "loss": 1.5981, |
| "step": 567500 |
| }, |
| { |
| "epoch": 2.3460507457384896, |
| "grad_norm": 0.7885046601295471, |
| "learning_rate": 0.00021873735022318868, |
| "loss": 1.597, |
| "step": 568000 |
| }, |
| { |
| "epoch": 2.3481159312541044, |
| "grad_norm": 0.8003047108650208, |
| "learning_rate": 0.00021804657742803183, |
| "loss": 1.5969, |
| "step": 568500 |
| }, |
| { |
| "epoch": 2.3501811167697193, |
| "grad_norm": 0.7714529037475586, |
| "learning_rate": 0.000217355804632875, |
| "loss": 1.5951, |
| "step": 569000 |
| }, |
| { |
| "epoch": 2.352246302285334, |
| "grad_norm": 0.8057835102081299, |
| "learning_rate": 0.00021666503183771812, |
| "loss": 1.598, |
| "step": 569500 |
| }, |
| { |
| "epoch": 2.354311487800949, |
| "grad_norm": 0.830685019493103, |
| "learning_rate": 0.00021597425904256126, |
| "loss": 1.5947, |
| "step": 570000 |
| }, |
| { |
| "epoch": 2.356376673316564, |
| "grad_norm": 0.7966949939727783, |
| "learning_rate": 0.00021528348624740443, |
| "loss": 1.5933, |
| "step": 570500 |
| }, |
| { |
| "epoch": 2.3584418588321787, |
| "grad_norm": 0.8312224745750427, |
| "learning_rate": 0.00021459271345224758, |
| "loss": 1.5941, |
| "step": 571000 |
| }, |
| { |
| "epoch": 2.3605070443477936, |
| "grad_norm": 0.8126243948936462, |
| "learning_rate": 0.0002139019406570907, |
| "loss": 1.595, |
| "step": 571500 |
| }, |
| { |
| "epoch": 2.3625722298634084, |
| "grad_norm": 0.7867225408554077, |
| "learning_rate": 0.00021321116786193387, |
| "loss": 1.5941, |
| "step": 572000 |
| }, |
| { |
| "epoch": 2.3646374153790237, |
| "grad_norm": 0.8437660336494446, |
| "learning_rate": 0.000212520395066777, |
| "loss": 1.5937, |
| "step": 572500 |
| }, |
| { |
| "epoch": 2.366702600894638, |
| "grad_norm": 0.7851312160491943, |
| "learning_rate": 0.00021182962227162016, |
| "loss": 1.5942, |
| "step": 573000 |
| }, |
| { |
| "epoch": 2.3687677864102534, |
| "grad_norm": 0.8472355008125305, |
| "learning_rate": 0.0002111388494764633, |
| "loss": 1.5937, |
| "step": 573500 |
| }, |
| { |
| "epoch": 2.3708329719258683, |
| "grad_norm": 0.7966650128364563, |
| "learning_rate": 0.00021044807668130645, |
| "loss": 1.5916, |
| "step": 574000 |
| }, |
| { |
| "epoch": 2.372898157441483, |
| "grad_norm": 0.8345617651939392, |
| "learning_rate": 0.00020975730388614962, |
| "loss": 1.5868, |
| "step": 574500 |
| }, |
| { |
| "epoch": 2.374963342957098, |
| "grad_norm": 0.82713383436203, |
| "learning_rate": 0.00020906653109099274, |
| "loss": 1.5982, |
| "step": 575000 |
| }, |
| { |
| "epoch": 2.377028528472713, |
| "grad_norm": 0.8211519718170166, |
| "learning_rate": 0.00020837575829583588, |
| "loss": 1.5888, |
| "step": 575500 |
| }, |
| { |
| "epoch": 2.3790937139883277, |
| "grad_norm": 0.8414788842201233, |
| "learning_rate": 0.00020768498550067905, |
| "loss": 1.5898, |
| "step": 576000 |
| }, |
| { |
| "epoch": 2.3811588995039425, |
| "grad_norm": 0.7635331749916077, |
| "learning_rate": 0.00020699421270552217, |
| "loss": 1.5873, |
| "step": 576500 |
| }, |
| { |
| "epoch": 2.3832240850195574, |
| "grad_norm": 0.8028623461723328, |
| "learning_rate": 0.00020630343991036532, |
| "loss": 1.5903, |
| "step": 577000 |
| }, |
| { |
| "epoch": 2.3852892705351723, |
| "grad_norm": 0.8185293674468994, |
| "learning_rate": 0.0002056126671152085, |
| "loss": 1.5917, |
| "step": 577500 |
| }, |
| { |
| "epoch": 2.387354456050787, |
| "grad_norm": 0.800356924533844, |
| "learning_rate": 0.00020492189432005163, |
| "loss": 1.5915, |
| "step": 578000 |
| }, |
| { |
| "epoch": 2.389419641566402, |
| "grad_norm": 0.7916369438171387, |
| "learning_rate": 0.00020423112152489475, |
| "loss": 1.5909, |
| "step": 578500 |
| }, |
| { |
| "epoch": 2.391484827082017, |
| "grad_norm": 0.830033540725708, |
| "learning_rate": 0.00020354034872973792, |
| "loss": 1.5881, |
| "step": 579000 |
| }, |
| { |
| "epoch": 2.3935500125976317, |
| "grad_norm": 0.7948420643806458, |
| "learning_rate": 0.00020284957593458107, |
| "loss": 1.5897, |
| "step": 579500 |
| }, |
| { |
| "epoch": 2.3956151981132465, |
| "grad_norm": 0.818466067314148, |
| "learning_rate": 0.00020215880313942419, |
| "loss": 1.5884, |
| "step": 580000 |
| }, |
| { |
| "epoch": 2.3976803836288614, |
| "grad_norm": 0.8161965608596802, |
| "learning_rate": 0.00020146803034426736, |
| "loss": 1.5906, |
| "step": 580500 |
| }, |
| { |
| "epoch": 2.3997455691444762, |
| "grad_norm": 0.8100621104240417, |
| "learning_rate": 0.0002007772575491105, |
| "loss": 1.5867, |
| "step": 581000 |
| }, |
| { |
| "epoch": 2.401810754660091, |
| "grad_norm": 0.8225206136703491, |
| "learning_rate": 0.00020008648475395365, |
| "loss": 1.5912, |
| "step": 581500 |
| }, |
| { |
| "epoch": 2.403875940175706, |
| "grad_norm": 0.8299617767333984, |
| "learning_rate": 0.0001993957119587968, |
| "loss": 1.592, |
| "step": 582000 |
| }, |
| { |
| "epoch": 2.405941125691321, |
| "grad_norm": 0.7852752208709717, |
| "learning_rate": 0.00019870493916363994, |
| "loss": 1.5859, |
| "step": 582500 |
| }, |
| { |
| "epoch": 2.4080063112069356, |
| "grad_norm": 0.8510515689849854, |
| "learning_rate": 0.00019801416636848308, |
| "loss": 1.5868, |
| "step": 583000 |
| }, |
| { |
| "epoch": 2.4100714967225505, |
| "grad_norm": 0.8003944158554077, |
| "learning_rate": 0.00019732339357332623, |
| "loss": 1.5839, |
| "step": 583500 |
| }, |
| { |
| "epoch": 2.4121366822381654, |
| "grad_norm": 0.8351225852966309, |
| "learning_rate": 0.00019663262077816937, |
| "loss": 1.5853, |
| "step": 584000 |
| }, |
| { |
| "epoch": 2.41420186775378, |
| "grad_norm": 0.8417115211486816, |
| "learning_rate": 0.00019594184798301252, |
| "loss": 1.5808, |
| "step": 584500 |
| }, |
| { |
| "epoch": 2.416267053269395, |
| "grad_norm": 0.822975754737854, |
| "learning_rate": 0.0001952510751878557, |
| "loss": 1.5828, |
| "step": 585000 |
| }, |
| { |
| "epoch": 2.41833223878501, |
| "grad_norm": 0.8236469030380249, |
| "learning_rate": 0.0001945603023926988, |
| "loss": 1.5831, |
| "step": 585500 |
| }, |
| { |
| "epoch": 2.4203974243006248, |
| "grad_norm": 0.8697351217269897, |
| "learning_rate": 0.00019386952959754195, |
| "loss": 1.5833, |
| "step": 586000 |
| }, |
| { |
| "epoch": 2.4224626098162396, |
| "grad_norm": 0.7966268658638, |
| "learning_rate": 0.00019317875680238512, |
| "loss": 1.5835, |
| "step": 586500 |
| }, |
| { |
| "epoch": 2.4245277953318545, |
| "grad_norm": 0.8148783445358276, |
| "learning_rate": 0.00019248798400722824, |
| "loss": 1.5855, |
| "step": 587000 |
| }, |
| { |
| "epoch": 2.4265929808474693, |
| "grad_norm": 0.8134833574295044, |
| "learning_rate": 0.00019179721121207139, |
| "loss": 1.5843, |
| "step": 587500 |
| }, |
| { |
| "epoch": 2.4286581663630846, |
| "grad_norm": 0.7940511703491211, |
| "learning_rate": 0.00019110643841691456, |
| "loss": 1.5831, |
| "step": 588000 |
| }, |
| { |
| "epoch": 2.430723351878699, |
| "grad_norm": 0.7859951257705688, |
| "learning_rate": 0.00019041566562175768, |
| "loss": 1.5884, |
| "step": 588500 |
| }, |
| { |
| "epoch": 2.4327885373943143, |
| "grad_norm": 0.7890865802764893, |
| "learning_rate": 0.00018972489282660082, |
| "loss": 1.5807, |
| "step": 589000 |
| }, |
| { |
| "epoch": 2.434853722909929, |
| "grad_norm": 0.7785663604736328, |
| "learning_rate": 0.000189034120031444, |
| "loss": 1.581, |
| "step": 589500 |
| }, |
| { |
| "epoch": 2.436918908425544, |
| "grad_norm": 0.8008002638816833, |
| "learning_rate": 0.00018834334723628714, |
| "loss": 1.5823, |
| "step": 590000 |
| }, |
| { |
| "epoch": 2.438984093941159, |
| "grad_norm": 0.8359131813049316, |
| "learning_rate": 0.00018765257444113025, |
| "loss": 1.5773, |
| "step": 590500 |
| }, |
| { |
| "epoch": 2.4410492794567737, |
| "grad_norm": 0.8443474173545837, |
| "learning_rate": 0.00018696180164597343, |
| "loss": 1.5841, |
| "step": 591000 |
| }, |
| { |
| "epoch": 2.4431144649723886, |
| "grad_norm": 0.7927765846252441, |
| "learning_rate": 0.00018627102885081657, |
| "loss": 1.5777, |
| "step": 591500 |
| }, |
| { |
| "epoch": 2.4451796504880035, |
| "grad_norm": 0.7933915853500366, |
| "learning_rate": 0.0001855802560556597, |
| "loss": 1.5801, |
| "step": 592000 |
| }, |
| { |
| "epoch": 2.4472448360036183, |
| "grad_norm": 0.798565149307251, |
| "learning_rate": 0.00018488948326050286, |
| "loss": 1.5797, |
| "step": 592500 |
| }, |
| { |
| "epoch": 2.449310021519233, |
| "grad_norm": 0.8151854276657104, |
| "learning_rate": 0.000184198710465346, |
| "loss": 1.5776, |
| "step": 593000 |
| }, |
| { |
| "epoch": 2.451375207034848, |
| "grad_norm": 0.7885642051696777, |
| "learning_rate": 0.00018350793767018915, |
| "loss": 1.5798, |
| "step": 593500 |
| }, |
| { |
| "epoch": 2.453440392550463, |
| "grad_norm": 0.8265528082847595, |
| "learning_rate": 0.0001828171648750323, |
| "loss": 1.5794, |
| "step": 594000 |
| }, |
| { |
| "epoch": 2.4555055780660777, |
| "grad_norm": 0.8994278311729431, |
| "learning_rate": 0.00018212639207987544, |
| "loss": 1.5758, |
| "step": 594500 |
| }, |
| { |
| "epoch": 2.4575707635816926, |
| "grad_norm": 0.7827315330505371, |
| "learning_rate": 0.00018143561928471859, |
| "loss": 1.5732, |
| "step": 595000 |
| }, |
| { |
| "epoch": 2.4596359490973074, |
| "grad_norm": 0.7778897285461426, |
| "learning_rate": 0.00018074484648956173, |
| "loss": 1.5796, |
| "step": 595500 |
| }, |
| { |
| "epoch": 2.4617011346129223, |
| "grad_norm": 0.7877337336540222, |
| "learning_rate": 0.00018005407369440487, |
| "loss": 1.5756, |
| "step": 596000 |
| }, |
| { |
| "epoch": 2.463766320128537, |
| "grad_norm": 0.7807685136795044, |
| "learning_rate": 0.00017936330089924802, |
| "loss": 1.5787, |
| "step": 596500 |
| }, |
| { |
| "epoch": 2.465831505644152, |
| "grad_norm": 0.825579047203064, |
| "learning_rate": 0.0001786725281040912, |
| "loss": 1.5794, |
| "step": 597000 |
| }, |
| { |
| "epoch": 2.467896691159767, |
| "grad_norm": 0.8047968149185181, |
| "learning_rate": 0.0001779817553089343, |
| "loss": 1.5809, |
| "step": 597500 |
| }, |
| { |
| "epoch": 2.4699618766753817, |
| "grad_norm": 0.8542481660842896, |
| "learning_rate": 0.00017729098251377745, |
| "loss": 1.5746, |
| "step": 598000 |
| }, |
| { |
| "epoch": 2.4720270621909965, |
| "grad_norm": 0.8317158222198486, |
| "learning_rate": 0.00017660020971862063, |
| "loss": 1.5809, |
| "step": 598500 |
| }, |
| { |
| "epoch": 2.4740922477066114, |
| "grad_norm": 0.8227892518043518, |
| "learning_rate": 0.00017590943692346374, |
| "loss": 1.5785, |
| "step": 599000 |
| }, |
| { |
| "epoch": 2.4761574332222263, |
| "grad_norm": 0.8336827158927917, |
| "learning_rate": 0.0001752186641283069, |
| "loss": 1.5746, |
| "step": 599500 |
| }, |
| { |
| "epoch": 2.478222618737841, |
| "grad_norm": 0.809407651424408, |
| "learning_rate": 0.00017452789133315006, |
| "loss": 1.5778, |
| "step": 600000 |
| }, |
| { |
| "epoch": 2.480287804253456, |
| "grad_norm": 0.799867570400238, |
| "learning_rate": 0.0001738371185379932, |
| "loss": 1.5762, |
| "step": 600500 |
| }, |
| { |
| "epoch": 2.482352989769071, |
| "grad_norm": 0.826615571975708, |
| "learning_rate": 0.00017314634574283632, |
| "loss": 1.5717, |
| "step": 601000 |
| }, |
| { |
| "epoch": 2.4844181752846857, |
| "grad_norm": 0.7937526702880859, |
| "learning_rate": 0.0001724555729476795, |
| "loss": 1.579, |
| "step": 601500 |
| }, |
| { |
| "epoch": 2.4864833608003005, |
| "grad_norm": 0.8167052865028381, |
| "learning_rate": 0.00017176480015252264, |
| "loss": 1.5727, |
| "step": 602000 |
| }, |
| { |
| "epoch": 2.4885485463159154, |
| "grad_norm": 0.8457524180412292, |
| "learning_rate": 0.00017107402735736579, |
| "loss": 1.5684, |
| "step": 602500 |
| }, |
| { |
| "epoch": 2.4906137318315302, |
| "grad_norm": 0.8600340485572815, |
| "learning_rate": 0.00017038325456220893, |
| "loss": 1.5767, |
| "step": 603000 |
| }, |
| { |
| "epoch": 2.4926789173471455, |
| "grad_norm": 0.786114513874054, |
| "learning_rate": 0.00016969248176705207, |
| "loss": 1.5696, |
| "step": 603500 |
| }, |
| { |
| "epoch": 2.49474410286276, |
| "grad_norm": 0.8081954717636108, |
| "learning_rate": 0.00016900170897189525, |
| "loss": 1.5735, |
| "step": 604000 |
| }, |
| { |
| "epoch": 2.4968092883783752, |
| "grad_norm": 0.8113991618156433, |
| "learning_rate": 0.00016831093617673836, |
| "loss": 1.5746, |
| "step": 604500 |
| }, |
| { |
| "epoch": 2.49887447389399, |
| "grad_norm": 0.8515011668205261, |
| "learning_rate": 0.0001676201633815815, |
| "loss": 1.568, |
| "step": 605000 |
| }, |
| { |
| "epoch": 2.500939659409605, |
| "grad_norm": 0.7948423624038696, |
| "learning_rate": 0.00016692939058642468, |
| "loss": 1.5727, |
| "step": 605500 |
| }, |
| { |
| "epoch": 2.50300484492522, |
| "grad_norm": 0.8115394711494446, |
| "learning_rate": 0.0001662386177912678, |
| "loss": 1.5704, |
| "step": 606000 |
| }, |
| { |
| "epoch": 2.5050700304408346, |
| "grad_norm": 0.8036853671073914, |
| "learning_rate": 0.00016554784499611094, |
| "loss": 1.5684, |
| "step": 606500 |
| }, |
| { |
| "epoch": 2.5071352159564495, |
| "grad_norm": 0.7892432808876038, |
| "learning_rate": 0.00016485707220095412, |
| "loss": 1.569, |
| "step": 607000 |
| }, |
| { |
| "epoch": 2.5092004014720644, |
| "grad_norm": 0.7984645366668701, |
| "learning_rate": 0.00016416629940579726, |
| "loss": 1.5679, |
| "step": 607500 |
| }, |
| { |
| "epoch": 2.511265586987679, |
| "grad_norm": 0.7996472120285034, |
| "learning_rate": 0.00016347552661064038, |
| "loss": 1.5691, |
| "step": 608000 |
| }, |
| { |
| "epoch": 2.513330772503294, |
| "grad_norm": 0.8775748610496521, |
| "learning_rate": 0.00016278475381548355, |
| "loss": 1.5707, |
| "step": 608500 |
| }, |
| { |
| "epoch": 2.515395958018909, |
| "grad_norm": 0.8051262497901917, |
| "learning_rate": 0.0001620939810203267, |
| "loss": 1.5739, |
| "step": 609000 |
| }, |
| { |
| "epoch": 2.5174611435345238, |
| "grad_norm": 0.8654427528381348, |
| "learning_rate": 0.0001614032082251698, |
| "loss": 1.5697, |
| "step": 609500 |
| }, |
| { |
| "epoch": 2.5195263290501386, |
| "grad_norm": 0.8159758448600769, |
| "learning_rate": 0.00016071243543001298, |
| "loss": 1.57, |
| "step": 610000 |
| }, |
| { |
| "epoch": 2.5215915145657535, |
| "grad_norm": 0.8165413737297058, |
| "learning_rate": 0.00016002166263485613, |
| "loss": 1.569, |
| "step": 610500 |
| }, |
| { |
| "epoch": 2.5236567000813683, |
| "grad_norm": 0.7978746891021729, |
| "learning_rate": 0.00015933088983969927, |
| "loss": 1.5659, |
| "step": 611000 |
| }, |
| { |
| "epoch": 2.525721885596983, |
| "grad_norm": 0.781399130821228, |
| "learning_rate": 0.00015864011704454242, |
| "loss": 1.5707, |
| "step": 611500 |
| }, |
| { |
| "epoch": 2.527787071112598, |
| "grad_norm": 0.8478353023529053, |
| "learning_rate": 0.00015794934424938556, |
| "loss": 1.5704, |
| "step": 612000 |
| }, |
| { |
| "epoch": 2.529852256628213, |
| "grad_norm": 0.846371054649353, |
| "learning_rate": 0.0001572585714542287, |
| "loss": 1.5638, |
| "step": 612500 |
| }, |
| { |
| "epoch": 2.5319174421438277, |
| "grad_norm": 0.8290744423866272, |
| "learning_rate": 0.00015656779865907185, |
| "loss": 1.5702, |
| "step": 613000 |
| }, |
| { |
| "epoch": 2.5339826276594426, |
| "grad_norm": 0.8195119500160217, |
| "learning_rate": 0.000155877025863915, |
| "loss": 1.5677, |
| "step": 613500 |
| }, |
| { |
| "epoch": 2.5360478131750575, |
| "grad_norm": 0.8459944128990173, |
| "learning_rate": 0.00015518625306875814, |
| "loss": 1.5662, |
| "step": 614000 |
| }, |
| { |
| "epoch": 2.5381129986906723, |
| "grad_norm": 0.7994758486747742, |
| "learning_rate": 0.00015449548027360132, |
| "loss": 1.5676, |
| "step": 614500 |
| }, |
| { |
| "epoch": 2.540178184206287, |
| "grad_norm": 0.7963876724243164, |
| "learning_rate": 0.00015380470747844443, |
| "loss": 1.5661, |
| "step": 615000 |
| }, |
| { |
| "epoch": 2.542243369721902, |
| "grad_norm": 0.8234278559684753, |
| "learning_rate": 0.00015311393468328758, |
| "loss": 1.5635, |
| "step": 615500 |
| }, |
| { |
| "epoch": 2.544308555237517, |
| "grad_norm": 0.7948046922683716, |
| "learning_rate": 0.00015242316188813075, |
| "loss": 1.5631, |
| "step": 616000 |
| }, |
| { |
| "epoch": 2.5463737407531317, |
| "grad_norm": 0.7982361912727356, |
| "learning_rate": 0.00015173238909297387, |
| "loss": 1.5685, |
| "step": 616500 |
| }, |
| { |
| "epoch": 2.5484389262687466, |
| "grad_norm": 0.7927718758583069, |
| "learning_rate": 0.000151041616297817, |
| "loss": 1.5661, |
| "step": 617000 |
| }, |
| { |
| "epoch": 2.5505041117843614, |
| "grad_norm": 0.8640558123588562, |
| "learning_rate": 0.00015035084350266018, |
| "loss": 1.5673, |
| "step": 617500 |
| }, |
| { |
| "epoch": 2.5525692972999767, |
| "grad_norm": 0.8167000412940979, |
| "learning_rate": 0.0001496600707075033, |
| "loss": 1.5666, |
| "step": 618000 |
| }, |
| { |
| "epoch": 2.554634482815591, |
| "grad_norm": 0.8331367373466492, |
| "learning_rate": 0.00014896929791234645, |
| "loss": 1.5656, |
| "step": 618500 |
| }, |
| { |
| "epoch": 2.5566996683312064, |
| "grad_norm": 0.8466469645500183, |
| "learning_rate": 0.00014827852511718962, |
| "loss": 1.562, |
| "step": 619000 |
| }, |
| { |
| "epoch": 2.558764853846821, |
| "grad_norm": 0.7808212637901306, |
| "learning_rate": 0.00014758775232203276, |
| "loss": 1.5605, |
| "step": 619500 |
| }, |
| { |
| "epoch": 2.560830039362436, |
| "grad_norm": 0.8436982035636902, |
| "learning_rate": 0.00014689697952687588, |
| "loss": 1.5621, |
| "step": 620000 |
| }, |
| { |
| "epoch": 2.5628952248780505, |
| "grad_norm": 0.8526425361633301, |
| "learning_rate": 0.00014620620673171905, |
| "loss": 1.566, |
| "step": 620500 |
| }, |
| { |
| "epoch": 2.564960410393666, |
| "grad_norm": 0.8892133831977844, |
| "learning_rate": 0.0001455154339365622, |
| "loss": 1.5623, |
| "step": 621000 |
| }, |
| { |
| "epoch": 2.5670255959092803, |
| "grad_norm": 0.8048965930938721, |
| "learning_rate": 0.00014482466114140532, |
| "loss": 1.5617, |
| "step": 621500 |
| }, |
| { |
| "epoch": 2.5690907814248956, |
| "grad_norm": 0.8180302977561951, |
| "learning_rate": 0.0001441338883462485, |
| "loss": 1.5605, |
| "step": 622000 |
| }, |
| { |
| "epoch": 2.5711559669405104, |
| "grad_norm": 0.795669674873352, |
| "learning_rate": 0.00014344311555109163, |
| "loss": 1.5615, |
| "step": 622500 |
| }, |
| { |
| "epoch": 2.5732211524561253, |
| "grad_norm": 0.8272981643676758, |
| "learning_rate": 0.00014275234275593478, |
| "loss": 1.5606, |
| "step": 623000 |
| }, |
| { |
| "epoch": 2.57528633797174, |
| "grad_norm": 0.8385244607925415, |
| "learning_rate": 0.00014206156996077792, |
| "loss": 1.5628, |
| "step": 623500 |
| }, |
| { |
| "epoch": 2.577351523487355, |
| "grad_norm": 0.8457437753677368, |
| "learning_rate": 0.00014137079716562107, |
| "loss": 1.5553, |
| "step": 624000 |
| }, |
| { |
| "epoch": 2.57941670900297, |
| "grad_norm": 0.8497530221939087, |
| "learning_rate": 0.0001406800243704642, |
| "loss": 1.5521, |
| "step": 624500 |
| }, |
| { |
| "epoch": 2.5814818945185847, |
| "grad_norm": 0.8231092691421509, |
| "learning_rate": 0.00013998925157530736, |
| "loss": 1.5613, |
| "step": 625000 |
| }, |
| { |
| "epoch": 2.5835470800341995, |
| "grad_norm": 0.783505380153656, |
| "learning_rate": 0.0001392984787801505, |
| "loss": 1.5577, |
| "step": 625500 |
| }, |
| { |
| "epoch": 2.5856122655498144, |
| "grad_norm": 0.8594375848770142, |
| "learning_rate": 0.00013860770598499365, |
| "loss": 1.5603, |
| "step": 626000 |
| }, |
| { |
| "epoch": 2.5876774510654292, |
| "grad_norm": 0.824301540851593, |
| "learning_rate": 0.00013791693318983682, |
| "loss": 1.5592, |
| "step": 626500 |
| }, |
| { |
| "epoch": 2.589742636581044, |
| "grad_norm": 0.7970808744430542, |
| "learning_rate": 0.00013722616039467994, |
| "loss": 1.5577, |
| "step": 627000 |
| }, |
| { |
| "epoch": 2.591807822096659, |
| "grad_norm": 0.7681635022163391, |
| "learning_rate": 0.00013653538759952308, |
| "loss": 1.556, |
| "step": 627500 |
| }, |
| { |
| "epoch": 2.593873007612274, |
| "grad_norm": 0.820792555809021, |
| "learning_rate": 0.00013584461480436625, |
| "loss": 1.5567, |
| "step": 628000 |
| }, |
| { |
| "epoch": 2.5959381931278886, |
| "grad_norm": 0.8436790704727173, |
| "learning_rate": 0.00013515384200920937, |
| "loss": 1.5562, |
| "step": 628500 |
| }, |
| { |
| "epoch": 2.5980033786435035, |
| "grad_norm": 0.806010901927948, |
| "learning_rate": 0.00013446306921405252, |
| "loss": 1.558, |
| "step": 629000 |
| }, |
| { |
| "epoch": 2.6000685641591184, |
| "grad_norm": 0.8049686551094055, |
| "learning_rate": 0.0001337722964188957, |
| "loss": 1.5593, |
| "step": 629500 |
| }, |
| { |
| "epoch": 2.602133749674733, |
| "grad_norm": 0.8346471786499023, |
| "learning_rate": 0.00013308152362373883, |
| "loss": 1.5551, |
| "step": 630000 |
| }, |
| { |
| "epoch": 2.604198935190348, |
| "grad_norm": 0.8366252779960632, |
| "learning_rate": 0.00013239075082858195, |
| "loss": 1.5571, |
| "step": 630500 |
| }, |
| { |
| "epoch": 2.606264120705963, |
| "grad_norm": 0.8249139785766602, |
| "learning_rate": 0.00013169997803342512, |
| "loss": 1.554, |
| "step": 631000 |
| }, |
| { |
| "epoch": 2.6083293062215778, |
| "grad_norm": 0.8431522250175476, |
| "learning_rate": 0.00013100920523826827, |
| "loss": 1.557, |
| "step": 631500 |
| }, |
| { |
| "epoch": 2.6103944917371926, |
| "grad_norm": 0.8180191516876221, |
| "learning_rate": 0.00013031843244311138, |
| "loss": 1.553, |
| "step": 632000 |
| }, |
| { |
| "epoch": 2.6124596772528075, |
| "grad_norm": 0.7824527025222778, |
| "learning_rate": 0.00012962765964795456, |
| "loss": 1.558, |
| "step": 632500 |
| }, |
| { |
| "epoch": 2.6145248627684223, |
| "grad_norm": 0.839433491230011, |
| "learning_rate": 0.0001289368868527977, |
| "loss": 1.5525, |
| "step": 633000 |
| }, |
| { |
| "epoch": 2.6165900482840376, |
| "grad_norm": 0.9019516110420227, |
| "learning_rate": 0.00012824611405764087, |
| "loss": 1.5569, |
| "step": 633500 |
| }, |
| { |
| "epoch": 2.618655233799652, |
| "grad_norm": 0.8029139637947083, |
| "learning_rate": 0.000127555341262484, |
| "loss": 1.5552, |
| "step": 634000 |
| }, |
| { |
| "epoch": 2.6207204193152673, |
| "grad_norm": 0.8322605490684509, |
| "learning_rate": 0.00012686456846732714, |
| "loss": 1.5566, |
| "step": 634500 |
| }, |
| { |
| "epoch": 2.6227856048308817, |
| "grad_norm": 0.8417773842811584, |
| "learning_rate": 0.0001261737956721703, |
| "loss": 1.551, |
| "step": 635000 |
| }, |
| { |
| "epoch": 2.624850790346497, |
| "grad_norm": 0.8202713131904602, |
| "learning_rate": 0.00012548302287701343, |
| "loss": 1.5507, |
| "step": 635500 |
| }, |
| { |
| "epoch": 2.6269159758621115, |
| "grad_norm": 0.839905858039856, |
| "learning_rate": 0.00012479225008185657, |
| "loss": 1.5495, |
| "step": 636000 |
| }, |
| { |
| "epoch": 2.6289811613777267, |
| "grad_norm": 0.8542851805686951, |
| "learning_rate": 0.00012410147728669972, |
| "loss": 1.5504, |
| "step": 636500 |
| }, |
| { |
| "epoch": 2.631046346893341, |
| "grad_norm": 0.8227192163467407, |
| "learning_rate": 0.0001234107044915429, |
| "loss": 1.5531, |
| "step": 637000 |
| }, |
| { |
| "epoch": 2.6331115324089565, |
| "grad_norm": 0.8212194442749023, |
| "learning_rate": 0.000122719931696386, |
| "loss": 1.5523, |
| "step": 637500 |
| }, |
| { |
| "epoch": 2.6351767179245713, |
| "grad_norm": 0.8629603981971741, |
| "learning_rate": 0.00012202915890122916, |
| "loss": 1.5479, |
| "step": 638000 |
| }, |
| { |
| "epoch": 2.637241903440186, |
| "grad_norm": 0.8459728956222534, |
| "learning_rate": 0.00012133838610607231, |
| "loss": 1.5481, |
| "step": 638500 |
| }, |
| { |
| "epoch": 2.639307088955801, |
| "grad_norm": 0.8557335734367371, |
| "learning_rate": 0.00012064761331091545, |
| "loss": 1.5487, |
| "step": 639000 |
| }, |
| { |
| "epoch": 2.641372274471416, |
| "grad_norm": 0.8298543691635132, |
| "learning_rate": 0.0001199568405157586, |
| "loss": 1.5479, |
| "step": 639500 |
| }, |
| { |
| "epoch": 2.6434374599870307, |
| "grad_norm": 0.8238996863365173, |
| "learning_rate": 0.00011926606772060176, |
| "loss": 1.5507, |
| "step": 640000 |
| }, |
| { |
| "epoch": 2.6455026455026456, |
| "grad_norm": 0.7995360493659973, |
| "learning_rate": 0.0001185752949254449, |
| "loss": 1.5443, |
| "step": 640500 |
| }, |
| { |
| "epoch": 2.6475678310182604, |
| "grad_norm": 0.8611718416213989, |
| "learning_rate": 0.00011788452213028803, |
| "loss": 1.5476, |
| "step": 641000 |
| }, |
| { |
| "epoch": 2.6496330165338753, |
| "grad_norm": 0.8229385614395142, |
| "learning_rate": 0.00011719374933513119, |
| "loss": 1.545, |
| "step": 641500 |
| }, |
| { |
| "epoch": 2.65169820204949, |
| "grad_norm": 0.8134409785270691, |
| "learning_rate": 0.00011650297653997434, |
| "loss": 1.5482, |
| "step": 642000 |
| }, |
| { |
| "epoch": 2.653763387565105, |
| "grad_norm": 0.8563694953918457, |
| "learning_rate": 0.00011581220374481748, |
| "loss": 1.5457, |
| "step": 642500 |
| }, |
| { |
| "epoch": 2.65582857308072, |
| "grad_norm": 0.8361693620681763, |
| "learning_rate": 0.00011512143094966063, |
| "loss": 1.5462, |
| "step": 643000 |
| }, |
| { |
| "epoch": 2.6578937585963347, |
| "grad_norm": 0.8493614792823792, |
| "learning_rate": 0.00011443065815450378, |
| "loss": 1.5463, |
| "step": 643500 |
| }, |
| { |
| "epoch": 2.6599589441119496, |
| "grad_norm": 0.7997604012489319, |
| "learning_rate": 0.00011373988535934692, |
| "loss": 1.547, |
| "step": 644000 |
| }, |
| { |
| "epoch": 2.6620241296275644, |
| "grad_norm": 0.8045528531074524, |
| "learning_rate": 0.00011304911256419006, |
| "loss": 1.5491, |
| "step": 644500 |
| }, |
| { |
| "epoch": 2.6640893151431793, |
| "grad_norm": 0.8172311186790466, |
| "learning_rate": 0.00011235833976903322, |
| "loss": 1.5486, |
| "step": 645000 |
| }, |
| { |
| "epoch": 2.666154500658794, |
| "grad_norm": 0.8630313873291016, |
| "learning_rate": 0.00011166756697387635, |
| "loss": 1.5513, |
| "step": 645500 |
| }, |
| { |
| "epoch": 2.668219686174409, |
| "grad_norm": 0.8246090412139893, |
| "learning_rate": 0.00011097679417871951, |
| "loss": 1.5461, |
| "step": 646000 |
| }, |
| { |
| "epoch": 2.670284871690024, |
| "grad_norm": 0.8191748857498169, |
| "learning_rate": 0.00011028602138356265, |
| "loss": 1.545, |
| "step": 646500 |
| }, |
| { |
| "epoch": 2.6723500572056387, |
| "grad_norm": 1.1739202737808228, |
| "learning_rate": 0.0001095952485884058, |
| "loss": 1.5455, |
| "step": 647000 |
| }, |
| { |
| "epoch": 2.6744152427212535, |
| "grad_norm": 0.8145565390586853, |
| "learning_rate": 0.00010890447579324894, |
| "loss": 1.5408, |
| "step": 647500 |
| }, |
| { |
| "epoch": 2.6764804282368684, |
| "grad_norm": 0.8613256216049194, |
| "learning_rate": 0.00010821370299809209, |
| "loss": 1.5439, |
| "step": 648000 |
| }, |
| { |
| "epoch": 2.6785456137524832, |
| "grad_norm": 0.8024303317070007, |
| "learning_rate": 0.00010752293020293523, |
| "loss": 1.5438, |
| "step": 648500 |
| }, |
| { |
| "epoch": 2.680610799268098, |
| "grad_norm": 0.8254972100257874, |
| "learning_rate": 0.00010683215740777838, |
| "loss": 1.5458, |
| "step": 649000 |
| }, |
| { |
| "epoch": 2.682675984783713, |
| "grad_norm": 0.815696120262146, |
| "learning_rate": 0.00010614138461262154, |
| "loss": 1.542, |
| "step": 649500 |
| }, |
| { |
| "epoch": 2.6847411702993282, |
| "grad_norm": 0.8715610504150391, |
| "learning_rate": 0.00010545061181746467, |
| "loss": 1.5415, |
| "step": 650000 |
| }, |
| { |
| "epoch": 2.6868063558149426, |
| "grad_norm": 0.8358045220375061, |
| "learning_rate": 0.00010475983902230781, |
| "loss": 1.5419, |
| "step": 650500 |
| }, |
| { |
| "epoch": 2.688871541330558, |
| "grad_norm": 0.7865080237388611, |
| "learning_rate": 0.00010406906622715097, |
| "loss": 1.5429, |
| "step": 651000 |
| }, |
| { |
| "epoch": 2.6909367268461724, |
| "grad_norm": 0.8054898381233215, |
| "learning_rate": 0.0001033782934319941, |
| "loss": 1.5433, |
| "step": 651500 |
| }, |
| { |
| "epoch": 2.6930019123617877, |
| "grad_norm": 0.8930450081825256, |
| "learning_rate": 0.00010268752063683726, |
| "loss": 1.5352, |
| "step": 652000 |
| }, |
| { |
| "epoch": 2.695067097877402, |
| "grad_norm": 0.8042411208152771, |
| "learning_rate": 0.0001019967478416804, |
| "loss": 1.5413, |
| "step": 652500 |
| }, |
| { |
| "epoch": 2.6971322833930174, |
| "grad_norm": 0.8400362133979797, |
| "learning_rate": 0.00010130597504652355, |
| "loss": 1.5423, |
| "step": 653000 |
| }, |
| { |
| "epoch": 2.699197468908632, |
| "grad_norm": 0.8137294054031372, |
| "learning_rate": 0.0001006152022513667, |
| "loss": 1.5432, |
| "step": 653500 |
| }, |
| { |
| "epoch": 2.701262654424247, |
| "grad_norm": 0.8344128727912903, |
| "learning_rate": 9.992442945620984e-05, |
| "loss": 1.5398, |
| "step": 654000 |
| }, |
| { |
| "epoch": 2.703327839939862, |
| "grad_norm": 0.849104642868042, |
| "learning_rate": 9.923365666105298e-05, |
| "loss": 1.54, |
| "step": 654500 |
| }, |
| { |
| "epoch": 2.7053930254554768, |
| "grad_norm": 0.8286527991294861, |
| "learning_rate": 9.854288386589613e-05, |
| "loss": 1.5412, |
| "step": 655000 |
| }, |
| { |
| "epoch": 2.7074582109710916, |
| "grad_norm": 0.8378123641014099, |
| "learning_rate": 9.785211107073929e-05, |
| "loss": 1.5361, |
| "step": 655500 |
| }, |
| { |
| "epoch": 2.7095233964867065, |
| "grad_norm": 0.8808925151824951, |
| "learning_rate": 9.716133827558243e-05, |
| "loss": 1.5382, |
| "step": 656000 |
| }, |
| { |
| "epoch": 2.7115885820023213, |
| "grad_norm": 0.8783825039863586, |
| "learning_rate": 9.647056548042558e-05, |
| "loss": 1.5361, |
| "step": 656500 |
| }, |
| { |
| "epoch": 2.713653767517936, |
| "grad_norm": 0.8051160573959351, |
| "learning_rate": 9.577979268526872e-05, |
| "loss": 1.5358, |
| "step": 657000 |
| }, |
| { |
| "epoch": 2.715718953033551, |
| "grad_norm": 0.896801233291626, |
| "learning_rate": 9.508901989011187e-05, |
| "loss": 1.5368, |
| "step": 657500 |
| }, |
| { |
| "epoch": 2.717784138549166, |
| "grad_norm": 0.8218420743942261, |
| "learning_rate": 9.439824709495501e-05, |
| "loss": 1.537, |
| "step": 658000 |
| }, |
| { |
| "epoch": 2.7198493240647807, |
| "grad_norm": 0.8470411896705627, |
| "learning_rate": 9.370747429979816e-05, |
| "loss": 1.5322, |
| "step": 658500 |
| }, |
| { |
| "epoch": 2.7219145095803956, |
| "grad_norm": 0.8505502939224243, |
| "learning_rate": 9.301670150464131e-05, |
| "loss": 1.5317, |
| "step": 659000 |
| }, |
| { |
| "epoch": 2.7239796950960105, |
| "grad_norm": 0.8617528080940247, |
| "learning_rate": 9.232592870948445e-05, |
| "loss": 1.5375, |
| "step": 659500 |
| }, |
| { |
| "epoch": 2.7260448806116253, |
| "grad_norm": 0.8441663384437561, |
| "learning_rate": 9.16351559143276e-05, |
| "loss": 1.5366, |
| "step": 660000 |
| }, |
| { |
| "epoch": 2.72811006612724, |
| "grad_norm": 0.8294611573219299, |
| "learning_rate": 9.094438311917075e-05, |
| "loss": 1.5373, |
| "step": 660500 |
| }, |
| { |
| "epoch": 2.730175251642855, |
| "grad_norm": 0.8215169906616211, |
| "learning_rate": 9.025361032401388e-05, |
| "loss": 1.5327, |
| "step": 661000 |
| }, |
| { |
| "epoch": 2.73224043715847, |
| "grad_norm": 0.8766931891441345, |
| "learning_rate": 8.956283752885704e-05, |
| "loss": 1.5339, |
| "step": 661500 |
| }, |
| { |
| "epoch": 2.7343056226740847, |
| "grad_norm": 0.8456342220306396, |
| "learning_rate": 8.887206473370018e-05, |
| "loss": 1.5341, |
| "step": 662000 |
| }, |
| { |
| "epoch": 2.7363708081896996, |
| "grad_norm": 0.8384252786636353, |
| "learning_rate": 8.818129193854333e-05, |
| "loss": 1.5338, |
| "step": 662500 |
| }, |
| { |
| "epoch": 2.7384359937053144, |
| "grad_norm": 0.8584861159324646, |
| "learning_rate": 8.749051914338647e-05, |
| "loss": 1.5301, |
| "step": 663000 |
| }, |
| { |
| "epoch": 2.7405011792209293, |
| "grad_norm": 0.8463834524154663, |
| "learning_rate": 8.679974634822962e-05, |
| "loss": 1.531, |
| "step": 663500 |
| }, |
| { |
| "epoch": 2.742566364736544, |
| "grad_norm": 0.84855055809021, |
| "learning_rate": 8.610897355307276e-05, |
| "loss": 1.5332, |
| "step": 664000 |
| }, |
| { |
| "epoch": 2.744631550252159, |
| "grad_norm": 0.8267730474472046, |
| "learning_rate": 8.541820075791591e-05, |
| "loss": 1.5337, |
| "step": 664500 |
| }, |
| { |
| "epoch": 2.746696735767774, |
| "grad_norm": 0.8398123383522034, |
| "learning_rate": 8.472742796275907e-05, |
| "loss": 1.5327, |
| "step": 665000 |
| }, |
| { |
| "epoch": 2.748761921283389, |
| "grad_norm": 0.8413114547729492, |
| "learning_rate": 8.40366551676022e-05, |
| "loss": 1.5355, |
| "step": 665500 |
| }, |
| { |
| "epoch": 2.7508271067990036, |
| "grad_norm": 0.8241723775863647, |
| "learning_rate": 8.334588237244536e-05, |
| "loss": 1.533, |
| "step": 666000 |
| }, |
| { |
| "epoch": 2.752892292314619, |
| "grad_norm": 0.8695456981658936, |
| "learning_rate": 8.26551095772885e-05, |
| "loss": 1.5347, |
| "step": 666500 |
| }, |
| { |
| "epoch": 2.7549574778302333, |
| "grad_norm": 0.8351263403892517, |
| "learning_rate": 8.196433678213163e-05, |
| "loss": 1.53, |
| "step": 667000 |
| }, |
| { |
| "epoch": 2.7570226633458486, |
| "grad_norm": 0.8227745294570923, |
| "learning_rate": 8.127356398697479e-05, |
| "loss": 1.53, |
| "step": 667500 |
| }, |
| { |
| "epoch": 2.759087848861463, |
| "grad_norm": 0.8654522895812988, |
| "learning_rate": 8.058279119181794e-05, |
| "loss": 1.532, |
| "step": 668000 |
| }, |
| { |
| "epoch": 2.7611530343770783, |
| "grad_norm": 0.819057822227478, |
| "learning_rate": 7.989201839666108e-05, |
| "loss": 1.5297, |
| "step": 668500 |
| }, |
| { |
| "epoch": 2.763218219892693, |
| "grad_norm": 0.8575501441955566, |
| "learning_rate": 7.920124560150422e-05, |
| "loss": 1.5275, |
| "step": 669000 |
| }, |
| { |
| "epoch": 2.765283405408308, |
| "grad_norm": 0.8428553938865662, |
| "learning_rate": 7.851047280634738e-05, |
| "loss": 1.5321, |
| "step": 669500 |
| }, |
| { |
| "epoch": 2.767348590923923, |
| "grad_norm": 0.8702006936073303, |
| "learning_rate": 7.781970001119051e-05, |
| "loss": 1.5291, |
| "step": 670000 |
| }, |
| { |
| "epoch": 2.7694137764395377, |
| "grad_norm": 0.8024266958236694, |
| "learning_rate": 7.712892721603366e-05, |
| "loss": 1.529, |
| "step": 670500 |
| }, |
| { |
| "epoch": 2.7714789619551525, |
| "grad_norm": 0.862339437007904, |
| "learning_rate": 7.643815442087682e-05, |
| "loss": 1.5337, |
| "step": 671000 |
| }, |
| { |
| "epoch": 2.7735441474707674, |
| "grad_norm": 0.8829432725906372, |
| "learning_rate": 7.574738162571996e-05, |
| "loss": 1.5243, |
| "step": 671500 |
| }, |
| { |
| "epoch": 2.7756093329863822, |
| "grad_norm": 0.8032020926475525, |
| "learning_rate": 7.505660883056311e-05, |
| "loss": 1.525, |
| "step": 672000 |
| }, |
| { |
| "epoch": 2.777674518501997, |
| "grad_norm": 0.8329365849494934, |
| "learning_rate": 7.436583603540625e-05, |
| "loss": 1.532, |
| "step": 672500 |
| }, |
| { |
| "epoch": 2.779739704017612, |
| "grad_norm": 0.865728497505188, |
| "learning_rate": 7.367506324024941e-05, |
| "loss": 1.5243, |
| "step": 673000 |
| }, |
| { |
| "epoch": 2.781804889533227, |
| "grad_norm": 0.8427261114120483, |
| "learning_rate": 7.298429044509254e-05, |
| "loss": 1.5197, |
| "step": 673500 |
| }, |
| { |
| "epoch": 2.7838700750488417, |
| "grad_norm": 0.8444133400917053, |
| "learning_rate": 7.229351764993569e-05, |
| "loss": 1.5314, |
| "step": 674000 |
| }, |
| { |
| "epoch": 2.7859352605644565, |
| "grad_norm": 0.8255510330200195, |
| "learning_rate": 7.160274485477885e-05, |
| "loss": 1.5275, |
| "step": 674500 |
| }, |
| { |
| "epoch": 2.7880004460800714, |
| "grad_norm": 0.794021487236023, |
| "learning_rate": 7.091197205962198e-05, |
| "loss": 1.5237, |
| "step": 675000 |
| }, |
| { |
| "epoch": 2.790065631595686, |
| "grad_norm": 0.8648783564567566, |
| "learning_rate": 7.022119926446513e-05, |
| "loss": 1.5221, |
| "step": 675500 |
| }, |
| { |
| "epoch": 2.792130817111301, |
| "grad_norm": 0.8662870526313782, |
| "learning_rate": 6.953042646930828e-05, |
| "loss": 1.5239, |
| "step": 676000 |
| }, |
| { |
| "epoch": 2.794196002626916, |
| "grad_norm": 0.8716167211532593, |
| "learning_rate": 6.883965367415141e-05, |
| "loss": 1.5284, |
| "step": 676500 |
| }, |
| { |
| "epoch": 2.7962611881425308, |
| "grad_norm": 0.8369839191436768, |
| "learning_rate": 6.814888087899457e-05, |
| "loss": 1.5206, |
| "step": 677000 |
| }, |
| { |
| "epoch": 2.7983263736581456, |
| "grad_norm": 0.8716705441474915, |
| "learning_rate": 6.745810808383771e-05, |
| "loss": 1.5179, |
| "step": 677500 |
| }, |
| { |
| "epoch": 2.8003915591737605, |
| "grad_norm": 0.8210489153862, |
| "learning_rate": 6.676733528868086e-05, |
| "loss": 1.5286, |
| "step": 678000 |
| }, |
| { |
| "epoch": 2.8024567446893753, |
| "grad_norm": 0.8834524750709534, |
| "learning_rate": 6.6076562493524e-05, |
| "loss": 1.5271, |
| "step": 678500 |
| }, |
| { |
| "epoch": 2.80452193020499, |
| "grad_norm": 0.858285665512085, |
| "learning_rate": 6.538578969836716e-05, |
| "loss": 1.5232, |
| "step": 679000 |
| }, |
| { |
| "epoch": 2.806587115720605, |
| "grad_norm": 0.8696337342262268, |
| "learning_rate": 6.46950169032103e-05, |
| "loss": 1.524, |
| "step": 679500 |
| }, |
| { |
| "epoch": 2.80865230123622, |
| "grad_norm": 0.8471727967262268, |
| "learning_rate": 6.400424410805344e-05, |
| "loss": 1.523, |
| "step": 680000 |
| }, |
| { |
| "epoch": 2.8107174867518347, |
| "grad_norm": 0.8594076633453369, |
| "learning_rate": 6.33134713128966e-05, |
| "loss": 1.5166, |
| "step": 680500 |
| }, |
| { |
| "epoch": 2.81278267226745, |
| "grad_norm": 0.856606662273407, |
| "learning_rate": 6.262269851773973e-05, |
| "loss": 1.523, |
| "step": 681000 |
| }, |
| { |
| "epoch": 2.8148478577830645, |
| "grad_norm": 0.8609211444854736, |
| "learning_rate": 6.193192572258289e-05, |
| "loss": 1.5209, |
| "step": 681500 |
| }, |
| { |
| "epoch": 2.8169130432986798, |
| "grad_norm": 0.8398802280426025, |
| "learning_rate": 6.124115292742603e-05, |
| "loss": 1.5271, |
| "step": 682000 |
| }, |
| { |
| "epoch": 2.818978228814294, |
| "grad_norm": 0.9304519295692444, |
| "learning_rate": 6.0550380132269176e-05, |
| "loss": 1.5205, |
| "step": 682500 |
| }, |
| { |
| "epoch": 2.8210434143299095, |
| "grad_norm": 0.8197703957557678, |
| "learning_rate": 5.985960733711232e-05, |
| "loss": 1.524, |
| "step": 683000 |
| }, |
| { |
| "epoch": 2.823108599845524, |
| "grad_norm": 0.831089973449707, |
| "learning_rate": 5.916883454195547e-05, |
| "loss": 1.5204, |
| "step": 683500 |
| }, |
| { |
| "epoch": 2.825173785361139, |
| "grad_norm": 0.8130340576171875, |
| "learning_rate": 5.847806174679862e-05, |
| "loss": 1.5151, |
| "step": 684000 |
| }, |
| { |
| "epoch": 2.827238970876754, |
| "grad_norm": 0.8501649498939514, |
| "learning_rate": 5.7787288951641755e-05, |
| "loss": 1.5213, |
| "step": 684500 |
| }, |
| { |
| "epoch": 2.829304156392369, |
| "grad_norm": 0.827510416507721, |
| "learning_rate": 5.709651615648491e-05, |
| "loss": 1.5202, |
| "step": 685000 |
| }, |
| { |
| "epoch": 2.8313693419079837, |
| "grad_norm": 0.8375749588012695, |
| "learning_rate": 5.640574336132805e-05, |
| "loss": 1.5226, |
| "step": 685500 |
| }, |
| { |
| "epoch": 2.8334345274235986, |
| "grad_norm": 0.8179614543914795, |
| "learning_rate": 5.57149705661712e-05, |
| "loss": 1.5174, |
| "step": 686000 |
| }, |
| { |
| "epoch": 2.8354997129392134, |
| "grad_norm": 0.8485569953918457, |
| "learning_rate": 5.502419777101435e-05, |
| "loss": 1.5197, |
| "step": 686500 |
| }, |
| { |
| "epoch": 2.8375648984548283, |
| "grad_norm": 0.8839040398597717, |
| "learning_rate": 5.433342497585749e-05, |
| "loss": 1.5206, |
| "step": 687000 |
| }, |
| { |
| "epoch": 2.839630083970443, |
| "grad_norm": 0.8560023307800293, |
| "learning_rate": 5.364265218070064e-05, |
| "loss": 1.5177, |
| "step": 687500 |
| }, |
| { |
| "epoch": 2.841695269486058, |
| "grad_norm": 0.8139906525611877, |
| "learning_rate": 5.295187938554378e-05, |
| "loss": 1.5154, |
| "step": 688000 |
| }, |
| { |
| "epoch": 2.843760455001673, |
| "grad_norm": 0.9361693859100342, |
| "learning_rate": 5.226110659038693e-05, |
| "loss": 1.5131, |
| "step": 688500 |
| }, |
| { |
| "epoch": 2.8458256405172877, |
| "grad_norm": 0.8294958472251892, |
| "learning_rate": 5.157033379523008e-05, |
| "loss": 1.5194, |
| "step": 689000 |
| }, |
| { |
| "epoch": 2.8478908260329026, |
| "grad_norm": 0.8591476082801819, |
| "learning_rate": 5.0879561000073224e-05, |
| "loss": 1.5179, |
| "step": 689500 |
| }, |
| { |
| "epoch": 2.8499560115485174, |
| "grad_norm": 0.8466942310333252, |
| "learning_rate": 5.018878820491637e-05, |
| "loss": 1.5122, |
| "step": 690000 |
| }, |
| { |
| "epoch": 2.8520211970641323, |
| "grad_norm": 0.9315714240074158, |
| "learning_rate": 4.949801540975952e-05, |
| "loss": 1.5193, |
| "step": 690500 |
| }, |
| { |
| "epoch": 2.854086382579747, |
| "grad_norm": 0.8646622896194458, |
| "learning_rate": 4.8807242614602665e-05, |
| "loss": 1.5155, |
| "step": 691000 |
| }, |
| { |
| "epoch": 2.856151568095362, |
| "grad_norm": 0.8958275318145752, |
| "learning_rate": 4.8116469819445804e-05, |
| "loss": 1.5172, |
| "step": 691500 |
| }, |
| { |
| "epoch": 2.858216753610977, |
| "grad_norm": 0.8623936176300049, |
| "learning_rate": 4.7425697024288955e-05, |
| "loss": 1.5138, |
| "step": 692000 |
| }, |
| { |
| "epoch": 2.8602819391265917, |
| "grad_norm": 0.8689021468162537, |
| "learning_rate": 4.67349242291321e-05, |
| "loss": 1.5144, |
| "step": 692500 |
| }, |
| { |
| "epoch": 2.8623471246422065, |
| "grad_norm": 0.8967764973640442, |
| "learning_rate": 4.6044151433975245e-05, |
| "loss": 1.5156, |
| "step": 693000 |
| }, |
| { |
| "epoch": 2.8644123101578214, |
| "grad_norm": 0.8540061116218567, |
| "learning_rate": 4.5353378638818396e-05, |
| "loss": 1.5161, |
| "step": 693500 |
| }, |
| { |
| "epoch": 2.8664774956734362, |
| "grad_norm": 0.8717928528785706, |
| "learning_rate": 4.466260584366154e-05, |
| "loss": 1.5097, |
| "step": 694000 |
| }, |
| { |
| "epoch": 2.868542681189051, |
| "grad_norm": 0.861867368221283, |
| "learning_rate": 4.397183304850468e-05, |
| "loss": 1.5117, |
| "step": 694500 |
| }, |
| { |
| "epoch": 2.870607866704666, |
| "grad_norm": 0.8746508955955505, |
| "learning_rate": 4.328106025334783e-05, |
| "loss": 1.5116, |
| "step": 695000 |
| }, |
| { |
| "epoch": 2.872673052220281, |
| "grad_norm": 0.856505274772644, |
| "learning_rate": 4.2590287458190976e-05, |
| "loss": 1.5105, |
| "step": 695500 |
| }, |
| { |
| "epoch": 2.8747382377358957, |
| "grad_norm": 0.8690941333770752, |
| "learning_rate": 4.189951466303412e-05, |
| "loss": 1.511, |
| "step": 696000 |
| }, |
| { |
| "epoch": 2.876803423251511, |
| "grad_norm": 0.8394379019737244, |
| "learning_rate": 4.120874186787727e-05, |
| "loss": 1.511, |
| "step": 696500 |
| }, |
| { |
| "epoch": 2.8788686087671254, |
| "grad_norm": 0.847400426864624, |
| "learning_rate": 4.051796907272042e-05, |
| "loss": 1.5152, |
| "step": 697000 |
| }, |
| { |
| "epoch": 2.8809337942827407, |
| "grad_norm": 0.8548203706741333, |
| "learning_rate": 3.982719627756357e-05, |
| "loss": 1.5103, |
| "step": 697500 |
| }, |
| { |
| "epoch": 2.882998979798355, |
| "grad_norm": 0.9266785979270935, |
| "learning_rate": 3.913642348240671e-05, |
| "loss": 1.5172, |
| "step": 698000 |
| }, |
| { |
| "epoch": 2.8850641653139704, |
| "grad_norm": 0.8905568718910217, |
| "learning_rate": 3.844565068724985e-05, |
| "loss": 1.5147, |
| "step": 698500 |
| }, |
| { |
| "epoch": 2.8871293508295848, |
| "grad_norm": 0.8947970271110535, |
| "learning_rate": 3.7754877892093e-05, |
| "loss": 1.5116, |
| "step": 699000 |
| }, |
| { |
| "epoch": 2.8891945363452, |
| "grad_norm": 0.8671281337738037, |
| "learning_rate": 3.706410509693615e-05, |
| "loss": 1.5089, |
| "step": 699500 |
| }, |
| { |
| "epoch": 2.8912597218608145, |
| "grad_norm": 0.8655187487602234, |
| "learning_rate": 3.637333230177929e-05, |
| "loss": 1.5079, |
| "step": 700000 |
| }, |
| { |
| "epoch": 2.8933249073764298, |
| "grad_norm": 0.8781392574310303, |
| "learning_rate": 3.5682559506622444e-05, |
| "loss": 1.5051, |
| "step": 700500 |
| }, |
| { |
| "epoch": 2.8953900928920446, |
| "grad_norm": 0.8239871859550476, |
| "learning_rate": 3.499178671146558e-05, |
| "loss": 1.5135, |
| "step": 701000 |
| }, |
| { |
| "epoch": 2.8974552784076595, |
| "grad_norm": 0.8702250719070435, |
| "learning_rate": 3.430101391630873e-05, |
| "loss": 1.5101, |
| "step": 701500 |
| }, |
| { |
| "epoch": 2.8995204639232743, |
| "grad_norm": 0.8681339621543884, |
| "learning_rate": 3.361024112115188e-05, |
| "loss": 1.5098, |
| "step": 702000 |
| }, |
| { |
| "epoch": 2.901585649438889, |
| "grad_norm": 0.8929154276847839, |
| "learning_rate": 3.2919468325995024e-05, |
| "loss": 1.5115, |
| "step": 702500 |
| }, |
| { |
| "epoch": 2.903650834954504, |
| "grad_norm": 0.8695405125617981, |
| "learning_rate": 3.222869553083817e-05, |
| "loss": 1.5073, |
| "step": 703000 |
| }, |
| { |
| "epoch": 2.905716020470119, |
| "grad_norm": 0.8858229517936707, |
| "learning_rate": 3.153792273568132e-05, |
| "loss": 1.5081, |
| "step": 703500 |
| }, |
| { |
| "epoch": 2.9077812059857338, |
| "grad_norm": 0.8298658132553101, |
| "learning_rate": 3.0847149940524465e-05, |
| "loss": 1.5109, |
| "step": 704000 |
| }, |
| { |
| "epoch": 2.9098463915013486, |
| "grad_norm": 0.9026769399642944, |
| "learning_rate": 3.015637714536761e-05, |
| "loss": 1.5036, |
| "step": 704500 |
| }, |
| { |
| "epoch": 2.9119115770169635, |
| "grad_norm": 0.8433796763420105, |
| "learning_rate": 2.9465604350210755e-05, |
| "loss": 1.5103, |
| "step": 705000 |
| }, |
| { |
| "epoch": 2.9139767625325783, |
| "grad_norm": 0.8475963473320007, |
| "learning_rate": 2.87748315550539e-05, |
| "loss": 1.5084, |
| "step": 705500 |
| }, |
| { |
| "epoch": 2.916041948048193, |
| "grad_norm": 0.8807883262634277, |
| "learning_rate": 2.8084058759897048e-05, |
| "loss": 1.5089, |
| "step": 706000 |
| }, |
| { |
| "epoch": 2.918107133563808, |
| "grad_norm": 0.9054199457168579, |
| "learning_rate": 2.7393285964740193e-05, |
| "loss": 1.5123, |
| "step": 706500 |
| }, |
| { |
| "epoch": 2.920172319079423, |
| "grad_norm": 0.8661481738090515, |
| "learning_rate": 2.670251316958334e-05, |
| "loss": 1.5088, |
| "step": 707000 |
| }, |
| { |
| "epoch": 2.9222375045950377, |
| "grad_norm": 0.8456491231918335, |
| "learning_rate": 2.6011740374426486e-05, |
| "loss": 1.5078, |
| "step": 707500 |
| }, |
| { |
| "epoch": 2.9243026901106526, |
| "grad_norm": 0.8700172305107117, |
| "learning_rate": 2.5320967579269634e-05, |
| "loss": 1.5056, |
| "step": 708000 |
| }, |
| { |
| "epoch": 2.9263678756262674, |
| "grad_norm": 0.882483184337616, |
| "learning_rate": 2.463019478411278e-05, |
| "loss": 1.5118, |
| "step": 708500 |
| }, |
| { |
| "epoch": 2.9284330611418823, |
| "grad_norm": 0.8397735357284546, |
| "learning_rate": 2.3939421988955924e-05, |
| "loss": 1.5078, |
| "step": 709000 |
| }, |
| { |
| "epoch": 2.930498246657497, |
| "grad_norm": 0.8614588379859924, |
| "learning_rate": 2.3248649193799072e-05, |
| "loss": 1.504, |
| "step": 709500 |
| }, |
| { |
| "epoch": 2.932563432173112, |
| "grad_norm": 0.8456758260726929, |
| "learning_rate": 2.2557876398642217e-05, |
| "loss": 1.5068, |
| "step": 710000 |
| }, |
| { |
| "epoch": 2.934628617688727, |
| "grad_norm": 0.8835407495498657, |
| "learning_rate": 2.1867103603485365e-05, |
| "loss": 1.503, |
| "step": 710500 |
| }, |
| { |
| "epoch": 2.9366938032043417, |
| "grad_norm": 0.8269529938697815, |
| "learning_rate": 2.117633080832851e-05, |
| "loss": 1.5039, |
| "step": 711000 |
| }, |
| { |
| "epoch": 2.9387589887199566, |
| "grad_norm": 0.9135294556617737, |
| "learning_rate": 2.0485558013171655e-05, |
| "loss": 1.5067, |
| "step": 711500 |
| }, |
| { |
| "epoch": 2.940824174235572, |
| "grad_norm": 0.8736814856529236, |
| "learning_rate": 1.9794785218014803e-05, |
| "loss": 1.5025, |
| "step": 712000 |
| }, |
| { |
| "epoch": 2.9428893597511863, |
| "grad_norm": 0.8207076191902161, |
| "learning_rate": 1.9104012422857948e-05, |
| "loss": 1.503, |
| "step": 712500 |
| }, |
| { |
| "epoch": 2.9449545452668016, |
| "grad_norm": 0.8992505669593811, |
| "learning_rate": 1.8413239627701093e-05, |
| "loss": 1.5057, |
| "step": 713000 |
| }, |
| { |
| "epoch": 2.947019730782416, |
| "grad_norm": 0.8630014657974243, |
| "learning_rate": 1.772246683254424e-05, |
| "loss": 1.5026, |
| "step": 713500 |
| }, |
| { |
| "epoch": 2.9490849162980313, |
| "grad_norm": 0.8466277122497559, |
| "learning_rate": 1.703169403738739e-05, |
| "loss": 1.5022, |
| "step": 714000 |
| }, |
| { |
| "epoch": 2.9511501018136457, |
| "grad_norm": 0.8246403932571411, |
| "learning_rate": 1.6340921242230534e-05, |
| "loss": 1.5025, |
| "step": 714500 |
| }, |
| { |
| "epoch": 2.953215287329261, |
| "grad_norm": 0.8537036776542664, |
| "learning_rate": 1.565014844707368e-05, |
| "loss": 1.5004, |
| "step": 715000 |
| }, |
| { |
| "epoch": 2.9552804728448754, |
| "grad_norm": 0.8644038438796997, |
| "learning_rate": 1.4959375651916825e-05, |
| "loss": 1.5003, |
| "step": 715500 |
| }, |
| { |
| "epoch": 2.9573456583604907, |
| "grad_norm": 0.8385940790176392, |
| "learning_rate": 1.4268602856759972e-05, |
| "loss": 1.4993, |
| "step": 716000 |
| }, |
| { |
| "epoch": 2.9594108438761055, |
| "grad_norm": 0.8472567796707153, |
| "learning_rate": 1.3577830061603118e-05, |
| "loss": 1.503, |
| "step": 716500 |
| }, |
| { |
| "epoch": 2.9614760293917204, |
| "grad_norm": 0.8817070126533508, |
| "learning_rate": 1.2887057266446265e-05, |
| "loss": 1.5039, |
| "step": 717000 |
| }, |
| { |
| "epoch": 2.9635412149073352, |
| "grad_norm": 0.8786518573760986, |
| "learning_rate": 1.219628447128941e-05, |
| "loss": 1.506, |
| "step": 717500 |
| }, |
| { |
| "epoch": 2.96560640042295, |
| "grad_norm": 0.8719050884246826, |
| "learning_rate": 1.1505511676132556e-05, |
| "loss": 1.5004, |
| "step": 718000 |
| }, |
| { |
| "epoch": 2.967671585938565, |
| "grad_norm": 0.9109290242195129, |
| "learning_rate": 1.0814738880975703e-05, |
| "loss": 1.5021, |
| "step": 718500 |
| }, |
| { |
| "epoch": 2.96973677145418, |
| "grad_norm": 0.8234292268753052, |
| "learning_rate": 1.012396608581885e-05, |
| "loss": 1.5025, |
| "step": 719000 |
| }, |
| { |
| "epoch": 2.9718019569697947, |
| "grad_norm": 0.9141399264335632, |
| "learning_rate": 9.433193290661996e-06, |
| "loss": 1.4982, |
| "step": 719500 |
| }, |
| { |
| "epoch": 2.9738671424854095, |
| "grad_norm": 0.8994991183280945, |
| "learning_rate": 8.74242049550514e-06, |
| "loss": 1.5012, |
| "step": 720000 |
| }, |
| { |
| "epoch": 2.9759323280010244, |
| "grad_norm": 0.8629069328308105, |
| "learning_rate": 8.051647700348289e-06, |
| "loss": 1.5005, |
| "step": 720500 |
| }, |
| { |
| "epoch": 2.977997513516639, |
| "grad_norm": 0.8604488968849182, |
| "learning_rate": 7.360874905191434e-06, |
| "loss": 1.497, |
| "step": 721000 |
| }, |
| { |
| "epoch": 2.980062699032254, |
| "grad_norm": 0.8444788455963135, |
| "learning_rate": 6.67010211003458e-06, |
| "loss": 1.5015, |
| "step": 721500 |
| }, |
| { |
| "epoch": 2.982127884547869, |
| "grad_norm": 0.844616711139679, |
| "learning_rate": 5.979329314877727e-06, |
| "loss": 1.5037, |
| "step": 722000 |
| }, |
| { |
| "epoch": 2.9841930700634838, |
| "grad_norm": 0.8340693712234497, |
| "learning_rate": 5.288556519720873e-06, |
| "loss": 1.5002, |
| "step": 722500 |
| }, |
| { |
| "epoch": 2.9862582555790986, |
| "grad_norm": 0.8410211205482483, |
| "learning_rate": 4.597783724564018e-06, |
| "loss": 1.4972, |
| "step": 723000 |
| }, |
| { |
| "epoch": 2.9883234410947135, |
| "grad_norm": 0.8680119514465332, |
| "learning_rate": 3.907010929407165e-06, |
| "loss": 1.4977, |
| "step": 723500 |
| }, |
| { |
| "epoch": 2.9903886266103283, |
| "grad_norm": 0.8596481084823608, |
| "learning_rate": 3.2162381342503112e-06, |
| "loss": 1.5007, |
| "step": 724000 |
| }, |
| { |
| "epoch": 2.992453812125943, |
| "grad_norm": 0.7909371256828308, |
| "learning_rate": 2.5254653390934573e-06, |
| "loss": 1.4953, |
| "step": 724500 |
| }, |
| { |
| "epoch": 2.994518997641558, |
| "grad_norm": 0.8666454553604126, |
| "learning_rate": 1.8346925439366037e-06, |
| "loss": 1.5011, |
| "step": 725000 |
| }, |
| { |
| "epoch": 2.996584183157173, |
| "grad_norm": 0.8664350509643555, |
| "learning_rate": 1.1439197487797498e-06, |
| "loss": 1.5007, |
| "step": 725500 |
| }, |
| { |
| "epoch": 2.9986493686727878, |
| "grad_norm": 0.8779242634773254, |
| "learning_rate": 4.531469536228961e-07, |
| "loss": 1.4985, |
| "step": 726000 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 726327, |
| "total_flos": 1.546067484574894e+18, |
| "train_loss": 1.7818369276394814, |
| "train_runtime": 122016.1779, |
| "train_samples_per_second": 380.973, |
| "train_steps_per_second": 5.953 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 726327, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.546067484574894e+18, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|