| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.8144612112848125, |
| "eval_steps": 500, |
| "global_step": 9000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0009049569014275695, |
| "grad_norm": 159.94659423828125, |
| "learning_rate": 6.024096385542169e-07, |
| "loss": 3.3777, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.001809913802855139, |
| "grad_norm": 92.94371795654297, |
| "learning_rate": 1.2048192771084338e-06, |
| "loss": 2.8739, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0027148707042827084, |
| "grad_norm": 40.02893829345703, |
| "learning_rate": 1.8072289156626508e-06, |
| "loss": 2.0823, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.003619827605710278, |
| "grad_norm": 11.130951881408691, |
| "learning_rate": 2.4096385542168676e-06, |
| "loss": 1.3735, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.004524784507137848, |
| "grad_norm": 6.264873027801514, |
| "learning_rate": 3.012048192771085e-06, |
| "loss": 0.77, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.005429741408565417, |
| "grad_norm": 5.753260135650635, |
| "learning_rate": 3.6144578313253016e-06, |
| "loss": 0.5821, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.006334698309992987, |
| "grad_norm": 5.4176836013793945, |
| "learning_rate": 4.216867469879519e-06, |
| "loss": 0.5349, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.007239655211420556, |
| "grad_norm": 5.466179370880127, |
| "learning_rate": 4.819277108433735e-06, |
| "loss": 0.4904, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.008144612112848126, |
| "grad_norm": 5.408243179321289, |
| "learning_rate": 5.421686746987952e-06, |
| "loss": 0.4391, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.009049569014275696, |
| "grad_norm": 4.930578708648682, |
| "learning_rate": 6.02409638554217e-06, |
| "loss": 0.382, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.009954525915703265, |
| "grad_norm": 4.409604072570801, |
| "learning_rate": 6.626506024096386e-06, |
| "loss": 0.3354, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.010859482817130834, |
| "grad_norm": 3.2613117694854736, |
| "learning_rate": 7.228915662650603e-06, |
| "loss": 0.2748, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.011764439718558404, |
| "grad_norm": 3.974195718765259, |
| "learning_rate": 7.83132530120482e-06, |
| "loss": 0.2458, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.012669396619985973, |
| "grad_norm": 2.277785539627075, |
| "learning_rate": 8.433734939759038e-06, |
| "loss": 0.2215, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.013574353521413543, |
| "grad_norm": 1.618218183517456, |
| "learning_rate": 9.036144578313254e-06, |
| "loss": 0.2, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.014479310422841112, |
| "grad_norm": 1.4689830541610718, |
| "learning_rate": 9.63855421686747e-06, |
| "loss": 0.1821, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.015384267324268681, |
| "grad_norm": 1.7365655899047852, |
| "learning_rate": 1.0240963855421688e-05, |
| "loss": 0.1699, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.01628922422569625, |
| "grad_norm": 0.9865982532501221, |
| "learning_rate": 1.0843373493975904e-05, |
| "loss": 0.1667, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.01719418112712382, |
| "grad_norm": 0.9514006972312927, |
| "learning_rate": 1.1445783132530122e-05, |
| "loss": 0.1651, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.01809913802855139, |
| "grad_norm": 1.1965084075927734, |
| "learning_rate": 1.204819277108434e-05, |
| "loss": 0.1564, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.01900409492997896, |
| "grad_norm": 1.2576557397842407, |
| "learning_rate": 1.2650602409638555e-05, |
| "loss": 0.1589, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.01990905183140653, |
| "grad_norm": 0.7250511646270752, |
| "learning_rate": 1.3253012048192772e-05, |
| "loss": 0.1567, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.020814008732834097, |
| "grad_norm": 0.881618320941925, |
| "learning_rate": 1.3855421686746989e-05, |
| "loss": 0.1554, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.021718965634261667, |
| "grad_norm": 0.7235729694366455, |
| "learning_rate": 1.4457831325301207e-05, |
| "loss": 0.1594, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.022623922535689237, |
| "grad_norm": 0.6479834318161011, |
| "learning_rate": 1.5060240963855424e-05, |
| "loss": 0.1554, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.023528879437116807, |
| "grad_norm": 0.8083927035331726, |
| "learning_rate": 1.566265060240964e-05, |
| "loss": 0.1526, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.024433836338544377, |
| "grad_norm": 1.1908012628555298, |
| "learning_rate": 1.6265060240963857e-05, |
| "loss": 0.1525, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.025338793239971947, |
| "grad_norm": 0.944805920124054, |
| "learning_rate": 1.6867469879518076e-05, |
| "loss": 0.1583, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.026243750141399517, |
| "grad_norm": 0.6904934048652649, |
| "learning_rate": 1.746987951807229e-05, |
| "loss": 0.1531, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.027148707042827087, |
| "grad_norm": 1.380239486694336, |
| "learning_rate": 1.807228915662651e-05, |
| "loss": 0.1483, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.028053663944254653, |
| "grad_norm": 0.4651995897293091, |
| "learning_rate": 1.8674698795180725e-05, |
| "loss": 0.1498, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.028958620845682223, |
| "grad_norm": 0.6768488883972168, |
| "learning_rate": 1.927710843373494e-05, |
| "loss": 0.152, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.029863577747109793, |
| "grad_norm": 0.6283469796180725, |
| "learning_rate": 1.987951807228916e-05, |
| "loss": 0.1464, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.030768534648537363, |
| "grad_norm": 0.7869206070899963, |
| "learning_rate": 1.999997250700714e-05, |
| "loss": 0.1454, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.03167349154996493, |
| "grad_norm": 1.3408620357513428, |
| "learning_rate": 1.9999860816982734e-05, |
| "loss": 0.154, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.0325784484513925, |
| "grad_norm": 1.0676746368408203, |
| "learning_rate": 1.9999663212573584e-05, |
| "loss": 0.1482, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.03348340535282007, |
| "grad_norm": 0.7311077117919922, |
| "learning_rate": 1.9999379695477417e-05, |
| "loss": 0.1486, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.03438836225424764, |
| "grad_norm": 0.8585270643234253, |
| "learning_rate": 1.999901026813009e-05, |
| "loss": 0.1463, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.03529331915567521, |
| "grad_norm": 0.4578467607498169, |
| "learning_rate": 1.9998554933705552e-05, |
| "loss": 0.1407, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.03619827605710278, |
| "grad_norm": 0.6237673163414001, |
| "learning_rate": 1.9998013696115847e-05, |
| "loss": 0.1463, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.03710323295853035, |
| "grad_norm": 0.49388837814331055, |
| "learning_rate": 1.999738656001104e-05, |
| "loss": 0.1498, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.03800818985995792, |
| "grad_norm": 0.9949780106544495, |
| "learning_rate": 1.999667353077921e-05, |
| "loss": 0.1457, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.03891314676138549, |
| "grad_norm": 0.792772114276886, |
| "learning_rate": 1.9995874614546386e-05, |
| "loss": 0.1567, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.03981810366281306, |
| "grad_norm": 0.45337289571762085, |
| "learning_rate": 1.9994989818176507e-05, |
| "loss": 0.1444, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.040723060564240625, |
| "grad_norm": 0.5384504795074463, |
| "learning_rate": 1.9994019149271357e-05, |
| "loss": 0.1464, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.041628017465668195, |
| "grad_norm": 0.4586544334888458, |
| "learning_rate": 1.9992962616170485e-05, |
| "loss": 0.1366, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.042532974367095765, |
| "grad_norm": 1.124829888343811, |
| "learning_rate": 1.999182022795116e-05, |
| "loss": 0.1441, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.043437931268523335, |
| "grad_norm": 7.702340602874756, |
| "learning_rate": 1.9990591994428278e-05, |
| "loss": 0.1434, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.044342888169950904, |
| "grad_norm": 0.600829541683197, |
| "learning_rate": 1.9989277926154273e-05, |
| "loss": 0.1554, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.045247845071378474, |
| "grad_norm": 0.525310218334198, |
| "learning_rate": 1.9987878034419047e-05, |
| "loss": 0.1524, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.046152801972806044, |
| "grad_norm": 1.6653215885162354, |
| "learning_rate": 1.998639233124985e-05, |
| "loss": 0.1504, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.047057758874233614, |
| "grad_norm": 0.5178505778312683, |
| "learning_rate": 1.998482082941118e-05, |
| "loss": 0.1462, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.047962715775661184, |
| "grad_norm": 0.524719774723053, |
| "learning_rate": 1.9983163542404694e-05, |
| "loss": 0.1482, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.048867672677088754, |
| "grad_norm": 0.4859486520290375, |
| "learning_rate": 1.9981420484469062e-05, |
| "loss": 0.1504, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.049772629578516324, |
| "grad_norm": 0.6776233315467834, |
| "learning_rate": 1.997959167057988e-05, |
| "loss": 0.1494, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.050677586479943894, |
| "grad_norm": 0.6861289143562317, |
| "learning_rate": 1.9977677116449494e-05, |
| "loss": 0.1492, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.051582543381371464, |
| "grad_norm": 0.4854241609573364, |
| "learning_rate": 1.9975676838526914e-05, |
| "loss": 0.1437, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.052487500282799034, |
| "grad_norm": 0.37351515889167786, |
| "learning_rate": 1.9973590853997646e-05, |
| "loss": 0.146, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.053392457184226604, |
| "grad_norm": 0.5376414060592651, |
| "learning_rate": 1.997141918078354e-05, |
| "loss": 0.1445, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.05429741408565417, |
| "grad_norm": 1.200066328048706, |
| "learning_rate": 1.996916183754266e-05, |
| "loss": 0.1407, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.05520237098708174, |
| "grad_norm": 0.5804212689399719, |
| "learning_rate": 1.9966818843669097e-05, |
| "loss": 0.1482, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.056107327888509306, |
| "grad_norm": 0.5233150124549866, |
| "learning_rate": 1.9964390219292823e-05, |
| "loss": 0.1423, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.057012284789936876, |
| "grad_norm": 0.7850412130355835, |
| "learning_rate": 1.9961875985279503e-05, |
| "loss": 0.1436, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.057917241691364446, |
| "grad_norm": 0.46773582696914673, |
| "learning_rate": 1.9959276163230325e-05, |
| "loss": 0.136, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.058822198592792016, |
| "grad_norm": 0.47068318724632263, |
| "learning_rate": 1.9956590775481808e-05, |
| "loss": 0.1477, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.059727155494219586, |
| "grad_norm": 0.7239755988121033, |
| "learning_rate": 1.9953819845105616e-05, |
| "loss": 0.1414, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.060632112395647156, |
| "grad_norm": 0.4646810293197632, |
| "learning_rate": 1.9950963395908368e-05, |
| "loss": 0.1433, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.061537069297074726, |
| "grad_norm": 0.5105672478675842, |
| "learning_rate": 1.99480214524314e-05, |
| "loss": 0.1381, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.062442026198502296, |
| "grad_norm": 0.4857189655303955, |
| "learning_rate": 1.99449940399506e-05, |
| "loss": 0.1355, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.06334698309992987, |
| "grad_norm": 0.7053922414779663, |
| "learning_rate": 1.9941881184476154e-05, |
| "loss": 0.1402, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.06425194000135744, |
| "grad_norm": 0.6095537543296814, |
| "learning_rate": 1.9938682912752343e-05, |
| "loss": 0.1424, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.065156896902785, |
| "grad_norm": 0.8942661285400391, |
| "learning_rate": 1.99353992522573e-05, |
| "loss": 0.1337, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.06606185380421258, |
| "grad_norm": 0.8702712059020996, |
| "learning_rate": 1.9932030231202786e-05, |
| "loss": 0.146, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.06696681070564015, |
| "grad_norm": 0.6549976468086243, |
| "learning_rate": 1.9928575878533946e-05, |
| "loss": 0.1389, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.06787176760706772, |
| "grad_norm": 0.56646329164505, |
| "learning_rate": 1.9925036223929045e-05, |
| "loss": 0.1399, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.06877672450849528, |
| "grad_norm": 0.5389584898948669, |
| "learning_rate": 1.9921411297799233e-05, |
| "loss": 0.1398, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.06968168140992285, |
| "grad_norm": 0.7392621636390686, |
| "learning_rate": 1.9917701131288274e-05, |
| "loss": 0.1436, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.07058663831135042, |
| "grad_norm": 0.5267114639282227, |
| "learning_rate": 1.991390575627228e-05, |
| "loss": 0.1398, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.071491595212778, |
| "grad_norm": 0.543932318687439, |
| "learning_rate": 1.9910025205359434e-05, |
| "loss": 0.1469, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.07239655211420556, |
| "grad_norm": 0.5490307211875916, |
| "learning_rate": 1.990605951188972e-05, |
| "loss": 0.1342, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.07330150901563313, |
| "grad_norm": 0.4569859206676483, |
| "learning_rate": 1.990200870993461e-05, |
| "loss": 0.1432, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.0742064659170607, |
| "grad_norm": 0.5565773844718933, |
| "learning_rate": 1.9897872834296816e-05, |
| "loss": 0.1465, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.07511142281848827, |
| "grad_norm": 0.5516616106033325, |
| "learning_rate": 1.989365192050995e-05, |
| "loss": 0.1434, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.07601637971991584, |
| "grad_norm": 0.8122782707214355, |
| "learning_rate": 1.988934600483824e-05, |
| "loss": 0.1424, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.07692133662134341, |
| "grad_norm": 0.4941742420196533, |
| "learning_rate": 1.9884955124276214e-05, |
| "loss": 0.1437, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.07782629352277098, |
| "grad_norm": 0.68223637342453, |
| "learning_rate": 1.9880479316548365e-05, |
| "loss": 0.1366, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.07873125042419855, |
| "grad_norm": 0.7050871253013611, |
| "learning_rate": 1.9875918620108867e-05, |
| "loss": 0.1358, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.07963620732562612, |
| "grad_norm": 0.44282448291778564, |
| "learning_rate": 1.9871273074141197e-05, |
| "loss": 0.1384, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.08054116422705368, |
| "grad_norm": 0.6209523677825928, |
| "learning_rate": 1.9866542718557844e-05, |
| "loss": 0.1389, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.08144612112848125, |
| "grad_norm": 0.49669986963272095, |
| "learning_rate": 1.9861727593999927e-05, |
| "loss": 0.1298, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.08235107802990882, |
| "grad_norm": 0.4034930467605591, |
| "learning_rate": 1.985682774183687e-05, |
| "loss": 0.139, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.08325603493133639, |
| "grad_norm": 0.42605486512184143, |
| "learning_rate": 1.985184320416603e-05, |
| "loss": 0.1359, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.08416099183276396, |
| "grad_norm": 0.5757314562797546, |
| "learning_rate": 1.9846774023812366e-05, |
| "loss": 0.1412, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.08506594873419153, |
| "grad_norm": 0.6896102428436279, |
| "learning_rate": 1.984162024432802e-05, |
| "loss": 0.1322, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.0859709056356191, |
| "grad_norm": 0.4264814257621765, |
| "learning_rate": 1.9836381909992e-05, |
| "loss": 0.1413, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.08687586253704667, |
| "grad_norm": 0.4684106111526489, |
| "learning_rate": 1.9831059065809756e-05, |
| "loss": 0.1373, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.08778081943847424, |
| "grad_norm": 0.5796085000038147, |
| "learning_rate": 1.9825651757512808e-05, |
| "loss": 0.1357, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.08868577633990181, |
| "grad_norm": 0.5410571098327637, |
| "learning_rate": 1.9820160031558365e-05, |
| "loss": 0.1364, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.08959073324132938, |
| "grad_norm": 0.45756787061691284, |
| "learning_rate": 1.9814583935128902e-05, |
| "loss": 0.1425, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.09049569014275695, |
| "grad_norm": 0.9066041111946106, |
| "learning_rate": 1.9808923516131787e-05, |
| "loss": 0.1367, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.09140064704418452, |
| "grad_norm": 0.49664556980133057, |
| "learning_rate": 1.9803178823198826e-05, |
| "loss": 0.1382, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.09230560394561209, |
| "grad_norm": 0.3994297683238983, |
| "learning_rate": 1.979734990568589e-05, |
| "loss": 0.1357, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.09321056084703966, |
| "grad_norm": 0.4870229959487915, |
| "learning_rate": 1.979143681367246e-05, |
| "loss": 0.1387, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.09411551774846723, |
| "grad_norm": 0.5828505754470825, |
| "learning_rate": 1.9785439597961207e-05, |
| "loss": 0.1388, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.0950204746498948, |
| "grad_norm": 0.4454402029514313, |
| "learning_rate": 1.977935831007756e-05, |
| "loss": 0.1394, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.09592543155132237, |
| "grad_norm": 0.6127322912216187, |
| "learning_rate": 1.977319300226926e-05, |
| "loss": 0.1394, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.09683038845274994, |
| "grad_norm": 0.42766839265823364, |
| "learning_rate": 1.97669437275059e-05, |
| "loss": 0.1379, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.09773534535417751, |
| "grad_norm": 0.556703507900238, |
| "learning_rate": 1.9760610539478492e-05, |
| "loss": 0.1336, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.09864030225560508, |
| "grad_norm": 0.4200476408004761, |
| "learning_rate": 1.9754193492598985e-05, |
| "loss": 0.1398, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.09954525915703265, |
| "grad_norm": 0.5154082179069519, |
| "learning_rate": 1.9747692641999815e-05, |
| "loss": 0.1391, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.10045021605846022, |
| "grad_norm": 0.4285774230957031, |
| "learning_rate": 1.9741108043533416e-05, |
| "loss": 0.1405, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.10135517295988779, |
| "grad_norm": 0.37950581312179565, |
| "learning_rate": 1.9734439753771742e-05, |
| "loss": 0.1399, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.10226012986131536, |
| "grad_norm": 0.4357180893421173, |
| "learning_rate": 1.9727687830005795e-05, |
| "loss": 0.1354, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.10316508676274293, |
| "grad_norm": 0.341791570186615, |
| "learning_rate": 1.9720852330245127e-05, |
| "loss": 0.1368, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.1040700436641705, |
| "grad_norm": 0.37262991070747375, |
| "learning_rate": 1.971393331321732e-05, |
| "loss": 0.1406, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.10497500056559807, |
| "grad_norm": 0.5309743881225586, |
| "learning_rate": 1.9706930838367517e-05, |
| "loss": 0.1386, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.10587995746702564, |
| "grad_norm": 0.4703729748725891, |
| "learning_rate": 1.9699844965857884e-05, |
| "loss": 0.1457, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.10678491436845321, |
| "grad_norm": 0.43350547552108765, |
| "learning_rate": 1.969267575656711e-05, |
| "loss": 0.1429, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.10768987126988078, |
| "grad_norm": 0.5514594316482544, |
| "learning_rate": 1.968542327208987e-05, |
| "loss": 0.14, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.10859482817130835, |
| "grad_norm": 0.7761058211326599, |
| "learning_rate": 1.9678087574736305e-05, |
| "loss": 0.1361, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.10949978507273592, |
| "grad_norm": 0.4929139316082001, |
| "learning_rate": 1.9670668727531486e-05, |
| "loss": 0.1382, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.11040474197416349, |
| "grad_norm": 0.5243300199508667, |
| "learning_rate": 1.9663166794214868e-05, |
| "loss": 0.1443, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.11130969887559106, |
| "grad_norm": 0.3967302739620209, |
| "learning_rate": 1.965558183923975e-05, |
| "loss": 0.1359, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.11221465577701861, |
| "grad_norm": 0.6645998954772949, |
| "learning_rate": 1.9647913927772708e-05, |
| "loss": 0.1422, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.11311961267844618, |
| "grad_norm": 0.28883126378059387, |
| "learning_rate": 1.9640163125693053e-05, |
| "loss": 0.1397, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.11402456957987375, |
| "grad_norm": 0.48156359791755676, |
| "learning_rate": 1.9632329499592248e-05, |
| "loss": 0.141, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.11492952648130132, |
| "grad_norm": 0.9770751595497131, |
| "learning_rate": 1.962441311677335e-05, |
| "loss": 0.1343, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.11583448338272889, |
| "grad_norm": 0.675937831401825, |
| "learning_rate": 1.9616414045250417e-05, |
| "loss": 0.143, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.11673944028415646, |
| "grad_norm": 0.5480718016624451, |
| "learning_rate": 1.960833235374794e-05, |
| "loss": 0.1391, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.11764439718558403, |
| "grad_norm": 0.42649492621421814, |
| "learning_rate": 1.960016811170024e-05, |
| "loss": 0.1331, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.1185493540870116, |
| "grad_norm": 0.3937451243400574, |
| "learning_rate": 1.9591921389250872e-05, |
| "loss": 0.1406, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.11945431098843917, |
| "grad_norm": 0.5013184547424316, |
| "learning_rate": 1.958359225725204e-05, |
| "loss": 0.1363, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.12035926788986674, |
| "grad_norm": 0.475367933511734, |
| "learning_rate": 1.9575180787263955e-05, |
| "loss": 0.1368, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.12126422479129431, |
| "grad_norm": 0.4039798676967621, |
| "learning_rate": 1.956668705155426e-05, |
| "loss": 0.1398, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.12216918169272188, |
| "grad_norm": 0.5868827104568481, |
| "learning_rate": 1.955811112309737e-05, |
| "loss": 0.1373, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.12307413859414945, |
| "grad_norm": 0.4204873740673065, |
| "learning_rate": 1.9549453075573873e-05, |
| "loss": 0.1385, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.12397909549557702, |
| "grad_norm": 0.5889093279838562, |
| "learning_rate": 1.954071298336989e-05, |
| "loss": 0.1326, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.12488405239700459, |
| "grad_norm": 0.4504663050174713, |
| "learning_rate": 1.9531890921576425e-05, |
| "loss": 0.1371, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.12578900929843218, |
| "grad_norm": 0.40761178731918335, |
| "learning_rate": 1.9522986965988748e-05, |
| "loss": 0.1336, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.12669396619985973, |
| "grad_norm": 0.46256113052368164, |
| "learning_rate": 1.9514001193105693e-05, |
| "loss": 0.1351, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.12759892310128731, |
| "grad_norm": 0.39085566997528076, |
| "learning_rate": 1.9504933680129063e-05, |
| "loss": 0.1347, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.12850388000271487, |
| "grad_norm": 0.6131863594055176, |
| "learning_rate": 1.9495784504962913e-05, |
| "loss": 0.1356, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.12940883690414243, |
| "grad_norm": 0.4904513955116272, |
| "learning_rate": 1.9486553746212915e-05, |
| "loss": 0.1365, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.13031379380557, |
| "grad_norm": 0.2950369417667389, |
| "learning_rate": 1.9477241483185675e-05, |
| "loss": 0.1374, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.13121875070699757, |
| "grad_norm": 0.4332665503025055, |
| "learning_rate": 1.946784779588803e-05, |
| "loss": 0.1361, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.13212370760842515, |
| "grad_norm": 0.3877945840358734, |
| "learning_rate": 1.9458372765026402e-05, |
| "loss": 0.1332, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.1330286645098527, |
| "grad_norm": 0.35281670093536377, |
| "learning_rate": 1.9448816472006057e-05, |
| "loss": 0.1406, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.1339336214112803, |
| "grad_norm": 0.5009306073188782, |
| "learning_rate": 1.943917899893045e-05, |
| "loss": 0.1341, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.13483857831270785, |
| "grad_norm": 0.7299574613571167, |
| "learning_rate": 1.9429460428600485e-05, |
| "loss": 0.1337, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.13574353521413543, |
| "grad_norm": 0.4611435830593109, |
| "learning_rate": 1.9419660844513828e-05, |
| "loss": 0.1438, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.136648492115563, |
| "grad_norm": 0.4176791310310364, |
| "learning_rate": 1.940978033086417e-05, |
| "loss": 0.1399, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.13755344901699057, |
| "grad_norm": 0.3517489731311798, |
| "learning_rate": 1.9399818972540526e-05, |
| "loss": 0.1333, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.13845840591841813, |
| "grad_norm": 0.4304490387439728, |
| "learning_rate": 1.9389776855126472e-05, |
| "loss": 0.1416, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.1393633628198457, |
| "grad_norm": 0.57242751121521, |
| "learning_rate": 1.937965406489945e-05, |
| "loss": 0.1375, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.14026831972127327, |
| "grad_norm": 0.4773651361465454, |
| "learning_rate": 1.936945068883e-05, |
| "loss": 0.1281, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.14117327662270085, |
| "grad_norm": 0.4251551926136017, |
| "learning_rate": 1.9359166814581017e-05, |
| "loss": 0.1368, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.1420782335241284, |
| "grad_norm": 0.7797797918319702, |
| "learning_rate": 1.9348802530507003e-05, |
| "loss": 0.1363, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.142983190425556, |
| "grad_norm": 1.2365995645523071, |
| "learning_rate": 1.9338357925653312e-05, |
| "loss": 0.1344, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.14388814732698355, |
| "grad_norm": 0.6574030518531799, |
| "learning_rate": 1.932783308975537e-05, |
| "loss": 0.1419, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.14479310422841113, |
| "grad_norm": 0.6940555572509766, |
| "learning_rate": 1.9317228113237916e-05, |
| "loss": 0.1427, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.14569806112983869, |
| "grad_norm": 0.4072614014148712, |
| "learning_rate": 1.9306543087214215e-05, |
| "loss": 0.1306, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.14660301803126627, |
| "grad_norm": 0.37100809812545776, |
| "learning_rate": 1.9295778103485297e-05, |
| "loss": 0.1368, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.14750797493269382, |
| "grad_norm": 0.3309537470340729, |
| "learning_rate": 1.9284933254539143e-05, |
| "loss": 0.1319, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.1484129318341214, |
| "grad_norm": 0.5338348746299744, |
| "learning_rate": 1.9274008633549905e-05, |
| "loss": 0.1321, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.14931788873554896, |
| "grad_norm": 0.81573086977005, |
| "learning_rate": 1.9263004334377087e-05, |
| "loss": 0.1332, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.15022284563697655, |
| "grad_norm": 0.5445601344108582, |
| "learning_rate": 1.9251920451564773e-05, |
| "loss": 0.1335, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.1511278025384041, |
| "grad_norm": 0.5735446810722351, |
| "learning_rate": 1.9240757080340787e-05, |
| "loss": 0.1432, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.1520327594398317, |
| "grad_norm": 0.35098642110824585, |
| "learning_rate": 1.9229514316615875e-05, |
| "loss": 0.1397, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.15293771634125924, |
| "grad_norm": 0.5715315341949463, |
| "learning_rate": 1.9218192256982898e-05, |
| "loss": 0.1356, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.15384267324268683, |
| "grad_norm": 0.4406200051307678, |
| "learning_rate": 1.920679099871599e-05, |
| "loss": 0.1348, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.15474763014411438, |
| "grad_norm": 0.3725470304489136, |
| "learning_rate": 1.919531063976972e-05, |
| "loss": 0.1374, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.15565258704554197, |
| "grad_norm": 0.2997819781303406, |
| "learning_rate": 1.918375127877826e-05, |
| "loss": 0.1332, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.15655754394696952, |
| "grad_norm": 0.40308165550231934, |
| "learning_rate": 1.917211301505453e-05, |
| "loss": 0.1331, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.1574625008483971, |
| "grad_norm": 0.437809020280838, |
| "learning_rate": 1.916039594858935e-05, |
| "loss": 0.1398, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.15836745774982466, |
| "grad_norm": 0.8596120476722717, |
| "learning_rate": 1.914860018005058e-05, |
| "loss": 0.1358, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.15927241465125225, |
| "grad_norm": 0.6408957242965698, |
| "learning_rate": 1.913672581078224e-05, |
| "loss": 0.1334, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.1601773715526798, |
| "grad_norm": 0.49583616852760315, |
| "learning_rate": 1.912477294280367e-05, |
| "loss": 0.1404, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.16108232845410736, |
| "grad_norm": 0.5075445175170898, |
| "learning_rate": 1.911274167880863e-05, |
| "loss": 0.1372, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.16198728535553494, |
| "grad_norm": 0.4242574870586395, |
| "learning_rate": 1.9100632122164423e-05, |
| "loss": 0.1377, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.1628922422569625, |
| "grad_norm": 0.5238597989082336, |
| "learning_rate": 1.9088444376911002e-05, |
| "loss": 0.1427, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.16379719915839008, |
| "grad_norm": 0.43989208340644836, |
| "learning_rate": 1.9076178547760095e-05, |
| "loss": 0.1317, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.16470215605981764, |
| "grad_norm": 0.426861047744751, |
| "learning_rate": 1.9063834740094284e-05, |
| "loss": 0.1375, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.16560711296124522, |
| "grad_norm": 0.602488100528717, |
| "learning_rate": 1.90514130599661e-05, |
| "loss": 0.1319, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.16651206986267278, |
| "grad_norm": 0.3982050120830536, |
| "learning_rate": 1.9038913614097142e-05, |
| "loss": 0.1371, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.16741702676410036, |
| "grad_norm": 0.33180946111679077, |
| "learning_rate": 1.902633650987712e-05, |
| "loss": 0.1328, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.16832198366552792, |
| "grad_norm": 0.5042048096656799, |
| "learning_rate": 1.9013681855362952e-05, |
| "loss": 0.1342, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.1692269405669555, |
| "grad_norm": 0.35587260127067566, |
| "learning_rate": 1.9000949759277844e-05, |
| "loss": 0.1436, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.17013189746838306, |
| "grad_norm": 0.4799160361289978, |
| "learning_rate": 1.898814033101033e-05, |
| "loss": 0.1407, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.17103685436981064, |
| "grad_norm": 0.4712836742401123, |
| "learning_rate": 1.897525368061336e-05, |
| "loss": 0.1361, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.1719418112712382, |
| "grad_norm": 0.45217564702033997, |
| "learning_rate": 1.896228991880334e-05, |
| "loss": 0.1399, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.17284676817266578, |
| "grad_norm": 0.4772576689720154, |
| "learning_rate": 1.8949249156959185e-05, |
| "loss": 0.1426, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.17375172507409334, |
| "grad_norm": 0.3817451298236847, |
| "learning_rate": 1.893613150712135e-05, |
| "loss": 0.1332, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.17465668197552092, |
| "grad_norm": 0.35645753145217896, |
| "learning_rate": 1.892293708199089e-05, |
| "loss": 0.1274, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.17556163887694848, |
| "grad_norm": 0.5715720057487488, |
| "learning_rate": 1.8909665994928478e-05, |
| "loss": 0.1354, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.17646659577837606, |
| "grad_norm": 0.4831380248069763, |
| "learning_rate": 1.889631835995342e-05, |
| "loss": 0.1353, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.17737155267980362, |
| "grad_norm": 0.5451020002365112, |
| "learning_rate": 1.8882894291742703e-05, |
| "loss": 0.1407, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.1782765095812312, |
| "grad_norm": 0.5351752638816833, |
| "learning_rate": 1.886939390562999e-05, |
| "loss": 0.1353, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.17918146648265876, |
| "grad_norm": 0.546170175075531, |
| "learning_rate": 1.8855817317604622e-05, |
| "loss": 0.1382, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.18008642338408634, |
| "grad_norm": 0.38834547996520996, |
| "learning_rate": 1.8842164644310657e-05, |
| "loss": 0.1289, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.1809913802855139, |
| "grad_norm": 0.3078169524669647, |
| "learning_rate": 1.882843600304582e-05, |
| "loss": 0.1291, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.18189633718694148, |
| "grad_norm": 0.36554184556007385, |
| "learning_rate": 1.8814631511760535e-05, |
| "loss": 0.1445, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.18280129408836904, |
| "grad_norm": 0.5817809700965881, |
| "learning_rate": 1.8800751289056885e-05, |
| "loss": 0.1396, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.18370625098979662, |
| "grad_norm": 0.873615562915802, |
| "learning_rate": 1.8786795454187615e-05, |
| "loss": 0.1367, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.18461120789122418, |
| "grad_norm": 0.7153456807136536, |
| "learning_rate": 1.8772764127055087e-05, |
| "loss": 0.1344, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.18551616479265176, |
| "grad_norm": 0.28874504566192627, |
| "learning_rate": 1.8758657428210266e-05, |
| "loss": 0.1375, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.18642112169407932, |
| "grad_norm": 0.41073450446128845, |
| "learning_rate": 1.8744475478851667e-05, |
| "loss": 0.1392, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.1873260785955069, |
| "grad_norm": 0.4002520740032196, |
| "learning_rate": 1.8730218400824337e-05, |
| "loss": 0.1334, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.18823103549693446, |
| "grad_norm": 0.49055391550064087, |
| "learning_rate": 1.871588631661879e-05, |
| "loss": 0.1398, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.18913599239836204, |
| "grad_norm": 0.3669786751270294, |
| "learning_rate": 1.8701479349369957e-05, |
| "loss": 0.1309, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.1900409492997896, |
| "grad_norm": 0.4466594159603119, |
| "learning_rate": 1.8686997622856134e-05, |
| "loss": 0.1361, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.19094590620121718, |
| "grad_norm": 0.37340307235717773, |
| "learning_rate": 1.8672441261497915e-05, |
| "loss": 0.1314, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.19185086310264474, |
| "grad_norm": 0.35569268465042114, |
| "learning_rate": 1.8657810390357126e-05, |
| "loss": 0.1385, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.1927558200040723, |
| "grad_norm": 0.8156322240829468, |
| "learning_rate": 1.8643105135135743e-05, |
| "loss": 0.1358, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.19366077690549988, |
| "grad_norm": 0.4480380117893219, |
| "learning_rate": 1.8628325622174818e-05, |
| "loss": 0.1367, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.19456573380692743, |
| "grad_norm": 0.5346203446388245, |
| "learning_rate": 1.86134719784534e-05, |
| "loss": 0.1367, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.19547069070835502, |
| "grad_norm": 0.3813647925853729, |
| "learning_rate": 1.8598544331587427e-05, |
| "loss": 0.1386, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.19637564760978257, |
| "grad_norm": 0.5341768860816956, |
| "learning_rate": 1.858354280982865e-05, |
| "loss": 0.1298, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.19728060451121016, |
| "grad_norm": 0.3788771331310272, |
| "learning_rate": 1.8568467542063505e-05, |
| "loss": 0.1416, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.1981855614126377, |
| "grad_norm": 0.28519588708877563, |
| "learning_rate": 1.8553318657812035e-05, |
| "loss": 0.1336, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.1990905183140653, |
| "grad_norm": 0.38430145382881165, |
| "learning_rate": 1.853809628722676e-05, |
| "loss": 0.1346, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.19999547521549285, |
| "grad_norm": 0.3612518906593323, |
| "learning_rate": 1.8522800561091556e-05, |
| "loss": 0.1344, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.20090043211692044, |
| "grad_norm": 0.36791539192199707, |
| "learning_rate": 1.8507431610820547e-05, |
| "loss": 0.1345, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.201805389018348, |
| "grad_norm": 0.8380405902862549, |
| "learning_rate": 1.8491989568456962e-05, |
| "loss": 0.1343, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.20271034591977558, |
| "grad_norm": 0.4786330461502075, |
| "learning_rate": 1.8476474566671995e-05, |
| "loss": 0.1409, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.20361530282120313, |
| "grad_norm": 0.5574005842208862, |
| "learning_rate": 1.8460886738763698e-05, |
| "loss": 0.1324, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.20452025972263072, |
| "grad_norm": 0.3608987033367157, |
| "learning_rate": 1.8445226218655787e-05, |
| "loss": 0.1429, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.20542521662405827, |
| "grad_norm": 0.3293229639530182, |
| "learning_rate": 1.842949314089654e-05, |
| "loss": 0.1332, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.20633017352548585, |
| "grad_norm": 0.594818651676178, |
| "learning_rate": 1.8413687640657602e-05, |
| "loss": 0.1354, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.2072351304269134, |
| "grad_norm": 0.3503284156322479, |
| "learning_rate": 1.8397809853732846e-05, |
| "loss": 0.1373, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.208140087328341, |
| "grad_norm": 0.6148970127105713, |
| "learning_rate": 1.8381859916537204e-05, |
| "loss": 0.1435, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.20904504422976855, |
| "grad_norm": 0.39734190702438354, |
| "learning_rate": 1.8365837966105486e-05, |
| "loss": 0.1325, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.20995000113119613, |
| "grad_norm": 0.3820249140262604, |
| "learning_rate": 1.8349744140091205e-05, |
| "loss": 0.1307, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.2108549580326237, |
| "grad_norm": 0.2815605700016022, |
| "learning_rate": 1.83335785767654e-05, |
| "loss": 0.1395, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.21175991493405127, |
| "grad_norm": 0.26476067304611206, |
| "learning_rate": 1.831734141501546e-05, |
| "loss": 0.1332, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.21266487183547883, |
| "grad_norm": 0.4209696352481842, |
| "learning_rate": 1.830103279434389e-05, |
| "loss": 0.1373, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.21356982873690641, |
| "grad_norm": 0.29990458488464355, |
| "learning_rate": 1.828465285486716e-05, |
| "loss": 0.1345, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.21447478563833397, |
| "grad_norm": 0.39812973141670227, |
| "learning_rate": 1.826820173731446e-05, |
| "loss": 0.1361, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.21537974253976155, |
| "grad_norm": 0.35514476895332336, |
| "learning_rate": 1.825167958302653e-05, |
| "loss": 0.1353, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.2162846994411891, |
| "grad_norm": 0.3101460039615631, |
| "learning_rate": 1.8235086533954418e-05, |
| "loss": 0.1369, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.2171896563426167, |
| "grad_norm": 0.42703959345817566, |
| "learning_rate": 1.8218422732658263e-05, |
| "loss": 0.1348, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.21809461324404425, |
| "grad_norm": 0.44410404562950134, |
| "learning_rate": 1.820168832230609e-05, |
| "loss": 0.1292, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.21899957014547183, |
| "grad_norm": 0.4274084270000458, |
| "learning_rate": 1.8184883446672545e-05, |
| "loss": 0.1325, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.2199045270468994, |
| "grad_norm": 0.3276132643222809, |
| "learning_rate": 1.81680082501377e-05, |
| "loss": 0.1304, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.22080948394832697, |
| "grad_norm": 0.28649523854255676, |
| "learning_rate": 1.8151062877685785e-05, |
| "loss": 0.1379, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.22171444084975453, |
| "grad_norm": 0.3112781345844269, |
| "learning_rate": 1.813404747490395e-05, |
| "loss": 0.1324, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.2226193977511821, |
| "grad_norm": 0.37147268652915955, |
| "learning_rate": 1.811696218798102e-05, |
| "loss": 0.1362, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.22352435465260967, |
| "grad_norm": 0.30297672748565674, |
| "learning_rate": 1.8099807163706225e-05, |
| "loss": 0.1382, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.22442931155403723, |
| "grad_norm": 0.3513183295726776, |
| "learning_rate": 1.808258254946795e-05, |
| "loss": 0.1313, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.2253342684554648, |
| "grad_norm": 0.2990841865539551, |
| "learning_rate": 1.806528849325248e-05, |
| "loss": 0.1361, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.22623922535689237, |
| "grad_norm": 0.44124889373779297, |
| "learning_rate": 1.8047925143642685e-05, |
| "loss": 0.1348, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.22714418225831995, |
| "grad_norm": 0.5837457776069641, |
| "learning_rate": 1.8030492649816807e-05, |
| "loss": 0.1384, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.2280491391597475, |
| "grad_norm": 0.4553276598453522, |
| "learning_rate": 1.801299116154712e-05, |
| "loss": 0.1345, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.2289540960611751, |
| "grad_norm": 0.5152564644813538, |
| "learning_rate": 1.7995420829198677e-05, |
| "loss": 0.1319, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.22985905296260264, |
| "grad_norm": 0.4430082440376282, |
| "learning_rate": 1.7977781803728012e-05, |
| "loss": 0.1352, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.23076400986403023, |
| "grad_norm": 0.47474417090415955, |
| "learning_rate": 1.7960074236681832e-05, |
| "loss": 0.1387, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.23166896676545778, |
| "grad_norm": 0.3780491054058075, |
| "learning_rate": 1.7942298280195735e-05, |
| "loss": 0.1369, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.23257392366688537, |
| "grad_norm": 0.3033972680568695, |
| "learning_rate": 1.7924454086992874e-05, |
| "loss": 0.1297, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.23347888056831292, |
| "grad_norm": 0.3683450520038605, |
| "learning_rate": 1.7906541810382676e-05, |
| "loss": 0.1318, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.2343838374697405, |
| "grad_norm": 0.3137243092060089, |
| "learning_rate": 1.78885616042595e-05, |
| "loss": 0.1299, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.23528879437116806, |
| "grad_norm": 0.6204285621643066, |
| "learning_rate": 1.787051362310134e-05, |
| "loss": 0.1328, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.23619375127259565, |
| "grad_norm": 0.3297698497772217, |
| "learning_rate": 1.785239802196847e-05, |
| "loss": 0.1266, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.2370987081740232, |
| "grad_norm": 0.27107271552085876, |
| "learning_rate": 1.7834214956502124e-05, |
| "loss": 0.1386, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.2380036650754508, |
| "grad_norm": 0.3303036093711853, |
| "learning_rate": 1.781596458292317e-05, |
| "loss": 0.1314, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.23890862197687834, |
| "grad_norm": 0.2860264480113983, |
| "learning_rate": 1.7797647058030748e-05, |
| "loss": 0.1341, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.23981357887830593, |
| "grad_norm": 0.333578497171402, |
| "learning_rate": 1.7779262539200937e-05, |
| "loss": 0.134, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.24071853577973348, |
| "grad_norm": 0.3807545602321625, |
| "learning_rate": 1.7760811184385406e-05, |
| "loss": 0.1327, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.24162349268116107, |
| "grad_norm": 0.3607015013694763, |
| "learning_rate": 1.7742293152110033e-05, |
| "loss": 0.1356, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.24252844958258862, |
| "grad_norm": 0.4713596701622009, |
| "learning_rate": 1.7723708601473566e-05, |
| "loss": 0.1371, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.2434334064840162, |
| "grad_norm": 0.5265551805496216, |
| "learning_rate": 1.7705057692146258e-05, |
| "loss": 0.1293, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.24433836338544376, |
| "grad_norm": 0.35034945607185364, |
| "learning_rate": 1.768634058436847e-05, |
| "loss": 0.1354, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.24524332028687135, |
| "grad_norm": 0.3199910819530487, |
| "learning_rate": 1.7667557438949328e-05, |
| "loss": 0.1411, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.2461482771882989, |
| "grad_norm": 0.379367858171463, |
| "learning_rate": 1.7648708417265314e-05, |
| "loss": 0.1345, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.2470532340897265, |
| "grad_norm": 0.5331190824508667, |
| "learning_rate": 1.7629793681258892e-05, |
| "loss": 0.133, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.24795819099115404, |
| "grad_norm": 0.29094645380973816, |
| "learning_rate": 1.761081339343711e-05, |
| "loss": 0.1288, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.24886314789258163, |
| "grad_norm": 0.47043576836586, |
| "learning_rate": 1.759176771687022e-05, |
| "loss": 0.1368, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.24976810479400918, |
| "grad_norm": 0.8411908149719238, |
| "learning_rate": 1.7572656815190253e-05, |
| "loss": 0.1326, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.25067306169543674, |
| "grad_norm": 0.39577701687812805, |
| "learning_rate": 1.7553480852589635e-05, |
| "loss": 0.1336, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.25157801859686435, |
| "grad_norm": 0.5741309523582458, |
| "learning_rate": 1.7534239993819758e-05, |
| "loss": 0.1367, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.2524829754982919, |
| "grad_norm": 0.3736680746078491, |
| "learning_rate": 1.7514934404189574e-05, |
| "loss": 0.1259, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.25338793239971946, |
| "grad_norm": 0.4589287042617798, |
| "learning_rate": 1.7495564249564184e-05, |
| "loss": 0.1319, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.254292889301147, |
| "grad_norm": 0.4368409514427185, |
| "learning_rate": 1.7476129696363394e-05, |
| "loss": 0.1282, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.25519784620257463, |
| "grad_norm": 0.3239140212535858, |
| "learning_rate": 1.7456630911560294e-05, |
| "loss": 0.1309, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.2561028031040022, |
| "grad_norm": 0.3988368809223175, |
| "learning_rate": 1.7437068062679827e-05, |
| "loss": 0.1338, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.25700776000542974, |
| "grad_norm": 0.45862647891044617, |
| "learning_rate": 1.7417441317797342e-05, |
| "loss": 0.1344, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.2579127169068573, |
| "grad_norm": 0.3606816232204437, |
| "learning_rate": 1.7397750845537163e-05, |
| "loss": 0.1369, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.25881767380828485, |
| "grad_norm": 0.44180789589881897, |
| "learning_rate": 1.7377996815071122e-05, |
| "loss": 0.1299, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.25972263070971247, |
| "grad_norm": 0.30617383122444153, |
| "learning_rate": 1.7358179396117118e-05, |
| "loss": 0.1334, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.26062758761114, |
| "grad_norm": 0.3464939594268799, |
| "learning_rate": 1.7338298758937656e-05, |
| "loss": 0.1317, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.2615325445125676, |
| "grad_norm": 0.3210926949977875, |
| "learning_rate": 1.7318355074338387e-05, |
| "loss": 0.1334, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.26243750141399513, |
| "grad_norm": 0.309063196182251, |
| "learning_rate": 1.7298348513666632e-05, |
| "loss": 0.1432, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.26334245831542274, |
| "grad_norm": 0.3307989835739136, |
| "learning_rate": 1.727827924880992e-05, |
| "loss": 0.1376, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.2642474152168503, |
| "grad_norm": 0.2714211046695709, |
| "learning_rate": 1.725814745219451e-05, |
| "loss": 0.1338, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.26515237211827786, |
| "grad_norm": 0.4215063750743866, |
| "learning_rate": 1.723795329678389e-05, |
| "loss": 0.1346, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.2660573290197054, |
| "grad_norm": 0.36363697052001953, |
| "learning_rate": 1.721769695607733e-05, |
| "loss": 0.1323, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.266962285921133, |
| "grad_norm": 0.3916791081428528, |
| "learning_rate": 1.7197378604108352e-05, |
| "loss": 0.1299, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.2678672428225606, |
| "grad_norm": 0.3785913288593292, |
| "learning_rate": 1.7176998415443256e-05, |
| "loss": 0.1328, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.26877219972398814, |
| "grad_norm": 0.329843670129776, |
| "learning_rate": 1.7156556565179618e-05, |
| "loss": 0.1316, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.2696771566254157, |
| "grad_norm": 0.30012860894203186, |
| "learning_rate": 1.713605322894478e-05, |
| "loss": 0.1344, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.2705821135268433, |
| "grad_norm": 0.33648693561553955, |
| "learning_rate": 1.7115488582894345e-05, |
| "loss": 0.1238, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.27148707042827086, |
| "grad_norm": 0.4068211317062378, |
| "learning_rate": 1.7094862803710665e-05, |
| "loss": 0.1414, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.2723920273296984, |
| "grad_norm": 0.34052276611328125, |
| "learning_rate": 1.7074176068601318e-05, |
| "loss": 0.1314, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.273296984231126, |
| "grad_norm": 0.4234228730201721, |
| "learning_rate": 1.705342855529759e-05, |
| "loss": 0.1344, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.2742019411325536, |
| "grad_norm": 0.3007555603981018, |
| "learning_rate": 1.7032620442052948e-05, |
| "loss": 0.1403, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.27510689803398114, |
| "grad_norm": 0.3519713878631592, |
| "learning_rate": 1.70117519076415e-05, |
| "loss": 0.1326, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.2760118549354087, |
| "grad_norm": 0.6432228088378906, |
| "learning_rate": 1.699082313135648e-05, |
| "loss": 0.1339, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.27691681183683625, |
| "grad_norm": 0.39598795771598816, |
| "learning_rate": 1.6969834293008674e-05, |
| "loss": 0.133, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.27782176873826386, |
| "grad_norm": 0.3361396789550781, |
| "learning_rate": 1.6948785572924912e-05, |
| "loss": 0.1258, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.2787267256396914, |
| "grad_norm": 0.3960329592227936, |
| "learning_rate": 1.692767715194649e-05, |
| "loss": 0.1315, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.279631682541119, |
| "grad_norm": 0.3047797381877899, |
| "learning_rate": 1.6906509211427633e-05, |
| "loss": 0.1321, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.28053663944254653, |
| "grad_norm": 0.3368310332298279, |
| "learning_rate": 1.6885281933233936e-05, |
| "loss": 0.1421, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.28144159634397414, |
| "grad_norm": 0.5269041061401367, |
| "learning_rate": 1.6863995499740785e-05, |
| "loss": 0.1342, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.2823465532454017, |
| "grad_norm": 0.3757074773311615, |
| "learning_rate": 1.6842650093831817e-05, |
| "loss": 0.1347, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.28325151014682926, |
| "grad_norm": 0.4319840669631958, |
| "learning_rate": 1.6821245898897317e-05, |
| "loss": 0.1368, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.2841564670482568, |
| "grad_norm": 0.2823663651943207, |
| "learning_rate": 1.6799783098832677e-05, |
| "loss": 0.1318, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.2850614239496844, |
| "grad_norm": 0.3386325538158417, |
| "learning_rate": 1.6778261878036784e-05, |
| "loss": 0.1335, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.285966380851112, |
| "grad_norm": 0.44977718591690063, |
| "learning_rate": 1.6756682421410454e-05, |
| "loss": 0.1342, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.28687133775253953, |
| "grad_norm": 0.43069374561309814, |
| "learning_rate": 1.6735044914354853e-05, |
| "loss": 0.1316, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.2877762946539671, |
| "grad_norm": 0.47004443407058716, |
| "learning_rate": 1.6713349542769865e-05, |
| "loss": 0.1353, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.28868125155539465, |
| "grad_norm": 0.5096125602722168, |
| "learning_rate": 1.6691596493052543e-05, |
| "loss": 0.1346, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.28958620845682226, |
| "grad_norm": 0.4015505611896515, |
| "learning_rate": 1.6669785952095468e-05, |
| "loss": 0.1334, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.2904911653582498, |
| "grad_norm": 0.37036389112472534, |
| "learning_rate": 1.6647918107285182e-05, |
| "loss": 0.127, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.29139612225967737, |
| "grad_norm": 0.39770999550819397, |
| "learning_rate": 1.6625993146500536e-05, |
| "loss": 0.1355, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.2923010791611049, |
| "grad_norm": 0.43057796359062195, |
| "learning_rate": 1.6604011258111097e-05, |
| "loss": 0.1358, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.29320603606253254, |
| "grad_norm": 0.3283129930496216, |
| "learning_rate": 1.658197263097555e-05, |
| "loss": 0.1317, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.2941109929639601, |
| "grad_norm": 0.28569671511650085, |
| "learning_rate": 1.6559877454440025e-05, |
| "loss": 0.1351, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.29501594986538765, |
| "grad_norm": 0.29221346974372864, |
| "learning_rate": 1.6537725918336524e-05, |
| "loss": 0.135, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.2959209067668152, |
| "grad_norm": 3.769948959350586, |
| "learning_rate": 1.6515518212981248e-05, |
| "loss": 0.1562, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.2968258636682428, |
| "grad_norm": 0.6297938823699951, |
| "learning_rate": 1.6493254529172996e-05, |
| "loss": 0.1332, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.2977308205696704, |
| "grad_norm": 0.4224908649921417, |
| "learning_rate": 1.647093505819149e-05, |
| "loss": 0.1362, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.29863577747109793, |
| "grad_norm": 0.60915607213974, |
| "learning_rate": 1.6448559991795762e-05, |
| "loss": 0.1331, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.2995407343725255, |
| "grad_norm": 0.4966517686843872, |
| "learning_rate": 1.64261295222225e-05, |
| "loss": 0.1264, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.3004456912739531, |
| "grad_norm": 0.8263148069381714, |
| "learning_rate": 1.6403643842184383e-05, |
| "loss": 0.1284, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.30135064817538065, |
| "grad_norm": 0.2521939277648926, |
| "learning_rate": 1.6381103144868434e-05, |
| "loss": 0.1321, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.3022556050768082, |
| "grad_norm": 0.3264107406139374, |
| "learning_rate": 1.6358507623934368e-05, |
| "loss": 0.1338, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.30316056197823577, |
| "grad_norm": 0.3555287718772888, |
| "learning_rate": 1.6335857473512908e-05, |
| "loss": 0.1318, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.3040655188796634, |
| "grad_norm": 0.3218032717704773, |
| "learning_rate": 1.6313152888204143e-05, |
| "loss": 0.1244, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.30497047578109093, |
| "grad_norm": 0.37903180718421936, |
| "learning_rate": 1.629039406307583e-05, |
| "loss": 0.1315, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.3058754326825185, |
| "grad_norm": 0.32837244868278503, |
| "learning_rate": 1.626758119366174e-05, |
| "loss": 0.1299, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.30678038958394604, |
| "grad_norm": 0.5855687856674194, |
| "learning_rate": 1.6244714475959958e-05, |
| "loss": 0.1372, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.30768534648537366, |
| "grad_norm": 0.30140820145606995, |
| "learning_rate": 1.622179410643123e-05, |
| "loss": 0.1303, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.3085903033868012, |
| "grad_norm": 0.32345208525657654, |
| "learning_rate": 1.619882028199723e-05, |
| "loss": 0.1318, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.30949526028822877, |
| "grad_norm": 0.4127529561519623, |
| "learning_rate": 1.617579320003891e-05, |
| "loss": 0.1325, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.3104002171896563, |
| "grad_norm": 0.2518954873085022, |
| "learning_rate": 1.6152713058394778e-05, |
| "loss": 0.1323, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.31130517409108394, |
| "grad_norm": 0.385233610868454, |
| "learning_rate": 1.612958005535921e-05, |
| "loss": 0.1321, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.3122101309925115, |
| "grad_norm": 4.388869285583496, |
| "learning_rate": 1.6106394389680752e-05, |
| "loss": 0.1392, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.31311508789393905, |
| "grad_norm": 0.3289415240287781, |
| "learning_rate": 1.6083156260560387e-05, |
| "loss": 0.1319, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.3140200447953666, |
| "grad_norm": 0.2971116900444031, |
| "learning_rate": 1.605986586764986e-05, |
| "loss": 0.1314, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.3149250016967942, |
| "grad_norm": 0.2933705449104309, |
| "learning_rate": 1.603652341104993e-05, |
| "loss": 0.1331, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.31582995859822177, |
| "grad_norm": 0.2969614863395691, |
| "learning_rate": 1.6013129091308658e-05, |
| "loss": 0.1356, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.3167349154996493, |
| "grad_norm": 0.48023778200149536, |
| "learning_rate": 1.5989683109419717e-05, |
| "loss": 0.1296, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.3176398724010769, |
| "grad_norm": 0.3945216238498688, |
| "learning_rate": 1.5966185666820608e-05, |
| "loss": 0.1393, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.3185448293025045, |
| "grad_norm": 0.34867751598358154, |
| "learning_rate": 1.5942636965390983e-05, |
| "loss": 0.1316, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.31944978620393205, |
| "grad_norm": 0.4952505826950073, |
| "learning_rate": 1.5919037207450873e-05, |
| "loss": 0.1292, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.3203547431053596, |
| "grad_norm": 0.35089966654777527, |
| "learning_rate": 1.589538659575897e-05, |
| "loss": 0.1339, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.32125970000678716, |
| "grad_norm": 0.5478940606117249, |
| "learning_rate": 1.5871685333510873e-05, |
| "loss": 0.1331, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.3221646569082147, |
| "grad_norm": 0.41318562626838684, |
| "learning_rate": 1.584793362433736e-05, |
| "loss": 0.1328, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.32306961380964233, |
| "grad_norm": 0.3964232802391052, |
| "learning_rate": 1.5824131672302608e-05, |
| "loss": 0.1445, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.3239745707110699, |
| "grad_norm": 0.3068403899669647, |
| "learning_rate": 1.5800279681902483e-05, |
| "loss": 0.1275, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.32487952761249744, |
| "grad_norm": 0.2769792079925537, |
| "learning_rate": 1.5776377858062737e-05, |
| "loss": 0.1286, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.325784484513925, |
| "grad_norm": 0.5100188255310059, |
| "learning_rate": 1.5752426406137275e-05, |
| "loss": 0.1344, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.3266894414153526, |
| "grad_norm": 0.3469770550727844, |
| "learning_rate": 1.5728425531906396e-05, |
| "loss": 0.1323, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.32759439831678017, |
| "grad_norm": 0.5681571960449219, |
| "learning_rate": 1.5704375441574996e-05, |
| "loss": 0.1335, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.3284993552182077, |
| "grad_norm": 0.3062500059604645, |
| "learning_rate": 1.568027634177083e-05, |
| "loss": 0.1367, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.3294043121196353, |
| "grad_norm": 0.3792308270931244, |
| "learning_rate": 1.5656128439542704e-05, |
| "loss": 0.1301, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.3303092690210629, |
| "grad_norm": 0.3073217272758484, |
| "learning_rate": 1.5631931942358723e-05, |
| "loss": 0.1304, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.33121422592249045, |
| "grad_norm": 0.3077130615711212, |
| "learning_rate": 1.560768705810451e-05, |
| "loss": 0.1293, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.332119182823918, |
| "grad_norm": 0.34068816900253296, |
| "learning_rate": 1.558339399508138e-05, |
| "loss": 0.138, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.33302413972534556, |
| "grad_norm": 0.366473525762558, |
| "learning_rate": 1.55590529620046e-05, |
| "loss": 0.1243, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.33392909662677317, |
| "grad_norm": 0.40335702896118164, |
| "learning_rate": 1.553466416800157e-05, |
| "loss": 0.1305, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.3348340535282007, |
| "grad_norm": 0.3505435287952423, |
| "learning_rate": 1.551022782261003e-05, |
| "loss": 0.1332, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.3357390104296283, |
| "grad_norm": 0.5044335126876831, |
| "learning_rate": 1.5485744135776258e-05, |
| "loss": 0.1326, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.33664396733105584, |
| "grad_norm": 0.3738217055797577, |
| "learning_rate": 1.546121331785327e-05, |
| "loss": 0.1251, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.33754892423248345, |
| "grad_norm": 0.3503580093383789, |
| "learning_rate": 1.5436635579599014e-05, |
| "loss": 0.1349, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.338453881133911, |
| "grad_norm": 0.3351600766181946, |
| "learning_rate": 1.541201113217456e-05, |
| "loss": 0.1275, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.33935883803533856, |
| "grad_norm": 0.2801141142845154, |
| "learning_rate": 1.538734018714227e-05, |
| "loss": 0.1278, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.3402637949367661, |
| "grad_norm": 0.26787105202674866, |
| "learning_rate": 1.5362622956463998e-05, |
| "loss": 0.1413, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.34116875183819373, |
| "grad_norm": 0.394846111536026, |
| "learning_rate": 1.5337859652499277e-05, |
| "loss": 0.1354, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.3420737087396213, |
| "grad_norm": 0.3125811815261841, |
| "learning_rate": 1.531305048800346e-05, |
| "loss": 0.1324, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.34297866564104884, |
| "grad_norm": 0.3050634264945984, |
| "learning_rate": 1.5288195676125937e-05, |
| "loss": 0.1345, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.3438836225424764, |
| "grad_norm": 0.257493257522583, |
| "learning_rate": 1.5263295430408255e-05, |
| "loss": 0.1311, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.344788579443904, |
| "grad_norm": 0.3533366620540619, |
| "learning_rate": 1.5238349964782325e-05, |
| "loss": 0.1282, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.34569353634533156, |
| "grad_norm": 0.3291451036930084, |
| "learning_rate": 1.5213359493568562e-05, |
| "loss": 0.1218, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.3465984932467591, |
| "grad_norm": 0.3743618130683899, |
| "learning_rate": 1.5188324231474054e-05, |
| "loss": 0.1295, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.3475034501481867, |
| "grad_norm": 0.5372154712677002, |
| "learning_rate": 1.51632443935907e-05, |
| "loss": 0.1331, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.3484084070496143, |
| "grad_norm": 0.3277980387210846, |
| "learning_rate": 1.5138120195393396e-05, |
| "loss": 0.1387, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.34931336395104184, |
| "grad_norm": 0.40068790316581726, |
| "learning_rate": 1.5112951852738138e-05, |
| "loss": 0.1301, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.3502183208524694, |
| "grad_norm": 0.32944217324256897, |
| "learning_rate": 1.5087739581860213e-05, |
| "loss": 0.1312, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.35112327775389696, |
| "grad_norm": 0.3983187675476074, |
| "learning_rate": 1.50624835993723e-05, |
| "loss": 0.1325, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.35202823465532457, |
| "grad_norm": 0.3687704801559448, |
| "learning_rate": 1.5037184122262645e-05, |
| "loss": 0.128, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.3529331915567521, |
| "grad_norm": 0.4385727345943451, |
| "learning_rate": 1.501184136789317e-05, |
| "loss": 0.1329, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.3538381484581797, |
| "grad_norm": 0.29301881790161133, |
| "learning_rate": 1.4986455553997625e-05, |
| "loss": 0.1301, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.35474310535960724, |
| "grad_norm": 0.4024035632610321, |
| "learning_rate": 1.4961026898679703e-05, |
| "loss": 0.1325, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.3556480622610348, |
| "grad_norm": 0.32220858335494995, |
| "learning_rate": 1.4935555620411168e-05, |
| "loss": 0.1361, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.3565530191624624, |
| "grad_norm": 0.2925558388233185, |
| "learning_rate": 1.4910041938029993e-05, |
| "loss": 0.1299, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.35745797606388996, |
| "grad_norm": 0.3419649600982666, |
| "learning_rate": 1.4884486070738457e-05, |
| "loss": 0.1371, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.3583629329653175, |
| "grad_norm": 0.37976405024528503, |
| "learning_rate": 1.4858888238101278e-05, |
| "loss": 0.1381, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.35926788986674507, |
| "grad_norm": 0.3191063106060028, |
| "learning_rate": 1.483324866004372e-05, |
| "loss": 0.1299, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.3601728467681727, |
| "grad_norm": 0.35269689559936523, |
| "learning_rate": 1.4807567556849707e-05, |
| "loss": 0.134, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.36107780366960024, |
| "grad_norm": 0.44557368755340576, |
| "learning_rate": 1.478184514915993e-05, |
| "loss": 0.1333, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.3619827605710278, |
| "grad_norm": 0.26233455538749695, |
| "learning_rate": 1.4756081657969947e-05, |
| "loss": 0.126, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.36288771747245535, |
| "grad_norm": 0.27105700969696045, |
| "learning_rate": 1.4730277304628287e-05, |
| "loss": 0.1338, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.36379267437388296, |
| "grad_norm": 0.45541515946388245, |
| "learning_rate": 1.4704432310834551e-05, |
| "loss": 0.1327, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.3646976312753105, |
| "grad_norm": 0.31574854254722595, |
| "learning_rate": 1.4678546898637502e-05, |
| "loss": 0.1329, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.3656025881767381, |
| "grad_norm": 0.3075743019580841, |
| "learning_rate": 1.4652621290433166e-05, |
| "loss": 0.1297, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.36650754507816563, |
| "grad_norm": 0.32941344380378723, |
| "learning_rate": 1.4626655708962904e-05, |
| "loss": 0.125, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.36741250197959324, |
| "grad_norm": 0.3551495671272278, |
| "learning_rate": 1.4600650377311523e-05, |
| "loss": 0.134, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.3683174588810208, |
| "grad_norm": 0.3904673457145691, |
| "learning_rate": 1.4574605518905336e-05, |
| "loss": 0.131, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.36922241578244835, |
| "grad_norm": 0.3711760342121124, |
| "learning_rate": 1.4548521357510256e-05, |
| "loss": 0.1302, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.3701273726838759, |
| "grad_norm": 0.23497672379016876, |
| "learning_rate": 1.4522398117229874e-05, |
| "loss": 0.1352, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.3710323295853035, |
| "grad_norm": 0.39998266100883484, |
| "learning_rate": 1.4496236022503523e-05, |
| "loss": 0.1326, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.3719372864867311, |
| "grad_norm": 0.38762685656547546, |
| "learning_rate": 1.4470035298104355e-05, |
| "loss": 0.1269, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.37284224338815863, |
| "grad_norm": 0.29660049080848694, |
| "learning_rate": 1.444379616913742e-05, |
| "loss": 0.1314, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.3737472002895862, |
| "grad_norm": 0.3709215223789215, |
| "learning_rate": 1.4417518861037713e-05, |
| "loss": 0.1315, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.3746521571910138, |
| "grad_norm": 0.5628421902656555, |
| "learning_rate": 1.4391203599568257e-05, |
| "loss": 0.1345, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.37555711409244136, |
| "grad_norm": 0.32759326696395874, |
| "learning_rate": 1.4364850610818147e-05, |
| "loss": 0.1369, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.3764620709938689, |
| "grad_norm": 0.33175286650657654, |
| "learning_rate": 1.4338460121200612e-05, |
| "loss": 0.1303, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.37736702789529647, |
| "grad_norm": 0.5355362296104431, |
| "learning_rate": 1.4312032357451084e-05, |
| "loss": 0.1283, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.3782719847967241, |
| "grad_norm": 0.25995931029319763, |
| "learning_rate": 1.428556754662522e-05, |
| "loss": 0.1306, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.37917694169815164, |
| "grad_norm": 0.34952160716056824, |
| "learning_rate": 1.4259065916096983e-05, |
| "loss": 0.1315, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.3800818985995792, |
| "grad_norm": 0.34290722012519836, |
| "learning_rate": 1.4232527693556673e-05, |
| "loss": 0.1349, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.38098685550100675, |
| "grad_norm": 0.2907416522502899, |
| "learning_rate": 1.4205953107008964e-05, |
| "loss": 0.134, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.38189181240243436, |
| "grad_norm": 0.305785208940506, |
| "learning_rate": 1.4179342384770964e-05, |
| "loss": 0.1322, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.3827967693038619, |
| "grad_norm": 0.3259272873401642, |
| "learning_rate": 1.4152695755470235e-05, |
| "loss": 0.124, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.3837017262052895, |
| "grad_norm": 0.4469420611858368, |
| "learning_rate": 1.4126013448042838e-05, |
| "loss": 0.1247, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.38460668310671703, |
| "grad_norm": 0.3158349096775055, |
| "learning_rate": 1.4099295691731374e-05, |
| "loss": 0.1213, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.3855116400081446, |
| "grad_norm": 0.30905407667160034, |
| "learning_rate": 1.4072542716082986e-05, |
| "loss": 0.1334, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.3864165969095722, |
| "grad_norm": 0.29381975531578064, |
| "learning_rate": 1.4045754750947428e-05, |
| "loss": 0.1328, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.38732155381099975, |
| "grad_norm": 0.39547502994537354, |
| "learning_rate": 1.401893202647505e-05, |
| "loss": 0.1269, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.3882265107124273, |
| "grad_norm": 0.3500146269798279, |
| "learning_rate": 1.3992074773114852e-05, |
| "loss": 0.1322, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.38913146761385486, |
| "grad_norm": 0.2726495563983917, |
| "learning_rate": 1.3965183221612484e-05, |
| "loss": 0.1337, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.3900364245152825, |
| "grad_norm": 0.3656439185142517, |
| "learning_rate": 1.393825760300827e-05, |
| "loss": 0.1324, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.39094138141671003, |
| "grad_norm": 0.34188976883888245, |
| "learning_rate": 1.3911298148635224e-05, |
| "loss": 0.1358, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.3918463383181376, |
| "grad_norm": 0.5021139979362488, |
| "learning_rate": 1.3884305090117069e-05, |
| "loss": 0.1365, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.39275129521956514, |
| "grad_norm": 0.32963699102401733, |
| "learning_rate": 1.3857278659366232e-05, |
| "loss": 0.1337, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.39365625212099276, |
| "grad_norm": 0.31096163392066956, |
| "learning_rate": 1.3830219088581856e-05, |
| "loss": 0.1305, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.3945612090224203, |
| "grad_norm": 0.3522721230983734, |
| "learning_rate": 1.380312661024782e-05, |
| "loss": 0.1327, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.39546616592384787, |
| "grad_norm": 0.3698263168334961, |
| "learning_rate": 1.3776001457130725e-05, |
| "loss": 0.1363, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.3963711228252754, |
| "grad_norm": 0.6951903700828552, |
| "learning_rate": 1.37488438622779e-05, |
| "loss": 0.1281, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.39727607972670304, |
| "grad_norm": 0.3918182849884033, |
| "learning_rate": 1.3721654059015393e-05, |
| "loss": 0.1338, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.3981810366281306, |
| "grad_norm": 0.40132880210876465, |
| "learning_rate": 1.3694432280945978e-05, |
| "loss": 0.1258, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.39908599352955815, |
| "grad_norm": 0.36218178272247314, |
| "learning_rate": 1.3667178761947144e-05, |
| "loss": 0.1324, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.3999909504309857, |
| "grad_norm": 0.411044180393219, |
| "learning_rate": 1.3639893736169083e-05, |
| "loss": 0.1332, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.4008959073324133, |
| "grad_norm": 0.3805508017539978, |
| "learning_rate": 1.3612577438032673e-05, |
| "loss": 0.1317, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.40180086423384087, |
| "grad_norm": 0.33358198404312134, |
| "learning_rate": 1.3585230102227478e-05, |
| "loss": 0.1254, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.4027058211352684, |
| "grad_norm": 0.2710610628128052, |
| "learning_rate": 1.355785196370972e-05, |
| "loss": 0.1319, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.403610778036696, |
| "grad_norm": 0.24267303943634033, |
| "learning_rate": 1.3530443257700272e-05, |
| "loss": 0.134, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.4045157349381236, |
| "grad_norm": 0.32278409600257874, |
| "learning_rate": 1.3503004219682611e-05, |
| "loss": 0.1377, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.40542069183955115, |
| "grad_norm": 0.2883910834789276, |
| "learning_rate": 1.3475535085400836e-05, |
| "loss": 0.13, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.4063256487409787, |
| "grad_norm": 0.5005854368209839, |
| "learning_rate": 1.3448036090857601e-05, |
| "loss": 0.1299, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.40723060564240626, |
| "grad_norm": 0.4682227075099945, |
| "learning_rate": 1.3420507472312121e-05, |
| "loss": 0.1365, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.4081355625438339, |
| "grad_norm": 0.3037811815738678, |
| "learning_rate": 1.3392949466278116e-05, |
| "loss": 0.1334, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.40904051944526143, |
| "grad_norm": 0.2893620729446411, |
| "learning_rate": 1.3365362309521794e-05, |
| "loss": 0.1313, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.409945476346689, |
| "grad_norm": 0.2590632736682892, |
| "learning_rate": 1.3337746239059817e-05, |
| "loss": 0.1284, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.41085043324811654, |
| "grad_norm": 0.44863688945770264, |
| "learning_rate": 1.3310101492157256e-05, |
| "loss": 0.128, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.41175539014954415, |
| "grad_norm": 0.3310014009475708, |
| "learning_rate": 1.328242830632556e-05, |
| "loss": 0.1352, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.4126603470509717, |
| "grad_norm": 0.44096454977989197, |
| "learning_rate": 1.3254726919320509e-05, |
| "loss": 0.1302, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.41356530395239927, |
| "grad_norm": 0.30306538939476013, |
| "learning_rate": 1.322699756914018e-05, |
| "loss": 0.1364, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.4144702608538268, |
| "grad_norm": 0.2730850875377655, |
| "learning_rate": 1.3199240494022891e-05, |
| "loss": 0.1393, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.41537521775525443, |
| "grad_norm": 0.21079055964946747, |
| "learning_rate": 1.3171455932445172e-05, |
| "loss": 0.1294, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.416280174656682, |
| "grad_norm": 0.5057855248451233, |
| "learning_rate": 1.3143644123119692e-05, |
| "loss": 0.1338, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.41718513155810955, |
| "grad_norm": 0.3610301613807678, |
| "learning_rate": 1.3115805304993221e-05, |
| "loss": 0.1298, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.4180900884595371, |
| "grad_norm": 0.3835664391517639, |
| "learning_rate": 1.3087939717244591e-05, |
| "loss": 0.1303, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.41899504536096466, |
| "grad_norm": 0.3250782787799835, |
| "learning_rate": 1.306004759928261e-05, |
| "loss": 0.1296, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.41990000226239227, |
| "grad_norm": 0.3284454941749573, |
| "learning_rate": 1.3032129190744032e-05, |
| "loss": 0.1285, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.4208049591638198, |
| "grad_norm": 0.30871763825416565, |
| "learning_rate": 1.3004184731491478e-05, |
| "loss": 0.1331, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.4217099160652474, |
| "grad_norm": 0.2988496720790863, |
| "learning_rate": 1.29762144616114e-05, |
| "loss": 0.125, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.42261487296667494, |
| "grad_norm": 0.30450698733329773, |
| "learning_rate": 1.2948218621411996e-05, |
| "loss": 0.134, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.42351982986810255, |
| "grad_norm": 0.33566537499427795, |
| "learning_rate": 1.2920197451421145e-05, |
| "loss": 0.1439, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.4244247867695301, |
| "grad_norm": 0.4010887145996094, |
| "learning_rate": 1.2892151192384362e-05, |
| "loss": 0.1348, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.42532974367095766, |
| "grad_norm": 0.42090174555778503, |
| "learning_rate": 1.2864080085262702e-05, |
| "loss": 0.1315, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.4262347005723852, |
| "grad_norm": 0.3819758892059326, |
| "learning_rate": 1.2835984371230722e-05, |
| "loss": 0.1264, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.42713965747381283, |
| "grad_norm": 0.6393516659736633, |
| "learning_rate": 1.2807864291674374e-05, |
| "loss": 0.1312, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.4280446143752404, |
| "grad_norm": 0.3352563977241516, |
| "learning_rate": 1.2779720088188954e-05, |
| "loss": 0.1337, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.42894957127666794, |
| "grad_norm": 0.304123193025589, |
| "learning_rate": 1.2751552002577024e-05, |
| "loss": 0.1303, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.4298545281780955, |
| "grad_norm": 0.391735702753067, |
| "learning_rate": 1.2723360276846322e-05, |
| "loss": 0.1349, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.4307594850795231, |
| "grad_norm": 0.2713654935359955, |
| "learning_rate": 1.26951451532077e-05, |
| "loss": 0.1326, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.43166444198095066, |
| "grad_norm": 0.26157835125923157, |
| "learning_rate": 1.2666906874073024e-05, |
| "loss": 0.1296, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.4325693988823782, |
| "grad_norm": 0.36790212988853455, |
| "learning_rate": 1.2638645682053119e-05, |
| "loss": 0.1293, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.4334743557838058, |
| "grad_norm": 0.4308098554611206, |
| "learning_rate": 1.2610361819955647e-05, |
| "loss": 0.1294, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.4343793126852334, |
| "grad_norm": 0.32590603828430176, |
| "learning_rate": 1.2582055530783059e-05, |
| "loss": 0.1292, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.43528426958666094, |
| "grad_norm": 0.41218942403793335, |
| "learning_rate": 1.2553727057730481e-05, |
| "loss": 0.1292, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.4361892264880885, |
| "grad_norm": 0.26972493529319763, |
| "learning_rate": 1.2525376644183625e-05, |
| "loss": 0.1288, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.43709418338951606, |
| "grad_norm": 0.305836021900177, |
| "learning_rate": 1.2497004533716726e-05, |
| "loss": 0.133, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.43799914029094367, |
| "grad_norm": 0.49705770611763, |
| "learning_rate": 1.246861097009041e-05, |
| "loss": 0.1312, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.4389040971923712, |
| "grad_norm": 0.8208497166633606, |
| "learning_rate": 1.2440196197249634e-05, |
| "loss": 0.139, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.4398090540937988, |
| "grad_norm": 0.43357348442077637, |
| "learning_rate": 1.2411760459321562e-05, |
| "loss": 0.1347, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.44071401099522634, |
| "grad_norm": 0.29426875710487366, |
| "learning_rate": 1.238330400061349e-05, |
| "loss": 0.1277, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.44161896789665395, |
| "grad_norm": 0.31841394305229187, |
| "learning_rate": 1.235482706561074e-05, |
| "loss": 0.1275, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.4425239247980815, |
| "grad_norm": 0.3246687650680542, |
| "learning_rate": 1.2326329898974543e-05, |
| "loss": 0.1322, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.44342888169950906, |
| "grad_norm": 0.3303876519203186, |
| "learning_rate": 1.2297812745539968e-05, |
| "loss": 0.1249, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.4443338386009366, |
| "grad_norm": 0.28597357869148254, |
| "learning_rate": 1.2269275850313788e-05, |
| "loss": 0.1251, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.4452387955023642, |
| "grad_norm": 0.4606969356536865, |
| "learning_rate": 1.2240719458472402e-05, |
| "loss": 0.1287, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.4461437524037918, |
| "grad_norm": 0.48796603083610535, |
| "learning_rate": 1.2212143815359702e-05, |
| "loss": 0.1283, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.44704870930521934, |
| "grad_norm": 0.43160954117774963, |
| "learning_rate": 1.2183549166484988e-05, |
| "loss": 0.1262, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.4479536662066469, |
| "grad_norm": 0.43935272097587585, |
| "learning_rate": 1.2154935757520847e-05, |
| "loss": 0.131, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.44885862310807445, |
| "grad_norm": 0.2970845699310303, |
| "learning_rate": 1.212630383430104e-05, |
| "loss": 0.1292, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.44976358000950206, |
| "grad_norm": 0.3177463710308075, |
| "learning_rate": 1.2097653642818404e-05, |
| "loss": 0.1269, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.4506685369109296, |
| "grad_norm": 0.3390902578830719, |
| "learning_rate": 1.2068985429222712e-05, |
| "loss": 0.1258, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.4515734938123572, |
| "grad_norm": 0.32120341062545776, |
| "learning_rate": 1.204029943981859e-05, |
| "loss": 0.1346, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.45247845071378473, |
| "grad_norm": 0.3396281898021698, |
| "learning_rate": 1.2011595921063388e-05, |
| "loss": 0.1313, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.45338340761521234, |
| "grad_norm": 0.45511719584465027, |
| "learning_rate": 1.1982875119565045e-05, |
| "loss": 0.1264, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.4542883645166399, |
| "grad_norm": 0.44824886322021484, |
| "learning_rate": 1.1954137282079999e-05, |
| "loss": 0.1283, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.45519332141806745, |
| "grad_norm": 0.37619948387145996, |
| "learning_rate": 1.1925382655511044e-05, |
| "loss": 0.1266, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.456098278319495, |
| "grad_norm": 0.35430288314819336, |
| "learning_rate": 1.1896611486905232e-05, |
| "loss": 0.1324, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.4570032352209226, |
| "grad_norm": 0.28441110253334045, |
| "learning_rate": 1.1867824023451719e-05, |
| "loss": 0.1291, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.4579081921223502, |
| "grad_norm": 0.41242095828056335, |
| "learning_rate": 1.1839020512479676e-05, |
| "loss": 0.1299, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.45881314902377773, |
| "grad_norm": 0.2977141737937927, |
| "learning_rate": 1.1810201201456134e-05, |
| "loss": 0.1344, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.4597181059252053, |
| "grad_norm": 0.342632919549942, |
| "learning_rate": 1.1781366337983882e-05, |
| "loss": 0.1431, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.4606230628266329, |
| "grad_norm": 0.304565966129303, |
| "learning_rate": 1.175251616979932e-05, |
| "loss": 0.1281, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.46152801972806046, |
| "grad_norm": 0.27322742342948914, |
| "learning_rate": 1.1723650944770343e-05, |
| "loss": 0.1252, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.462432976629488, |
| "grad_norm": 0.2408372461795807, |
| "learning_rate": 1.1694770910894213e-05, |
| "loss": 0.1325, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.46333793353091557, |
| "grad_norm": 0.34792786836624146, |
| "learning_rate": 1.1665876316295408e-05, |
| "loss": 0.1248, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.4642428904323432, |
| "grad_norm": 0.2646757662296295, |
| "learning_rate": 1.1636967409223521e-05, |
| "loss": 0.1219, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.46514784733377074, |
| "grad_norm": 0.30345967411994934, |
| "learning_rate": 1.1608044438051107e-05, |
| "loss": 0.132, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.4660528042351983, |
| "grad_norm": 0.3464104235172272, |
| "learning_rate": 1.1579107651271544e-05, |
| "loss": 0.1248, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.46695776113662585, |
| "grad_norm": 0.3681158423423767, |
| "learning_rate": 1.1550157297496927e-05, |
| "loss": 0.1352, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.46786271803805346, |
| "grad_norm": 0.3495553731918335, |
| "learning_rate": 1.152119362545589e-05, |
| "loss": 0.1405, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.468767674939481, |
| "grad_norm": 0.41019749641418457, |
| "learning_rate": 1.1492216883991512e-05, |
| "loss": 0.1313, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.46967263184090857, |
| "grad_norm": 0.30911651253700256, |
| "learning_rate": 1.1463227322059143e-05, |
| "loss": 0.1239, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.47057758874233613, |
| "grad_norm": 0.32175543904304504, |
| "learning_rate": 1.1434225188724289e-05, |
| "loss": 0.1345, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.47148254564376374, |
| "grad_norm": 0.3402431011199951, |
| "learning_rate": 1.1405210733160463e-05, |
| "loss": 0.1305, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.4723875025451913, |
| "grad_norm": 0.5482098460197449, |
| "learning_rate": 1.1376184204647047e-05, |
| "loss": 0.129, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.47329245944661885, |
| "grad_norm": 0.4262870252132416, |
| "learning_rate": 1.134714585256714e-05, |
| "loss": 0.1312, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.4741974163480464, |
| "grad_norm": 0.28574520349502563, |
| "learning_rate": 1.1318095926405434e-05, |
| "loss": 0.1259, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.475102373249474, |
| "grad_norm": 0.49999454617500305, |
| "learning_rate": 1.1289034675746056e-05, |
| "loss": 0.1265, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.4760073301509016, |
| "grad_norm": 0.2796456515789032, |
| "learning_rate": 1.1259962350270428e-05, |
| "loss": 0.1305, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.47691228705232913, |
| "grad_norm": 0.3368781805038452, |
| "learning_rate": 1.1230879199755118e-05, |
| "loss": 0.1334, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.4778172439537567, |
| "grad_norm": 0.3882569968700409, |
| "learning_rate": 1.1201785474069706e-05, |
| "loss": 0.1379, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.4787222008551843, |
| "grad_norm": 0.3234885036945343, |
| "learning_rate": 1.1172681423174625e-05, |
| "loss": 0.1308, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.47962715775661185, |
| "grad_norm": 0.30723053216934204, |
| "learning_rate": 1.114356729711902e-05, |
| "loss": 0.1291, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.4805321146580394, |
| "grad_norm": 0.34784960746765137, |
| "learning_rate": 1.1114443346038591e-05, |
| "loss": 0.1264, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.48143707155946697, |
| "grad_norm": 0.36275964975357056, |
| "learning_rate": 1.1085309820153456e-05, |
| "loss": 0.1247, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.4823420284608945, |
| "grad_norm": 0.33047980070114136, |
| "learning_rate": 1.1056166969765991e-05, |
| "loss": 0.1277, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.48324698536232213, |
| "grad_norm": 0.3921981155872345, |
| "learning_rate": 1.1027015045258694e-05, |
| "loss": 0.1345, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.4841519422637497, |
| "grad_norm": 0.3603346347808838, |
| "learning_rate": 1.0997854297092011e-05, |
| "loss": 0.1232, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.48505689916517725, |
| "grad_norm": 0.5657104253768921, |
| "learning_rate": 1.0968684975802206e-05, |
| "loss": 0.1316, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.4859618560666048, |
| "grad_norm": 0.28306034207344055, |
| "learning_rate": 1.0939507331999195e-05, |
| "loss": 0.1312, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.4868668129680324, |
| "grad_norm": 0.33064553141593933, |
| "learning_rate": 1.0910321616364397e-05, |
| "loss": 0.1347, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.48777176986945997, |
| "grad_norm": 0.39418330788612366, |
| "learning_rate": 1.0881128079648586e-05, |
| "loss": 0.1263, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.4886767267708875, |
| "grad_norm": 0.3185255527496338, |
| "learning_rate": 1.0851926972669722e-05, |
| "loss": 0.1399, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.4895816836723151, |
| "grad_norm": 0.4073029160499573, |
| "learning_rate": 1.0822718546310816e-05, |
| "loss": 0.1303, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.4904866405737427, |
| "grad_norm": 0.49370768666267395, |
| "learning_rate": 1.0793503051517758e-05, |
| "loss": 0.1249, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.49139159747517025, |
| "grad_norm": 0.2705213725566864, |
| "learning_rate": 1.0764280739297163e-05, |
| "loss": 0.1341, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.4922965543765978, |
| "grad_norm": 0.2689199447631836, |
| "learning_rate": 1.0735051860714231e-05, |
| "loss": 0.1306, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.49320151127802536, |
| "grad_norm": 0.27751344442367554, |
| "learning_rate": 1.0705816666890561e-05, |
| "loss": 0.1285, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.494106468179453, |
| "grad_norm": 0.3316192030906677, |
| "learning_rate": 1.0676575409002024e-05, |
| "loss": 0.1362, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.49501142508088053, |
| "grad_norm": 0.3556595742702484, |
| "learning_rate": 1.064732833827658e-05, |
| "loss": 0.1271, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.4959163819823081, |
| "grad_norm": 0.2501612603664398, |
| "learning_rate": 1.0618075705992138e-05, |
| "loss": 0.1307, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.49682133888373564, |
| "grad_norm": 0.32025519013404846, |
| "learning_rate": 1.0588817763474388e-05, |
| "loss": 0.1271, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.49772629578516325, |
| "grad_norm": 0.3633834421634674, |
| "learning_rate": 1.0559554762094637e-05, |
| "loss": 0.1221, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.4986312526865908, |
| "grad_norm": 0.3367408514022827, |
| "learning_rate": 1.0530286953267665e-05, |
| "loss": 0.1288, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.49953620958801837, |
| "grad_norm": 0.36954525113105774, |
| "learning_rate": 1.050101458844955e-05, |
| "loss": 0.1247, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.5004411664894459, |
| "grad_norm": 0.46406790614128113, |
| "learning_rate": 1.047173791913551e-05, |
| "loss": 0.1312, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.5013461233908735, |
| "grad_norm": 0.32501330971717834, |
| "learning_rate": 1.044245719685775e-05, |
| "loss": 0.1323, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.502251080292301, |
| "grad_norm": 0.31969472765922546, |
| "learning_rate": 1.0413172673183298e-05, |
| "loss": 0.1343, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.5031560371937287, |
| "grad_norm": 0.2659285068511963, |
| "learning_rate": 1.0383884599711838e-05, |
| "loss": 0.1244, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.5040609940951563, |
| "grad_norm": 0.4111124873161316, |
| "learning_rate": 1.035459322807355e-05, |
| "loss": 0.1289, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.5049659509965838, |
| "grad_norm": 0.36066919565200806, |
| "learning_rate": 1.0325298809926962e-05, |
| "loss": 0.1297, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.5058709078980114, |
| "grad_norm": 0.30882957577705383, |
| "learning_rate": 1.029600159695676e-05, |
| "loss": 0.1256, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.5067758647994389, |
| "grad_norm": 0.30170318484306335, |
| "learning_rate": 1.0266701840871657e-05, |
| "loss": 0.1266, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.5076808217008665, |
| "grad_norm": 0.39970946311950684, |
| "learning_rate": 1.0237399793402203e-05, |
| "loss": 0.1334, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.508585778602294, |
| "grad_norm": 0.24716977775096893, |
| "learning_rate": 1.0208095706298643e-05, |
| "loss": 0.1277, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.5094907355037216, |
| "grad_norm": 0.3430007994174957, |
| "learning_rate": 1.017878983132874e-05, |
| "loss": 0.1315, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.5103956924051493, |
| "grad_norm": 0.37827351689338684, |
| "learning_rate": 1.0149482420275623e-05, |
| "loss": 0.1288, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.5113006493065768, |
| "grad_norm": 0.2566516101360321, |
| "learning_rate": 1.0120173724935614e-05, |
| "loss": 0.1331, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.5122056062080044, |
| "grad_norm": 0.327240914106369, |
| "learning_rate": 1.0090863997116066e-05, |
| "loss": 0.1267, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.5131105631094319, |
| "grad_norm": 0.49574097990989685, |
| "learning_rate": 1.0061553488633217e-05, |
| "loss": 0.129, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.5140155200108595, |
| "grad_norm": 0.33093881607055664, |
| "learning_rate": 1.0032242451309996e-05, |
| "loss": 0.1289, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.514920476912287, |
| "grad_norm": 0.30682793259620667, |
| "learning_rate": 1.0002931136973881e-05, |
| "loss": 0.1289, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.5158254338137146, |
| "grad_norm": 0.32405000925064087, |
| "learning_rate": 9.973619797454734e-06, |
| "loss": 0.1298, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.5167303907151422, |
| "grad_norm": 0.34693455696105957, |
| "learning_rate": 9.944308684582627e-06, |
| "loss": 0.129, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.5176353476165697, |
| "grad_norm": 0.44524097442626953, |
| "learning_rate": 9.914998050185693e-06, |
| "loss": 0.1252, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.5185403045179974, |
| "grad_norm": 0.346175879240036, |
| "learning_rate": 9.885688146087945e-06, |
| "loss": 0.1324, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.5194452614194249, |
| "grad_norm": 0.32724258303642273, |
| "learning_rate": 9.856379224107124e-06, |
| "loss": 0.1222, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.5203502183208525, |
| "grad_norm": 0.3304066061973572, |
| "learning_rate": 9.827071536052536e-06, |
| "loss": 0.1297, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.52125517522228, |
| "grad_norm": 0.3203341066837311, |
| "learning_rate": 9.797765333722888e-06, |
| "loss": 0.1315, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.5221601321237076, |
| "grad_norm": 0.33740121126174927, |
| "learning_rate": 9.768460868904112e-06, |
| "loss": 0.1281, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.5230650890251352, |
| "grad_norm": 0.3358958065509796, |
| "learning_rate": 9.739158393367229e-06, |
| "loss": 0.1298, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.5239700459265627, |
| "grad_norm": 0.3097865581512451, |
| "learning_rate": 9.709858158866147e-06, |
| "loss": 0.1219, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.5248750028279903, |
| "grad_norm": 0.3808821141719818, |
| "learning_rate": 9.680560417135538e-06, |
| "loss": 0.1244, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.5257799597294179, |
| "grad_norm": 0.3200414478778839, |
| "learning_rate": 9.651265419888651e-06, |
| "loss": 0.1312, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.5266849166308455, |
| "grad_norm": 0.38478338718414307, |
| "learning_rate": 9.621973418815154e-06, |
| "loss": 0.1324, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.527589873532273, |
| "grad_norm": 0.39911043643951416, |
| "learning_rate": 9.592684665578978e-06, |
| "loss": 0.1246, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.5284948304337006, |
| "grad_norm": 0.328489750623703, |
| "learning_rate": 9.563399411816141e-06, |
| "loss": 0.1276, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.5293997873351282, |
| "grad_norm": 0.4059896171092987, |
| "learning_rate": 9.534117909132606e-06, |
| "loss": 0.136, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.5303047442365557, |
| "grad_norm": 0.3130931854248047, |
| "learning_rate": 9.5048404091021e-06, |
| "loss": 0.1286, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.5312097011379833, |
| "grad_norm": 0.3940030634403229, |
| "learning_rate": 9.475567163263968e-06, |
| "loss": 0.1315, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.5321146580394108, |
| "grad_norm": 0.40726789832115173, |
| "learning_rate": 9.446298423120995e-06, |
| "loss": 0.1289, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.5330196149408385, |
| "grad_norm": 0.5030715465545654, |
| "learning_rate": 9.417034440137264e-06, |
| "loss": 0.1305, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.533924571842266, |
| "grad_norm": 0.3696930706501007, |
| "learning_rate": 9.387775465735987e-06, |
| "loss": 0.1337, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.5348295287436936, |
| "grad_norm": 0.2996613681316376, |
| "learning_rate": 9.358521751297336e-06, |
| "loss": 0.1296, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.5357344856451212, |
| "grad_norm": 0.30213144421577454, |
| "learning_rate": 9.329273548156305e-06, |
| "loss": 0.1284, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.5366394425465487, |
| "grad_norm": 0.35034891963005066, |
| "learning_rate": 9.300031107600519e-06, |
| "loss": 0.1258, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.5375443994479763, |
| "grad_norm": 0.4542511999607086, |
| "learning_rate": 9.270794680868108e-06, |
| "loss": 0.1384, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.5384493563494038, |
| "grad_norm": 0.4177015423774719, |
| "learning_rate": 9.241564519145529e-06, |
| "loss": 0.1308, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.5393543132508314, |
| "grad_norm": 0.3148910701274872, |
| "learning_rate": 9.212340873565417e-06, |
| "loss": 0.1256, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.540259270152259, |
| "grad_norm": 0.3220025300979614, |
| "learning_rate": 9.183123995204419e-06, |
| "loss": 0.1356, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.5411642270536866, |
| "grad_norm": 0.37302547693252563, |
| "learning_rate": 9.153914135081037e-06, |
| "loss": 0.1319, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.5420691839551142, |
| "grad_norm": 0.2666574716567993, |
| "learning_rate": 9.12471154415348e-06, |
| "loss": 0.1271, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.5429741408565417, |
| "grad_norm": 0.333808034658432, |
| "learning_rate": 9.095516473317506e-06, |
| "loss": 0.1249, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.5438790977579693, |
| "grad_norm": 0.29716721177101135, |
| "learning_rate": 9.066329173404267e-06, |
| "loss": 0.1331, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.5447840546593968, |
| "grad_norm": 0.3095798194408417, |
| "learning_rate": 9.037149895178132e-06, |
| "loss": 0.1219, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.5456890115608244, |
| "grad_norm": 0.3963325619697571, |
| "learning_rate": 9.007978889334573e-06, |
| "loss": 0.1281, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.546593968462252, |
| "grad_norm": 0.2934766113758087, |
| "learning_rate": 8.978816406497977e-06, |
| "loss": 0.1254, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.5474989253636795, |
| "grad_norm": 0.3293272852897644, |
| "learning_rate": 8.949662697219507e-06, |
| "loss": 0.132, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.5484038822651072, |
| "grad_norm": 0.47501856088638306, |
| "learning_rate": 8.920518011974955e-06, |
| "loss": 0.134, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.5493088391665347, |
| "grad_norm": 0.38773998618125916, |
| "learning_rate": 8.891382601162571e-06, |
| "loss": 0.1347, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.5502137960679623, |
| "grad_norm": 0.30083420872688293, |
| "learning_rate": 8.862256715100926e-06, |
| "loss": 0.1232, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.5511187529693898, |
| "grad_norm": 0.32377147674560547, |
| "learning_rate": 8.833140604026763e-06, |
| "loss": 0.1244, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.5520237098708174, |
| "grad_norm": 1.7971711158752441, |
| "learning_rate": 8.804034518092846e-06, |
| "loss": 0.1269, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.552928666772245, |
| "grad_norm": 0.28756287693977356, |
| "learning_rate": 8.7749387073658e-06, |
| "loss": 0.1285, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.5538336236736725, |
| "grad_norm": 0.3303268551826477, |
| "learning_rate": 8.745853421823965e-06, |
| "loss": 0.131, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.5547385805751001, |
| "grad_norm": 0.30345699191093445, |
| "learning_rate": 8.716778911355266e-06, |
| "loss": 0.1344, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.5556435374765277, |
| "grad_norm": 0.33741581439971924, |
| "learning_rate": 8.687715425755047e-06, |
| "loss": 0.125, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.5565484943779553, |
| "grad_norm": 0.36916887760162354, |
| "learning_rate": 8.65866321472393e-06, |
| "loss": 0.1391, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.5574534512793828, |
| "grad_norm": 0.2726483941078186, |
| "learning_rate": 8.62962252786568e-06, |
| "loss": 0.1274, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.5583584081808104, |
| "grad_norm": 0.6223147511482239, |
| "learning_rate": 8.600593614685035e-06, |
| "loss": 0.1222, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.559263365082238, |
| "grad_norm": 0.285051554441452, |
| "learning_rate": 8.571576724585589e-06, |
| "loss": 0.1316, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.5601683219836655, |
| "grad_norm": 0.2996697723865509, |
| "learning_rate": 8.542572106867643e-06, |
| "loss": 0.1221, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.5610732788850931, |
| "grad_norm": 0.4449019730091095, |
| "learning_rate": 8.513580010726052e-06, |
| "loss": 0.136, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.5619782357865206, |
| "grad_norm": 0.3610653579235077, |
| "learning_rate": 8.484600685248089e-06, |
| "loss": 0.1349, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.5628831926879483, |
| "grad_norm": 0.23445254564285278, |
| "learning_rate": 8.455634379411314e-06, |
| "loss": 0.1304, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.5637881495893758, |
| "grad_norm": 0.4748559594154358, |
| "learning_rate": 8.426681342081428e-06, |
| "loss": 0.1281, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.5646931064908034, |
| "grad_norm": 0.28520044684410095, |
| "learning_rate": 8.397741822010128e-06, |
| "loss": 0.1318, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.565598063392231, |
| "grad_norm": 0.31549420952796936, |
| "learning_rate": 8.368816067832986e-06, |
| "loss": 0.1259, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.5665030202936585, |
| "grad_norm": 0.3754958510398865, |
| "learning_rate": 8.339904328067289e-06, |
| "loss": 0.1317, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.5674079771950861, |
| "grad_norm": 0.3160054087638855, |
| "learning_rate": 8.311006851109939e-06, |
| "loss": 0.1311, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.5683129340965136, |
| "grad_norm": 0.3136991560459137, |
| "learning_rate": 8.282123885235276e-06, |
| "loss": 0.1349, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.5692178909979412, |
| "grad_norm": 0.38889384269714355, |
| "learning_rate": 8.253255678592985e-06, |
| "loss": 0.1349, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.5701228478993688, |
| "grad_norm": 0.3792050778865814, |
| "learning_rate": 8.224402479205941e-06, |
| "loss": 0.1237, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.5710278048007964, |
| "grad_norm": 0.35518279671669006, |
| "learning_rate": 8.195564534968074e-06, |
| "loss": 0.1232, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.571932761702224, |
| "grad_norm": 0.2767059803009033, |
| "learning_rate": 8.166742093642263e-06, |
| "loss": 0.1265, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.5728377186036515, |
| "grad_norm": 0.37054571509361267, |
| "learning_rate": 8.137935402858182e-06, |
| "loss": 0.1288, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.5737426755050791, |
| "grad_norm": 0.3450683653354645, |
| "learning_rate": 8.10914471011019e-06, |
| "loss": 0.1339, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.5746476324065066, |
| "grad_norm": 0.29977867007255554, |
| "learning_rate": 8.080370262755191e-06, |
| "loss": 0.126, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.5755525893079342, |
| "grad_norm": 0.34314143657684326, |
| "learning_rate": 8.051612308010526e-06, |
| "loss": 0.1283, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.5764575462093617, |
| "grad_norm": 0.37054121494293213, |
| "learning_rate": 8.022871092951827e-06, |
| "loss": 0.1292, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.5773625031107893, |
| "grad_norm": 0.37676891684532166, |
| "learning_rate": 7.994146864510912e-06, |
| "loss": 0.1285, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.578267460012217, |
| "grad_norm": 0.26649826765060425, |
| "learning_rate": 7.965439869473664e-06, |
| "loss": 0.1261, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.5791724169136445, |
| "grad_norm": 0.38938188552856445, |
| "learning_rate": 7.936750354477891e-06, |
| "loss": 0.1272, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.5800773738150721, |
| "grad_norm": 0.32541313767433167, |
| "learning_rate": 7.908078566011227e-06, |
| "loss": 0.1233, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.5809823307164996, |
| "grad_norm": 0.36433953046798706, |
| "learning_rate": 7.879424750409007e-06, |
| "loss": 0.1314, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.5818872876179272, |
| "grad_norm": 0.3940136432647705, |
| "learning_rate": 7.850789153852157e-06, |
| "loss": 0.1373, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.5827922445193547, |
| "grad_norm": 0.3312411904335022, |
| "learning_rate": 7.822172022365059e-06, |
| "loss": 0.1258, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.5836972014207823, |
| "grad_norm": 0.5461381077766418, |
| "learning_rate": 7.793573601813467e-06, |
| "loss": 0.1275, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.5846021583222099, |
| "grad_norm": 0.41021519899368286, |
| "learning_rate": 7.764994137902366e-06, |
| "loss": 0.1305, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.5855071152236375, |
| "grad_norm": 0.5024427175521851, |
| "learning_rate": 7.736433876173879e-06, |
| "loss": 0.1264, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.5864120721250651, |
| "grad_norm": 0.3114100992679596, |
| "learning_rate": 7.70789306200516e-06, |
| "loss": 0.1328, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.5873170290264926, |
| "grad_norm": 0.3421667814254761, |
| "learning_rate": 7.679371940606265e-06, |
| "loss": 0.1336, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.5882219859279202, |
| "grad_norm": 0.4376727044582367, |
| "learning_rate": 7.650870757018061e-06, |
| "loss": 0.1277, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.5891269428293477, |
| "grad_norm": 0.26968899369239807, |
| "learning_rate": 7.622389756110126e-06, |
| "loss": 0.1281, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.5900318997307753, |
| "grad_norm": 0.3418639004230499, |
| "learning_rate": 7.593929182578634e-06, |
| "loss": 0.1321, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.5909368566322029, |
| "grad_norm": 0.3123999536037445, |
| "learning_rate": 7.565489280944256e-06, |
| "loss": 0.1257, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.5918418135336304, |
| "grad_norm": 0.39082077145576477, |
| "learning_rate": 7.537070295550051e-06, |
| "loss": 0.1303, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.5927467704350581, |
| "grad_norm": 0.32185035943984985, |
| "learning_rate": 7.508672470559385e-06, |
| "loss": 0.1278, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.5936517273364856, |
| "grad_norm": 0.6370688080787659, |
| "learning_rate": 7.480296049953823e-06, |
| "loss": 0.132, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.5945566842379132, |
| "grad_norm": 0.44037026166915894, |
| "learning_rate": 7.451941277531025e-06, |
| "loss": 0.1264, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.5954616411393407, |
| "grad_norm": 0.301471471786499, |
| "learning_rate": 7.423608396902673e-06, |
| "loss": 0.1261, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.5963665980407683, |
| "grad_norm": 0.3698810338973999, |
| "learning_rate": 7.395297651492346e-06, |
| "loss": 0.1262, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.5972715549421959, |
| "grad_norm": 0.27682480216026306, |
| "learning_rate": 7.36700928453346e-06, |
| "loss": 0.1301, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.5981765118436234, |
| "grad_norm": 0.3122202455997467, |
| "learning_rate": 7.338743539067163e-06, |
| "loss": 0.1325, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.599081468745051, |
| "grad_norm": 0.3500480651855469, |
| "learning_rate": 7.310500657940253e-06, |
| "loss": 0.1332, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.5999864256464786, |
| "grad_norm": 0.3293434977531433, |
| "learning_rate": 7.282280883803073e-06, |
| "loss": 0.1337, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.6008913825479062, |
| "grad_norm": 0.2992168366909027, |
| "learning_rate": 7.254084459107453e-06, |
| "loss": 0.1336, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.6017963394493338, |
| "grad_norm": 0.3215314745903015, |
| "learning_rate": 7.225911626104621e-06, |
| "loss": 0.1283, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.6027012963507613, |
| "grad_norm": 0.3701172173023224, |
| "learning_rate": 7.1977626268430965e-06, |
| "loss": 0.1219, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.6036062532521889, |
| "grad_norm": 0.3572734594345093, |
| "learning_rate": 7.1696377031666495e-06, |
| "loss": 0.1204, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.6045112101536164, |
| "grad_norm": 0.37793678045272827, |
| "learning_rate": 7.1415370967121896e-06, |
| "loss": 0.1253, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.605416167055044, |
| "grad_norm": 0.2588195204734802, |
| "learning_rate": 7.113461048907711e-06, |
| "loss": 0.1247, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.6063211239564715, |
| "grad_norm": 0.3132305145263672, |
| "learning_rate": 7.085409800970203e-06, |
| "loss": 0.1307, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.6072260808578992, |
| "grad_norm": 0.36036190390586853, |
| "learning_rate": 7.0573835939035974e-06, |
| "loss": 0.1322, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.6081310377593268, |
| "grad_norm": 0.39142096042633057, |
| "learning_rate": 7.029382668496679e-06, |
| "loss": 0.1218, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.6090359946607543, |
| "grad_norm": 0.33356091380119324, |
| "learning_rate": 7.001407265321019e-06, |
| "loss": 0.1268, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.6099409515621819, |
| "grad_norm": 0.3158833086490631, |
| "learning_rate": 6.973457624728922e-06, |
| "loss": 0.1248, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.6108459084636094, |
| "grad_norm": 0.35817354917526245, |
| "learning_rate": 6.945533986851345e-06, |
| "loss": 0.1304, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.611750865365037, |
| "grad_norm": 0.28623586893081665, |
| "learning_rate": 6.917636591595849e-06, |
| "loss": 0.1243, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.6126558222664645, |
| "grad_norm": 0.39012181758880615, |
| "learning_rate": 6.8897656786445166e-06, |
| "loss": 0.1213, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.6135607791678921, |
| "grad_norm": 0.44978418946266174, |
| "learning_rate": 6.861921487451922e-06, |
| "loss": 0.1234, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.6144657360693196, |
| "grad_norm": 0.34891191124916077, |
| "learning_rate": 6.834104257243043e-06, |
| "loss": 0.1275, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.6153706929707473, |
| "grad_norm": 0.29933297634124756, |
| "learning_rate": 6.806314227011235e-06, |
| "loss": 0.1307, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.6162756498721749, |
| "grad_norm": 0.3456212282180786, |
| "learning_rate": 6.778551635516157e-06, |
| "loss": 0.1273, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.6171806067736024, |
| "grad_norm": 0.32694172859191895, |
| "learning_rate": 6.750816721281719e-06, |
| "loss": 0.1278, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.61808556367503, |
| "grad_norm": 0.3350003659725189, |
| "learning_rate": 6.7231097225940475e-06, |
| "loss": 0.1318, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.6189905205764575, |
| "grad_norm": 0.3554823100566864, |
| "learning_rate": 6.695430877499434e-06, |
| "loss": 0.1282, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.6198954774778851, |
| "grad_norm": 0.4120415449142456, |
| "learning_rate": 6.6677804238022806e-06, |
| "loss": 0.1311, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.6208004343793126, |
| "grad_norm": 0.3582000732421875, |
| "learning_rate": 6.640158599063069e-06, |
| "loss": 0.1223, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.6217053912807402, |
| "grad_norm": 0.3458595275878906, |
| "learning_rate": 6.612565640596307e-06, |
| "loss": 0.1294, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.6226103481821679, |
| "grad_norm": 0.2830416262149811, |
| "learning_rate": 6.585001785468497e-06, |
| "loss": 0.1273, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.6235153050835954, |
| "grad_norm": 0.32797959446907043, |
| "learning_rate": 6.5574672704961025e-06, |
| "loss": 0.1284, |
| "step": 6890 |
| }, |
| { |
| "epoch": 0.624420261985023, |
| "grad_norm": 0.3323483467102051, |
| "learning_rate": 6.529962332243509e-06, |
| "loss": 0.1258, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.6253252188864505, |
| "grad_norm": 0.2794325649738312, |
| "learning_rate": 6.5024872070209936e-06, |
| "loss": 0.1323, |
| "step": 6910 |
| }, |
| { |
| "epoch": 0.6262301757878781, |
| "grad_norm": 0.2866572439670563, |
| "learning_rate": 6.4750421308826795e-06, |
| "loss": 0.1269, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.6271351326893057, |
| "grad_norm": 0.3717053532600403, |
| "learning_rate": 6.447627339624538e-06, |
| "loss": 0.1257, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.6280400895907332, |
| "grad_norm": 0.3445277512073517, |
| "learning_rate": 6.4202430687823416e-06, |
| "loss": 0.133, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.6289450464921608, |
| "grad_norm": 0.4389108419418335, |
| "learning_rate": 6.39288955362964e-06, |
| "loss": 0.1271, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.6298500033935884, |
| "grad_norm": 0.38752782344818115, |
| "learning_rate": 6.365567029175747e-06, |
| "loss": 0.1306, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.630754960295016, |
| "grad_norm": 0.4342532157897949, |
| "learning_rate": 6.338275730163715e-06, |
| "loss": 0.1286, |
| "step": 6970 |
| }, |
| { |
| "epoch": 0.6316599171964435, |
| "grad_norm": 0.3530200719833374, |
| "learning_rate": 6.311015891068328e-06, |
| "loss": 0.1239, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.6325648740978711, |
| "grad_norm": 0.33301499485969543, |
| "learning_rate": 6.283787746094077e-06, |
| "loss": 0.1311, |
| "step": 6990 |
| }, |
| { |
| "epoch": 0.6334698309992987, |
| "grad_norm": 0.3174525201320648, |
| "learning_rate": 6.256591529173148e-06, |
| "loss": 0.1318, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.6343747879007262, |
| "grad_norm": 0.27925947308540344, |
| "learning_rate": 6.229427473963416e-06, |
| "loss": 0.1291, |
| "step": 7010 |
| }, |
| { |
| "epoch": 0.6352797448021538, |
| "grad_norm": 0.36644554138183594, |
| "learning_rate": 6.20229581384644e-06, |
| "loss": 0.119, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.6361847017035813, |
| "grad_norm": 0.4176923930644989, |
| "learning_rate": 6.1751967819254545e-06, |
| "loss": 0.126, |
| "step": 7030 |
| }, |
| { |
| "epoch": 0.637089658605009, |
| "grad_norm": 0.30893582105636597, |
| "learning_rate": 6.148130611023361e-06, |
| "loss": 0.1283, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.6379946155064365, |
| "grad_norm": 0.37425440549850464, |
| "learning_rate": 6.121097533680745e-06, |
| "loss": 0.1265, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.6388995724078641, |
| "grad_norm": 0.2834739089012146, |
| "learning_rate": 6.094097782153853e-06, |
| "loss": 0.1311, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.6398045293092917, |
| "grad_norm": 0.3139822781085968, |
| "learning_rate": 6.0671315884126225e-06, |
| "loss": 0.1231, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.6407094862107192, |
| "grad_norm": 0.2844420075416565, |
| "learning_rate": 6.040199184138668e-06, |
| "loss": 0.129, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.6416144431121468, |
| "grad_norm": 0.35503455996513367, |
| "learning_rate": 6.013300800723312e-06, |
| "loss": 0.1311, |
| "step": 7090 |
| }, |
| { |
| "epoch": 0.6425194000135743, |
| "grad_norm": 0.35351240634918213, |
| "learning_rate": 5.986436669265568e-06, |
| "loss": 0.1331, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.6434243569150019, |
| "grad_norm": 0.3031436800956726, |
| "learning_rate": 5.959607020570184e-06, |
| "loss": 0.1305, |
| "step": 7110 |
| }, |
| { |
| "epoch": 0.6443293138164294, |
| "grad_norm": 0.23189416527748108, |
| "learning_rate": 5.932812085145647e-06, |
| "loss": 0.1235, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.6452342707178571, |
| "grad_norm": 0.2822563946247101, |
| "learning_rate": 5.906052093202199e-06, |
| "loss": 0.1269, |
| "step": 7130 |
| }, |
| { |
| "epoch": 0.6461392276192847, |
| "grad_norm": 0.28259310126304626, |
| "learning_rate": 5.879327274649868e-06, |
| "loss": 0.1273, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.6470441845207122, |
| "grad_norm": 0.33720916509628296, |
| "learning_rate": 5.852637859096475e-06, |
| "loss": 0.1345, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.6479491414221398, |
| "grad_norm": 0.3332005441188812, |
| "learning_rate": 5.825984075845691e-06, |
| "loss": 0.1248, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.6488540983235673, |
| "grad_norm": 0.3517606258392334, |
| "learning_rate": 5.799366153895037e-06, |
| "loss": 0.1288, |
| "step": 7170 |
| }, |
| { |
| "epoch": 0.6497590552249949, |
| "grad_norm": 0.265109121799469, |
| "learning_rate": 5.772784321933939e-06, |
| "loss": 0.1329, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.6506640121264224, |
| "grad_norm": 0.3004017472267151, |
| "learning_rate": 5.746238808341751e-06, |
| "loss": 0.1252, |
| "step": 7190 |
| }, |
| { |
| "epoch": 0.65156896902785, |
| "grad_norm": 0.34260621666908264, |
| "learning_rate": 5.719729841185786e-06, |
| "loss": 0.1267, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.6524739259292777, |
| "grad_norm": 0.24408110976219177, |
| "learning_rate": 5.693257648219379e-06, |
| "loss": 0.1296, |
| "step": 7210 |
| }, |
| { |
| "epoch": 0.6533788828307052, |
| "grad_norm": 0.2951405346393585, |
| "learning_rate": 5.666822456879918e-06, |
| "loss": 0.1231, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.6542838397321328, |
| "grad_norm": 0.32168251276016235, |
| "learning_rate": 5.640424494286878e-06, |
| "loss": 0.1298, |
| "step": 7230 |
| }, |
| { |
| "epoch": 0.6551887966335603, |
| "grad_norm": 0.2844177186489105, |
| "learning_rate": 5.614063987239885e-06, |
| "loss": 0.1289, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.6560937535349879, |
| "grad_norm": 0.5491533875465393, |
| "learning_rate": 5.587741162216768e-06, |
| "loss": 0.1313, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.6569987104364154, |
| "grad_norm": 0.298951119184494, |
| "learning_rate": 5.561456245371608e-06, |
| "loss": 0.125, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.657903667337843, |
| "grad_norm": 0.3288751542568207, |
| "learning_rate": 5.535209462532792e-06, |
| "loss": 0.1296, |
| "step": 7270 |
| }, |
| { |
| "epoch": 0.6588086242392706, |
| "grad_norm": 0.43045946955680847, |
| "learning_rate": 5.509001039201085e-06, |
| "loss": 0.1263, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.6597135811406982, |
| "grad_norm": 0.2680876851081848, |
| "learning_rate": 5.482831200547667e-06, |
| "loss": 0.1324, |
| "step": 7290 |
| }, |
| { |
| "epoch": 0.6606185380421258, |
| "grad_norm": 0.3521096110343933, |
| "learning_rate": 5.456700171412231e-06, |
| "loss": 0.1204, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.6615234949435533, |
| "grad_norm": 0.2706452012062073, |
| "learning_rate": 5.430608176301036e-06, |
| "loss": 0.1269, |
| "step": 7310 |
| }, |
| { |
| "epoch": 0.6624284518449809, |
| "grad_norm": 0.3557042181491852, |
| "learning_rate": 5.4045554393849635e-06, |
| "loss": 0.132, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.6633334087464084, |
| "grad_norm": 0.3670320510864258, |
| "learning_rate": 5.378542184497623e-06, |
| "loss": 0.1257, |
| "step": 7330 |
| }, |
| { |
| "epoch": 0.664238365647836, |
| "grad_norm": 0.31355276703834534, |
| "learning_rate": 5.3525686351333976e-06, |
| "loss": 0.1275, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.6651433225492636, |
| "grad_norm": 0.30157995223999023, |
| "learning_rate": 5.326635014445547e-06, |
| "loss": 0.1291, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.6660482794506911, |
| "grad_norm": 0.2899110019207001, |
| "learning_rate": 5.300741545244279e-06, |
| "loss": 0.1311, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.6669532363521188, |
| "grad_norm": 0.34780648350715637, |
| "learning_rate": 5.274888449994843e-06, |
| "loss": 0.1294, |
| "step": 7370 |
| }, |
| { |
| "epoch": 0.6678581932535463, |
| "grad_norm": 0.2876949608325958, |
| "learning_rate": 5.2490759508155975e-06, |
| "loss": 0.1303, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.6687631501549739, |
| "grad_norm": 0.3087066113948822, |
| "learning_rate": 5.223304269476137e-06, |
| "loss": 0.1255, |
| "step": 7390 |
| }, |
| { |
| "epoch": 0.6696681070564015, |
| "grad_norm": 0.38678351044654846, |
| "learning_rate": 5.19757362739535e-06, |
| "loss": 0.1288, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.670573063957829, |
| "grad_norm": 0.2943480610847473, |
| "learning_rate": 5.171884245639545e-06, |
| "loss": 0.1284, |
| "step": 7410 |
| }, |
| { |
| "epoch": 0.6714780208592566, |
| "grad_norm": 0.28372156620025635, |
| "learning_rate": 5.146236344920542e-06, |
| "loss": 0.1292, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.6723829777606841, |
| "grad_norm": 0.2806905508041382, |
| "learning_rate": 5.12063014559376e-06, |
| "loss": 0.1272, |
| "step": 7430 |
| }, |
| { |
| "epoch": 0.6732879346621117, |
| "grad_norm": 0.3286825120449066, |
| "learning_rate": 5.095065867656351e-06, |
| "loss": 0.1205, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.6741928915635392, |
| "grad_norm": 0.33701106905937195, |
| "learning_rate": 5.0695437307452945e-06, |
| "loss": 0.1312, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.6750978484649669, |
| "grad_norm": 0.3478999137878418, |
| "learning_rate": 5.044063954135508e-06, |
| "loss": 0.1284, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.6760028053663945, |
| "grad_norm": 0.28950104117393494, |
| "learning_rate": 5.018626756737979e-06, |
| "loss": 0.1267, |
| "step": 7470 |
| }, |
| { |
| "epoch": 0.676907762267822, |
| "grad_norm": 0.3087421655654907, |
| "learning_rate": 4.9932323570978605e-06, |
| "loss": 0.1254, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.6778127191692496, |
| "grad_norm": 0.34977859258651733, |
| "learning_rate": 4.967880973392607e-06, |
| "loss": 0.1293, |
| "step": 7490 |
| }, |
| { |
| "epoch": 0.6787176760706771, |
| "grad_norm": 0.3207535147666931, |
| "learning_rate": 4.942572823430107e-06, |
| "loss": 0.1268, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.6796226329721047, |
| "grad_norm": 0.34587785601615906, |
| "learning_rate": 4.917308124646802e-06, |
| "loss": 0.1272, |
| "step": 7510 |
| }, |
| { |
| "epoch": 0.6805275898735322, |
| "grad_norm": 0.338029146194458, |
| "learning_rate": 4.892087094105818e-06, |
| "loss": 0.1208, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.6814325467749598, |
| "grad_norm": 0.3155430555343628, |
| "learning_rate": 4.866909948495101e-06, |
| "loss": 0.1234, |
| "step": 7530 |
| }, |
| { |
| "epoch": 0.6823375036763875, |
| "grad_norm": 0.27973029017448425, |
| "learning_rate": 4.841776904125559e-06, |
| "loss": 0.1301, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.683242460577815, |
| "grad_norm": 0.3526993989944458, |
| "learning_rate": 4.816688176929207e-06, |
| "loss": 0.1258, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.6841474174792426, |
| "grad_norm": 0.3077426850795746, |
| "learning_rate": 4.791643982457293e-06, |
| "loss": 0.1235, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.6850523743806701, |
| "grad_norm": 0.31144073605537415, |
| "learning_rate": 4.766644535878476e-06, |
| "loss": 0.1226, |
| "step": 7570 |
| }, |
| { |
| "epoch": 0.6859573312820977, |
| "grad_norm": 0.32703807950019836, |
| "learning_rate": 4.741690051976946e-06, |
| "loss": 0.1265, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.6868622881835252, |
| "grad_norm": 0.4660681486129761, |
| "learning_rate": 4.716780745150602e-06, |
| "loss": 0.1323, |
| "step": 7590 |
| }, |
| { |
| "epoch": 0.6877672450849528, |
| "grad_norm": 0.27197226881980896, |
| "learning_rate": 4.6919168294092e-06, |
| "loss": 0.1319, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.6886722019863804, |
| "grad_norm": 0.3342832624912262, |
| "learning_rate": 4.6670985183725205e-06, |
| "loss": 0.134, |
| "step": 7610 |
| }, |
| { |
| "epoch": 0.689577158887808, |
| "grad_norm": 0.33844679594039917, |
| "learning_rate": 4.642326025268514e-06, |
| "loss": 0.1282, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.6904821157892356, |
| "grad_norm": 0.39204105734825134, |
| "learning_rate": 4.6175995629314994e-06, |
| "loss": 0.1236, |
| "step": 7630 |
| }, |
| { |
| "epoch": 0.6913870726906631, |
| "grad_norm": 0.38780298829078674, |
| "learning_rate": 4.592919343800315e-06, |
| "loss": 0.1316, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.6922920295920907, |
| "grad_norm": 0.3531728982925415, |
| "learning_rate": 4.568285579916491e-06, |
| "loss": 0.1339, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.6931969864935182, |
| "grad_norm": 0.264414519071579, |
| "learning_rate": 4.543698482922445e-06, |
| "loss": 0.1309, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.6941019433949458, |
| "grad_norm": 0.3809826076030731, |
| "learning_rate": 4.519158264059642e-06, |
| "loss": 0.1302, |
| "step": 7670 |
| }, |
| { |
| "epoch": 0.6950069002963734, |
| "grad_norm": 0.3677527904510498, |
| "learning_rate": 4.4946651341668006e-06, |
| "loss": 0.128, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.6959118571978009, |
| "grad_norm": 0.3577388823032379, |
| "learning_rate": 4.470219303678069e-06, |
| "loss": 0.1242, |
| "step": 7690 |
| }, |
| { |
| "epoch": 0.6968168140992286, |
| "grad_norm": 0.3218074142932892, |
| "learning_rate": 4.44582098262122e-06, |
| "loss": 0.128, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.6977217710006561, |
| "grad_norm": 0.3113616406917572, |
| "learning_rate": 4.421470380615841e-06, |
| "loss": 0.1246, |
| "step": 7710 |
| }, |
| { |
| "epoch": 0.6986267279020837, |
| "grad_norm": 0.4153074622154236, |
| "learning_rate": 4.397167706871546e-06, |
| "loss": 0.1236, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.6995316848035112, |
| "grad_norm": 0.2737235426902771, |
| "learning_rate": 4.37291317018617e-06, |
| "loss": 0.1334, |
| "step": 7730 |
| }, |
| { |
| "epoch": 0.7004366417049388, |
| "grad_norm": 0.3413766026496887, |
| "learning_rate": 4.348706978943965e-06, |
| "loss": 0.1248, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.7013415986063664, |
| "grad_norm": 0.44644635915756226, |
| "learning_rate": 4.324549341113839e-06, |
| "loss": 0.131, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.7022465555077939, |
| "grad_norm": 0.3571605384349823, |
| "learning_rate": 4.300440464247528e-06, |
| "loss": 0.1219, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.7031515124092215, |
| "grad_norm": 0.218129500746727, |
| "learning_rate": 4.276380555477855e-06, |
| "loss": 0.1224, |
| "step": 7770 |
| }, |
| { |
| "epoch": 0.7040564693106491, |
| "grad_norm": 0.2756895124912262, |
| "learning_rate": 4.25236982151692e-06, |
| "loss": 0.1165, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.7049614262120767, |
| "grad_norm": 0.3429490029811859, |
| "learning_rate": 4.22840846865434e-06, |
| "loss": 0.133, |
| "step": 7790 |
| }, |
| { |
| "epoch": 0.7058663831135042, |
| "grad_norm": 0.3609547019004822, |
| "learning_rate": 4.204496702755471e-06, |
| "loss": 0.1229, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.7067713400149318, |
| "grad_norm": 0.31833615899086, |
| "learning_rate": 4.180634729259635e-06, |
| "loss": 0.131, |
| "step": 7810 |
| }, |
| { |
| "epoch": 0.7076762969163594, |
| "grad_norm": 0.3234885632991791, |
| "learning_rate": 4.15682275317836e-06, |
| "loss": 0.1242, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.7085812538177869, |
| "grad_norm": 0.331586092710495, |
| "learning_rate": 4.133060979093623e-06, |
| "loss": 0.1238, |
| "step": 7830 |
| }, |
| { |
| "epoch": 0.7094862107192145, |
| "grad_norm": 0.29033374786376953, |
| "learning_rate": 4.109349611156088e-06, |
| "loss": 0.1231, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.710391167620642, |
| "grad_norm": 0.4018980860710144, |
| "learning_rate": 4.085688853083346e-06, |
| "loss": 0.1276, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.7112961245220696, |
| "grad_norm": 0.3441830575466156, |
| "learning_rate": 4.062078908158174e-06, |
| "loss": 0.1334, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.7122010814234973, |
| "grad_norm": 0.3075098693370819, |
| "learning_rate": 4.038519979226785e-06, |
| "loss": 0.1223, |
| "step": 7870 |
| }, |
| { |
| "epoch": 0.7131060383249248, |
| "grad_norm": 0.4702622890472412, |
| "learning_rate": 4.015012268697085e-06, |
| "loss": 0.1274, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.7140109952263524, |
| "grad_norm": 0.40426555275917053, |
| "learning_rate": 3.991555978536937e-06, |
| "loss": 0.1286, |
| "step": 7890 |
| }, |
| { |
| "epoch": 0.7149159521277799, |
| "grad_norm": 0.3282513916492462, |
| "learning_rate": 3.968151310272417e-06, |
| "loss": 0.1286, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.7158209090292075, |
| "grad_norm": 0.31669655442237854, |
| "learning_rate": 3.944798464986086e-06, |
| "loss": 0.1228, |
| "step": 7910 |
| }, |
| { |
| "epoch": 0.716725865930635, |
| "grad_norm": 0.3846443295478821, |
| "learning_rate": 3.9214976433152755e-06, |
| "loss": 0.1289, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.7176308228320626, |
| "grad_norm": 0.3068075478076935, |
| "learning_rate": 3.8982490454503455e-06, |
| "loss": 0.1258, |
| "step": 7930 |
| }, |
| { |
| "epoch": 0.7185357797334901, |
| "grad_norm": 0.259236216545105, |
| "learning_rate": 3.875052871132979e-06, |
| "loss": 0.126, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.7194407366349178, |
| "grad_norm": 0.32442575693130493, |
| "learning_rate": 3.851909319654448e-06, |
| "loss": 0.1282, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.7203456935363454, |
| "grad_norm": 0.3598691523075104, |
| "learning_rate": 3.82881858985392e-06, |
| "loss": 0.127, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.7212506504377729, |
| "grad_norm": 0.23929624259471893, |
| "learning_rate": 3.8057808801167463e-06, |
| "loss": 0.1243, |
| "step": 7970 |
| }, |
| { |
| "epoch": 0.7221556073392005, |
| "grad_norm": 0.31415387988090515, |
| "learning_rate": 3.782796388372739e-06, |
| "loss": 0.1309, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.723060564240628, |
| "grad_norm": 0.3203081786632538, |
| "learning_rate": 3.7598653120945015e-06, |
| "loss": 0.1268, |
| "step": 7990 |
| }, |
| { |
| "epoch": 0.7239655211420556, |
| "grad_norm": 0.38148126006126404, |
| "learning_rate": 3.736987848295699e-06, |
| "loss": 0.1196, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.7248704780434831, |
| "grad_norm": 0.2480911761522293, |
| "learning_rate": 3.7141641935293926e-06, |
| "loss": 0.1296, |
| "step": 8010 |
| }, |
| { |
| "epoch": 0.7257754349449107, |
| "grad_norm": 0.2723288834095001, |
| "learning_rate": 3.6913945438863397e-06, |
| "loss": 0.127, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.7266803918463384, |
| "grad_norm": 0.3012789785861969, |
| "learning_rate": 3.6686790949933082e-06, |
| "loss": 0.1254, |
| "step": 8030 |
| }, |
| { |
| "epoch": 0.7275853487477659, |
| "grad_norm": 0.2832350432872772, |
| "learning_rate": 3.64601804201139e-06, |
| "loss": 0.1158, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.7284903056491935, |
| "grad_norm": 0.30464476346969604, |
| "learning_rate": 3.6234115796343405e-06, |
| "loss": 0.1223, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.729395262550621, |
| "grad_norm": 0.3467896282672882, |
| "learning_rate": 3.6008599020868985e-06, |
| "loss": 0.1326, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.7303002194520486, |
| "grad_norm": 0.35166192054748535, |
| "learning_rate": 3.5783632031231018e-06, |
| "loss": 0.1257, |
| "step": 8070 |
| }, |
| { |
| "epoch": 0.7312051763534761, |
| "grad_norm": 0.3181626796722412, |
| "learning_rate": 3.555921676024653e-06, |
| "loss": 0.1269, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.7321101332549037, |
| "grad_norm": 0.30875396728515625, |
| "learning_rate": 3.53353551359923e-06, |
| "loss": 0.1297, |
| "step": 8090 |
| }, |
| { |
| "epoch": 0.7330150901563313, |
| "grad_norm": 0.3796384632587433, |
| "learning_rate": 3.511204908178848e-06, |
| "loss": 0.1243, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.7339200470577589, |
| "grad_norm": 0.3839415907859802, |
| "learning_rate": 3.488930051618201e-06, |
| "loss": 0.1265, |
| "step": 8110 |
| }, |
| { |
| "epoch": 0.7348250039591865, |
| "grad_norm": 0.34740591049194336, |
| "learning_rate": 3.4667111352930163e-06, |
| "loss": 0.1339, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.735729960860614, |
| "grad_norm": 0.2768769860267639, |
| "learning_rate": 3.4445483500983944e-06, |
| "loss": 0.1238, |
| "step": 8130 |
| }, |
| { |
| "epoch": 0.7366349177620416, |
| "grad_norm": 0.3522382378578186, |
| "learning_rate": 3.4224418864471976e-06, |
| "loss": 0.1242, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.7375398746634692, |
| "grad_norm": 0.31364932656288147, |
| "learning_rate": 3.400391934268391e-06, |
| "loss": 0.1261, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.7384448315648967, |
| "grad_norm": 0.3115822374820709, |
| "learning_rate": 3.378398683005416e-06, |
| "loss": 0.1248, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.7393497884663243, |
| "grad_norm": 0.370149165391922, |
| "learning_rate": 3.356462321614573e-06, |
| "loss": 0.1294, |
| "step": 8170 |
| }, |
| { |
| "epoch": 0.7402547453677518, |
| "grad_norm": 0.28242307901382446, |
| "learning_rate": 3.334583038563376e-06, |
| "loss": 0.1298, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.7411597022691794, |
| "grad_norm": 0.323049396276474, |
| "learning_rate": 3.3127610218289617e-06, |
| "loss": 0.1228, |
| "step": 8190 |
| }, |
| { |
| "epoch": 0.742064659170607, |
| "grad_norm": 0.393040269613266, |
| "learning_rate": 3.2909964588964514e-06, |
| "loss": 0.1276, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.7429696160720346, |
| "grad_norm": 0.36402514576911926, |
| "learning_rate": 3.269289536757352e-06, |
| "loss": 0.1296, |
| "step": 8210 |
| }, |
| { |
| "epoch": 0.7438745729734622, |
| "grad_norm": 0.3288993239402771, |
| "learning_rate": 3.2476404419079487e-06, |
| "loss": 0.1245, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.7447795298748897, |
| "grad_norm": 0.40095171332359314, |
| "learning_rate": 3.226049360347694e-06, |
| "loss": 0.1275, |
| "step": 8230 |
| }, |
| { |
| "epoch": 0.7456844867763173, |
| "grad_norm": 0.36854180693626404, |
| "learning_rate": 3.2045164775776137e-06, |
| "loss": 0.1254, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.7465894436777448, |
| "grad_norm": 0.35555073618888855, |
| "learning_rate": 3.1830419785987243e-06, |
| "loss": 0.1237, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.7474944005791724, |
| "grad_norm": 0.4191891849040985, |
| "learning_rate": 3.161626047910431e-06, |
| "loss": 0.13, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.7483993574805999, |
| "grad_norm": 0.3702433705329895, |
| "learning_rate": 3.140268869508949e-06, |
| "loss": 0.1317, |
| "step": 8270 |
| }, |
| { |
| "epoch": 0.7493043143820276, |
| "grad_norm": 0.27752381563186646, |
| "learning_rate": 3.1189706268857077e-06, |
| "loss": 0.1226, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.7502092712834552, |
| "grad_norm": 0.3350948095321655, |
| "learning_rate": 3.0977315030258002e-06, |
| "loss": 0.1309, |
| "step": 8290 |
| }, |
| { |
| "epoch": 0.7511142281848827, |
| "grad_norm": 0.4031756520271301, |
| "learning_rate": 3.0765516804063932e-06, |
| "loss": 0.127, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.7520191850863103, |
| "grad_norm": 0.3240339159965515, |
| "learning_rate": 3.055431340995163e-06, |
| "loss": 0.123, |
| "step": 8310 |
| }, |
| { |
| "epoch": 0.7529241419877378, |
| "grad_norm": 0.3321908414363861, |
| "learning_rate": 3.0343706662487306e-06, |
| "loss": 0.1258, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.7538290988891654, |
| "grad_norm": 0.3860868513584137, |
| "learning_rate": 3.013369837111101e-06, |
| "loss": 0.1297, |
| "step": 8330 |
| }, |
| { |
| "epoch": 0.7547340557905929, |
| "grad_norm": 0.3689778745174408, |
| "learning_rate": 2.992429034012121e-06, |
| "loss": 0.1253, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.7556390126920205, |
| "grad_norm": 0.27259641885757446, |
| "learning_rate": 2.9715484368659152e-06, |
| "loss": 0.1258, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.7565439695934482, |
| "grad_norm": 0.39256593585014343, |
| "learning_rate": 2.9507282250693514e-06, |
| "loss": 0.119, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.7574489264948757, |
| "grad_norm": 0.33603495359420776, |
| "learning_rate": 2.9299685775004793e-06, |
| "loss": 0.1337, |
| "step": 8370 |
| }, |
| { |
| "epoch": 0.7583538833963033, |
| "grad_norm": 0.4902133345603943, |
| "learning_rate": 2.9092696725170212e-06, |
| "loss": 0.1352, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.7592588402977308, |
| "grad_norm": 0.27200329303741455, |
| "learning_rate": 2.8886316879548205e-06, |
| "loss": 0.1231, |
| "step": 8390 |
| }, |
| { |
| "epoch": 0.7601637971991584, |
| "grad_norm": 0.27076977491378784, |
| "learning_rate": 2.868054801126321e-06, |
| "loss": 0.1209, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.7610687541005859, |
| "grad_norm": 0.32341551780700684, |
| "learning_rate": 2.8475391888190395e-06, |
| "loss": 0.1346, |
| "step": 8410 |
| }, |
| { |
| "epoch": 0.7619737110020135, |
| "grad_norm": 0.29108741879463196, |
| "learning_rate": 2.8270850272940466e-06, |
| "loss": 0.1251, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.762878667903441, |
| "grad_norm": 0.321847528219223, |
| "learning_rate": 2.806692492284461e-06, |
| "loss": 0.1248, |
| "step": 8430 |
| }, |
| { |
| "epoch": 0.7637836248048687, |
| "grad_norm": 0.3972260355949402, |
| "learning_rate": 2.786361758993932e-06, |
| "loss": 0.1266, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.7646885817062963, |
| "grad_norm": 0.3229351341724396, |
| "learning_rate": 2.766093002095137e-06, |
| "loss": 0.1253, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.7655935386077238, |
| "grad_norm": 0.32253509759902954, |
| "learning_rate": 2.745886395728271e-06, |
| "loss": 0.124, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.7664984955091514, |
| "grad_norm": 0.34673410654067993, |
| "learning_rate": 2.725742113499571e-06, |
| "loss": 0.124, |
| "step": 8470 |
| }, |
| { |
| "epoch": 0.767403452410579, |
| "grad_norm": 0.3298405110836029, |
| "learning_rate": 2.705660328479809e-06, |
| "loss": 0.1259, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.7683084093120065, |
| "grad_norm": 0.454266756772995, |
| "learning_rate": 2.6856412132027997e-06, |
| "loss": 0.1211, |
| "step": 8490 |
| }, |
| { |
| "epoch": 0.7692133662134341, |
| "grad_norm": 0.39579710364341736, |
| "learning_rate": 2.6656849396639415e-06, |
| "loss": 0.1302, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.7701183231148616, |
| "grad_norm": 0.2675047814846039, |
| "learning_rate": 2.6457916793187124e-06, |
| "loss": 0.123, |
| "step": 8510 |
| }, |
| { |
| "epoch": 0.7710232800162892, |
| "grad_norm": 0.44622719287872314, |
| "learning_rate": 2.6259616030812128e-06, |
| "loss": 0.1238, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.7719282369177168, |
| "grad_norm": 0.40703779458999634, |
| "learning_rate": 2.6061948813226968e-06, |
| "loss": 0.1222, |
| "step": 8530 |
| }, |
| { |
| "epoch": 0.7728331938191444, |
| "grad_norm": 0.32759425044059753, |
| "learning_rate": 2.5864916838701016e-06, |
| "loss": 0.1257, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.773738150720572, |
| "grad_norm": 0.2865431308746338, |
| "learning_rate": 2.5668521800045944e-06, |
| "loss": 0.1291, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.7746431076219995, |
| "grad_norm": 0.30442455410957336, |
| "learning_rate": 2.5472765384601074e-06, |
| "loss": 0.1214, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.7755480645234271, |
| "grad_norm": 0.31685060262680054, |
| "learning_rate": 2.5277649274219064e-06, |
| "loss": 0.131, |
| "step": 8570 |
| }, |
| { |
| "epoch": 0.7764530214248546, |
| "grad_norm": 0.274147093296051, |
| "learning_rate": 2.508317514525125e-06, |
| "loss": 0.1195, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.7773579783262822, |
| "grad_norm": 0.28254234790802, |
| "learning_rate": 2.4889344668533453e-06, |
| "loss": 0.1313, |
| "step": 8590 |
| }, |
| { |
| "epoch": 0.7782629352277097, |
| "grad_norm": 0.3491531014442444, |
| "learning_rate": 2.469615950937142e-06, |
| "loss": 0.1279, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.7791678921291374, |
| "grad_norm": 0.3661726713180542, |
| "learning_rate": 2.4503621327526694e-06, |
| "loss": 0.1252, |
| "step": 8610 |
| }, |
| { |
| "epoch": 0.780072849030565, |
| "grad_norm": 0.24949342012405396, |
| "learning_rate": 2.431173177720223e-06, |
| "loss": 0.1209, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.7809778059319925, |
| "grad_norm": 0.24942710995674133, |
| "learning_rate": 2.4120492507028236e-06, |
| "loss": 0.1294, |
| "step": 8630 |
| }, |
| { |
| "epoch": 0.7818827628334201, |
| "grad_norm": 0.3253498077392578, |
| "learning_rate": 2.392990516004804e-06, |
| "loss": 0.1313, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.7827877197348476, |
| "grad_norm": 0.31229835748672485, |
| "learning_rate": 2.3739971373703852e-06, |
| "loss": 0.1244, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.7836926766362752, |
| "grad_norm": 0.3631618320941925, |
| "learning_rate": 2.355069277982286e-06, |
| "loss": 0.1266, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.7845976335377027, |
| "grad_norm": 0.2935367822647095, |
| "learning_rate": 2.3362071004603036e-06, |
| "loss": 0.1222, |
| "step": 8670 |
| }, |
| { |
| "epoch": 0.7855025904391303, |
| "grad_norm": 0.34212765097618103, |
| "learning_rate": 2.3174107668599366e-06, |
| "loss": 0.126, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.786407547340558, |
| "grad_norm": 0.28019851446151733, |
| "learning_rate": 2.298680438670976e-06, |
| "loss": 0.119, |
| "step": 8690 |
| }, |
| { |
| "epoch": 0.7873125042419855, |
| "grad_norm": 0.35425955057144165, |
| "learning_rate": 2.2800162768161204e-06, |
| "loss": 0.1237, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.7882174611434131, |
| "grad_norm": 0.3190707564353943, |
| "learning_rate": 2.2614184416496022e-06, |
| "loss": 0.1206, |
| "step": 8710 |
| }, |
| { |
| "epoch": 0.7891224180448406, |
| "grad_norm": 0.40515249967575073, |
| "learning_rate": 2.2428870929558012e-06, |
| "loss": 0.1251, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.7900273749462682, |
| "grad_norm": 0.3554609417915344, |
| "learning_rate": 2.224422389947879e-06, |
| "loss": 0.1268, |
| "step": 8730 |
| }, |
| { |
| "epoch": 0.7909323318476957, |
| "grad_norm": 0.37230101227760315, |
| "learning_rate": 2.2060244912663996e-06, |
| "loss": 0.134, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.7918372887491233, |
| "grad_norm": 0.2492271512746811, |
| "learning_rate": 2.1876935549779766e-06, |
| "loss": 0.1247, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.7927422456505508, |
| "grad_norm": 0.3300861418247223, |
| "learning_rate": 2.169429738573915e-06, |
| "loss": 0.1243, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.7936472025519785, |
| "grad_norm": 0.4448375105857849, |
| "learning_rate": 2.151233198968854e-06, |
| "loss": 0.121, |
| "step": 8770 |
| }, |
| { |
| "epoch": 0.7945521594534061, |
| "grad_norm": 0.2607729434967041, |
| "learning_rate": 2.1331040924994216e-06, |
| "loss": 0.1194, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.7954571163548336, |
| "grad_norm": 0.3317795991897583, |
| "learning_rate": 2.1150425749228853e-06, |
| "loss": 0.122, |
| "step": 8790 |
| }, |
| { |
| "epoch": 0.7963620732562612, |
| "grad_norm": 0.3975413739681244, |
| "learning_rate": 2.097048801415823e-06, |
| "loss": 0.1261, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.7972670301576887, |
| "grad_norm": 0.3129339814186096, |
| "learning_rate": 2.079122926572784e-06, |
| "loss": 0.1264, |
| "step": 8810 |
| }, |
| { |
| "epoch": 0.7981719870591163, |
| "grad_norm": 0.32011422514915466, |
| "learning_rate": 2.0612651044049683e-06, |
| "loss": 0.1287, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.7990769439605439, |
| "grad_norm": 0.34052857756614685, |
| "learning_rate": 2.043475488338885e-06, |
| "loss": 0.1217, |
| "step": 8830 |
| }, |
| { |
| "epoch": 0.7999819008619714, |
| "grad_norm": 0.2858836054801941, |
| "learning_rate": 2.0257542312150534e-06, |
| "loss": 0.1242, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.800886857763399, |
| "grad_norm": 0.26198819279670715, |
| "learning_rate": 2.0081014852866843e-06, |
| "loss": 0.1288, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.8017918146648266, |
| "grad_norm": 0.3347429633140564, |
| "learning_rate": 1.9905174022183702e-06, |
| "loss": 0.1251, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.8026967715662542, |
| "grad_norm": 0.3790920078754425, |
| "learning_rate": 1.9730021330847838e-06, |
| "loss": 0.1263, |
| "step": 8870 |
| }, |
| { |
| "epoch": 0.8036017284676817, |
| "grad_norm": 0.27628254890441895, |
| "learning_rate": 1.955555828369371e-06, |
| "loss": 0.1272, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.8045066853691093, |
| "grad_norm": 0.2933729887008667, |
| "learning_rate": 1.938178637963074e-06, |
| "loss": 0.1332, |
| "step": 8890 |
| }, |
| { |
| "epoch": 0.8054116422705369, |
| "grad_norm": 0.2660931348800659, |
| "learning_rate": 1.9208707111630376e-06, |
| "loss": 0.1259, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.8063165991719644, |
| "grad_norm": 0.3294607698917389, |
| "learning_rate": 1.903632196671311e-06, |
| "loss": 0.1293, |
| "step": 8910 |
| }, |
| { |
| "epoch": 0.807221556073392, |
| "grad_norm": 0.3121941089630127, |
| "learning_rate": 1.8864632425936015e-06, |
| "loss": 0.1289, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.8081265129748195, |
| "grad_norm": 0.2634756863117218, |
| "learning_rate": 1.8693639964379661e-06, |
| "loss": 0.1291, |
| "step": 8930 |
| }, |
| { |
| "epoch": 0.8090314698762472, |
| "grad_norm": 0.3612058162689209, |
| "learning_rate": 1.852334605113576e-06, |
| "loss": 0.1284, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.8099364267776747, |
| "grad_norm": 0.27511492371559143, |
| "learning_rate": 1.8353752149294335e-06, |
| "loss": 0.1255, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.8108413836791023, |
| "grad_norm": 0.5154643058776855, |
| "learning_rate": 1.8184859715931247e-06, |
| "loss": 0.1293, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.8117463405805299, |
| "grad_norm": 0.2842332124710083, |
| "learning_rate": 1.8016670202095677e-06, |
| "loss": 0.1233, |
| "step": 8970 |
| }, |
| { |
| "epoch": 0.8126512974819574, |
| "grad_norm": 0.26935648918151855, |
| "learning_rate": 1.7849185052797525e-06, |
| "loss": 0.127, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.813556254383385, |
| "grad_norm": 0.4419654607772827, |
| "learning_rate": 1.7682405706995243e-06, |
| "loss": 0.1255, |
| "step": 8990 |
| }, |
| { |
| "epoch": 0.8144612112848125, |
| "grad_norm": 0.27756479382514954, |
| "learning_rate": 1.7516333597583214e-06, |
| "loss": 0.1195, |
| "step": 9000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 11050, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.722768489962275e+18, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|