diff --git "a/trainer_state.json" "b/trainer_state.json" deleted file mode 100644--- "a/trainer_state.json" +++ /dev/null @@ -1,6929 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 1.0, - "eval_steps": 500, - "global_step": 9858, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0010144045445323595, - "grad_norm": 76.97936248779297, - "learning_rate": 6.756756756756758e-07, - "loss": 3.6009, - "step": 10 - }, - { - "epoch": 0.002028809089064719, - "grad_norm": 12.27938461303711, - "learning_rate": 1.3513513513513515e-06, - "loss": 2.8161, - "step": 20 - }, - { - "epoch": 0.0030432136335970784, - "grad_norm": 3.859952926635742, - "learning_rate": 2.0270270270270273e-06, - "loss": 2.3945, - "step": 30 - }, - { - "epoch": 0.004057618178129438, - "grad_norm": 2.802656888961792, - "learning_rate": 2.702702702702703e-06, - "loss": 2.2219, - "step": 40 - }, - { - "epoch": 0.005072022722661797, - "grad_norm": 1.5864405632019043, - "learning_rate": 3.3783783783783788e-06, - "loss": 2.1487, - "step": 50 - }, - { - "epoch": 0.006086427267194157, - "grad_norm": 1.3526517152786255, - "learning_rate": 4.0540540540540545e-06, - "loss": 2.107, - "step": 60 - }, - { - "epoch": 0.007100831811726517, - "grad_norm": 1.4246087074279785, - "learning_rate": 4.72972972972973e-06, - "loss": 2.0715, - "step": 70 - }, - { - "epoch": 0.008115236356258876, - "grad_norm": 0.9552904963493347, - "learning_rate": 5.405405405405406e-06, - "loss": 2.0651, - "step": 80 - }, - { - "epoch": 0.009129640900791236, - "grad_norm": 0.9061160683631897, - "learning_rate": 6.081081081081082e-06, - "loss": 2.0321, - "step": 90 - }, - { - "epoch": 0.010144045445323595, - "grad_norm": 1.0343708992004395, - "learning_rate": 6.7567567567567575e-06, - "loss": 2.0102, - "step": 100 - }, - { - "epoch": 0.011158449989855955, - "grad_norm": 0.8597519993782043, - "learning_rate": 7.4324324324324324e-06, - "loss": 2.0017, - "step": 110 - }, - { - "epoch": 0.012172854534388313, - "grad_norm": 2.2696714401245117, - "learning_rate": 8.108108108108109e-06, - "loss": 1.9987, - "step": 120 - }, - { - "epoch": 0.013187259078920674, - "grad_norm": 0.9581915140151978, - "learning_rate": 8.783783783783785e-06, - "loss": 1.9799, - "step": 130 - }, - { - "epoch": 0.014201663623453034, - "grad_norm": 0.9030017256736755, - "learning_rate": 9.45945945945946e-06, - "loss": 1.9771, - "step": 140 - }, - { - "epoch": 0.015216068167985392, - "grad_norm": 1.6578402519226074, - "learning_rate": 1.0135135135135136e-05, - "loss": 1.9661, - "step": 150 - }, - { - "epoch": 0.016230472712517752, - "grad_norm": 2.0955846309661865, - "learning_rate": 1.0810810810810812e-05, - "loss": 1.9601, - "step": 160 - }, - { - "epoch": 0.017244877257050113, - "grad_norm": 2.4251277446746826, - "learning_rate": 1.1486486486486488e-05, - "loss": 1.9476, - "step": 170 - }, - { - "epoch": 0.018259281801582473, - "grad_norm": 2.553368091583252, - "learning_rate": 1.2162162162162164e-05, - "loss": 1.9442, - "step": 180 - }, - { - "epoch": 0.01927368634611483, - "grad_norm": 2.3637475967407227, - "learning_rate": 1.283783783783784e-05, - "loss": 1.9366, - "step": 190 - }, - { - "epoch": 0.02028809089064719, - "grad_norm": 0.9606789946556091, - "learning_rate": 1.3513513513513515e-05, - "loss": 1.9412, - "step": 200 - }, - { - "epoch": 0.02130249543517955, - "grad_norm": 4.336134910583496, - "learning_rate": 1.4189189189189189e-05, - "loss": 1.9227, - "step": 210 - }, - { - "epoch": 0.02231689997971191, - "grad_norm": 1.772627830505371, - "learning_rate": 1.4864864864864865e-05, - "loss": 1.9211, - "step": 220 - }, - { - "epoch": 0.02333130452424427, - "grad_norm": 1.0229802131652832, - "learning_rate": 1.554054054054054e-05, - "loss": 1.9189, - "step": 230 - }, - { - "epoch": 0.024345709068776627, - "grad_norm": 8.90110969543457, - "learning_rate": 1.6216216216216218e-05, - "loss": 1.9259, - "step": 240 - }, - { - "epoch": 0.025360113613308987, - "grad_norm": 2.499330997467041, - "learning_rate": 1.6891891891891896e-05, - "loss": 1.9103, - "step": 250 - }, - { - "epoch": 0.026374518157841347, - "grad_norm": 3.7998156547546387, - "learning_rate": 1.756756756756757e-05, - "loss": 1.906, - "step": 260 - }, - { - "epoch": 0.027388922702373707, - "grad_norm": 1.5446523427963257, - "learning_rate": 1.8243243243243244e-05, - "loss": 1.8939, - "step": 270 - }, - { - "epoch": 0.028403327246906068, - "grad_norm": 2.2270143032073975, - "learning_rate": 1.891891891891892e-05, - "loss": 1.8904, - "step": 280 - }, - { - "epoch": 0.029417731791438424, - "grad_norm": 2.437983751296997, - "learning_rate": 1.9594594594594595e-05, - "loss": 1.8847, - "step": 290 - }, - { - "epoch": 0.030432136335970784, - "grad_norm": 1.0774540901184082, - "learning_rate": 1.9999991364406493e-05, - "loss": 1.8904, - "step": 300 - }, - { - "epoch": 0.031446540880503145, - "grad_norm": 4.156506538391113, - "learning_rate": 1.999989421415082e-05, - "loss": 1.89, - "step": 310 - }, - { - "epoch": 0.032460945425035505, - "grad_norm": 1.4925384521484375, - "learning_rate": 1.9999689120199763e-05, - "loss": 1.8914, - "step": 320 - }, - { - "epoch": 0.033475349969567865, - "grad_norm": 1.881105661392212, - "learning_rate": 1.9999376084767213e-05, - "loss": 1.889, - "step": 330 - }, - { - "epoch": 0.034489754514100225, - "grad_norm": 2.714365243911743, - "learning_rate": 1.9998955111232224e-05, - "loss": 1.8742, - "step": 340 - }, - { - "epoch": 0.035504159058632585, - "grad_norm": 1.9924039840698242, - "learning_rate": 1.999842620413899e-05, - "loss": 1.8762, - "step": 350 - }, - { - "epoch": 0.036518563603164945, - "grad_norm": 2.7798962593078613, - "learning_rate": 1.9997789369196786e-05, - "loss": 1.8684, - "step": 360 - }, - { - "epoch": 0.0375329681476973, - "grad_norm": 1.2144362926483154, - "learning_rate": 1.9997044613279913e-05, - "loss": 1.8691, - "step": 370 - }, - { - "epoch": 0.03854737269222966, - "grad_norm": 0.9883942604064941, - "learning_rate": 1.999619194442764e-05, - "loss": 1.8792, - "step": 380 - }, - { - "epoch": 0.03956177723676202, - "grad_norm": 3.192946672439575, - "learning_rate": 1.9995231371844075e-05, - "loss": 1.8783, - "step": 390 - }, - { - "epoch": 0.04057618178129438, - "grad_norm": 1.4811856746673584, - "learning_rate": 1.9994162905898107e-05, - "loss": 1.8676, - "step": 400 - }, - { - "epoch": 0.04159058632582674, - "grad_norm": 1.255608320236206, - "learning_rate": 1.999298655812327e-05, - "loss": 1.8685, - "step": 410 - }, - { - "epoch": 0.0426049908703591, - "grad_norm": 2.124394178390503, - "learning_rate": 1.9991702341217644e-05, - "loss": 1.8588, - "step": 420 - }, - { - "epoch": 0.04361939541489146, - "grad_norm": 4.389142990112305, - "learning_rate": 1.9990310269043673e-05, - "loss": 1.8618, - "step": 430 - }, - { - "epoch": 0.04463379995942382, - "grad_norm": 3.153005838394165, - "learning_rate": 1.9988810356628065e-05, - "loss": 1.8611, - "step": 440 - }, - { - "epoch": 0.04564820450395618, - "grad_norm": 4.043641090393066, - "learning_rate": 1.99872026201616e-05, - "loss": 1.8579, - "step": 450 - }, - { - "epoch": 0.04666260904848854, - "grad_norm": 1.2698462009429932, - "learning_rate": 1.998548707699896e-05, - "loss": 1.8504, - "step": 460 - }, - { - "epoch": 0.047677013593020894, - "grad_norm": 2.646603584289551, - "learning_rate": 1.998366374565855e-05, - "loss": 1.8474, - "step": 470 - }, - { - "epoch": 0.048691418137553254, - "grad_norm": 1.652426838874817, - "learning_rate": 1.9981732645822286e-05, - "loss": 1.849, - "step": 480 - }, - { - "epoch": 0.049705822682085614, - "grad_norm": 0.9020372629165649, - "learning_rate": 1.99796937983354e-05, - "loss": 1.8547, - "step": 490 - }, - { - "epoch": 0.050720227226617974, - "grad_norm": 1.2602753639221191, - "learning_rate": 1.9977547225206194e-05, - "loss": 1.8405, - "step": 500 - }, - { - "epoch": 0.051734631771150334, - "grad_norm": 2.7725753784179688, - "learning_rate": 1.9975292949605818e-05, - "loss": 1.8612, - "step": 510 - }, - { - "epoch": 0.052749036315682694, - "grad_norm": 5.631360054016113, - "learning_rate": 1.9972930995868015e-05, - "loss": 1.842, - "step": 520 - }, - { - "epoch": 0.053763440860215055, - "grad_norm": 3.98781681060791, - "learning_rate": 1.9970461389488854e-05, - "loss": 1.8503, - "step": 530 - }, - { - "epoch": 0.054777845404747415, - "grad_norm": 2.422701597213745, - "learning_rate": 1.996788415712646e-05, - "loss": 1.8365, - "step": 540 - }, - { - "epoch": 0.055792249949279775, - "grad_norm": 1.3598812818527222, - "learning_rate": 1.996519932660073e-05, - "loss": 1.84, - "step": 550 - }, - { - "epoch": 0.056806654493812135, - "grad_norm": 2.006317377090454, - "learning_rate": 1.996240692689302e-05, - "loss": 1.842, - "step": 560 - }, - { - "epoch": 0.05782105903834449, - "grad_norm": 0.8607438802719116, - "learning_rate": 1.9959506988145842e-05, - "loss": 1.8399, - "step": 570 - }, - { - "epoch": 0.05883546358287685, - "grad_norm": 1.448121428489685, - "learning_rate": 1.995649954166254e-05, - "loss": 1.8402, - "step": 580 - }, - { - "epoch": 0.05984986812740921, - "grad_norm": 2.892017364501953, - "learning_rate": 1.9953384619906945e-05, - "loss": 1.8393, - "step": 590 - }, - { - "epoch": 0.06086427267194157, - "grad_norm": 0.8938267230987549, - "learning_rate": 1.995016225650303e-05, - "loss": 1.837, - "step": 600 - }, - { - "epoch": 0.06187867721647393, - "grad_norm": 2.5937340259552, - "learning_rate": 1.9946832486234545e-05, - "loss": 1.8505, - "step": 610 - }, - { - "epoch": 0.06289308176100629, - "grad_norm": 1.0606496334075928, - "learning_rate": 1.9943395345044644e-05, - "loss": 1.8294, - "step": 620 - }, - { - "epoch": 0.06390748630553865, - "grad_norm": 0.8565366268157959, - "learning_rate": 1.9939850870035484e-05, - "loss": 1.8311, - "step": 630 - }, - { - "epoch": 0.06492189085007101, - "grad_norm": 0.9362851977348328, - "learning_rate": 1.9936199099467846e-05, - "loss": 1.8315, - "step": 640 - }, - { - "epoch": 0.06593629539460337, - "grad_norm": 2.4915812015533447, - "learning_rate": 1.993244007276071e-05, - "loss": 1.847, - "step": 650 - }, - { - "epoch": 0.06695069993913573, - "grad_norm": 1.718644142150879, - "learning_rate": 1.9928573830490828e-05, - "loss": 1.8296, - "step": 660 - }, - { - "epoch": 0.06796510448366809, - "grad_norm": 1.3261979818344116, - "learning_rate": 1.9924600414392284e-05, - "loss": 1.8211, - "step": 670 - }, - { - "epoch": 0.06897950902820045, - "grad_norm": 2.388695478439331, - "learning_rate": 1.992051986735606e-05, - "loss": 1.8341, - "step": 680 - }, - { - "epoch": 0.06999391357273281, - "grad_norm": 2.140284776687622, - "learning_rate": 1.9916332233429556e-05, - "loss": 1.8262, - "step": 690 - }, - { - "epoch": 0.07100831811726517, - "grad_norm": 1.6555594205856323, - "learning_rate": 1.9912037557816112e-05, - "loss": 1.8269, - "step": 700 - }, - { - "epoch": 0.07202272266179753, - "grad_norm": 0.8342475295066833, - "learning_rate": 1.990763588687454e-05, - "loss": 1.8287, - "step": 710 - }, - { - "epoch": 0.07303712720632989, - "grad_norm": 1.015068769454956, - "learning_rate": 1.99031272681186e-05, - "loss": 1.8267, - "step": 720 - }, - { - "epoch": 0.07405153175086224, - "grad_norm": 1.4728447198867798, - "learning_rate": 1.9898511750216505e-05, - "loss": 1.836, - "step": 730 - }, - { - "epoch": 0.0750659362953946, - "grad_norm": 1.2636910676956177, - "learning_rate": 1.9893789382990388e-05, - "loss": 1.8184, - "step": 740 - }, - { - "epoch": 0.07608034083992696, - "grad_norm": 1.2953280210494995, - "learning_rate": 1.9888960217415766e-05, - "loss": 1.819, - "step": 750 - }, - { - "epoch": 0.07709474538445932, - "grad_norm": 1.631601333618164, - "learning_rate": 1.988402430562099e-05, - "loss": 1.8233, - "step": 760 - }, - { - "epoch": 0.07810914992899168, - "grad_norm": 0.8191771507263184, - "learning_rate": 1.9878981700886677e-05, - "loss": 1.8302, - "step": 770 - }, - { - "epoch": 0.07912355447352404, - "grad_norm": 1.2076416015625, - "learning_rate": 1.9873832457645142e-05, - "loss": 1.8226, - "step": 780 - }, - { - "epoch": 0.0801379590180564, - "grad_norm": 1.7032721042633057, - "learning_rate": 1.986857663147981e-05, - "loss": 1.8157, - "step": 790 - }, - { - "epoch": 0.08115236356258876, - "grad_norm": 1.3509644269943237, - "learning_rate": 1.986321427912461e-05, - "loss": 1.8177, - "step": 800 - }, - { - "epoch": 0.08216676810712112, - "grad_norm": 2.7909610271453857, - "learning_rate": 1.9857745458463363e-05, - "loss": 1.8149, - "step": 810 - }, - { - "epoch": 0.08318117265165348, - "grad_norm": 1.1940338611602783, - "learning_rate": 1.9852170228529165e-05, - "loss": 1.8289, - "step": 820 - }, - { - "epoch": 0.08419557719618584, - "grad_norm": 2.0010592937469482, - "learning_rate": 1.9846488649503754e-05, - "loss": 1.8225, - "step": 830 - }, - { - "epoch": 0.0852099817407182, - "grad_norm": 3.111947774887085, - "learning_rate": 1.9840700782716836e-05, - "loss": 1.8233, - "step": 840 - }, - { - "epoch": 0.08622438628525056, - "grad_norm": 2.125370979309082, - "learning_rate": 1.9834806690645442e-05, - "loss": 1.8105, - "step": 850 - }, - { - "epoch": 0.08723879082978292, - "grad_norm": 4.367944717407227, - "learning_rate": 1.9828806436913254e-05, - "loss": 1.8141, - "step": 860 - }, - { - "epoch": 0.08825319537431528, - "grad_norm": 2.4379868507385254, - "learning_rate": 1.9822700086289915e-05, - "loss": 1.8205, - "step": 870 - }, - { - "epoch": 0.08926759991884764, - "grad_norm": 1.4165778160095215, - "learning_rate": 1.981648770469033e-05, - "loss": 1.8124, - "step": 880 - }, - { - "epoch": 0.09028200446338, - "grad_norm": 1.085752010345459, - "learning_rate": 1.981016935917395e-05, - "loss": 1.803, - "step": 890 - }, - { - "epoch": 0.09129640900791236, - "grad_norm": 2.2110702991485596, - "learning_rate": 1.980374511794405e-05, - "loss": 1.8091, - "step": 900 - }, - { - "epoch": 0.09231081355244472, - "grad_norm": 2.586404323577881, - "learning_rate": 1.9797215050346996e-05, - "loss": 1.8194, - "step": 910 - }, - { - "epoch": 0.09332521809697708, - "grad_norm": 1.117167353630066, - "learning_rate": 1.9790579226871506e-05, - "loss": 1.8038, - "step": 920 - }, - { - "epoch": 0.09433962264150944, - "grad_norm": 3.552520751953125, - "learning_rate": 1.9783837719147855e-05, - "loss": 1.807, - "step": 930 - }, - { - "epoch": 0.09535402718604179, - "grad_norm": 1.6371164321899414, - "learning_rate": 1.9776990599947148e-05, - "loss": 1.8105, - "step": 940 - }, - { - "epoch": 0.09636843173057415, - "grad_norm": 1.0059300661087036, - "learning_rate": 1.97700379431805e-05, - "loss": 1.8131, - "step": 950 - }, - { - "epoch": 0.09738283627510651, - "grad_norm": 0.9066600799560547, - "learning_rate": 1.976297982389825e-05, - "loss": 1.8064, - "step": 960 - }, - { - "epoch": 0.09839724081963887, - "grad_norm": 1.1495178937911987, - "learning_rate": 1.975581631828915e-05, - "loss": 1.8145, - "step": 970 - }, - { - "epoch": 0.09941164536417123, - "grad_norm": 0.8007872104644775, - "learning_rate": 1.9748547503679547e-05, - "loss": 1.8033, - "step": 980 - }, - { - "epoch": 0.10042604990870359, - "grad_norm": 1.686836838722229, - "learning_rate": 1.9741173458532544e-05, - "loss": 1.8034, - "step": 990 - }, - { - "epoch": 0.10144045445323595, - "grad_norm": 0.9698904752731323, - "learning_rate": 1.973369426244715e-05, - "loss": 1.8069, - "step": 1000 - }, - { - "epoch": 0.10245485899776831, - "grad_norm": 1.0635056495666504, - "learning_rate": 1.9726109996157423e-05, - "loss": 1.7933, - "step": 1010 - }, - { - "epoch": 0.10346926354230067, - "grad_norm": 2.791302442550659, - "learning_rate": 1.9718420741531604e-05, - "loss": 1.8006, - "step": 1020 - }, - { - "epoch": 0.10448366808683303, - "grad_norm": 1.191218614578247, - "learning_rate": 1.9710626581571228e-05, - "loss": 1.8072, - "step": 1030 - }, - { - "epoch": 0.10549807263136539, - "grad_norm": 3.768235206604004, - "learning_rate": 1.9702727600410222e-05, - "loss": 1.8044, - "step": 1040 - }, - { - "epoch": 0.10651247717589775, - "grad_norm": 1.0058181285858154, - "learning_rate": 1.9694723883314017e-05, - "loss": 1.8022, - "step": 1050 - }, - { - "epoch": 0.10752688172043011, - "grad_norm": 1.088187336921692, - "learning_rate": 1.9686615516678605e-05, - "loss": 1.8034, - "step": 1060 - }, - { - "epoch": 0.10854128626496247, - "grad_norm": 0.9366543889045715, - "learning_rate": 1.9678402588029618e-05, - "loss": 1.8067, - "step": 1070 - }, - { - "epoch": 0.10955569080949483, - "grad_norm": 0.9639139771461487, - "learning_rate": 1.9670085186021377e-05, - "loss": 1.8023, - "step": 1080 - }, - { - "epoch": 0.11057009535402719, - "grad_norm": 1.1969596147537231, - "learning_rate": 1.9661663400435948e-05, - "loss": 1.7963, - "step": 1090 - }, - { - "epoch": 0.11158449989855955, - "grad_norm": 1.591292142868042, - "learning_rate": 1.9653137322182152e-05, - "loss": 1.7989, - "step": 1100 - }, - { - "epoch": 0.11259890444309191, - "grad_norm": 3.7504684925079346, - "learning_rate": 1.9644507043294606e-05, - "loss": 1.8056, - "step": 1110 - }, - { - "epoch": 0.11361330898762427, - "grad_norm": 1.5959354639053345, - "learning_rate": 1.9635772656932715e-05, - "loss": 1.796, - "step": 1120 - }, - { - "epoch": 0.11462771353215663, - "grad_norm": 1.405653715133667, - "learning_rate": 1.962693425737967e-05, - "loss": 1.7903, - "step": 1130 - }, - { - "epoch": 0.11564211807668898, - "grad_norm": 4.938650608062744, - "learning_rate": 1.9617991940041433e-05, - "loss": 1.7896, - "step": 1140 - }, - { - "epoch": 0.11665652262122134, - "grad_norm": 0.8536686301231384, - "learning_rate": 1.96089458014457e-05, - "loss": 1.8012, - "step": 1150 - }, - { - "epoch": 0.1176709271657537, - "grad_norm": 0.9341592788696289, - "learning_rate": 1.9599795939240867e-05, - "loss": 1.7984, - "step": 1160 - }, - { - "epoch": 0.11868533171028606, - "grad_norm": 1.3800748586654663, - "learning_rate": 1.959054245219498e-05, - "loss": 1.8009, - "step": 1170 - }, - { - "epoch": 0.11969973625481842, - "grad_norm": 1.7865543365478516, - "learning_rate": 1.958118544019464e-05, - "loss": 1.7914, - "step": 1180 - }, - { - "epoch": 0.12071414079935078, - "grad_norm": 0.9369724988937378, - "learning_rate": 1.9571725004243973e-05, - "loss": 1.7956, - "step": 1190 - }, - { - "epoch": 0.12172854534388314, - "grad_norm": 0.8227797746658325, - "learning_rate": 1.9562161246463495e-05, - "loss": 1.7982, - "step": 1200 - }, - { - "epoch": 0.1227429498884155, - "grad_norm": 2.797832727432251, - "learning_rate": 1.955249427008904e-05, - "loss": 1.7984, - "step": 1210 - }, - { - "epoch": 0.12375735443294786, - "grad_norm": 0.8574436902999878, - "learning_rate": 1.9542724179470616e-05, - "loss": 1.7961, - "step": 1220 - }, - { - "epoch": 0.12477175897748022, - "grad_norm": 2.017897605895996, - "learning_rate": 1.953285108007132e-05, - "loss": 1.7962, - "step": 1230 - }, - { - "epoch": 0.12578616352201258, - "grad_norm": 1.51298189163208, - "learning_rate": 1.952287507846615e-05, - "loss": 1.7989, - "step": 1240 - }, - { - "epoch": 0.12680056806654494, - "grad_norm": 1.008192539215088, - "learning_rate": 1.9512796282340906e-05, - "loss": 1.7944, - "step": 1250 - }, - { - "epoch": 0.1278149726110773, - "grad_norm": 1.8913826942443848, - "learning_rate": 1.950261480049098e-05, - "loss": 1.7808, - "step": 1260 - }, - { - "epoch": 0.12882937715560966, - "grad_norm": 3.443873882293701, - "learning_rate": 1.9492330742820216e-05, - "loss": 1.7933, - "step": 1270 - }, - { - "epoch": 0.12984378170014202, - "grad_norm": 1.1258774995803833, - "learning_rate": 1.9481944220339705e-05, - "loss": 1.7862, - "step": 1280 - }, - { - "epoch": 0.13085818624467438, - "grad_norm": 2.374582052230835, - "learning_rate": 1.9471455345166595e-05, - "loss": 1.79, - "step": 1290 - }, - { - "epoch": 0.13187259078920674, - "grad_norm": 1.2120611667633057, - "learning_rate": 1.946086423052288e-05, - "loss": 1.785, - "step": 1300 - }, - { - "epoch": 0.1328869953337391, - "grad_norm": 2.8109161853790283, - "learning_rate": 1.9450170990734174e-05, - "loss": 1.7911, - "step": 1310 - }, - { - "epoch": 0.13390139987827146, - "grad_norm": 0.9759477376937866, - "learning_rate": 1.943937574122848e-05, - "loss": 1.7836, - "step": 1320 - }, - { - "epoch": 0.13491580442280382, - "grad_norm": 2.4841973781585693, - "learning_rate": 1.9428478598534943e-05, - "loss": 1.7896, - "step": 1330 - }, - { - "epoch": 0.13593020896733618, - "grad_norm": 0.9664813876152039, - "learning_rate": 1.941747968028259e-05, - "loss": 1.7797, - "step": 1340 - }, - { - "epoch": 0.13694461351186854, - "grad_norm": 1.3424862623214722, - "learning_rate": 1.940637910519907e-05, - "loss": 1.7907, - "step": 1350 - }, - { - "epoch": 0.1379590180564009, - "grad_norm": 1.039871096611023, - "learning_rate": 1.9395176993109357e-05, - "loss": 1.791, - "step": 1360 - }, - { - "epoch": 0.13897342260093326, - "grad_norm": 1.8651899099349976, - "learning_rate": 1.938387346493447e-05, - "loss": 1.7847, - "step": 1370 - }, - { - "epoch": 0.13998782714546562, - "grad_norm": 1.461659550666809, - "learning_rate": 1.9372468642690156e-05, - "loss": 1.7741, - "step": 1380 - }, - { - "epoch": 0.14100223168999798, - "grad_norm": 0.9491080641746521, - "learning_rate": 1.9360962649485585e-05, - "loss": 1.7871, - "step": 1390 - }, - { - "epoch": 0.14201663623453034, - "grad_norm": 0.8328019976615906, - "learning_rate": 1.9349355609522014e-05, - "loss": 1.7795, - "step": 1400 - }, - { - "epoch": 0.1430310407790627, - "grad_norm": 1.1860322952270508, - "learning_rate": 1.9337647648091445e-05, - "loss": 1.7762, - "step": 1410 - }, - { - "epoch": 0.14404544532359506, - "grad_norm": 1.455818772315979, - "learning_rate": 1.9325838891575284e-05, - "loss": 1.7796, - "step": 1420 - }, - { - "epoch": 0.14505984986812742, - "grad_norm": 2.3588056564331055, - "learning_rate": 1.9313929467442953e-05, - "loss": 1.7819, - "step": 1430 - }, - { - "epoch": 0.14607425441265978, - "grad_norm": 1.2702618837356567, - "learning_rate": 1.9301919504250542e-05, - "loss": 1.7851, - "step": 1440 - }, - { - "epoch": 0.14708865895719211, - "grad_norm": 0.9718762040138245, - "learning_rate": 1.92898091316394e-05, - "loss": 1.7905, - "step": 1450 - }, - { - "epoch": 0.14810306350172447, - "grad_norm": 0.7884719371795654, - "learning_rate": 1.9277598480334747e-05, - "loss": 1.7844, - "step": 1460 - }, - { - "epoch": 0.14911746804625683, - "grad_norm": 0.860934853553772, - "learning_rate": 1.9265287682144263e-05, - "loss": 1.7888, - "step": 1470 - }, - { - "epoch": 0.1501318725907892, - "grad_norm": 1.3632832765579224, - "learning_rate": 1.9252876869956655e-05, - "loss": 1.7835, - "step": 1480 - }, - { - "epoch": 0.15114627713532156, - "grad_norm": 0.949360191822052, - "learning_rate": 1.9240366177740238e-05, - "loss": 1.7835, - "step": 1490 - }, - { - "epoch": 0.15216068167985392, - "grad_norm": 1.1235259771347046, - "learning_rate": 1.922775574054147e-05, - "loss": 1.783, - "step": 1500 - }, - { - "epoch": 0.15317508622438628, - "grad_norm": 1.436371922492981, - "learning_rate": 1.9215045694483514e-05, - "loss": 1.7763, - "step": 1510 - }, - { - "epoch": 0.15418949076891864, - "grad_norm": 1.3503382205963135, - "learning_rate": 1.9202236176764754e-05, - "loss": 1.7753, - "step": 1520 - }, - { - "epoch": 0.155203895313451, - "grad_norm": 2.278960704803467, - "learning_rate": 1.918932732565732e-05, - "loss": 1.7807, - "step": 1530 - }, - { - "epoch": 0.15621829985798336, - "grad_norm": 0.999164879322052, - "learning_rate": 1.91763192805056e-05, - "loss": 1.772, - "step": 1540 - }, - { - "epoch": 0.15723270440251572, - "grad_norm": 2.4769155979156494, - "learning_rate": 1.9163212181724718e-05, - "loss": 1.7792, - "step": 1550 - }, - { - "epoch": 0.15824710894704808, - "grad_norm": 1.260796070098877, - "learning_rate": 1.915000617079904e-05, - "loss": 1.7866, - "step": 1560 - }, - { - "epoch": 0.15926151349158044, - "grad_norm": 0.8366189002990723, - "learning_rate": 1.9136701390280644e-05, - "loss": 1.7737, - "step": 1570 - }, - { - "epoch": 0.1602759180361128, - "grad_norm": 1.9065474271774292, - "learning_rate": 1.9123297983787757e-05, - "loss": 1.7799, - "step": 1580 - }, - { - "epoch": 0.16129032258064516, - "grad_norm": 0.82051020860672, - "learning_rate": 1.910979609600324e-05, - "loss": 1.7852, - "step": 1590 - }, - { - "epoch": 0.16230472712517752, - "grad_norm": 2.8275108337402344, - "learning_rate": 1.9096195872672995e-05, - "loss": 1.7793, - "step": 1600 - }, - { - "epoch": 0.16331913166970988, - "grad_norm": 1.494033694267273, - "learning_rate": 1.9082497460604415e-05, - "loss": 1.7843, - "step": 1610 - }, - { - "epoch": 0.16433353621424224, - "grad_norm": 3.623962879180908, - "learning_rate": 1.9068701007664786e-05, - "loss": 1.7755, - "step": 1620 - }, - { - "epoch": 0.1653479407587746, - "grad_norm": 0.8354327082633972, - "learning_rate": 1.9054806662779692e-05, - "loss": 1.7728, - "step": 1630 - }, - { - "epoch": 0.16636234530330696, - "grad_norm": 1.8714085817337036, - "learning_rate": 1.9040814575931413e-05, - "loss": 1.7744, - "step": 1640 - }, - { - "epoch": 0.16737674984783932, - "grad_norm": 0.7831736207008362, - "learning_rate": 1.9026724898157306e-05, - "loss": 1.7735, - "step": 1650 - }, - { - "epoch": 0.16839115439237168, - "grad_norm": 1.0116039514541626, - "learning_rate": 1.901253778154817e-05, - "loss": 1.7714, - "step": 1660 - }, - { - "epoch": 0.16940555893690404, - "grad_norm": 0.8798742890357971, - "learning_rate": 1.8998253379246597e-05, - "loss": 1.7669, - "step": 1670 - }, - { - "epoch": 0.1704199634814364, - "grad_norm": 1.6103144884109497, - "learning_rate": 1.898387184544534e-05, - "loss": 1.7739, - "step": 1680 - }, - { - "epoch": 0.17143436802596876, - "grad_norm": 0.7855607271194458, - "learning_rate": 1.8969393335385632e-05, - "loss": 1.7703, - "step": 1690 - }, - { - "epoch": 0.17244877257050112, - "grad_norm": 2.19303035736084, - "learning_rate": 1.8954818005355514e-05, - "loss": 1.7725, - "step": 1700 - }, - { - "epoch": 0.17346317711503348, - "grad_norm": 0.8839064240455627, - "learning_rate": 1.8940146012688148e-05, - "loss": 1.7666, - "step": 1710 - }, - { - "epoch": 0.17447758165956584, - "grad_norm": 1.2407820224761963, - "learning_rate": 1.892537751576012e-05, - "loss": 1.7753, - "step": 1720 - }, - { - "epoch": 0.1754919862040982, - "grad_norm": 0.8608596324920654, - "learning_rate": 1.8910512673989728e-05, - "loss": 1.7608, - "step": 1730 - }, - { - "epoch": 0.17650639074863056, - "grad_norm": 1.5777356624603271, - "learning_rate": 1.8895551647835272e-05, - "loss": 1.772, - "step": 1740 - }, - { - "epoch": 0.17752079529316292, - "grad_norm": 2.6893134117126465, - "learning_rate": 1.8880494598793296e-05, - "loss": 1.7689, - "step": 1750 - }, - { - "epoch": 0.17853519983769528, - "grad_norm": 0.9048823714256287, - "learning_rate": 1.8865341689396876e-05, - "loss": 1.771, - "step": 1760 - }, - { - "epoch": 0.17954960438222764, - "grad_norm": 1.3236117362976074, - "learning_rate": 1.8850093083213843e-05, - "loss": 1.7683, - "step": 1770 - }, - { - "epoch": 0.18056400892676, - "grad_norm": 1.3367910385131836, - "learning_rate": 1.8834748944845028e-05, - "loss": 1.7702, - "step": 1780 - }, - { - "epoch": 0.18157841347129236, - "grad_norm": 0.9908106327056885, - "learning_rate": 1.881930943992249e-05, - "loss": 1.7742, - "step": 1790 - }, - { - "epoch": 0.18259281801582472, - "grad_norm": 0.7877388000488281, - "learning_rate": 1.8803774735107708e-05, - "loss": 1.7698, - "step": 1800 - }, - { - "epoch": 0.18360722256035708, - "grad_norm": 0.8282954692840576, - "learning_rate": 1.87881449980898e-05, - "loss": 1.7725, - "step": 1810 - }, - { - "epoch": 0.18462162710488944, - "grad_norm": 1.0914045572280884, - "learning_rate": 1.8772420397583708e-05, - "loss": 1.7716, - "step": 1820 - }, - { - "epoch": 0.1856360316494218, - "grad_norm": 3.130774974822998, - "learning_rate": 1.875660110332838e-05, - "loss": 1.7698, - "step": 1830 - }, - { - "epoch": 0.18665043619395416, - "grad_norm": 0.8744616508483887, - "learning_rate": 1.8740687286084927e-05, - "loss": 1.7707, - "step": 1840 - }, - { - "epoch": 0.18766484073848652, - "grad_norm": 2.4027020931243896, - "learning_rate": 1.872467911763479e-05, - "loss": 1.7675, - "step": 1850 - }, - { - "epoch": 0.18867924528301888, - "grad_norm": 0.8547419309616089, - "learning_rate": 1.870857677077788e-05, - "loss": 1.7676, - "step": 1860 - }, - { - "epoch": 0.18969364982755121, - "grad_norm": 1.0207130908966064, - "learning_rate": 1.8692380419330728e-05, - "loss": 1.7652, - "step": 1870 - }, - { - "epoch": 0.19070805437208357, - "grad_norm": 1.7680976390838623, - "learning_rate": 1.8676090238124582e-05, - "loss": 1.7748, - "step": 1880 - }, - { - "epoch": 0.19172245891661593, - "grad_norm": 1.992583155632019, - "learning_rate": 1.865970640300353e-05, - "loss": 1.7719, - "step": 1890 - }, - { - "epoch": 0.1927368634611483, - "grad_norm": 2.4844202995300293, - "learning_rate": 1.8643229090822624e-05, - "loss": 1.7718, - "step": 1900 - }, - { - "epoch": 0.19375126800568065, - "grad_norm": 0.7880557775497437, - "learning_rate": 1.8626658479445935e-05, - "loss": 1.762, - "step": 1910 - }, - { - "epoch": 0.19476567255021301, - "grad_norm": 1.8340610265731812, - "learning_rate": 1.8609994747744663e-05, - "loss": 1.7726, - "step": 1920 - }, - { - "epoch": 0.19578007709474538, - "grad_norm": 1.2467340230941772, - "learning_rate": 1.8593238075595184e-05, - "loss": 1.7705, - "step": 1930 - }, - { - "epoch": 0.19679448163927774, - "grad_norm": 1.5034763813018799, - "learning_rate": 1.8576388643877127e-05, - "loss": 1.7643, - "step": 1940 - }, - { - "epoch": 0.1978088861838101, - "grad_norm": 1.3206113576889038, - "learning_rate": 1.8559446634471408e-05, - "loss": 1.7766, - "step": 1950 - }, - { - "epoch": 0.19882329072834246, - "grad_norm": 1.5309138298034668, - "learning_rate": 1.854241223025827e-05, - "loss": 1.7761, - "step": 1960 - }, - { - "epoch": 0.19983769527287482, - "grad_norm": 1.0647454261779785, - "learning_rate": 1.8525285615115312e-05, - "loss": 1.7587, - "step": 1970 - }, - { - "epoch": 0.20085209981740718, - "grad_norm": 0.7944309711456299, - "learning_rate": 1.8508066973915505e-05, - "loss": 1.7737, - "step": 1980 - }, - { - "epoch": 0.20186650436193954, - "grad_norm": 0.8133752942085266, - "learning_rate": 1.8490756492525185e-05, - "loss": 1.7626, - "step": 1990 - }, - { - "epoch": 0.2028809089064719, - "grad_norm": 1.517412543296814, - "learning_rate": 1.8473354357802074e-05, - "loss": 1.7704, - "step": 2000 - }, - { - "epoch": 0.20389531345100426, - "grad_norm": 1.0983048677444458, - "learning_rate": 1.845586075759322e-05, - "loss": 1.7668, - "step": 2010 - }, - { - "epoch": 0.20490971799553662, - "grad_norm": 1.006282091140747, - "learning_rate": 1.843827588073301e-05, - "loss": 1.7523, - "step": 2020 - }, - { - "epoch": 0.20592412254006898, - "grad_norm": 2.199679136276245, - "learning_rate": 1.842059991704111e-05, - "loss": 1.7656, - "step": 2030 - }, - { - "epoch": 0.20693852708460134, - "grad_norm": 1.0698515176773071, - "learning_rate": 1.8402833057320427e-05, - "loss": 1.7641, - "step": 2040 - }, - { - "epoch": 0.2079529316291337, - "grad_norm": 1.050663709640503, - "learning_rate": 1.8384975493355034e-05, - "loss": 1.7571, - "step": 2050 - }, - { - "epoch": 0.20896733617366606, - "grad_norm": 2.233557939529419, - "learning_rate": 1.8367027417908115e-05, - "loss": 1.7581, - "step": 2060 - }, - { - "epoch": 0.20998174071819842, - "grad_norm": 1.9631679058074951, - "learning_rate": 1.8348989024719884e-05, - "loss": 1.7524, - "step": 2070 - }, - { - "epoch": 0.21099614526273078, - "grad_norm": 1.1947954893112183, - "learning_rate": 1.8330860508505478e-05, - "loss": 1.7633, - "step": 2080 - }, - { - "epoch": 0.21201054980726314, - "grad_norm": 1.634669542312622, - "learning_rate": 1.8312642064952868e-05, - "loss": 1.7704, - "step": 2090 - }, - { - "epoch": 0.2130249543517955, - "grad_norm": 3.076327085494995, - "learning_rate": 1.829433389072075e-05, - "loss": 1.7605, - "step": 2100 - }, - { - "epoch": 0.21403935889632786, - "grad_norm": 1.1550085544586182, - "learning_rate": 1.8275936183436417e-05, - "loss": 1.7568, - "step": 2110 - }, - { - "epoch": 0.21505376344086022, - "grad_norm": 2.065035104751587, - "learning_rate": 1.825744914169361e-05, - "loss": 1.7549, - "step": 2120 - }, - { - "epoch": 0.21606816798539258, - "grad_norm": 1.4134364128112793, - "learning_rate": 1.823887296505041e-05, - "loss": 1.7646, - "step": 2130 - }, - { - "epoch": 0.21708257252992494, - "grad_norm": 0.8231322169303894, - "learning_rate": 1.822020785402705e-05, - "loss": 1.7617, - "step": 2140 - }, - { - "epoch": 0.2180969770744573, - "grad_norm": 1.5070151090621948, - "learning_rate": 1.8201454010103764e-05, - "loss": 1.757, - "step": 2150 - }, - { - "epoch": 0.21911138161898966, - "grad_norm": 1.1289032697677612, - "learning_rate": 1.818261163571862e-05, - "loss": 1.7493, - "step": 2160 - }, - { - "epoch": 0.22012578616352202, - "grad_norm": 0.9477230310440063, - "learning_rate": 1.8163680934265322e-05, - "loss": 1.7572, - "step": 2170 - }, - { - "epoch": 0.22114019070805438, - "grad_norm": 1.8031604290008545, - "learning_rate": 1.8144662110091015e-05, - "loss": 1.7594, - "step": 2180 - }, - { - "epoch": 0.22215459525258674, - "grad_norm": 1.350406527519226, - "learning_rate": 1.812555536849409e-05, - "loss": 1.7643, - "step": 2190 - }, - { - "epoch": 0.2231689997971191, - "grad_norm": 0.8511176109313965, - "learning_rate": 1.8106360915721956e-05, - "loss": 1.7577, - "step": 2200 - }, - { - "epoch": 0.22418340434165146, - "grad_norm": 1.474684238433838, - "learning_rate": 1.8087078958968824e-05, - "loss": 1.7538, - "step": 2210 - }, - { - "epoch": 0.22519780888618382, - "grad_norm": 1.1089171171188354, - "learning_rate": 1.806770970637346e-05, - "loss": 1.7528, - "step": 2220 - }, - { - "epoch": 0.22621221343071618, - "grad_norm": 3.064673662185669, - "learning_rate": 1.8048253367016946e-05, - "loss": 1.7563, - "step": 2230 - }, - { - "epoch": 0.22722661797524854, - "grad_norm": 0.8484387397766113, - "learning_rate": 1.802871015092042e-05, - "loss": 1.7593, - "step": 2240 - }, - { - "epoch": 0.2282410225197809, - "grad_norm": 0.743966817855835, - "learning_rate": 1.8009080269042813e-05, - "loss": 1.7584, - "step": 2250 - }, - { - "epoch": 0.22925542706431326, - "grad_norm": 1.109473466873169, - "learning_rate": 1.7989363933278567e-05, - "loss": 1.7554, - "step": 2260 - }, - { - "epoch": 0.23026983160884562, - "grad_norm": 1.130275845527649, - "learning_rate": 1.796956135645534e-05, - "loss": 1.7596, - "step": 2270 - }, - { - "epoch": 0.23128423615337795, - "grad_norm": 0.8135804533958435, - "learning_rate": 1.794967275233173e-05, - "loss": 1.751, - "step": 2280 - }, - { - "epoch": 0.2322986406979103, - "grad_norm": 2.8125569820404053, - "learning_rate": 1.7929698335594952e-05, - "loss": 1.7504, - "step": 2290 - }, - { - "epoch": 0.23331304524244267, - "grad_norm": 1.8101365566253662, - "learning_rate": 1.790963832185852e-05, - "loss": 1.7526, - "step": 2300 - }, - { - "epoch": 0.23432744978697503, - "grad_norm": 1.2420798540115356, - "learning_rate": 1.7889492927659927e-05, - "loss": 1.7477, - "step": 2310 - }, - { - "epoch": 0.2353418543315074, - "grad_norm": 0.8301209211349487, - "learning_rate": 1.7869262370458304e-05, - "loss": 1.7615, - "step": 2320 - }, - { - "epoch": 0.23635625887603975, - "grad_norm": 2.2448301315307617, - "learning_rate": 1.7848946868632068e-05, - "loss": 1.7517, - "step": 2330 - }, - { - "epoch": 0.23737066342057211, - "grad_norm": 3.0445401668548584, - "learning_rate": 1.7828546641476577e-05, - "loss": 1.7683, - "step": 2340 - }, - { - "epoch": 0.23838506796510447, - "grad_norm": 2.0939691066741943, - "learning_rate": 1.7808061909201747e-05, - "loss": 1.7514, - "step": 2350 - }, - { - "epoch": 0.23939947250963683, - "grad_norm": 0.8633739352226257, - "learning_rate": 1.7787492892929692e-05, - "loss": 1.7525, - "step": 2360 - }, - { - "epoch": 0.2404138770541692, - "grad_norm": 1.9185552597045898, - "learning_rate": 1.7766839814692322e-05, - "loss": 1.7531, - "step": 2370 - }, - { - "epoch": 0.24142828159870156, - "grad_norm": 1.5382554531097412, - "learning_rate": 1.7746102897428946e-05, - "loss": 1.7533, - "step": 2380 - }, - { - "epoch": 0.24244268614323392, - "grad_norm": 1.7119652032852173, - "learning_rate": 1.772528236498389e-05, - "loss": 1.7557, - "step": 2390 - }, - { - "epoch": 0.24345709068776628, - "grad_norm": 1.9632717370986938, - "learning_rate": 1.7704378442104052e-05, - "loss": 1.7492, - "step": 2400 - }, - { - "epoch": 0.24447149523229864, - "grad_norm": 1.2674551010131836, - "learning_rate": 1.768339135443648e-05, - "loss": 1.7519, - "step": 2410 - }, - { - "epoch": 0.245485899776831, - "grad_norm": 1.6057243347167969, - "learning_rate": 1.766232132852596e-05, - "loss": 1.7537, - "step": 2420 - }, - { - "epoch": 0.24650030432136336, - "grad_norm": 1.7823708057403564, - "learning_rate": 1.7641168591812537e-05, - "loss": 1.7585, - "step": 2430 - }, - { - "epoch": 0.24751470886589572, - "grad_norm": 1.0708705186843872, - "learning_rate": 1.7619933372629083e-05, - "loss": 1.7528, - "step": 2440 - }, - { - "epoch": 0.24852911341042808, - "grad_norm": 1.1098607778549194, - "learning_rate": 1.7598615900198833e-05, - "loss": 1.7514, - "step": 2450 - }, - { - "epoch": 0.24954351795496044, - "grad_norm": 1.7395727634429932, - "learning_rate": 1.757721640463289e-05, - "loss": 1.7521, - "step": 2460 - }, - { - "epoch": 0.2505579224994928, - "grad_norm": 1.0957585573196411, - "learning_rate": 1.755573511692776e-05, - "loss": 1.7535, - "step": 2470 - }, - { - "epoch": 0.25157232704402516, - "grad_norm": 1.601169228553772, - "learning_rate": 1.7534172268962853e-05, - "loss": 1.7549, - "step": 2480 - }, - { - "epoch": 0.2525867315885575, - "grad_norm": 2.0877418518066406, - "learning_rate": 1.7512528093497986e-05, - "loss": 1.7492, - "step": 2490 - }, - { - "epoch": 0.2536011361330899, - "grad_norm": 2.5318548679351807, - "learning_rate": 1.7490802824170858e-05, - "loss": 1.7509, - "step": 2500 - }, - { - "epoch": 0.2546155406776222, - "grad_norm": 1.399144172668457, - "learning_rate": 1.7468996695494532e-05, - "loss": 1.7434, - "step": 2510 - }, - { - "epoch": 0.2556299452221546, - "grad_norm": 0.864453136920929, - "learning_rate": 1.744710994285491e-05, - "loss": 1.7499, - "step": 2520 - }, - { - "epoch": 0.25664434976668693, - "grad_norm": 0.8436155319213867, - "learning_rate": 1.742514280250819e-05, - "loss": 1.7453, - "step": 2530 - }, - { - "epoch": 0.2576587543112193, - "grad_norm": 1.0545316934585571, - "learning_rate": 1.7403095511578304e-05, - "loss": 1.7519, - "step": 2540 - }, - { - "epoch": 0.25867315885575165, - "grad_norm": 1.5584205389022827, - "learning_rate": 1.7380968308054385e-05, - "loss": 1.747, - "step": 2550 - }, - { - "epoch": 0.25968756340028404, - "grad_norm": 0.9556212425231934, - "learning_rate": 1.7358761430788164e-05, - "loss": 1.7481, - "step": 2560 - }, - { - "epoch": 0.26070196794481637, - "grad_norm": 2.1686651706695557, - "learning_rate": 1.733647511949142e-05, - "loss": 1.7471, - "step": 2570 - }, - { - "epoch": 0.26171637248934876, - "grad_norm": 1.064915418624878, - "learning_rate": 1.731410961473337e-05, - "loss": 1.7508, - "step": 2580 - }, - { - "epoch": 0.2627307770338811, - "grad_norm": 1.4751176834106445, - "learning_rate": 1.7291665157938106e-05, - "loss": 1.7456, - "step": 2590 - }, - { - "epoch": 0.2637451815784135, - "grad_norm": 0.8935450315475464, - "learning_rate": 1.726914199138194e-05, - "loss": 1.7533, - "step": 2600 - }, - { - "epoch": 0.2647595861229458, - "grad_norm": 1.1825262308120728, - "learning_rate": 1.7246540358190827e-05, - "loss": 1.7469, - "step": 2610 - }, - { - "epoch": 0.2657739906674782, - "grad_norm": 1.071778416633606, - "learning_rate": 1.7223860502337735e-05, - "loss": 1.7497, - "step": 2620 - }, - { - "epoch": 0.26678839521201053, - "grad_norm": 1.149614691734314, - "learning_rate": 1.7201102668639995e-05, - "loss": 1.7545, - "step": 2630 - }, - { - "epoch": 0.2678027997565429, - "grad_norm": 1.1419072151184082, - "learning_rate": 1.7178267102756675e-05, - "loss": 1.7498, - "step": 2640 - }, - { - "epoch": 0.26881720430107525, - "grad_norm": 2.1849591732025146, - "learning_rate": 1.7155354051185912e-05, - "loss": 1.7524, - "step": 2650 - }, - { - "epoch": 0.26983160884560764, - "grad_norm": 0.978971540927887, - "learning_rate": 1.7132363761262277e-05, - "loss": 1.7435, - "step": 2660 - }, - { - "epoch": 0.27084601339014, - "grad_norm": 2.2752292156219482, - "learning_rate": 1.7109296481154082e-05, - "loss": 1.7468, - "step": 2670 - }, - { - "epoch": 0.27186041793467236, - "grad_norm": 1.9060412645339966, - "learning_rate": 1.70861524598607e-05, - "loss": 1.7533, - "step": 2680 - }, - { - "epoch": 0.2728748224792047, - "grad_norm": 0.9077199101448059, - "learning_rate": 1.706293194720989e-05, - "loss": 1.7428, - "step": 2690 - }, - { - "epoch": 0.2738892270237371, - "grad_norm": 0.9407029151916504, - "learning_rate": 1.7039635193855106e-05, - "loss": 1.7407, - "step": 2700 - }, - { - "epoch": 0.2749036315682694, - "grad_norm": 1.159638524055481, - "learning_rate": 1.701626245127277e-05, - "loss": 1.7511, - "step": 2710 - }, - { - "epoch": 0.2759180361128018, - "grad_norm": 1.4043631553649902, - "learning_rate": 1.699281397175957e-05, - "loss": 1.7548, - "step": 2720 - }, - { - "epoch": 0.27693244065733413, - "grad_norm": 1.0725611448287964, - "learning_rate": 1.6969290008429732e-05, - "loss": 1.74, - "step": 2730 - }, - { - "epoch": 0.2779468452018665, - "grad_norm": 1.1321431398391724, - "learning_rate": 1.6945690815212307e-05, - "loss": 1.7399, - "step": 2740 - }, - { - "epoch": 0.27896124974639885, - "grad_norm": 0.8933020830154419, - "learning_rate": 1.6922016646848395e-05, - "loss": 1.7391, - "step": 2750 - }, - { - "epoch": 0.27997565429093124, - "grad_norm": 1.4015562534332275, - "learning_rate": 1.6898267758888422e-05, - "loss": 1.7472, - "step": 2760 - }, - { - "epoch": 0.2809900588354636, - "grad_norm": 0.8753533363342285, - "learning_rate": 1.687444440768937e-05, - "loss": 1.7454, - "step": 2770 - }, - { - "epoch": 0.28200446337999596, - "grad_norm": 0.8602030277252197, - "learning_rate": 1.6850546850412026e-05, - "loss": 1.7503, - "step": 2780 - }, - { - "epoch": 0.2830188679245283, - "grad_norm": 0.8110195398330688, - "learning_rate": 1.682657534501817e-05, - "loss": 1.7461, - "step": 2790 - }, - { - "epoch": 0.2840332724690607, - "grad_norm": 0.9761762619018555, - "learning_rate": 1.6802530150267835e-05, - "loss": 1.7448, - "step": 2800 - }, - { - "epoch": 0.285047677013593, - "grad_norm": 2.206202268600464, - "learning_rate": 1.6778411525716483e-05, - "loss": 1.7554, - "step": 2810 - }, - { - "epoch": 0.2860620815581254, - "grad_norm": 1.5367364883422852, - "learning_rate": 1.6754219731712212e-05, - "loss": 1.7327, - "step": 2820 - }, - { - "epoch": 0.28707648610265774, - "grad_norm": 1.2064716815948486, - "learning_rate": 1.672995502939295e-05, - "loss": 1.7419, - "step": 2830 - }, - { - "epoch": 0.2880908906471901, - "grad_norm": 0.8288530111312866, - "learning_rate": 1.6705617680683633e-05, - "loss": 1.7463, - "step": 2840 - }, - { - "epoch": 0.28910529519172246, - "grad_norm": 0.8210314512252808, - "learning_rate": 1.6681207948293372e-05, - "loss": 1.7451, - "step": 2850 - }, - { - "epoch": 0.29011969973625484, - "grad_norm": 1.4870562553405762, - "learning_rate": 1.6656726095712633e-05, - "loss": 1.7467, - "step": 2860 - }, - { - "epoch": 0.2911341042807872, - "grad_norm": 1.039605736732483, - "learning_rate": 1.6632172387210368e-05, - "loss": 1.7446, - "step": 2870 - }, - { - "epoch": 0.29214850882531956, - "grad_norm": 1.2453172206878662, - "learning_rate": 1.6607547087831182e-05, - "loss": 1.7413, - "step": 2880 - }, - { - "epoch": 0.2931629133698519, - "grad_norm": 0.9563378691673279, - "learning_rate": 1.658285046339248e-05, - "loss": 1.7482, - "step": 2890 - }, - { - "epoch": 0.29417731791438423, - "grad_norm": 1.1646844148635864, - "learning_rate": 1.6558082780481562e-05, - "loss": 1.7464, - "step": 2900 - }, - { - "epoch": 0.2951917224589166, - "grad_norm": 0.7650978565216064, - "learning_rate": 1.6533244306452783e-05, - "loss": 1.7333, - "step": 2910 - }, - { - "epoch": 0.29620612700344895, - "grad_norm": 1.7044178247451782, - "learning_rate": 1.6508335309424647e-05, - "loss": 1.7481, - "step": 2920 - }, - { - "epoch": 0.29722053154798134, - "grad_norm": 1.3023779392242432, - "learning_rate": 1.6483356058276915e-05, - "loss": 1.7499, - "step": 2930 - }, - { - "epoch": 0.29823493609251367, - "grad_norm": 1.4667022228240967, - "learning_rate": 1.6458306822647707e-05, - "loss": 1.7422, - "step": 2940 - }, - { - "epoch": 0.29924934063704606, - "grad_norm": 1.153861403465271, - "learning_rate": 1.643318787293059e-05, - "loss": 1.7405, - "step": 2950 - }, - { - "epoch": 0.3002637451815784, - "grad_norm": 0.9464306235313416, - "learning_rate": 1.640799948027166e-05, - "loss": 1.7473, - "step": 2960 - }, - { - "epoch": 0.3012781497261108, - "grad_norm": 1.1194087266921997, - "learning_rate": 1.638274191656661e-05, - "loss": 1.7476, - "step": 2970 - }, - { - "epoch": 0.3022925542706431, - "grad_norm": 1.0221765041351318, - "learning_rate": 1.6357415454457796e-05, - "loss": 1.749, - "step": 2980 - }, - { - "epoch": 0.3033069588151755, - "grad_norm": 0.7287331223487854, - "learning_rate": 1.6332020367331308e-05, - "loss": 1.7398, - "step": 2990 - }, - { - "epoch": 0.30432136335970783, - "grad_norm": 0.7595639228820801, - "learning_rate": 1.6306556929314e-05, - "loss": 1.7398, - "step": 3000 - }, - { - "epoch": 0.3053357679042402, - "grad_norm": 1.1534080505371094, - "learning_rate": 1.6281025415270534e-05, - "loss": 1.7503, - "step": 3010 - }, - { - "epoch": 0.30635017244877255, - "grad_norm": 1.9311598539352417, - "learning_rate": 1.625542610080043e-05, - "loss": 1.7466, - "step": 3020 - }, - { - "epoch": 0.30736457699330494, - "grad_norm": 1.7035318613052368, - "learning_rate": 1.6229759262235064e-05, - "loss": 1.7407, - "step": 3030 - }, - { - "epoch": 0.30837898153783727, - "grad_norm": 0.8274477124214172, - "learning_rate": 1.6204025176634712e-05, - "loss": 1.7433, - "step": 3040 - }, - { - "epoch": 0.30939338608236966, - "grad_norm": 1.270766019821167, - "learning_rate": 1.6178224121785543e-05, - "loss": 1.7392, - "step": 3050 - }, - { - "epoch": 0.310407790626902, - "grad_norm": 0.9259629845619202, - "learning_rate": 1.6152356376196623e-05, - "loss": 1.7475, - "step": 3060 - }, - { - "epoch": 0.3114221951714344, - "grad_norm": 0.7231857776641846, - "learning_rate": 1.612642221909691e-05, - "loss": 1.7466, - "step": 3070 - }, - { - "epoch": 0.3124365997159667, - "grad_norm": 1.5596321821212769, - "learning_rate": 1.6100421930432238e-05, - "loss": 1.7373, - "step": 3080 - }, - { - "epoch": 0.3134510042604991, - "grad_norm": 2.015007734298706, - "learning_rate": 1.6074355790862306e-05, - "loss": 1.7349, - "step": 3090 - }, - { - "epoch": 0.31446540880503143, - "grad_norm": 1.1419507265090942, - "learning_rate": 1.6048224081757628e-05, - "loss": 1.7435, - "step": 3100 - }, - { - "epoch": 0.3154798133495638, - "grad_norm": 1.3488186597824097, - "learning_rate": 1.6022027085196516e-05, - "loss": 1.7396, - "step": 3110 - }, - { - "epoch": 0.31649421789409615, - "grad_norm": 1.312618613243103, - "learning_rate": 1.5995765083962023e-05, - "loss": 1.7342, - "step": 3120 - }, - { - "epoch": 0.31750862243862854, - "grad_norm": 1.3405729532241821, - "learning_rate": 1.596943836153889e-05, - "loss": 1.7392, - "step": 3130 - }, - { - "epoch": 0.3185230269831609, - "grad_norm": 0.7603846192359924, - "learning_rate": 1.59430472021105e-05, - "loss": 1.7345, - "step": 3140 - }, - { - "epoch": 0.31953743152769326, - "grad_norm": 0.8404675722122192, - "learning_rate": 1.591659189055579e-05, - "loss": 1.7471, - "step": 3150 - }, - { - "epoch": 0.3205518360722256, - "grad_norm": 0.8096135854721069, - "learning_rate": 1.589007271244619e-05, - "loss": 1.7387, - "step": 3160 - }, - { - "epoch": 0.321566240616758, - "grad_norm": 0.9156092405319214, - "learning_rate": 1.5863489954042538e-05, - "loss": 1.7318, - "step": 3170 - }, - { - "epoch": 0.3225806451612903, - "grad_norm": 1.0475143194198608, - "learning_rate": 1.5836843902291986e-05, - "loss": 1.7299, - "step": 3180 - }, - { - "epoch": 0.3235950497058227, - "grad_norm": 0.7734218835830688, - "learning_rate": 1.5810134844824908e-05, - "loss": 1.7309, - "step": 3190 - }, - { - "epoch": 0.32460945425035503, - "grad_norm": 0.9087108373641968, - "learning_rate": 1.5783363069951786e-05, - "loss": 1.738, - "step": 3200 - }, - { - "epoch": 0.3256238587948874, - "grad_norm": 0.8391383290290833, - "learning_rate": 1.5756528866660114e-05, - "loss": 1.7458, - "step": 3210 - }, - { - "epoch": 0.32663826333941975, - "grad_norm": 0.7180137038230896, - "learning_rate": 1.572963252461126e-05, - "loss": 1.7321, - "step": 3220 - }, - { - "epoch": 0.32765266788395214, - "grad_norm": 1.2489904165267944, - "learning_rate": 1.570267433413735e-05, - "loss": 1.7314, - "step": 3230 - }, - { - "epoch": 0.3286670724284845, - "grad_norm": 1.1317272186279297, - "learning_rate": 1.567565458623813e-05, - "loss": 1.7392, - "step": 3240 - }, - { - "epoch": 0.32968147697301686, - "grad_norm": 0.8226853609085083, - "learning_rate": 1.5648573572577837e-05, - "loss": 1.7403, - "step": 3250 - }, - { - "epoch": 0.3306958815175492, - "grad_norm": 0.7814518213272095, - "learning_rate": 1.5621431585482025e-05, - "loss": 1.7317, - "step": 3260 - }, - { - "epoch": 0.3317102860620816, - "grad_norm": 0.7723233103752136, - "learning_rate": 1.5594228917934434e-05, - "loss": 1.7333, - "step": 3270 - }, - { - "epoch": 0.3327246906066139, - "grad_norm": 1.7539443969726562, - "learning_rate": 1.5566965863573816e-05, - "loss": 1.7388, - "step": 3280 - }, - { - "epoch": 0.3337390951511463, - "grad_norm": 2.108532190322876, - "learning_rate": 1.553964271669077e-05, - "loss": 1.7442, - "step": 3290 - }, - { - "epoch": 0.33475349969567864, - "grad_norm": 2.435744285583496, - "learning_rate": 1.5512259772224558e-05, - "loss": 1.7399, - "step": 3300 - }, - { - "epoch": 0.335767904240211, - "grad_norm": 0.6986476182937622, - "learning_rate": 1.5484817325759928e-05, - "loss": 1.741, - "step": 3310 - }, - { - "epoch": 0.33678230878474336, - "grad_norm": 0.6989396214485168, - "learning_rate": 1.545731567352392e-05, - "loss": 1.7327, - "step": 3320 - }, - { - "epoch": 0.3377967133292757, - "grad_norm": 1.6350688934326172, - "learning_rate": 1.542975511238268e-05, - "loss": 1.7333, - "step": 3330 - }, - { - "epoch": 0.3388111178738081, - "grad_norm": 0.8368114829063416, - "learning_rate": 1.5402135939838226e-05, - "loss": 1.7303, - "step": 3340 - }, - { - "epoch": 0.3398255224183404, - "grad_norm": 2.1118948459625244, - "learning_rate": 1.5374458454025277e-05, - "loss": 1.7293, - "step": 3350 - }, - { - "epoch": 0.3408399269628728, - "grad_norm": 0.7982993721961975, - "learning_rate": 1.5346722953707996e-05, - "loss": 1.7277, - "step": 3360 - }, - { - "epoch": 0.34185433150740513, - "grad_norm": 1.406225323677063, - "learning_rate": 1.5318929738276793e-05, - "loss": 1.7265, - "step": 3370 - }, - { - "epoch": 0.3428687360519375, - "grad_norm": 1.7030199766159058, - "learning_rate": 1.5291079107745074e-05, - "loss": 1.7257, - "step": 3380 - }, - { - "epoch": 0.34388314059646985, - "grad_norm": 0.7769737839698792, - "learning_rate": 1.5263171362746028e-05, - "loss": 1.7295, - "step": 3390 - }, - { - "epoch": 0.34489754514100224, - "grad_norm": 1.067215085029602, - "learning_rate": 1.5235206804529345e-05, - "loss": 1.7302, - "step": 3400 - }, - { - "epoch": 0.34591194968553457, - "grad_norm": 1.08042573928833, - "learning_rate": 1.5207185734957994e-05, - "loss": 1.7287, - "step": 3410 - }, - { - "epoch": 0.34692635423006696, - "grad_norm": 1.7795910835266113, - "learning_rate": 1.5179108456504965e-05, - "loss": 1.7327, - "step": 3420 - }, - { - "epoch": 0.3479407587745993, - "grad_norm": 0.7217854261398315, - "learning_rate": 1.515097527224997e-05, - "loss": 1.7397, - "step": 3430 - }, - { - "epoch": 0.3489551633191317, - "grad_norm": 1.8240686655044556, - "learning_rate": 1.5122786485876215e-05, - "loss": 1.7361, - "step": 3440 - }, - { - "epoch": 0.349969567863664, - "grad_norm": 1.68661630153656, - "learning_rate": 1.5094542401667095e-05, - "loss": 1.7273, - "step": 3450 - }, - { - "epoch": 0.3509839724081964, - "grad_norm": 1.1973419189453125, - "learning_rate": 1.5066243324502919e-05, - "loss": 1.7342, - "step": 3460 - }, - { - "epoch": 0.35199837695272873, - "grad_norm": 1.0971792936325073, - "learning_rate": 1.5037889559857614e-05, - "loss": 1.7251, - "step": 3470 - }, - { - "epoch": 0.3530127814972611, - "grad_norm": 1.136086106300354, - "learning_rate": 1.5009481413795431e-05, - "loss": 1.7338, - "step": 3480 - }, - { - "epoch": 0.35402718604179345, - "grad_norm": 2.288309335708618, - "learning_rate": 1.4981019192967636e-05, - "loss": 1.7414, - "step": 3490 - }, - { - "epoch": 0.35504159058632584, - "grad_norm": 1.6639214754104614, - "learning_rate": 1.4952503204609217e-05, - "loss": 1.7326, - "step": 3500 - }, - { - "epoch": 0.35605599513085817, - "grad_norm": 0.7823703289031982, - "learning_rate": 1.4923933756535539e-05, - "loss": 1.7383, - "step": 3510 - }, - { - "epoch": 0.35707039967539056, - "grad_norm": 1.4496943950653076, - "learning_rate": 1.4895311157139048e-05, - "loss": 1.7358, - "step": 3520 - }, - { - "epoch": 0.3580848042199229, - "grad_norm": 1.1496936082839966, - "learning_rate": 1.4866635715385926e-05, - "loss": 1.7255, - "step": 3530 - }, - { - "epoch": 0.3590992087644553, - "grad_norm": 0.787397563457489, - "learning_rate": 1.4837907740812764e-05, - "loss": 1.7275, - "step": 3540 - }, - { - "epoch": 0.3601136133089876, - "grad_norm": 1.3278664350509644, - "learning_rate": 1.4809127543523216e-05, - "loss": 1.7283, - "step": 3550 - }, - { - "epoch": 0.36112801785352, - "grad_norm": 0.9110720753669739, - "learning_rate": 1.4780295434184653e-05, - "loss": 1.7313, - "step": 3560 - }, - { - "epoch": 0.36214242239805233, - "grad_norm": 1.2207852602005005, - "learning_rate": 1.4751411724024806e-05, - "loss": 1.7266, - "step": 3570 - }, - { - "epoch": 0.3631568269425847, - "grad_norm": 2.433804750442505, - "learning_rate": 1.472247672482842e-05, - "loss": 1.7271, - "step": 3580 - }, - { - "epoch": 0.36417123148711705, - "grad_norm": 1.7789822816848755, - "learning_rate": 1.4693490748933874e-05, - "loss": 1.7291, - "step": 3590 - }, - { - "epoch": 0.36518563603164944, - "grad_norm": 0.9559857845306396, - "learning_rate": 1.4664454109229809e-05, - "loss": 1.7289, - "step": 3600 - }, - { - "epoch": 0.3662000405761818, - "grad_norm": 0.8113611936569214, - "learning_rate": 1.463536711915176e-05, - "loss": 1.7274, - "step": 3610 - }, - { - "epoch": 0.36721444512071416, - "grad_norm": 1.5056771039962769, - "learning_rate": 1.4606230092678768e-05, - "loss": 1.7277, - "step": 3620 - }, - { - "epoch": 0.3682288496652465, - "grad_norm": 1.551259994506836, - "learning_rate": 1.4577043344329993e-05, - "loss": 1.7299, - "step": 3630 - }, - { - "epoch": 0.3692432542097789, - "grad_norm": 1.000220775604248, - "learning_rate": 1.4547807189161316e-05, - "loss": 1.7221, - "step": 3640 - }, - { - "epoch": 0.3702576587543112, - "grad_norm": 1.4091674089431763, - "learning_rate": 1.4518521942761932e-05, - "loss": 1.7294, - "step": 3650 - }, - { - "epoch": 0.3712720632988436, - "grad_norm": 2.3106977939605713, - "learning_rate": 1.4489187921250965e-05, - "loss": 1.7221, - "step": 3660 - }, - { - "epoch": 0.37228646784337593, - "grad_norm": 1.3922241926193237, - "learning_rate": 1.4459805441274028e-05, - "loss": 1.7342, - "step": 3670 - }, - { - "epoch": 0.3733008723879083, - "grad_norm": 1.1360936164855957, - "learning_rate": 1.4430374819999827e-05, - "loss": 1.7279, - "step": 3680 - }, - { - "epoch": 0.37431527693244065, - "grad_norm": 1.884657621383667, - "learning_rate": 1.4400896375116717e-05, - "loss": 1.7368, - "step": 3690 - }, - { - "epoch": 0.37532968147697304, - "grad_norm": 0.8114562034606934, - "learning_rate": 1.43713704248293e-05, - "loss": 1.7318, - "step": 3700 - }, - { - "epoch": 0.3763440860215054, - "grad_norm": 1.1877741813659668, - "learning_rate": 1.4341797287854959e-05, - "loss": 1.7318, - "step": 3710 - }, - { - "epoch": 0.37735849056603776, - "grad_norm": 0.8494029641151428, - "learning_rate": 1.4312177283420448e-05, - "loss": 1.7299, - "step": 3720 - }, - { - "epoch": 0.3783728951105701, - "grad_norm": 0.9850980043411255, - "learning_rate": 1.4282510731258414e-05, - "loss": 1.7222, - "step": 3730 - }, - { - "epoch": 0.37938729965510243, - "grad_norm": 1.2905610799789429, - "learning_rate": 1.4252797951603978e-05, - "loss": 1.7291, - "step": 3740 - }, - { - "epoch": 0.3804017041996348, - "grad_norm": 0.7275786399841309, - "learning_rate": 1.4223039265191251e-05, - "loss": 1.7215, - "step": 3750 - }, - { - "epoch": 0.38141610874416715, - "grad_norm": 0.7242658138275146, - "learning_rate": 1.4193234993249895e-05, - "loss": 1.7277, - "step": 3760 - }, - { - "epoch": 0.38243051328869954, - "grad_norm": 1.7107105255126953, - "learning_rate": 1.4163385457501636e-05, - "loss": 1.7381, - "step": 3770 - }, - { - "epoch": 0.38344491783323187, - "grad_norm": 1.153464436531067, - "learning_rate": 1.4133490980156801e-05, - "loss": 1.7362, - "step": 3780 - }, - { - "epoch": 0.38445932237776426, - "grad_norm": 0.7688796520233154, - "learning_rate": 1.4103551883910838e-05, - "loss": 1.7213, - "step": 3790 - }, - { - "epoch": 0.3854737269222966, - "grad_norm": 1.3833922147750854, - "learning_rate": 1.4073568491940836e-05, - "loss": 1.723, - "step": 3800 - }, - { - "epoch": 0.386488131466829, - "grad_norm": 1.1648547649383545, - "learning_rate": 1.4043541127902037e-05, - "loss": 1.7221, - "step": 3810 - }, - { - "epoch": 0.3875025360113613, - "grad_norm": 1.0113093852996826, - "learning_rate": 1.4013470115924323e-05, - "loss": 1.7216, - "step": 3820 - }, - { - "epoch": 0.3885169405558937, - "grad_norm": 1.7457501888275146, - "learning_rate": 1.3983355780608751e-05, - "loss": 1.7208, - "step": 3830 - }, - { - "epoch": 0.38953134510042603, - "grad_norm": 0.9993512630462646, - "learning_rate": 1.3953198447024023e-05, - "loss": 1.7266, - "step": 3840 - }, - { - "epoch": 0.3905457496449584, - "grad_norm": 1.0321276187896729, - "learning_rate": 1.3922998440702984e-05, - "loss": 1.7328, - "step": 3850 - }, - { - "epoch": 0.39156015418949075, - "grad_norm": 0.7221664190292358, - "learning_rate": 1.3892756087639109e-05, - "loss": 1.7216, - "step": 3860 - }, - { - "epoch": 0.39257455873402314, - "grad_norm": 0.7056678533554077, - "learning_rate": 1.386247171428299e-05, - "loss": 1.7266, - "step": 3870 - }, - { - "epoch": 0.39358896327855547, - "grad_norm": 1.0751144886016846, - "learning_rate": 1.38321456475388e-05, - "loss": 1.7301, - "step": 3880 - }, - { - "epoch": 0.39460336782308786, - "grad_norm": 0.903701901435852, - "learning_rate": 1.3801778214760774e-05, - "loss": 1.7212, - "step": 3890 - }, - { - "epoch": 0.3956177723676202, - "grad_norm": 0.6775175333023071, - "learning_rate": 1.377136974374967e-05, - "loss": 1.7228, - "step": 3900 - }, - { - "epoch": 0.3966321769121526, - "grad_norm": 0.7275093197822571, - "learning_rate": 1.3740920562749234e-05, - "loss": 1.729, - "step": 3910 - }, - { - "epoch": 0.3976465814566849, - "grad_norm": 0.8025760054588318, - "learning_rate": 1.3710431000442656e-05, - "loss": 1.7211, - "step": 3920 - }, - { - "epoch": 0.3986609860012173, - "grad_norm": 2.4457945823669434, - "learning_rate": 1.3679901385949018e-05, - "loss": 1.7205, - "step": 3930 - }, - { - "epoch": 0.39967539054574963, - "grad_norm": 0.857626736164093, - "learning_rate": 1.3649332048819746e-05, - "loss": 1.7136, - "step": 3940 - }, - { - "epoch": 0.400689795090282, - "grad_norm": 0.7328224778175354, - "learning_rate": 1.3618723319035056e-05, - "loss": 1.7251, - "step": 3950 - }, - { - "epoch": 0.40170419963481435, - "grad_norm": 1.138983130455017, - "learning_rate": 1.3588075527000382e-05, - "loss": 1.7315, - "step": 3960 - }, - { - "epoch": 0.40271860417934674, - "grad_norm": 0.7532480955123901, - "learning_rate": 1.3557389003542815e-05, - "loss": 1.7296, - "step": 3970 - }, - { - "epoch": 0.40373300872387907, - "grad_norm": 2.651571273803711, - "learning_rate": 1.3526664079907538e-05, - "loss": 1.7209, - "step": 3980 - }, - { - "epoch": 0.40474741326841146, - "grad_norm": 1.7532095909118652, - "learning_rate": 1.3495901087754231e-05, - "loss": 1.7256, - "step": 3990 - }, - { - "epoch": 0.4057618178129438, - "grad_norm": 2.5791752338409424, - "learning_rate": 1.346510035915352e-05, - "loss": 1.7224, - "step": 4000 - }, - { - "epoch": 0.4067762223574762, - "grad_norm": 1.6915431022644043, - "learning_rate": 1.3434262226583365e-05, - "loss": 1.7253, - "step": 4010 - }, - { - "epoch": 0.4077906269020085, - "grad_norm": 0.8234580159187317, - "learning_rate": 1.3403387022925488e-05, - "loss": 1.7276, - "step": 4020 - }, - { - "epoch": 0.4088050314465409, - "grad_norm": 0.9379932284355164, - "learning_rate": 1.3372475081461768e-05, - "loss": 1.7228, - "step": 4030 - }, - { - "epoch": 0.40981943599107323, - "grad_norm": 0.7605624794960022, - "learning_rate": 1.334152673587066e-05, - "loss": 1.7288, - "step": 4040 - }, - { - "epoch": 0.4108338405356056, - "grad_norm": 1.1446384191513062, - "learning_rate": 1.3310542320223574e-05, - "loss": 1.7269, - "step": 4050 - }, - { - "epoch": 0.41184824508013795, - "grad_norm": 1.1726760864257812, - "learning_rate": 1.327952216898128e-05, - "loss": 1.7276, - "step": 4060 - }, - { - "epoch": 0.41286264962467034, - "grad_norm": 1.8441879749298096, - "learning_rate": 1.3248466616990297e-05, - "loss": 1.7215, - "step": 4070 - }, - { - "epoch": 0.4138770541692027, - "grad_norm": 1.5489195585250854, - "learning_rate": 1.3217375999479274e-05, - "loss": 1.7222, - "step": 4080 - }, - { - "epoch": 0.41489145871373506, - "grad_norm": 1.1660573482513428, - "learning_rate": 1.318625065205538e-05, - "loss": 1.7195, - "step": 4090 - }, - { - "epoch": 0.4159058632582674, - "grad_norm": 0.7152527570724487, - "learning_rate": 1.3155090910700663e-05, - "loss": 1.7384, - "step": 4100 - }, - { - "epoch": 0.4169202678027998, - "grad_norm": 1.688676118850708, - "learning_rate": 1.3123897111768454e-05, - "loss": 1.7289, - "step": 4110 - }, - { - "epoch": 0.4179346723473321, - "grad_norm": 1.1847255229949951, - "learning_rate": 1.3092669591979703e-05, - "loss": 1.7246, - "step": 4120 - }, - { - "epoch": 0.4189490768918645, - "grad_norm": 1.0151628255844116, - "learning_rate": 1.3061408688419364e-05, - "loss": 1.7186, - "step": 4130 - }, - { - "epoch": 0.41996348143639683, - "grad_norm": 0.8787612318992615, - "learning_rate": 1.303011473853275e-05, - "loss": 1.7167, - "step": 4140 - }, - { - "epoch": 0.42097788598092917, - "grad_norm": 1.14687180519104, - "learning_rate": 1.29987880801219e-05, - "loss": 1.7261, - "step": 4150 - }, - { - "epoch": 0.42199229052546156, - "grad_norm": 1.5079935789108276, - "learning_rate": 1.2967429051341913e-05, - "loss": 1.718, - "step": 4160 - }, - { - "epoch": 0.4230066950699939, - "grad_norm": 1.1245684623718262, - "learning_rate": 1.2936037990697318e-05, - "loss": 1.7239, - "step": 4170 - }, - { - "epoch": 0.4240210996145263, - "grad_norm": 0.7608409523963928, - "learning_rate": 1.2904615237038407e-05, - "loss": 1.7129, - "step": 4180 - }, - { - "epoch": 0.4250355041590586, - "grad_norm": 1.1182703971862793, - "learning_rate": 1.2873161129557581e-05, - "loss": 1.7219, - "step": 4190 - }, - { - "epoch": 0.426049908703591, - "grad_norm": 0.9542585611343384, - "learning_rate": 1.2841676007785697e-05, - "loss": 1.7198, - "step": 4200 - }, - { - "epoch": 0.42706431324812333, - "grad_norm": 1.047252893447876, - "learning_rate": 1.2810160211588383e-05, - "loss": 1.7179, - "step": 4210 - }, - { - "epoch": 0.4280787177926557, - "grad_norm": 2.627610206604004, - "learning_rate": 1.277861408116239e-05, - "loss": 1.7264, - "step": 4220 - }, - { - "epoch": 0.42909312233718805, - "grad_norm": 0.8351724147796631, - "learning_rate": 1.2747037957031917e-05, - "loss": 1.7146, - "step": 4230 - }, - { - "epoch": 0.43010752688172044, - "grad_norm": 0.717055082321167, - "learning_rate": 1.271543218004491e-05, - "loss": 1.7241, - "step": 4240 - }, - { - "epoch": 0.43112193142625277, - "grad_norm": 1.0989712476730347, - "learning_rate": 1.2683797091369421e-05, - "loss": 1.7222, - "step": 4250 - }, - { - "epoch": 0.43213633597078516, - "grad_norm": 0.7421863079071045, - "learning_rate": 1.2652133032489904e-05, - "loss": 1.7153, - "step": 4260 - }, - { - "epoch": 0.4331507405153175, - "grad_norm": 0.776130735874176, - "learning_rate": 1.2620440345203519e-05, - "loss": 1.7249, - "step": 4270 - }, - { - "epoch": 0.4341651450598499, - "grad_norm": 1.5118204355239868, - "learning_rate": 1.2588719371616468e-05, - "loss": 1.7144, - "step": 4280 - }, - { - "epoch": 0.4351795496043822, - "grad_norm": 1.1911628246307373, - "learning_rate": 1.2556970454140282e-05, - "loss": 1.723, - "step": 4290 - }, - { - "epoch": 0.4361939541489146, - "grad_norm": 0.9643965363502502, - "learning_rate": 1.2525193935488138e-05, - "loss": 1.7264, - "step": 4300 - }, - { - "epoch": 0.43720835869344693, - "grad_norm": 0.7350860834121704, - "learning_rate": 1.2493390158671145e-05, - "loss": 1.7369, - "step": 4310 - }, - { - "epoch": 0.4382227632379793, - "grad_norm": 0.8367869257926941, - "learning_rate": 1.2461559466994657e-05, - "loss": 1.7164, - "step": 4320 - }, - { - "epoch": 0.43923716778251165, - "grad_norm": 0.7366974949836731, - "learning_rate": 1.2429702204054555e-05, - "loss": 1.7229, - "step": 4330 - }, - { - "epoch": 0.44025157232704404, - "grad_norm": 0.8591330051422119, - "learning_rate": 1.2397818713733549e-05, - "loss": 1.7235, - "step": 4340 - }, - { - "epoch": 0.44126597687157637, - "grad_norm": 1.3979252576828003, - "learning_rate": 1.2365909340197454e-05, - "loss": 1.7127, - "step": 4350 - }, - { - "epoch": 0.44228038141610876, - "grad_norm": 0.8519896864891052, - "learning_rate": 1.2333974427891482e-05, - "loss": 1.7221, - "step": 4360 - }, - { - "epoch": 0.4432947859606411, - "grad_norm": 0.7373445630073547, - "learning_rate": 1.230201432153653e-05, - "loss": 1.7123, - "step": 4370 - }, - { - "epoch": 0.4443091905051735, - "grad_norm": 0.7757080793380737, - "learning_rate": 1.2270029366125435e-05, - "loss": 1.719, - "step": 4380 - }, - { - "epoch": 0.4453235950497058, - "grad_norm": 1.0483899116516113, - "learning_rate": 1.223801990691929e-05, - "loss": 1.7185, - "step": 4390 - }, - { - "epoch": 0.4463379995942382, - "grad_norm": 1.0248713493347168, - "learning_rate": 1.2205986289443677e-05, - "loss": 1.7142, - "step": 4400 - }, - { - "epoch": 0.44735240413877053, - "grad_norm": 2.5186378955841064, - "learning_rate": 1.2173928859484955e-05, - "loss": 1.7214, - "step": 4410 - }, - { - "epoch": 0.4483668086833029, - "grad_norm": 0.7689295411109924, - "learning_rate": 1.2141847963086535e-05, - "loss": 1.7247, - "step": 4420 - }, - { - "epoch": 0.44938121322783525, - "grad_norm": 0.8134256601333618, - "learning_rate": 1.2109743946545129e-05, - "loss": 1.7233, - "step": 4430 - }, - { - "epoch": 0.45039561777236764, - "grad_norm": 0.7907826900482178, - "learning_rate": 1.207761715640702e-05, - "loss": 1.7213, - "step": 4440 - }, - { - "epoch": 0.4514100223169, - "grad_norm": 1.0850943326950073, - "learning_rate": 1.2045467939464323e-05, - "loss": 1.718, - "step": 4450 - }, - { - "epoch": 0.45242442686143236, - "grad_norm": 1.1381374597549438, - "learning_rate": 1.2013296642751238e-05, - "loss": 1.7171, - "step": 4460 - }, - { - "epoch": 0.4534388314059647, - "grad_norm": 0.9255115985870361, - "learning_rate": 1.1981103613540302e-05, - "loss": 1.7254, - "step": 4470 - }, - { - "epoch": 0.4544532359504971, - "grad_norm": 0.6684579253196716, - "learning_rate": 1.1948889199338647e-05, - "loss": 1.7188, - "step": 4480 - }, - { - "epoch": 0.4554676404950294, - "grad_norm": 0.845117449760437, - "learning_rate": 1.191665374788424e-05, - "loss": 1.7086, - "step": 4490 - }, - { - "epoch": 0.4564820450395618, - "grad_norm": 0.8689450621604919, - "learning_rate": 1.1884397607142137e-05, - "loss": 1.7108, - "step": 4500 - }, - { - "epoch": 0.45749644958409413, - "grad_norm": 1.2796167135238647, - "learning_rate": 1.185212112530073e-05, - "loss": 1.7132, - "step": 4510 - }, - { - "epoch": 0.4585108541286265, - "grad_norm": 0.8743492364883423, - "learning_rate": 1.1819824650767972e-05, - "loss": 1.7134, - "step": 4520 - }, - { - "epoch": 0.45952525867315885, - "grad_norm": 1.0619572401046753, - "learning_rate": 1.1787508532167633e-05, - "loss": 1.717, - "step": 4530 - }, - { - "epoch": 0.46053966321769124, - "grad_norm": 0.997868001461029, - "learning_rate": 1.175517311833553e-05, - "loss": 1.7167, - "step": 4540 - }, - { - "epoch": 0.4615540677622236, - "grad_norm": 0.7756907343864441, - "learning_rate": 1.1722818758315761e-05, - "loss": 1.7116, - "step": 4550 - }, - { - "epoch": 0.4625684723067559, - "grad_norm": 0.7107768654823303, - "learning_rate": 1.169044580135694e-05, - "loss": 1.7179, - "step": 4560 - }, - { - "epoch": 0.4635828768512883, - "grad_norm": 0.8022791743278503, - "learning_rate": 1.165805459690842e-05, - "loss": 1.709, - "step": 4570 - }, - { - "epoch": 0.4645972813958206, - "grad_norm": 0.7716123461723328, - "learning_rate": 1.1625645494616535e-05, - "loss": 1.7069, - "step": 4580 - }, - { - "epoch": 0.465611685940353, - "grad_norm": 0.7599775195121765, - "learning_rate": 1.159321884432081e-05, - "loss": 1.7165, - "step": 4590 - }, - { - "epoch": 0.46662609048488535, - "grad_norm": 0.7957566976547241, - "learning_rate": 1.1560774996050195e-05, - "loss": 1.7193, - "step": 4600 - }, - { - "epoch": 0.46764049502941774, - "grad_norm": 0.707252025604248, - "learning_rate": 1.1528314300019281e-05, - "loss": 1.7127, - "step": 4610 - }, - { - "epoch": 0.46865489957395007, - "grad_norm": 1.0349026918411255, - "learning_rate": 1.1495837106624525e-05, - "loss": 1.7215, - "step": 4620 - }, - { - "epoch": 0.46966930411848246, - "grad_norm": 0.7257461547851562, - "learning_rate": 1.146334376644046e-05, - "loss": 1.715, - "step": 4630 - }, - { - "epoch": 0.4706837086630148, - "grad_norm": 0.7048343420028687, - "learning_rate": 1.1430834630215918e-05, - "loss": 1.708, - "step": 4640 - }, - { - "epoch": 0.4716981132075472, - "grad_norm": 0.9412261843681335, - "learning_rate": 1.1398310048870247e-05, - "loss": 1.7151, - "step": 4650 - }, - { - "epoch": 0.4727125177520795, - "grad_norm": 1.8378090858459473, - "learning_rate": 1.1365770373489502e-05, - "loss": 1.7115, - "step": 4660 - }, - { - "epoch": 0.4737269222966119, - "grad_norm": 1.286843180656433, - "learning_rate": 1.1333215955322685e-05, - "loss": 1.7125, - "step": 4670 - }, - { - "epoch": 0.47474132684114423, - "grad_norm": 0.6957350373268127, - "learning_rate": 1.1300647145777935e-05, - "loss": 1.718, - "step": 4680 - }, - { - "epoch": 0.4757557313856766, - "grad_norm": 1.3016197681427002, - "learning_rate": 1.126806429641873e-05, - "loss": 1.7094, - "step": 4690 - }, - { - "epoch": 0.47677013593020895, - "grad_norm": 1.0874053239822388, - "learning_rate": 1.1235467758960114e-05, - "loss": 1.7144, - "step": 4700 - }, - { - "epoch": 0.47778454047474134, - "grad_norm": 0.7505393028259277, - "learning_rate": 1.1202857885264875e-05, - "loss": 1.7119, - "step": 4710 - }, - { - "epoch": 0.47879894501927367, - "grad_norm": 0.7249730825424194, - "learning_rate": 1.1170235027339766e-05, - "loss": 1.7119, - "step": 4720 - }, - { - "epoch": 0.47981334956380606, - "grad_norm": 1.193466067314148, - "learning_rate": 1.1137599537331698e-05, - "loss": 1.7108, - "step": 4730 - }, - { - "epoch": 0.4808277541083384, - "grad_norm": 1.503462553024292, - "learning_rate": 1.1104951767523932e-05, - "loss": 1.72, - "step": 4740 - }, - { - "epoch": 0.4818421586528708, - "grad_norm": 0.6898245215415955, - "learning_rate": 1.1072292070332294e-05, - "loss": 1.7137, - "step": 4750 - }, - { - "epoch": 0.4828565631974031, - "grad_norm": 0.8285757303237915, - "learning_rate": 1.1039620798301351e-05, - "loss": 1.7061, - "step": 4760 - }, - { - "epoch": 0.4838709677419355, - "grad_norm": 0.7768415212631226, - "learning_rate": 1.1006938304100617e-05, - "loss": 1.7092, - "step": 4770 - }, - { - "epoch": 0.48488537228646783, - "grad_norm": 0.7080183625221252, - "learning_rate": 1.0974244940520742e-05, - "loss": 1.713, - "step": 4780 - }, - { - "epoch": 0.4858997768310002, - "grad_norm": 0.8379605412483215, - "learning_rate": 1.0941541060469712e-05, - "loss": 1.7209, - "step": 4790 - }, - { - "epoch": 0.48691418137553255, - "grad_norm": 2.702296495437622, - "learning_rate": 1.0908827016969021e-05, - "loss": 1.7168, - "step": 4800 - }, - { - "epoch": 0.48792858592006494, - "grad_norm": 1.2359603643417358, - "learning_rate": 1.0876103163149886e-05, - "loss": 1.7161, - "step": 4810 - }, - { - "epoch": 0.48894299046459727, - "grad_norm": 0.8637421131134033, - "learning_rate": 1.0843369852249415e-05, - "loss": 1.6968, - "step": 4820 - }, - { - "epoch": 0.48995739500912966, - "grad_norm": 0.67940753698349, - "learning_rate": 1.0810627437606794e-05, - "loss": 1.7073, - "step": 4830 - }, - { - "epoch": 0.490971799553662, - "grad_norm": 0.7498750686645508, - "learning_rate": 1.0777876272659489e-05, - "loss": 1.6948, - "step": 4840 - }, - { - "epoch": 0.4919862040981944, - "grad_norm": 1.1352565288543701, - "learning_rate": 1.0745116710939417e-05, - "loss": 1.7075, - "step": 4850 - }, - { - "epoch": 0.4930006086427267, - "grad_norm": 0.764735996723175, - "learning_rate": 1.0712349106069131e-05, - "loss": 1.7074, - "step": 4860 - }, - { - "epoch": 0.4940150131872591, - "grad_norm": 0.8299955129623413, - "learning_rate": 1.0679573811758013e-05, - "loss": 1.7132, - "step": 4870 - }, - { - "epoch": 0.49502941773179143, - "grad_norm": 0.7384482622146606, - "learning_rate": 1.064679118179844e-05, - "loss": 1.7102, - "step": 4880 - }, - { - "epoch": 0.4960438222763238, - "grad_norm": 1.0672184228897095, - "learning_rate": 1.061400157006198e-05, - "loss": 1.703, - "step": 4890 - }, - { - "epoch": 0.49705822682085615, - "grad_norm": 1.1753833293914795, - "learning_rate": 1.0581205330495566e-05, - "loss": 1.7125, - "step": 4900 - }, - { - "epoch": 0.49807263136538854, - "grad_norm": 1.672736644744873, - "learning_rate": 1.0548402817117664e-05, - "loss": 1.7103, - "step": 4910 - }, - { - "epoch": 0.4990870359099209, - "grad_norm": 0.8310092687606812, - "learning_rate": 1.0515594384014479e-05, - "loss": 1.7079, - "step": 4920 - }, - { - "epoch": 0.5001014404544533, - "grad_norm": 1.6034438610076904, - "learning_rate": 1.0482780385336107e-05, - "loss": 1.7009, - "step": 4930 - }, - { - "epoch": 0.5011158449989856, - "grad_norm": 1.049529790878296, - "learning_rate": 1.0449961175292718e-05, - "loss": 1.7123, - "step": 4940 - }, - { - "epoch": 0.5021302495435179, - "grad_norm": 0.9474283456802368, - "learning_rate": 1.0417137108150744e-05, - "loss": 1.7028, - "step": 4950 - }, - { - "epoch": 0.5031446540880503, - "grad_norm": 0.680298388004303, - "learning_rate": 1.0384308538229046e-05, - "loss": 1.7168, - "step": 4960 - }, - { - "epoch": 0.5041590586325827, - "grad_norm": 0.7304694056510925, - "learning_rate": 1.0351475819895083e-05, - "loss": 1.7099, - "step": 4970 - }, - { - "epoch": 0.505173463177115, - "grad_norm": 0.8177661895751953, - "learning_rate": 1.0318639307561105e-05, - "loss": 1.7064, - "step": 4980 - }, - { - "epoch": 0.5061878677216474, - "grad_norm": 0.8717294931411743, - "learning_rate": 1.0285799355680301e-05, - "loss": 1.7177, - "step": 4990 - }, - { - "epoch": 0.5072022722661798, - "grad_norm": 0.7116250395774841, - "learning_rate": 1.0252956318743006e-05, - "loss": 1.7009, - "step": 5000 - }, - { - "epoch": 0.5082166768107121, - "grad_norm": 1.1391139030456543, - "learning_rate": 1.0220110551272847e-05, - "loss": 1.7134, - "step": 5010 - }, - { - "epoch": 0.5092310813552444, - "grad_norm": 1.6234387159347534, - "learning_rate": 1.0187262407822917e-05, - "loss": 1.7111, - "step": 5020 - }, - { - "epoch": 0.5102454858997768, - "grad_norm": 0.7143400311470032, - "learning_rate": 1.0154412242971975e-05, - "loss": 1.7108, - "step": 5030 - }, - { - "epoch": 0.5112598904443092, - "grad_norm": 0.7285455465316772, - "learning_rate": 1.0121560411320592e-05, - "loss": 1.7027, - "step": 5040 - }, - { - "epoch": 0.5122742949888416, - "grad_norm": 0.8997184634208679, - "learning_rate": 1.0088707267487326e-05, - "loss": 1.704, - "step": 5050 - }, - { - "epoch": 0.5132886995333739, - "grad_norm": 0.8084858059883118, - "learning_rate": 1.0055853166104904e-05, - "loss": 1.7036, - "step": 5060 - }, - { - "epoch": 0.5143031040779062, - "grad_norm": 1.1963047981262207, - "learning_rate": 1.002299846181639e-05, - "loss": 1.7053, - "step": 5070 - }, - { - "epoch": 0.5153175086224386, - "grad_norm": 1.5399653911590576, - "learning_rate": 9.990143509271353e-06, - "loss": 1.715, - "step": 5080 - }, - { - "epoch": 0.516331913166971, - "grad_norm": 0.9177563786506653, - "learning_rate": 9.957288663122046e-06, - "loss": 1.709, - "step": 5090 - }, - { - "epoch": 0.5173463177115033, - "grad_norm": 0.8075568675994873, - "learning_rate": 9.924434278019568e-06, - "loss": 1.7024, - "step": 5100 - }, - { - "epoch": 0.5183607222560357, - "grad_norm": 0.9145830273628235, - "learning_rate": 9.891580708610046e-06, - "loss": 1.7043, - "step": 5110 - }, - { - "epoch": 0.5193751268005681, - "grad_norm": 1.042720913887024, - "learning_rate": 9.858728309530802e-06, - "loss": 1.7032, - "step": 5120 - }, - { - "epoch": 0.5203895313451005, - "grad_norm": 0.6901386976242065, - "learning_rate": 9.825877435406523e-06, - "loss": 1.6999, - "step": 5130 - }, - { - "epoch": 0.5214039358896327, - "grad_norm": 0.7476629614830017, - "learning_rate": 9.793028440845435e-06, - "loss": 1.7094, - "step": 5140 - }, - { - "epoch": 0.5224183404341651, - "grad_norm": 0.9763286709785461, - "learning_rate": 9.760181680435471e-06, - "loss": 1.7066, - "step": 5150 - }, - { - "epoch": 0.5234327449786975, - "grad_norm": 0.6750779151916504, - "learning_rate": 9.727337508740457e-06, - "loss": 1.7112, - "step": 5160 - }, - { - "epoch": 0.5244471495232299, - "grad_norm": 1.1725136041641235, - "learning_rate": 9.694496280296271e-06, - "loss": 1.6977, - "step": 5170 - }, - { - "epoch": 0.5254615540677622, - "grad_norm": 1.5115586519241333, - "learning_rate": 9.661658349607012e-06, - "loss": 1.7079, - "step": 5180 - }, - { - "epoch": 0.5264759586122946, - "grad_norm": 0.9237322211265564, - "learning_rate": 9.628824071141195e-06, - "loss": 1.7004, - "step": 5190 - }, - { - "epoch": 0.527490363156827, - "grad_norm": 0.9567244648933411, - "learning_rate": 9.595993799327901e-06, - "loss": 1.716, - "step": 5200 - }, - { - "epoch": 0.5285047677013593, - "grad_norm": 0.9096534252166748, - "learning_rate": 9.563167888552969e-06, - "loss": 1.71, - "step": 5210 - }, - { - "epoch": 0.5295191722458916, - "grad_norm": 0.7258625626564026, - "learning_rate": 9.530346693155152e-06, - "loss": 1.7057, - "step": 5220 - }, - { - "epoch": 0.530533576790424, - "grad_norm": 1.22307288646698, - "learning_rate": 9.497530567422315e-06, - "loss": 1.7071, - "step": 5230 - }, - { - "epoch": 0.5315479813349564, - "grad_norm": 0.7654728889465332, - "learning_rate": 9.464719865587592e-06, - "loss": 1.7019, - "step": 5240 - }, - { - "epoch": 0.5325623858794888, - "grad_norm": 0.8716443181037903, - "learning_rate": 9.431914941825574e-06, - "loss": 1.6998, - "step": 5250 - }, - { - "epoch": 0.5335767904240211, - "grad_norm": 1.3258901834487915, - "learning_rate": 9.399116150248468e-06, - "loss": 1.7059, - "step": 5260 - }, - { - "epoch": 0.5345911949685535, - "grad_norm": 0.8529701828956604, - "learning_rate": 9.3663238449023e-06, - "loss": 1.7065, - "step": 5270 - }, - { - "epoch": 0.5356055995130858, - "grad_norm": 0.9370996952056885, - "learning_rate": 9.33353837976308e-06, - "loss": 1.7036, - "step": 5280 - }, - { - "epoch": 0.5366200040576182, - "grad_norm": 1.077623724937439, - "learning_rate": 9.300760108732972e-06, - "loss": 1.7119, - "step": 5290 - }, - { - "epoch": 0.5376344086021505, - "grad_norm": 1.7699486017227173, - "learning_rate": 9.267989385636494e-06, - "loss": 1.7035, - "step": 5300 - }, - { - "epoch": 0.5386488131466829, - "grad_norm": 0.7716344594955444, - "learning_rate": 9.235226564216687e-06, - "loss": 1.709, - "step": 5310 - }, - { - "epoch": 0.5396632176912153, - "grad_norm": 0.739220142364502, - "learning_rate": 9.202471998131281e-06, - "loss": 1.7006, - "step": 5320 - }, - { - "epoch": 0.5406776222357477, - "grad_norm": 0.9880684018135071, - "learning_rate": 9.169726040948916e-06, - "loss": 1.7104, - "step": 5330 - }, - { - "epoch": 0.54169202678028, - "grad_norm": 0.9975450038909912, - "learning_rate": 9.136989046145292e-06, - "loss": 1.7045, - "step": 5340 - }, - { - "epoch": 0.5427064313248123, - "grad_norm": 0.7139139771461487, - "learning_rate": 9.104261367099365e-06, - "loss": 1.7, - "step": 5350 - }, - { - "epoch": 0.5437208358693447, - "grad_norm": 1.7507296800613403, - "learning_rate": 9.071543357089541e-06, - "loss": 1.6995, - "step": 5360 - }, - { - "epoch": 0.5447352404138771, - "grad_norm": 1.2117117643356323, - "learning_rate": 9.038835369289837e-06, - "loss": 1.7028, - "step": 5370 - }, - { - "epoch": 0.5457496449584094, - "grad_norm": 0.9740832448005676, - "learning_rate": 9.0061377567661e-06, - "loss": 1.7019, - "step": 5380 - }, - { - "epoch": 0.5467640495029418, - "grad_norm": 0.6742587089538574, - "learning_rate": 8.973450872472175e-06, - "loss": 1.7057, - "step": 5390 - }, - { - "epoch": 0.5477784540474742, - "grad_norm": 0.8865513205528259, - "learning_rate": 8.940775069246103e-06, - "loss": 1.707, - "step": 5400 - }, - { - "epoch": 0.5487928585920064, - "grad_norm": 0.7690379023551941, - "learning_rate": 8.908110699806313e-06, - "loss": 1.7056, - "step": 5410 - }, - { - "epoch": 0.5498072631365388, - "grad_norm": 1.442641258239746, - "learning_rate": 8.875458116747807e-06, - "loss": 1.7042, - "step": 5420 - }, - { - "epoch": 0.5508216676810712, - "grad_norm": 0.7179170250892639, - "learning_rate": 8.842817672538359e-06, - "loss": 1.7052, - "step": 5430 - }, - { - "epoch": 0.5518360722256036, - "grad_norm": 0.9060067534446716, - "learning_rate": 8.810189719514714e-06, - "loss": 1.7115, - "step": 5440 - }, - { - "epoch": 0.5528504767701359, - "grad_norm": 1.2409850358963013, - "learning_rate": 8.777574609878787e-06, - "loss": 1.7067, - "step": 5450 - }, - { - "epoch": 0.5538648813146683, - "grad_norm": 1.020844578742981, - "learning_rate": 8.744972695693837e-06, - "loss": 1.6957, - "step": 5460 - }, - { - "epoch": 0.5548792858592007, - "grad_norm": 1.3590489625930786, - "learning_rate": 8.712384328880706e-06, - "loss": 1.7038, - "step": 5470 - }, - { - "epoch": 0.555893690403733, - "grad_norm": 0.6982019543647766, - "learning_rate": 8.679809861213982e-06, - "loss": 1.6978, - "step": 5480 - }, - { - "epoch": 0.5569080949482653, - "grad_norm": 1.1582210063934326, - "learning_rate": 8.647249644318232e-06, - "loss": 1.7097, - "step": 5490 - }, - { - "epoch": 0.5579224994927977, - "grad_norm": 0.7709252238273621, - "learning_rate": 8.614704029664181e-06, - "loss": 1.6955, - "step": 5500 - }, - { - "epoch": 0.5589369040373301, - "grad_norm": 0.7880954742431641, - "learning_rate": 8.582173368564944e-06, - "loss": 1.7077, - "step": 5510 - }, - { - "epoch": 0.5599513085818625, - "grad_norm": 0.8452306389808655, - "learning_rate": 8.549658012172206e-06, - "loss": 1.7029, - "step": 5520 - }, - { - "epoch": 0.5609657131263948, - "grad_norm": 1.1234850883483887, - "learning_rate": 8.517158311472462e-06, - "loss": 1.7052, - "step": 5530 - }, - { - "epoch": 0.5619801176709271, - "grad_norm": 2.251223564147949, - "learning_rate": 8.484674617283191e-06, - "loss": 1.6931, - "step": 5540 - }, - { - "epoch": 0.5629945222154595, - "grad_norm": 1.9055334329605103, - "learning_rate": 8.452207280249107e-06, - "loss": 1.6946, - "step": 5550 - }, - { - "epoch": 0.5640089267599919, - "grad_norm": 0.795020341873169, - "learning_rate": 8.419756650838351e-06, - "loss": 1.7005, - "step": 5560 - }, - { - "epoch": 0.5650233313045242, - "grad_norm": 0.9660155773162842, - "learning_rate": 8.387323079338715e-06, - "loss": 1.7011, - "step": 5570 - }, - { - "epoch": 0.5660377358490566, - "grad_norm": 0.6827763915061951, - "learning_rate": 8.354906915853858e-06, - "loss": 1.7024, - "step": 5580 - }, - { - "epoch": 0.567052140393589, - "grad_norm": 0.839949905872345, - "learning_rate": 8.322508510299531e-06, - "loss": 1.7009, - "step": 5590 - }, - { - "epoch": 0.5680665449381214, - "grad_norm": 0.6947374939918518, - "learning_rate": 8.290128212399792e-06, - "loss": 1.6921, - "step": 5600 - }, - { - "epoch": 0.5690809494826536, - "grad_norm": 0.7299503087997437, - "learning_rate": 8.25776637168324e-06, - "loss": 1.7024, - "step": 5610 - }, - { - "epoch": 0.570095354027186, - "grad_norm": 0.9291466474533081, - "learning_rate": 8.225423337479241e-06, - "loss": 1.7028, - "step": 5620 - }, - { - "epoch": 0.5711097585717184, - "grad_norm": 0.7141242027282715, - "learning_rate": 8.193099458914148e-06, - "loss": 1.6965, - "step": 5630 - }, - { - "epoch": 0.5721241631162508, - "grad_norm": 1.1994588375091553, - "learning_rate": 8.160795084907551e-06, - "loss": 1.7031, - "step": 5640 - }, - { - "epoch": 0.5731385676607831, - "grad_norm": 0.8997781276702881, - "learning_rate": 8.128510564168477e-06, - "loss": 1.7013, - "step": 5650 - }, - { - "epoch": 0.5741529722053155, - "grad_norm": 0.7939571738243103, - "learning_rate": 8.096246245191667e-06, - "loss": 1.6999, - "step": 5660 - }, - { - "epoch": 0.5751673767498479, - "grad_norm": 0.6805724501609802, - "learning_rate": 8.064002476253785e-06, - "loss": 1.7087, - "step": 5670 - }, - { - "epoch": 0.5761817812943802, - "grad_norm": 1.2482056617736816, - "learning_rate": 8.031779605409671e-06, - "loss": 1.7105, - "step": 5680 - }, - { - "epoch": 0.5771961858389125, - "grad_norm": 0.7727828621864319, - "learning_rate": 7.999577980488581e-06, - "loss": 1.6924, - "step": 5690 - }, - { - "epoch": 0.5782105903834449, - "grad_norm": 0.7554962635040283, - "learning_rate": 7.967397949090432e-06, - "loss": 1.6987, - "step": 5700 - }, - { - "epoch": 0.5792249949279773, - "grad_norm": 1.8765445947647095, - "learning_rate": 7.935239858582043e-06, - "loss": 1.695, - "step": 5710 - }, - { - "epoch": 0.5802393994725097, - "grad_norm": 1.4084327220916748, - "learning_rate": 7.903104056093405e-06, - "loss": 1.6988, - "step": 5720 - }, - { - "epoch": 0.581253804017042, - "grad_norm": 0.7170883417129517, - "learning_rate": 7.870990888513915e-06, - "loss": 1.6982, - "step": 5730 - }, - { - "epoch": 0.5822682085615744, - "grad_norm": 0.783585250377655, - "learning_rate": 7.838900702488634e-06, - "loss": 1.6971, - "step": 5740 - }, - { - "epoch": 0.5832826131061067, - "grad_norm": 0.8968095183372498, - "learning_rate": 7.806833844414556e-06, - "loss": 1.691, - "step": 5750 - }, - { - "epoch": 0.5842970176506391, - "grad_norm": 0.8402195572853088, - "learning_rate": 7.774790660436857e-06, - "loss": 1.697, - "step": 5760 - }, - { - "epoch": 0.5853114221951714, - "grad_norm": 1.1973276138305664, - "learning_rate": 7.742771496445167e-06, - "loss": 1.6966, - "step": 5770 - }, - { - "epoch": 0.5863258267397038, - "grad_norm": 0.8646887540817261, - "learning_rate": 7.710776698069828e-06, - "loss": 1.6933, - "step": 5780 - }, - { - "epoch": 0.5873402312842362, - "grad_norm": 0.6454582810401917, - "learning_rate": 7.678806610678173e-06, - "loss": 1.6966, - "step": 5790 - }, - { - "epoch": 0.5883546358287685, - "grad_norm": 0.7188283801078796, - "learning_rate": 7.646861579370786e-06, - "loss": 1.6938, - "step": 5800 - }, - { - "epoch": 0.5893690403733008, - "grad_norm": 0.6656209230422974, - "learning_rate": 7.614941948977798e-06, - "loss": 1.7035, - "step": 5810 - }, - { - "epoch": 0.5903834449178332, - "grad_norm": 1.2346372604370117, - "learning_rate": 7.583048064055127e-06, - "loss": 1.697, - "step": 5820 - }, - { - "epoch": 0.5913978494623656, - "grad_norm": 0.7347487807273865, - "learning_rate": 7.5511802688808e-06, - "loss": 1.6991, - "step": 5830 - }, - { - "epoch": 0.5924122540068979, - "grad_norm": 0.6772575378417969, - "learning_rate": 7.519338907451214e-06, - "loss": 1.7052, - "step": 5840 - }, - { - "epoch": 0.5934266585514303, - "grad_norm": 0.8593490123748779, - "learning_rate": 7.487524323477428e-06, - "loss": 1.6957, - "step": 5850 - }, - { - "epoch": 0.5944410630959627, - "grad_norm": 0.8644009232521057, - "learning_rate": 7.455736860381453e-06, - "loss": 1.6926, - "step": 5860 - }, - { - "epoch": 0.5954554676404951, - "grad_norm": 0.71120685338974, - "learning_rate": 7.423976861292534e-06, - "loss": 1.6929, - "step": 5870 - }, - { - "epoch": 0.5964698721850273, - "grad_norm": 1.2444875240325928, - "learning_rate": 7.39224466904347e-06, - "loss": 1.6985, - "step": 5880 - }, - { - "epoch": 0.5974842767295597, - "grad_norm": 0.7809310555458069, - "learning_rate": 7.360540626166893e-06, - "loss": 1.7052, - "step": 5890 - }, - { - "epoch": 0.5984986812740921, - "grad_norm": 1.257996916770935, - "learning_rate": 7.328865074891578e-06, - "loss": 1.6905, - "step": 5900 - }, - { - "epoch": 0.5995130858186245, - "grad_norm": 0.9928603768348694, - "learning_rate": 7.297218357138749e-06, - "loss": 1.7043, - "step": 5910 - }, - { - "epoch": 0.6005274903631568, - "grad_norm": 0.703633189201355, - "learning_rate": 7.265600814518388e-06, - "loss": 1.6972, - "step": 5920 - }, - { - "epoch": 0.6015418949076892, - "grad_norm": 0.6931165456771851, - "learning_rate": 7.2340127883255375e-06, - "loss": 1.7, - "step": 5930 - }, - { - "epoch": 0.6025562994522216, - "grad_norm": 1.3630616664886475, - "learning_rate": 7.202454619536634e-06, - "loss": 1.6989, - "step": 5940 - }, - { - "epoch": 0.6035707039967539, - "grad_norm": 1.4959807395935059, - "learning_rate": 7.17092664880582e-06, - "loss": 1.6935, - "step": 5950 - }, - { - "epoch": 0.6045851085412862, - "grad_norm": 0.7883189916610718, - "learning_rate": 7.139429216461262e-06, - "loss": 1.7022, - "step": 5960 - }, - { - "epoch": 0.6055995130858186, - "grad_norm": 0.8443313241004944, - "learning_rate": 7.107962662501482e-06, - "loss": 1.7021, - "step": 5970 - }, - { - "epoch": 0.606613917630351, - "grad_norm": 1.9770259857177734, - "learning_rate": 7.076527326591682e-06, - "loss": 1.698, - "step": 5980 - }, - { - "epoch": 0.6076283221748834, - "grad_norm": 0.6623902320861816, - "learning_rate": 7.045123548060084e-06, - "loss": 1.7003, - "step": 5990 - }, - { - "epoch": 0.6086427267194157, - "grad_norm": 1.0857994556427002, - "learning_rate": 7.013751665894266e-06, - "loss": 1.6921, - "step": 6000 - }, - { - "epoch": 0.609657131263948, - "grad_norm": 1.0366302728652954, - "learning_rate": 6.9824120187374985e-06, - "loss": 1.6965, - "step": 6010 - }, - { - "epoch": 0.6106715358084804, - "grad_norm": 0.6846364736557007, - "learning_rate": 6.951104944885094e-06, - "loss": 1.6921, - "step": 6020 - }, - { - "epoch": 0.6116859403530128, - "grad_norm": 0.7947251796722412, - "learning_rate": 6.919830782280752e-06, - "loss": 1.6938, - "step": 6030 - }, - { - "epoch": 0.6127003448975451, - "grad_norm": 0.7470448017120361, - "learning_rate": 6.888589868512907e-06, - "loss": 1.6935, - "step": 6040 - }, - { - "epoch": 0.6137147494420775, - "grad_norm": 0.9000504612922668, - "learning_rate": 6.857382540811101e-06, - "loss": 1.6978, - "step": 6050 - }, - { - "epoch": 0.6147291539866099, - "grad_norm": 0.9821270108222961, - "learning_rate": 6.826209136042321e-06, - "loss": 1.7053, - "step": 6060 - }, - { - "epoch": 0.6157435585311423, - "grad_norm": 0.6501373648643494, - "learning_rate": 6.795069990707379e-06, - "loss": 1.692, - "step": 6070 - }, - { - "epoch": 0.6167579630756745, - "grad_norm": 0.9722982048988342, - "learning_rate": 6.763965440937272e-06, - "loss": 1.6949, - "step": 6080 - }, - { - "epoch": 0.6177723676202069, - "grad_norm": 0.9131156206130981, - "learning_rate": 6.73289582248956e-06, - "loss": 1.7017, - "step": 6090 - }, - { - "epoch": 0.6187867721647393, - "grad_norm": 0.6617788076400757, - "learning_rate": 6.701861470744727e-06, - "loss": 1.6987, - "step": 6100 - }, - { - "epoch": 0.6198011767092717, - "grad_norm": 0.7107517123222351, - "learning_rate": 6.670862720702582e-06, - "loss": 1.7019, - "step": 6110 - }, - { - "epoch": 0.620815581253804, - "grad_norm": 0.8330481648445129, - "learning_rate": 6.639899906978626e-06, - "loss": 1.6979, - "step": 6120 - }, - { - "epoch": 0.6218299857983364, - "grad_norm": 1.310232162475586, - "learning_rate": 6.60897336380045e-06, - "loss": 1.703, - "step": 6130 - }, - { - "epoch": 0.6228443903428688, - "grad_norm": 0.7191615104675293, - "learning_rate": 6.578083425004125e-06, - "loss": 1.699, - "step": 6140 - }, - { - "epoch": 0.6238587948874011, - "grad_norm": 0.6662994027137756, - "learning_rate": 6.547230424030583e-06, - "loss": 1.7051, - "step": 6150 - }, - { - "epoch": 0.6248731994319334, - "grad_norm": 0.665047287940979, - "learning_rate": 6.516414693922046e-06, - "loss": 1.6959, - "step": 6160 - }, - { - "epoch": 0.6258876039764658, - "grad_norm": 0.940137505531311, - "learning_rate": 6.485636567318412e-06, - "loss": 1.6986, - "step": 6170 - }, - { - "epoch": 0.6269020085209982, - "grad_norm": 1.872345209121704, - "learning_rate": 6.454896376453667e-06, - "loss": 1.6936, - "step": 6180 - }, - { - "epoch": 0.6279164130655306, - "grad_norm": 1.0470194816589355, - "learning_rate": 6.4241944531523e-06, - "loss": 1.6934, - "step": 6190 - }, - { - "epoch": 0.6289308176100629, - "grad_norm": 0.7385777235031128, - "learning_rate": 6.393531128825729e-06, - "loss": 1.6917, - "step": 6200 - }, - { - "epoch": 0.6299452221545953, - "grad_norm": 0.8634769320487976, - "learning_rate": 6.3629067344687e-06, - "loss": 1.6981, - "step": 6210 - }, - { - "epoch": 0.6309596266991276, - "grad_norm": 0.7826641201972961, - "learning_rate": 6.332321600655743e-06, - "loss": 1.6982, - "step": 6220 - }, - { - "epoch": 0.6319740312436599, - "grad_norm": 0.6960589289665222, - "learning_rate": 6.301776057537588e-06, - "loss": 1.6999, - "step": 6230 - }, - { - "epoch": 0.6329884357881923, - "grad_norm": 0.6992986798286438, - "learning_rate": 6.271270434837604e-06, - "loss": 1.6925, - "step": 6240 - }, - { - "epoch": 0.6340028403327247, - "grad_norm": 0.7040351033210754, - "learning_rate": 6.240805061848238e-06, - "loss": 1.6941, - "step": 6250 - }, - { - "epoch": 0.6350172448772571, - "grad_norm": 1.0318106412887573, - "learning_rate": 6.210380267427467e-06, - "loss": 1.6992, - "step": 6260 - }, - { - "epoch": 0.6360316494217894, - "grad_norm": 0.8349300026893616, - "learning_rate": 6.179996379995231e-06, - "loss": 1.6969, - "step": 6270 - }, - { - "epoch": 0.6370460539663217, - "grad_norm": 1.4219266176223755, - "learning_rate": 6.149653727529913e-06, - "loss": 1.6975, - "step": 6280 - }, - { - "epoch": 0.6380604585108541, - "grad_norm": 0.728640615940094, - "learning_rate": 6.1193526375647815e-06, - "loss": 1.6879, - "step": 6290 - }, - { - "epoch": 0.6390748630553865, - "grad_norm": 1.3021235466003418, - "learning_rate": 6.089093437184459e-06, - "loss": 1.6931, - "step": 6300 - }, - { - "epoch": 0.6400892675999188, - "grad_norm": 0.6404723525047302, - "learning_rate": 6.058876453021394e-06, - "loss": 1.6914, - "step": 6310 - }, - { - "epoch": 0.6411036721444512, - "grad_norm": 0.8133106827735901, - "learning_rate": 6.028702011252323e-06, - "loss": 1.6944, - "step": 6320 - }, - { - "epoch": 0.6421180766889836, - "grad_norm": 0.8269804120063782, - "learning_rate": 5.998570437594775e-06, - "loss": 1.6926, - "step": 6330 - }, - { - "epoch": 0.643132481233516, - "grad_norm": 1.3558006286621094, - "learning_rate": 5.968482057303526e-06, - "loss": 1.701, - "step": 6340 - }, - { - "epoch": 0.6441468857780482, - "grad_norm": 1.1384729146957397, - "learning_rate": 5.938437195167108e-06, - "loss": 1.6864, - "step": 6350 - }, - { - "epoch": 0.6451612903225806, - "grad_norm": 1.2502055168151855, - "learning_rate": 5.908436175504299e-06, - "loss": 1.7046, - "step": 6360 - }, - { - "epoch": 0.646175694867113, - "grad_norm": 2.4415109157562256, - "learning_rate": 5.878479322160621e-06, - "loss": 1.692, - "step": 6370 - }, - { - "epoch": 0.6471900994116454, - "grad_norm": 0.7759987115859985, - "learning_rate": 5.848566958504831e-06, - "loss": 1.6892, - "step": 6380 - }, - { - "epoch": 0.6482045039561777, - "grad_norm": 0.7297446131706238, - "learning_rate": 5.818699407425454e-06, - "loss": 1.6859, - "step": 6390 - }, - { - "epoch": 0.6492189085007101, - "grad_norm": 0.7481786012649536, - "learning_rate": 5.788876991327288e-06, - "loss": 1.6972, - "step": 6400 - }, - { - "epoch": 0.6502333130452425, - "grad_norm": 0.9783136248588562, - "learning_rate": 5.759100032127915e-06, - "loss": 1.696, - "step": 6410 - }, - { - "epoch": 0.6512477175897748, - "grad_norm": 0.7211257219314575, - "learning_rate": 5.729368851254244e-06, - "loss": 1.6995, - "step": 6420 - }, - { - "epoch": 0.6522621221343071, - "grad_norm": 0.7238343954086304, - "learning_rate": 5.699683769639014e-06, - "loss": 1.6945, - "step": 6430 - }, - { - "epoch": 0.6532765266788395, - "grad_norm": 0.6956850290298462, - "learning_rate": 5.670045107717358e-06, - "loss": 1.7029, - "step": 6440 - }, - { - "epoch": 0.6542909312233719, - "grad_norm": 0.9732932448387146, - "learning_rate": 5.640453185423331e-06, - "loss": 1.6884, - "step": 6450 - }, - { - "epoch": 0.6553053357679043, - "grad_norm": 1.271252989768982, - "learning_rate": 5.610908322186459e-06, - "loss": 1.6904, - "step": 6460 - }, - { - "epoch": 0.6563197403124366, - "grad_norm": 1.570483684539795, - "learning_rate": 5.5814108369282824e-06, - "loss": 1.6854, - "step": 6470 - }, - { - "epoch": 0.657334144856969, - "grad_norm": 0.7242071628570557, - "learning_rate": 5.551961048058927e-06, - "loss": 1.6962, - "step": 6480 - }, - { - "epoch": 0.6583485494015013, - "grad_norm": 0.8546056747436523, - "learning_rate": 5.5225592734736645e-06, - "loss": 1.6967, - "step": 6490 - }, - { - "epoch": 0.6593629539460337, - "grad_norm": 1.0872573852539062, - "learning_rate": 5.493205830549466e-06, - "loss": 1.697, - "step": 6500 - }, - { - "epoch": 0.660377358490566, - "grad_norm": 0.8840607404708862, - "learning_rate": 5.4639010361415945e-06, - "loss": 1.6952, - "step": 6510 - }, - { - "epoch": 0.6613917630350984, - "grad_norm": 0.8018211722373962, - "learning_rate": 5.434645206580178e-06, - "loss": 1.6962, - "step": 6520 - }, - { - "epoch": 0.6624061675796308, - "grad_norm": 1.141829013824463, - "learning_rate": 5.4054386576667935e-06, - "loss": 1.6954, - "step": 6530 - }, - { - "epoch": 0.6634205721241632, - "grad_norm": 0.6727152466773987, - "learning_rate": 5.37628170467106e-06, - "loss": 1.6877, - "step": 6540 - }, - { - "epoch": 0.6644349766686954, - "grad_norm": 0.8944599628448486, - "learning_rate": 5.347174662327226e-06, - "loss": 1.6928, - "step": 6550 - }, - { - "epoch": 0.6654493812132278, - "grad_norm": 0.7954721450805664, - "learning_rate": 5.31811784483079e-06, - "loss": 1.6986, - "step": 6560 - }, - { - "epoch": 0.6664637857577602, - "grad_norm": 0.7883242964744568, - "learning_rate": 5.289111565835094e-06, - "loss": 1.6891, - "step": 6570 - }, - { - "epoch": 0.6674781903022926, - "grad_norm": 0.970805823802948, - "learning_rate": 5.260156138447947e-06, - "loss": 1.6913, - "step": 6580 - }, - { - "epoch": 0.6684925948468249, - "grad_norm": 0.7309479117393494, - "learning_rate": 5.231251875228243e-06, - "loss": 1.7019, - "step": 6590 - }, - { - "epoch": 0.6695069993913573, - "grad_norm": 1.407232642173767, - "learning_rate": 5.20239908818257e-06, - "loss": 1.6913, - "step": 6600 - }, - { - "epoch": 0.6705214039358897, - "grad_norm": 0.7800869345664978, - "learning_rate": 5.1735980887618745e-06, - "loss": 1.6965, - "step": 6610 - }, - { - "epoch": 0.671535808480422, - "grad_norm": 1.0743424892425537, - "learning_rate": 5.144849187858075e-06, - "loss": 1.692, - "step": 6620 - }, - { - "epoch": 0.6725502130249543, - "grad_norm": 0.6856610774993896, - "learning_rate": 5.116152695800716e-06, - "loss": 1.694, - "step": 6630 - }, - { - "epoch": 0.6735646175694867, - "grad_norm": 0.6834506392478943, - "learning_rate": 5.0875089223536125e-06, - "loss": 1.7013, - "step": 6640 - }, - { - "epoch": 0.6745790221140191, - "grad_norm": 0.8020355701446533, - "learning_rate": 5.058918176711516e-06, - "loss": 1.6963, - "step": 6650 - }, - { - "epoch": 0.6755934266585514, - "grad_norm": 0.8195199966430664, - "learning_rate": 5.030380767496756e-06, - "loss": 1.6971, - "step": 6660 - }, - { - "epoch": 0.6766078312030838, - "grad_norm": 0.8929177522659302, - "learning_rate": 5.001897002755937e-06, - "loss": 1.6912, - "step": 6670 - }, - { - "epoch": 0.6776222357476162, - "grad_norm": 0.7102656960487366, - "learning_rate": 4.973467189956596e-06, - "loss": 1.6984, - "step": 6680 - }, - { - "epoch": 0.6786366402921485, - "grad_norm": 0.6538779735565186, - "learning_rate": 4.945091635983879e-06, - "loss": 1.6913, - "step": 6690 - }, - { - "epoch": 0.6796510448366808, - "grad_norm": 1.158691167831421, - "learning_rate": 4.9167706471372515e-06, - "loss": 1.6918, - "step": 6700 - }, - { - "epoch": 0.6806654493812132, - "grad_norm": 0.9895077347755432, - "learning_rate": 4.888504529127154e-06, - "loss": 1.6925, - "step": 6710 - }, - { - "epoch": 0.6816798539257456, - "grad_norm": 0.6967589259147644, - "learning_rate": 4.860293587071745e-06, - "loss": 1.6948, - "step": 6720 - }, - { - "epoch": 0.682694258470278, - "grad_norm": 1.211841344833374, - "learning_rate": 4.832138125493577e-06, - "loss": 1.6991, - "step": 6730 - }, - { - "epoch": 0.6837086630148103, - "grad_norm": 1.994767427444458, - "learning_rate": 4.804038448316323e-06, - "loss": 1.6886, - "step": 6740 - }, - { - "epoch": 0.6847230675593426, - "grad_norm": 0.7147320508956909, - "learning_rate": 4.775994858861492e-06, - "loss": 1.6857, - "step": 6750 - }, - { - "epoch": 0.685737472103875, - "grad_norm": 0.7403655052185059, - "learning_rate": 4.748007659845158e-06, - "loss": 1.694, - "step": 6760 - }, - { - "epoch": 0.6867518766484074, - "grad_norm": 0.7203643918037415, - "learning_rate": 4.720077153374674e-06, - "loss": 1.6896, - "step": 6770 - }, - { - "epoch": 0.6877662811929397, - "grad_norm": 0.7995414137840271, - "learning_rate": 4.6922036409454465e-06, - "loss": 1.7006, - "step": 6780 - }, - { - "epoch": 0.6887806857374721, - "grad_norm": 0.7858167886734009, - "learning_rate": 4.664387423437647e-06, - "loss": 1.6903, - "step": 6790 - }, - { - "epoch": 0.6897950902820045, - "grad_norm": 0.6948023438453674, - "learning_rate": 4.636628801112987e-06, - "loss": 1.6883, - "step": 6800 - }, - { - "epoch": 0.6908094948265369, - "grad_norm": 0.8593195676803589, - "learning_rate": 4.608928073611462e-06, - "loss": 1.6901, - "step": 6810 - }, - { - "epoch": 0.6918238993710691, - "grad_norm": 0.7997449040412903, - "learning_rate": 4.581285539948126e-06, - "loss": 1.6841, - "step": 6820 - }, - { - "epoch": 0.6928383039156015, - "grad_norm": 1.3550745248794556, - "learning_rate": 4.553701498509854e-06, - "loss": 1.6912, - "step": 6830 - }, - { - "epoch": 0.6938527084601339, - "grad_norm": 0.7959534525871277, - "learning_rate": 4.526176247052136e-06, - "loss": 1.687, - "step": 6840 - }, - { - "epoch": 0.6948671130046663, - "grad_norm": 0.701482355594635, - "learning_rate": 4.498710082695848e-06, - "loss": 1.6912, - "step": 6850 - }, - { - "epoch": 0.6958815175491986, - "grad_norm": 0.688855767250061, - "learning_rate": 4.4713033019240595e-06, - "loss": 1.6859, - "step": 6860 - }, - { - "epoch": 0.696895922093731, - "grad_norm": 0.7985575795173645, - "learning_rate": 4.443956200578819e-06, - "loss": 1.6857, - "step": 6870 - }, - { - "epoch": 0.6979103266382634, - "grad_norm": 0.710429310798645, - "learning_rate": 4.41666907385796e-06, - "loss": 1.6864, - "step": 6880 - }, - { - "epoch": 0.6989247311827957, - "grad_norm": 0.6469950675964355, - "learning_rate": 4.389442216311933e-06, - "loss": 1.691, - "step": 6890 - }, - { - "epoch": 0.699939135727328, - "grad_norm": 1.0438098907470703, - "learning_rate": 4.362275921840608e-06, - "loss": 1.6892, - "step": 6900 - }, - { - "epoch": 0.7009535402718604, - "grad_norm": 0.9447315335273743, - "learning_rate": 4.3351704836901084e-06, - "loss": 1.6919, - "step": 6910 - }, - { - "epoch": 0.7019679448163928, - "grad_norm": 0.846228837966919, - "learning_rate": 4.308126194449647e-06, - "loss": 1.6871, - "step": 6920 - }, - { - "epoch": 0.7029823493609252, - "grad_norm": 0.8632501363754272, - "learning_rate": 4.2811433460483655e-06, - "loss": 1.6891, - "step": 6930 - }, - { - "epoch": 0.7039967539054575, - "grad_norm": 0.9589157700538635, - "learning_rate": 4.254222229752175e-06, - "loss": 1.6965, - "step": 6940 - }, - { - "epoch": 0.7050111584499898, - "grad_norm": 0.7905771136283875, - "learning_rate": 4.227363136160633e-06, - "loss": 1.6888, - "step": 6950 - }, - { - "epoch": 0.7060255629945222, - "grad_norm": 1.2884942293167114, - "learning_rate": 4.200566355203784e-06, - "loss": 1.6943, - "step": 6960 - }, - { - "epoch": 0.7070399675390546, - "grad_norm": 0.788606584072113, - "learning_rate": 4.1738321761390485e-06, - "loss": 1.6938, - "step": 6970 - }, - { - "epoch": 0.7080543720835869, - "grad_norm": 0.7270281910896301, - "learning_rate": 4.147160887548089e-06, - "loss": 1.686, - "step": 6980 - }, - { - "epoch": 0.7090687766281193, - "grad_norm": 0.6702724099159241, - "learning_rate": 4.120552777333687e-06, - "loss": 1.6823, - "step": 6990 - }, - { - "epoch": 0.7100831811726517, - "grad_norm": 0.6619483828544617, - "learning_rate": 4.094008132716662e-06, - "loss": 1.6887, - "step": 7000 - }, - { - "epoch": 0.7110975857171841, - "grad_norm": 0.6733633875846863, - "learning_rate": 4.067527240232748e-06, - "loss": 1.6802, - "step": 7010 - }, - { - "epoch": 0.7121119902617163, - "grad_norm": 0.6545982956886292, - "learning_rate": 4.0411103857295105e-06, - "loss": 1.6871, - "step": 7020 - }, - { - "epoch": 0.7131263948062487, - "grad_norm": 0.7236390709877014, - "learning_rate": 4.014757854363249e-06, - "loss": 1.6859, - "step": 7030 - }, - { - "epoch": 0.7141407993507811, - "grad_norm": 1.133027195930481, - "learning_rate": 3.9884699305959385e-06, - "loss": 1.6789, - "step": 7040 - }, - { - "epoch": 0.7151552038953134, - "grad_norm": 0.7678714990615845, - "learning_rate": 3.962246898192144e-06, - "loss": 1.6885, - "step": 7050 - }, - { - "epoch": 0.7161696084398458, - "grad_norm": 0.8332532644271851, - "learning_rate": 3.936089040215963e-06, - "loss": 1.6923, - "step": 7060 - }, - { - "epoch": 0.7171840129843782, - "grad_norm": 0.9309415817260742, - "learning_rate": 3.909996639027966e-06, - "loss": 1.6933, - "step": 7070 - }, - { - "epoch": 0.7181984175289106, - "grad_norm": 0.845024585723877, - "learning_rate": 3.883969976282152e-06, - "loss": 1.6872, - "step": 7080 - }, - { - "epoch": 0.7192128220734428, - "grad_norm": 0.6225118041038513, - "learning_rate": 3.858009332922911e-06, - "loss": 1.6911, - "step": 7090 - }, - { - "epoch": 0.7202272266179752, - "grad_norm": 1.0944541692733765, - "learning_rate": 3.832114989181988e-06, - "loss": 1.6827, - "step": 7100 - }, - { - "epoch": 0.7212416311625076, - "grad_norm": 0.6674110293388367, - "learning_rate": 3.8062872245754468e-06, - "loss": 1.6871, - "step": 7110 - }, - { - "epoch": 0.72225603570704, - "grad_norm": 0.8871638178825378, - "learning_rate": 3.780526317900679e-06, - "loss": 1.6884, - "step": 7120 - }, - { - "epoch": 0.7232704402515723, - "grad_norm": 0.7796902656555176, - "learning_rate": 3.754832547233369e-06, - "loss": 1.6918, - "step": 7130 - }, - { - "epoch": 0.7242848447961047, - "grad_norm": 0.9538637399673462, - "learning_rate": 3.729206189924509e-06, - "loss": 1.69, - "step": 7140 - }, - { - "epoch": 0.725299249340637, - "grad_norm": 0.7675561308860779, - "learning_rate": 3.7036475225973977e-06, - "loss": 1.6992, - "step": 7150 - }, - { - "epoch": 0.7263136538851694, - "grad_norm": 0.6488850116729736, - "learning_rate": 3.678156821144647e-06, - "loss": 1.6867, - "step": 7160 - }, - { - "epoch": 0.7273280584297017, - "grad_norm": 0.6786872744560242, - "learning_rate": 3.652734360725224e-06, - "loss": 1.6827, - "step": 7170 - }, - { - "epoch": 0.7283424629742341, - "grad_norm": 0.7817838191986084, - "learning_rate": 3.6273804157614623e-06, - "loss": 1.6838, - "step": 7180 - }, - { - "epoch": 0.7293568675187665, - "grad_norm": 0.8888971209526062, - "learning_rate": 3.6020952599361083e-06, - "loss": 1.6844, - "step": 7190 - }, - { - "epoch": 0.7303712720632989, - "grad_norm": 0.6721639037132263, - "learning_rate": 3.5768791661893653e-06, - "loss": 1.689, - "step": 7200 - }, - { - "epoch": 0.7313856766078312, - "grad_norm": 1.8566421270370483, - "learning_rate": 3.551732406715951e-06, - "loss": 1.6784, - "step": 7210 - }, - { - "epoch": 0.7324000811523635, - "grad_norm": 0.8284285664558411, - "learning_rate": 3.5266552529621434e-06, - "loss": 1.6894, - "step": 7220 - }, - { - "epoch": 0.7334144856968959, - "grad_norm": 0.8408186435699463, - "learning_rate": 3.501647975622876e-06, - "loss": 1.6841, - "step": 7230 - }, - { - "epoch": 0.7344288902414283, - "grad_norm": 0.815199077129364, - "learning_rate": 3.4767108446387955e-06, - "loss": 1.6874, - "step": 7240 - }, - { - "epoch": 0.7354432947859606, - "grad_norm": 0.9302525520324707, - "learning_rate": 3.4518441291933556e-06, - "loss": 1.6904, - "step": 7250 - }, - { - "epoch": 0.736457699330493, - "grad_norm": 1.384047031402588, - "learning_rate": 3.4270480977099172e-06, - "loss": 1.6841, - "step": 7260 - }, - { - "epoch": 0.7374721038750254, - "grad_norm": 0.6799222826957703, - "learning_rate": 3.402323017848832e-06, - "loss": 1.6881, - "step": 7270 - }, - { - "epoch": 0.7384865084195578, - "grad_norm": 0.6889559626579285, - "learning_rate": 3.3776691565045727e-06, - "loss": 1.6908, - "step": 7280 - }, - { - "epoch": 0.73950091296409, - "grad_norm": 1.0018811225891113, - "learning_rate": 3.3530867798028476e-06, - "loss": 1.6923, - "step": 7290 - }, - { - "epoch": 0.7405153175086224, - "grad_norm": 0.6559397578239441, - "learning_rate": 3.32857615309772e-06, - "loss": 1.6855, - "step": 7300 - }, - { - "epoch": 0.7415297220531548, - "grad_norm": 1.1891716718673706, - "learning_rate": 3.3041375409687525e-06, - "loss": 1.6896, - "step": 7310 - }, - { - "epoch": 0.7425441265976872, - "grad_norm": 1.5389657020568848, - "learning_rate": 3.279771207218149e-06, - "loss": 1.6939, - "step": 7320 - }, - { - "epoch": 0.7435585311422195, - "grad_norm": 0.85477215051651, - "learning_rate": 3.2554774148678945e-06, - "loss": 1.6889, - "step": 7330 - }, - { - "epoch": 0.7445729356867519, - "grad_norm": 1.2386493682861328, - "learning_rate": 3.23125642615694e-06, - "loss": 1.6783, - "step": 7340 - }, - { - "epoch": 0.7455873402312843, - "grad_norm": 0.6974436640739441, - "learning_rate": 3.207108502538354e-06, - "loss": 1.6848, - "step": 7350 - }, - { - "epoch": 0.7466017447758166, - "grad_norm": 1.2174402475357056, - "learning_rate": 3.1830339046765045e-06, - "loss": 1.6769, - "step": 7360 - }, - { - "epoch": 0.7476161493203489, - "grad_norm": 0.9583227038383484, - "learning_rate": 3.15903289244425e-06, - "loss": 1.6934, - "step": 7370 - }, - { - "epoch": 0.7486305538648813, - "grad_norm": 0.7439892292022705, - "learning_rate": 3.13510572492012e-06, - "loss": 1.6854, - "step": 7380 - }, - { - "epoch": 0.7496449584094137, - "grad_norm": 0.8371275663375854, - "learning_rate": 3.1112526603855385e-06, - "loss": 1.6886, - "step": 7390 - }, - { - "epoch": 0.7506593629539461, - "grad_norm": 0.7316817045211792, - "learning_rate": 3.0874739563220225e-06, - "loss": 1.6853, - "step": 7400 - }, - { - "epoch": 0.7516737674984784, - "grad_norm": 0.7425588965415955, - "learning_rate": 3.063769869408405e-06, - "loss": 1.6886, - "step": 7410 - }, - { - "epoch": 0.7526881720430108, - "grad_norm": 0.6400764584541321, - "learning_rate": 3.0401406555180666e-06, - "loss": 1.6839, - "step": 7420 - }, - { - "epoch": 0.7537025765875431, - "grad_norm": 1.0293561220169067, - "learning_rate": 3.0165865697161777e-06, - "loss": 1.6878, - "step": 7430 - }, - { - "epoch": 0.7547169811320755, - "grad_norm": 0.7544503211975098, - "learning_rate": 2.9931078662569256e-06, - "loss": 1.6901, - "step": 7440 - }, - { - "epoch": 0.7557313856766078, - "grad_norm": 0.7823988199234009, - "learning_rate": 2.969704798580796e-06, - "loss": 1.6853, - "step": 7450 - }, - { - "epoch": 0.7567457902211402, - "grad_norm": 0.691929817199707, - "learning_rate": 2.946377619311822e-06, - "loss": 1.6798, - "step": 7460 - }, - { - "epoch": 0.7577601947656726, - "grad_norm": 0.6566022038459778, - "learning_rate": 2.9231265802548623e-06, - "loss": 1.6858, - "step": 7470 - }, - { - "epoch": 0.7587745993102049, - "grad_norm": 0.8671581745147705, - "learning_rate": 2.8999519323928783e-06, - "loss": 1.6936, - "step": 7480 - }, - { - "epoch": 0.7597890038547372, - "grad_norm": 0.8594644069671631, - "learning_rate": 2.8768539258842333e-06, - "loss": 1.6898, - "step": 7490 - }, - { - "epoch": 0.7608034083992696, - "grad_norm": 1.0892634391784668, - "learning_rate": 2.8538328100599753e-06, - "loss": 1.6851, - "step": 7500 - }, - { - "epoch": 0.761817812943802, - "grad_norm": 0.6834043264389038, - "learning_rate": 2.830888833421168e-06, - "loss": 1.6829, - "step": 7510 - }, - { - "epoch": 0.7628322174883343, - "grad_norm": 0.8435320258140564, - "learning_rate": 2.8080222436361937e-06, - "loss": 1.6773, - "step": 7520 - }, - { - "epoch": 0.7638466220328667, - "grad_norm": 0.919886589050293, - "learning_rate": 2.785233287538085e-06, - "loss": 1.6865, - "step": 7530 - }, - { - "epoch": 0.7648610265773991, - "grad_norm": 0.7416160106658936, - "learning_rate": 2.7625222111218597e-06, - "loss": 1.6816, - "step": 7540 - }, - { - "epoch": 0.7658754311219315, - "grad_norm": 0.6899641156196594, - "learning_rate": 2.7398892595418558e-06, - "loss": 1.683, - "step": 7550 - }, - { - "epoch": 0.7668898356664637, - "grad_norm": 0.8722615242004395, - "learning_rate": 2.717334677109106e-06, - "loss": 1.6771, - "step": 7560 - }, - { - "epoch": 0.7679042402109961, - "grad_norm": 0.7188706994056702, - "learning_rate": 2.694858707288681e-06, - "loss": 1.6812, - "step": 7570 - }, - { - "epoch": 0.7689186447555285, - "grad_norm": 0.8512191772460938, - "learning_rate": 2.6724615926970753e-06, - "loss": 1.6861, - "step": 7580 - }, - { - "epoch": 0.7699330493000609, - "grad_norm": 0.6833221912384033, - "learning_rate": 2.650143575099573e-06, - "loss": 1.6839, - "step": 7590 - }, - { - "epoch": 0.7709474538445932, - "grad_norm": 0.6480352282524109, - "learning_rate": 2.6279048954076536e-06, - "loss": 1.686, - "step": 7600 - }, - { - "epoch": 0.7719618583891256, - "grad_norm": 0.699713945388794, - "learning_rate": 2.6057457936763832e-06, - "loss": 1.6864, - "step": 7610 - }, - { - "epoch": 0.772976262933658, - "grad_norm": 0.6632005572319031, - "learning_rate": 2.5836665091018276e-06, - "loss": 1.6898, - "step": 7620 - }, - { - "epoch": 0.7739906674781903, - "grad_norm": 0.6848092079162598, - "learning_rate": 2.5616672800184605e-06, - "loss": 1.6815, - "step": 7630 - }, - { - "epoch": 0.7750050720227226, - "grad_norm": 0.691442608833313, - "learning_rate": 2.539748343896604e-06, - "loss": 1.6797, - "step": 7640 - }, - { - "epoch": 0.776019476567255, - "grad_norm": 0.6606059670448303, - "learning_rate": 2.5179099373398576e-06, - "loss": 1.6896, - "step": 7650 - }, - { - "epoch": 0.7770338811117874, - "grad_norm": 0.681952714920044, - "learning_rate": 2.496152296082548e-06, - "loss": 1.6748, - "step": 7660 - }, - { - "epoch": 0.7780482856563198, - "grad_norm": 0.7417349815368652, - "learning_rate": 2.474475654987175e-06, - "loss": 1.6772, - "step": 7670 - }, - { - "epoch": 0.7790626902008521, - "grad_norm": 0.8655144572257996, - "learning_rate": 2.45288024804189e-06, - "loss": 1.6937, - "step": 7680 - }, - { - "epoch": 0.7800770947453844, - "grad_norm": 0.7213091850280762, - "learning_rate": 2.431366308357962e-06, - "loss": 1.6857, - "step": 7690 - }, - { - "epoch": 0.7810914992899168, - "grad_norm": 0.7388828992843628, - "learning_rate": 2.4099340681672635e-06, - "loss": 1.6884, - "step": 7700 - }, - { - "epoch": 0.7821059038344492, - "grad_norm": 0.6804307103157043, - "learning_rate": 2.3885837588197657e-06, - "loss": 1.6819, - "step": 7710 - }, - { - "epoch": 0.7831203083789815, - "grad_norm": 0.7278358340263367, - "learning_rate": 2.367315610781028e-06, - "loss": 1.6728, - "step": 7720 - }, - { - "epoch": 0.7841347129235139, - "grad_norm": 0.6935149431228638, - "learning_rate": 2.3461298536297326e-06, - "loss": 1.6788, - "step": 7730 - }, - { - "epoch": 0.7851491174680463, - "grad_norm": 0.7839821577072144, - "learning_rate": 2.325026716055191e-06, - "loss": 1.6812, - "step": 7740 - }, - { - "epoch": 0.7861635220125787, - "grad_norm": 0.680661141872406, - "learning_rate": 2.3040064258548767e-06, - "loss": 1.6879, - "step": 7750 - }, - { - "epoch": 0.7871779265571109, - "grad_norm": 0.6763148307800293, - "learning_rate": 2.2830692099319707e-06, - "loss": 1.681, - "step": 7760 - }, - { - "epoch": 0.7881923311016433, - "grad_norm": 0.7075271606445312, - "learning_rate": 2.262215294292912e-06, - "loss": 1.6851, - "step": 7770 - }, - { - "epoch": 0.7892067356461757, - "grad_norm": 0.7013016939163208, - "learning_rate": 2.241444904044949e-06, - "loss": 1.673, - "step": 7780 - }, - { - "epoch": 0.7902211401907081, - "grad_norm": 0.7250566482543945, - "learning_rate": 2.220758263393724e-06, - "loss": 1.683, - "step": 7790 - }, - { - "epoch": 0.7912355447352404, - "grad_norm": 0.9809650778770447, - "learning_rate": 2.2001555956408428e-06, - "loss": 1.6881, - "step": 7800 - }, - { - "epoch": 0.7922499492797728, - "grad_norm": 0.9492971301078796, - "learning_rate": 2.179637123181466e-06, - "loss": 1.6884, - "step": 7810 - }, - { - "epoch": 0.7932643538243052, - "grad_norm": 0.6466846466064453, - "learning_rate": 2.1592030675019147e-06, - "loss": 1.6808, - "step": 7820 - }, - { - "epoch": 0.7942787583688375, - "grad_norm": 0.6859692931175232, - "learning_rate": 2.138853649177266e-06, - "loss": 1.6699, - "step": 7830 - }, - { - "epoch": 0.7952931629133698, - "grad_norm": 0.7105811834335327, - "learning_rate": 2.1185890878689853e-06, - "loss": 1.6874, - "step": 7840 - }, - { - "epoch": 0.7963075674579022, - "grad_norm": 0.8044138550758362, - "learning_rate": 2.098409602322551e-06, - "loss": 1.6881, - "step": 7850 - }, - { - "epoch": 0.7973219720024346, - "grad_norm": 1.1749554872512817, - "learning_rate": 2.0783154103650905e-06, - "loss": 1.6856, - "step": 7860 - }, - { - "epoch": 0.7983363765469669, - "grad_norm": 0.9438343048095703, - "learning_rate": 2.0583067289030334e-06, - "loss": 1.6852, - "step": 7870 - }, - { - "epoch": 0.7993507810914993, - "grad_norm": 1.2980304956436157, - "learning_rate": 2.0383837739197687e-06, - "loss": 1.6771, - "step": 7880 - }, - { - "epoch": 0.8003651856360317, - "grad_norm": 0.6881336569786072, - "learning_rate": 2.0185467604733034e-06, - "loss": 1.6789, - "step": 7890 - }, - { - "epoch": 0.801379590180564, - "grad_norm": 0.8108803033828735, - "learning_rate": 1.9987959026939597e-06, - "loss": 1.6928, - "step": 7900 - }, - { - "epoch": 0.8023939947250963, - "grad_norm": 0.9757908582687378, - "learning_rate": 1.9791314137820504e-06, - "loss": 1.682, - "step": 7910 - }, - { - "epoch": 0.8034083992696287, - "grad_norm": 0.6156294345855713, - "learning_rate": 1.9595535060055815e-06, - "loss": 1.6749, - "step": 7920 - }, - { - "epoch": 0.8044228038141611, - "grad_norm": 1.0827484130859375, - "learning_rate": 1.940062390697962e-06, - "loss": 1.6822, - "step": 7930 - }, - { - "epoch": 0.8054372083586935, - "grad_norm": 1.3421286344528198, - "learning_rate": 1.920658278255714e-06, - "loss": 1.6796, - "step": 7940 - }, - { - "epoch": 0.8064516129032258, - "grad_norm": 0.6466681957244873, - "learning_rate": 1.9013413781362155e-06, - "loss": 1.6766, - "step": 7950 - }, - { - "epoch": 0.8074660174477581, - "grad_norm": 0.8725202679634094, - "learning_rate": 1.882111898855431e-06, - "loss": 1.6838, - "step": 7960 - }, - { - "epoch": 0.8084804219922905, - "grad_norm": 0.6975882649421692, - "learning_rate": 1.8629700479856616e-06, - "loss": 1.6808, - "step": 7970 - }, - { - "epoch": 0.8094948265368229, - "grad_norm": 1.20611572265625, - "learning_rate": 1.8439160321533055e-06, - "loss": 1.6859, - "step": 7980 - }, - { - "epoch": 0.8105092310813552, - "grad_norm": 0.6343019604682922, - "learning_rate": 1.8249500570366296e-06, - "loss": 1.687, - "step": 7990 - }, - { - "epoch": 0.8115236356258876, - "grad_norm": 0.6387778520584106, - "learning_rate": 1.806072327363536e-06, - "loss": 1.6845, - "step": 8000 - }, - { - "epoch": 0.81253804017042, - "grad_norm": 0.6350755095481873, - "learning_rate": 1.787283046909376e-06, - "loss": 1.6797, - "step": 8010 - }, - { - "epoch": 0.8135524447149524, - "grad_norm": 0.7560693621635437, - "learning_rate": 1.7685824184947298e-06, - "loss": 1.6795, - "step": 8020 - }, - { - "epoch": 0.8145668492594846, - "grad_norm": 0.6946399807929993, - "learning_rate": 1.7499706439832275e-06, - "loss": 1.6759, - "step": 8030 - }, - { - "epoch": 0.815581253804017, - "grad_norm": 0.7456703186035156, - "learning_rate": 1.731447924279366e-06, - "loss": 1.6826, - "step": 8040 - }, - { - "epoch": 0.8165956583485494, - "grad_norm": 0.6817774176597595, - "learning_rate": 1.7130144593263442e-06, - "loss": 1.6694, - "step": 8050 - }, - { - "epoch": 0.8176100628930818, - "grad_norm": 0.748322069644928, - "learning_rate": 1.6946704481038945e-06, - "loss": 1.6802, - "step": 8060 - }, - { - "epoch": 0.8186244674376141, - "grad_norm": 0.9055871367454529, - "learning_rate": 1.6764160886261505e-06, - "loss": 1.6786, - "step": 8070 - }, - { - "epoch": 0.8196388719821465, - "grad_norm": 0.7281890511512756, - "learning_rate": 1.658251577939497e-06, - "loss": 1.6733, - "step": 8080 - }, - { - "epoch": 0.8206532765266789, - "grad_norm": 0.7686569094657898, - "learning_rate": 1.6401771121204512e-06, - "loss": 1.6875, - "step": 8090 - }, - { - "epoch": 0.8216676810712112, - "grad_norm": 0.6771758198738098, - "learning_rate": 1.6221928862735414e-06, - "loss": 1.6786, - "step": 8100 - }, - { - "epoch": 0.8226820856157435, - "grad_norm": 0.7121849656105042, - "learning_rate": 1.6042990945291969e-06, - "loss": 1.6782, - "step": 8110 - }, - { - "epoch": 0.8236964901602759, - "grad_norm": 0.7387982606887817, - "learning_rate": 1.5864959300416626e-06, - "loss": 1.681, - "step": 8120 - }, - { - "epoch": 0.8247108947048083, - "grad_norm": 0.7719337940216064, - "learning_rate": 1.5687835849869093e-06, - "loss": 1.6724, - "step": 8130 - }, - { - "epoch": 0.8257252992493407, - "grad_norm": 0.7453306913375854, - "learning_rate": 1.5511622505605562e-06, - "loss": 1.6786, - "step": 8140 - }, - { - "epoch": 0.826739703793873, - "grad_norm": 0.6602176427841187, - "learning_rate": 1.533632116975814e-06, - "loss": 1.6815, - "step": 8150 - }, - { - "epoch": 0.8277541083384053, - "grad_norm": 0.6728115677833557, - "learning_rate": 1.516193373461421e-06, - "loss": 1.6758, - "step": 8160 - }, - { - "epoch": 0.8287685128829377, - "grad_norm": 0.9312713742256165, - "learning_rate": 1.49884620825961e-06, - "loss": 1.6755, - "step": 8170 - }, - { - "epoch": 0.8297829174274701, - "grad_norm": 1.413404941558838, - "learning_rate": 1.4815908086240749e-06, - "loss": 1.677, - "step": 8180 - }, - { - "epoch": 0.8307973219720024, - "grad_norm": 0.6724287271499634, - "learning_rate": 1.4644273608179493e-06, - "loss": 1.682, - "step": 8190 - }, - { - "epoch": 0.8318117265165348, - "grad_norm": 0.8280810713768005, - "learning_rate": 1.4473560501117856e-06, - "loss": 1.6837, - "step": 8200 - }, - { - "epoch": 0.8328261310610672, - "grad_norm": 0.7651968002319336, - "learning_rate": 1.4303770607815727e-06, - "loss": 1.6784, - "step": 8210 - }, - { - "epoch": 0.8338405356055996, - "grad_norm": 0.7739443182945251, - "learning_rate": 1.413490576106733e-06, - "loss": 1.6907, - "step": 8220 - }, - { - "epoch": 0.8348549401501318, - "grad_norm": 0.6667850017547607, - "learning_rate": 1.3966967783681551e-06, - "loss": 1.6758, - "step": 8230 - }, - { - "epoch": 0.8358693446946642, - "grad_norm": 0.6846851110458374, - "learning_rate": 1.3799958488462085e-06, - "loss": 1.6797, - "step": 8240 - }, - { - "epoch": 0.8368837492391966, - "grad_norm": 0.7309419512748718, - "learning_rate": 1.36338796781881e-06, - "loss": 1.6809, - "step": 8250 - }, - { - "epoch": 0.837898153783729, - "grad_norm": 0.7184876799583435, - "learning_rate": 1.3468733145594582e-06, - "loss": 1.6898, - "step": 8260 - }, - { - "epoch": 0.8389125583282613, - "grad_norm": 0.6892003417015076, - "learning_rate": 1.3304520673353138e-06, - "loss": 1.6782, - "step": 8270 - }, - { - "epoch": 0.8399269628727937, - "grad_norm": 0.8023001551628113, - "learning_rate": 1.3141244034052559e-06, - "loss": 1.6847, - "step": 8280 - }, - { - "epoch": 0.8409413674173261, - "grad_norm": 1.0735557079315186, - "learning_rate": 1.2978904990179919e-06, - "loss": 1.6884, - "step": 8290 - }, - { - "epoch": 0.8419557719618583, - "grad_norm": 0.8170183300971985, - "learning_rate": 1.2817505294101373e-06, - "loss": 1.6798, - "step": 8300 - }, - { - "epoch": 0.8429701765063907, - "grad_norm": 0.653398334980011, - "learning_rate": 1.2657046688043306e-06, - "loss": 1.6884, - "step": 8310 - }, - { - "epoch": 0.8439845810509231, - "grad_norm": 0.6646006107330322, - "learning_rate": 1.249753090407354e-06, - "loss": 1.6779, - "step": 8320 - }, - { - "epoch": 0.8449989855954555, - "grad_norm": 0.737384557723999, - "learning_rate": 1.2338959664082628e-06, - "loss": 1.6822, - "step": 8330 - }, - { - "epoch": 0.8460133901399878, - "grad_norm": 0.783181369304657, - "learning_rate": 1.2181334679765212e-06, - "loss": 1.6765, - "step": 8340 - }, - { - "epoch": 0.8470277946845202, - "grad_norm": 0.7288150787353516, - "learning_rate": 1.2024657652601612e-06, - "loss": 1.6776, - "step": 8350 - }, - { - "epoch": 0.8480421992290526, - "grad_norm": 0.7122567892074585, - "learning_rate": 1.1868930273839474e-06, - "loss": 1.6777, - "step": 8360 - }, - { - "epoch": 0.8490566037735849, - "grad_norm": 0.6501953601837158, - "learning_rate": 1.1714154224475438e-06, - "loss": 1.6803, - "step": 8370 - }, - { - "epoch": 0.8500710083181172, - "grad_norm": 0.6516619920730591, - "learning_rate": 1.1560331175237082e-06, - "loss": 1.6779, - "step": 8380 - }, - { - "epoch": 0.8510854128626496, - "grad_norm": 0.6746801733970642, - "learning_rate": 1.140746278656476e-06, - "loss": 1.6735, - "step": 8390 - }, - { - "epoch": 0.852099817407182, - "grad_norm": 0.7403082847595215, - "learning_rate": 1.1255550708593822e-06, - "loss": 1.6844, - "step": 8400 - }, - { - "epoch": 0.8531142219517144, - "grad_norm": 0.6999229788780212, - "learning_rate": 1.110459658113674e-06, - "loss": 1.6791, - "step": 8410 - }, - { - "epoch": 0.8541286264962467, - "grad_norm": 0.7300592064857483, - "learning_rate": 1.0954602033665362e-06, - "loss": 1.6778, - "step": 8420 - }, - { - "epoch": 0.855143031040779, - "grad_norm": 0.6327239871025085, - "learning_rate": 1.0805568685293421e-06, - "loss": 1.6881, - "step": 8430 - }, - { - "epoch": 0.8561574355853114, - "grad_norm": 0.7314109206199646, - "learning_rate": 1.065749814475896e-06, - "loss": 1.6775, - "step": 8440 - }, - { - "epoch": 0.8571718401298438, - "grad_norm": 1.018730640411377, - "learning_rate": 1.0510392010407e-06, - "loss": 1.6861, - "step": 8450 - }, - { - "epoch": 0.8581862446743761, - "grad_norm": 0.6695818305015564, - "learning_rate": 1.0364251870172315e-06, - "loss": 1.6934, - "step": 8460 - }, - { - "epoch": 0.8592006492189085, - "grad_norm": 0.6460920572280884, - "learning_rate": 1.0219079301562296e-06, - "loss": 1.6861, - "step": 8470 - }, - { - "epoch": 0.8602150537634409, - "grad_norm": 0.8206009268760681, - "learning_rate": 1.0074875871639854e-06, - "loss": 1.6722, - "step": 8480 - }, - { - "epoch": 0.8612294583079733, - "grad_norm": 0.6997749209403992, - "learning_rate": 9.931643137006596e-07, - "loss": 1.6792, - "step": 8490 - }, - { - "epoch": 0.8622438628525055, - "grad_norm": 0.7158259749412537, - "learning_rate": 9.789382643785894e-07, - "loss": 1.6816, - "step": 8500 - }, - { - "epoch": 0.8632582673970379, - "grad_norm": 0.7240493893623352, - "learning_rate": 9.64809592760636e-07, - "loss": 1.6912, - "step": 8510 - }, - { - "epoch": 0.8642726719415703, - "grad_norm": 0.6413635611534119, - "learning_rate": 9.50778451358515e-07, - "loss": 1.6751, - "step": 8520 - }, - { - "epoch": 0.8652870764861027, - "grad_norm": 0.6725150942802429, - "learning_rate": 9.368449916311529e-07, - "loss": 1.6815, - "step": 8530 - }, - { - "epoch": 0.866301481030635, - "grad_norm": 0.6585033535957336, - "learning_rate": 9.230093639830561e-07, - "loss": 1.6853, - "step": 8540 - }, - { - "epoch": 0.8673158855751674, - "grad_norm": 0.7245474457740784, - "learning_rate": 9.092717177626819e-07, - "loss": 1.6747, - "step": 8550 - }, - { - "epoch": 0.8683302901196998, - "grad_norm": 0.7404381036758423, - "learning_rate": 8.956322012608265e-07, - "loss": 1.6753, - "step": 8560 - }, - { - "epoch": 0.8693446946642321, - "grad_norm": 0.8215778470039368, - "learning_rate": 8.82090961709029e-07, - "loss": 1.6888, - "step": 8570 - }, - { - "epoch": 0.8703590992087644, - "grad_norm": 0.6326916813850403, - "learning_rate": 8.686481452779816e-07, - "loss": 1.6766, - "step": 8580 - }, - { - "epoch": 0.8713735037532968, - "grad_norm": 0.8346433639526367, - "learning_rate": 8.553038970759464e-07, - "loss": 1.6851, - "step": 8590 - }, - { - "epoch": 0.8723879082978292, - "grad_norm": 0.710530161857605, - "learning_rate": 8.420583611471955e-07, - "loss": 1.6868, - "step": 8600 - }, - { - "epoch": 0.8734023128423616, - "grad_norm": 0.6314665079116821, - "learning_rate": 8.289116804704467e-07, - "loss": 1.6792, - "step": 8610 - }, - { - "epoch": 0.8744167173868939, - "grad_norm": 0.6659345030784607, - "learning_rate": 8.158639969573334e-07, - "loss": 1.6783, - "step": 8620 - }, - { - "epoch": 0.8754311219314262, - "grad_norm": 0.6360709071159363, - "learning_rate": 8.02915451450863e-07, - "loss": 1.6714, - "step": 8630 - }, - { - "epoch": 0.8764455264759586, - "grad_norm": 0.6321987509727478, - "learning_rate": 7.900661837238977e-07, - "loss": 1.6862, - "step": 8640 - }, - { - "epoch": 0.877459931020491, - "grad_norm": 0.641467273235321, - "learning_rate": 7.773163324776511e-07, - "loss": 1.6767, - "step": 8650 - }, - { - "epoch": 0.8784743355650233, - "grad_norm": 0.6552922129631042, - "learning_rate": 7.646660353401847e-07, - "loss": 1.6835, - "step": 8660 - }, - { - "epoch": 0.8794887401095557, - "grad_norm": 0.7071682214736938, - "learning_rate": 7.521154288649235e-07, - "loss": 1.6794, - "step": 8670 - }, - { - "epoch": 0.8805031446540881, - "grad_norm": 0.7058200240135193, - "learning_rate": 7.396646485291858e-07, - "loss": 1.679, - "step": 8680 - }, - { - "epoch": 0.8815175491986205, - "grad_norm": 0.6930869817733765, - "learning_rate": 7.273138287327164e-07, - "loss": 1.6728, - "step": 8690 - }, - { - "epoch": 0.8825319537431527, - "grad_norm": 0.7007521986961365, - "learning_rate": 7.150631027962406e-07, - "loss": 1.6719, - "step": 8700 - }, - { - "epoch": 0.8835463582876851, - "grad_norm": 0.7968788743019104, - "learning_rate": 7.029126029600197e-07, - "loss": 1.6785, - "step": 8710 - }, - { - "epoch": 0.8845607628322175, - "grad_norm": 0.6696544885635376, - "learning_rate": 6.908624603824244e-07, - "loss": 1.6816, - "step": 8720 - }, - { - "epoch": 0.8855751673767498, - "grad_norm": 0.6761875748634338, - "learning_rate": 6.789128051385252e-07, - "loss": 1.6783, - "step": 8730 - }, - { - "epoch": 0.8865895719212822, - "grad_norm": 0.65973961353302, - "learning_rate": 6.670637662186785e-07, - "loss": 1.6797, - "step": 8740 - }, - { - "epoch": 0.8876039764658146, - "grad_norm": 0.7765873670578003, - "learning_rate": 6.553154715271459e-07, - "loss": 1.6826, - "step": 8750 - }, - { - "epoch": 0.888618381010347, - "grad_norm": 0.6666598320007324, - "learning_rate": 6.436680478807001e-07, - "loss": 1.6819, - "step": 8760 - }, - { - "epoch": 0.8896327855548792, - "grad_norm": 0.6343129873275757, - "learning_rate": 6.321216210072667e-07, - "loss": 1.6808, - "step": 8770 - }, - { - "epoch": 0.8906471900994116, - "grad_norm": 0.7419044375419617, - "learning_rate": 6.206763155445628e-07, - "loss": 1.682, - "step": 8780 - }, - { - "epoch": 0.891661594643944, - "grad_norm": 0.8284322619438171, - "learning_rate": 6.093322550387548e-07, - "loss": 1.6722, - "step": 8790 - }, - { - "epoch": 0.8926759991884764, - "grad_norm": 0.6644549369812012, - "learning_rate": 5.980895619431148e-07, - "loss": 1.6846, - "step": 8800 - }, - { - "epoch": 0.8936904037330087, - "grad_norm": 0.7379443645477295, - "learning_rate": 5.869483576167123e-07, - "loss": 1.6813, - "step": 8810 - }, - { - "epoch": 0.8947048082775411, - "grad_norm": 0.7265321612358093, - "learning_rate": 5.759087623230964e-07, - "loss": 1.685, - "step": 8820 - }, - { - "epoch": 0.8957192128220735, - "grad_norm": 0.7281410098075867, - "learning_rate": 5.649708952290001e-07, - "loss": 1.678, - "step": 8830 - }, - { - "epoch": 0.8967336173666058, - "grad_norm": 0.6362907290458679, - "learning_rate": 5.541348744030483e-07, - "loss": 1.6744, - "step": 8840 - }, - { - "epoch": 0.8977480219111381, - "grad_norm": 0.6471095681190491, - "learning_rate": 5.434008168144944e-07, - "loss": 1.6848, - "step": 8850 - }, - { - "epoch": 0.8987624264556705, - "grad_norm": 0.7785438299179077, - "learning_rate": 5.327688383319474e-07, - "loss": 1.6817, - "step": 8860 - }, - { - "epoch": 0.8997768310002029, - "grad_norm": 0.6940597891807556, - "learning_rate": 5.222390537221256e-07, - "loss": 1.6859, - "step": 8870 - }, - { - "epoch": 0.9007912355447353, - "grad_norm": 0.6518341302871704, - "learning_rate": 5.11811576648621e-07, - "loss": 1.6832, - "step": 8880 - }, - { - "epoch": 0.9018056400892676, - "grad_norm": 0.6684447526931763, - "learning_rate": 5.014865196706597e-07, - "loss": 1.6807, - "step": 8890 - }, - { - "epoch": 0.9028200446338, - "grad_norm": 0.7260445952415466, - "learning_rate": 4.912639942419039e-07, - "loss": 1.6843, - "step": 8900 - }, - { - "epoch": 0.9038344491783323, - "grad_norm": 0.6235900521278381, - "learning_rate": 4.811441107092385e-07, - "loss": 1.6771, - "step": 8910 - }, - { - "epoch": 0.9048488537228647, - "grad_norm": 0.6858961582183838, - "learning_rate": 4.711269783115813e-07, - "loss": 1.6852, - "step": 8920 - }, - { - "epoch": 0.905863258267397, - "grad_norm": 0.6687613725662231, - "learning_rate": 4.612127051787052e-07, - "loss": 1.6861, - "step": 8930 - }, - { - "epoch": 0.9068776628119294, - "grad_norm": 0.6287438273429871, - "learning_rate": 4.5140139833007023e-07, - "loss": 1.6925, - "step": 8940 - }, - { - "epoch": 0.9078920673564618, - "grad_norm": 0.657447099685669, - "learning_rate": 4.4169316367366766e-07, - "loss": 1.675, - "step": 8950 - }, - { - "epoch": 0.9089064719009942, - "grad_norm": 0.6303340792655945, - "learning_rate": 4.320881060048787e-07, - "loss": 1.677, - "step": 8960 - }, - { - "epoch": 0.9099208764455264, - "grad_norm": 0.659706711769104, - "learning_rate": 4.2258632900534224e-07, - "loss": 1.6721, - "step": 8970 - }, - { - "epoch": 0.9109352809900588, - "grad_norm": 0.6636031866073608, - "learning_rate": 4.131879352418355e-07, - "loss": 1.6841, - "step": 8980 - }, - { - "epoch": 0.9119496855345912, - "grad_norm": 0.6449264287948608, - "learning_rate": 4.0389302616516747e-07, - "loss": 1.6786, - "step": 8990 - }, - { - "epoch": 0.9129640900791236, - "grad_norm": 0.6599504947662354, - "learning_rate": 3.9470170210908375e-07, - "loss": 1.6766, - "step": 9000 - }, - { - "epoch": 0.9139784946236559, - "grad_norm": 0.6451672315597534, - "learning_rate": 3.856140622891802e-07, - "loss": 1.6763, - "step": 9010 - }, - { - "epoch": 0.9149928991681883, - "grad_norm": 0.6493940353393555, - "learning_rate": 3.7663020480183776e-07, - "loss": 1.6866, - "step": 9020 - }, - { - "epoch": 0.9160073037127207, - "grad_norm": 0.6335039734840393, - "learning_rate": 3.677502266231603e-07, - "loss": 1.6832, - "step": 9030 - }, - { - "epoch": 0.917021708257253, - "grad_norm": 0.6968539953231812, - "learning_rate": 3.5897422360792636e-07, - "loss": 1.6752, - "step": 9040 - }, - { - "epoch": 0.9180361128017853, - "grad_norm": 0.6821022033691406, - "learning_rate": 3.5030229048856114e-07, - "loss": 1.678, - "step": 9050 - }, - { - "epoch": 0.9190505173463177, - "grad_norm": 0.6833332777023315, - "learning_rate": 3.4173452087410186e-07, - "loss": 1.688, - "step": 9060 - }, - { - "epoch": 0.9200649218908501, - "grad_norm": 0.6825125813484192, - "learning_rate": 3.332710072492007e-07, - "loss": 1.6735, - "step": 9070 - }, - { - "epoch": 0.9210793264353825, - "grad_norm": 0.7820912599563599, - "learning_rate": 3.2491184097311557e-07, - "loss": 1.6826, - "step": 9080 - }, - { - "epoch": 0.9220937309799148, - "grad_norm": 0.6965621709823608, - "learning_rate": 3.1665711227873206e-07, - "loss": 1.6838, - "step": 9090 - }, - { - "epoch": 0.9231081355244471, - "grad_norm": 0.6593127250671387, - "learning_rate": 3.085069102715854e-07, - "loss": 1.6736, - "step": 9100 - }, - { - "epoch": 0.9241225400689795, - "grad_norm": 0.6550984382629395, - "learning_rate": 3.0046132292889774e-07, - "loss": 1.6778, - "step": 9110 - }, - { - "epoch": 0.9251369446135118, - "grad_norm": 0.6520861983299255, - "learning_rate": 2.925204370986312e-07, - "loss": 1.6696, - "step": 9120 - }, - { - "epoch": 0.9261513491580442, - "grad_norm": 0.6255242228507996, - "learning_rate": 2.846843384985476e-07, - "loss": 1.675, - "step": 9130 - }, - { - "epoch": 0.9271657537025766, - "grad_norm": 0.6270112991333008, - "learning_rate": 2.769531117152857e-07, - "loss": 1.6782, - "step": 9140 - }, - { - "epoch": 0.928180158247109, - "grad_norm": 0.6492500305175781, - "learning_rate": 2.693268402034488e-07, - "loss": 1.6726, - "step": 9150 - }, - { - "epoch": 0.9291945627916413, - "grad_norm": 0.634792149066925, - "learning_rate": 2.618056062847008e-07, - "loss": 1.6838, - "step": 9160 - }, - { - "epoch": 0.9302089673361736, - "grad_norm": 0.7293791174888611, - "learning_rate": 2.5438949114687694e-07, - "loss": 1.6799, - "step": 9170 - }, - { - "epoch": 0.931223371880706, - "grad_norm": 0.6276476383209229, - "learning_rate": 2.470785748431115e-07, - "loss": 1.6752, - "step": 9180 - }, - { - "epoch": 0.9322377764252384, - "grad_norm": 0.6805276274681091, - "learning_rate": 2.398729362909735e-07, - "loss": 1.6789, - "step": 9190 - }, - { - "epoch": 0.9332521809697707, - "grad_norm": 0.648118793964386, - "learning_rate": 2.3277265327160903e-07, - "loss": 1.6858, - "step": 9200 - }, - { - "epoch": 0.9342665855143031, - "grad_norm": 0.616062581539154, - "learning_rate": 2.2577780242890924e-07, - "loss": 1.6704, - "step": 9210 - }, - { - "epoch": 0.9352809900588355, - "grad_norm": 0.6214431524276733, - "learning_rate": 2.1888845926867797e-07, - "loss": 1.6797, - "step": 9220 - }, - { - "epoch": 0.9362953946033679, - "grad_norm": 0.6437916159629822, - "learning_rate": 2.1210469815781565e-07, - "loss": 1.6841, - "step": 9230 - }, - { - "epoch": 0.9373097991479001, - "grad_norm": 0.6119692921638489, - "learning_rate": 2.0542659232352212e-07, - "loss": 1.6763, - "step": 9240 - }, - { - "epoch": 0.9383242036924325, - "grad_norm": 0.6257642507553101, - "learning_rate": 1.9885421385250292e-07, - "loss": 1.6808, - "step": 9250 - }, - { - "epoch": 0.9393386082369649, - "grad_norm": 0.8402661681175232, - "learning_rate": 1.9238763369018977e-07, - "loss": 1.6778, - "step": 9260 - }, - { - "epoch": 0.9403530127814973, - "grad_norm": 0.6527227759361267, - "learning_rate": 1.860269216399768e-07, - "loss": 1.6792, - "step": 9270 - }, - { - "epoch": 0.9413674173260296, - "grad_norm": 0.6283434629440308, - "learning_rate": 1.797721463624691e-07, - "loss": 1.6713, - "step": 9280 - }, - { - "epoch": 0.942381821870562, - "grad_norm": 0.6670483946800232, - "learning_rate": 1.7362337537473294e-07, - "loss": 1.6764, - "step": 9290 - }, - { - "epoch": 0.9433962264150944, - "grad_norm": 0.5928934812545776, - "learning_rate": 1.6758067504957897e-07, - "loss": 1.6762, - "step": 9300 - }, - { - "epoch": 0.9444106309596267, - "grad_norm": 0.6767281293869019, - "learning_rate": 1.6164411061483699e-07, - "loss": 1.6783, - "step": 9310 - }, - { - "epoch": 0.945425035504159, - "grad_norm": 0.666013777256012, - "learning_rate": 1.5581374615265654e-07, - "loss": 1.6827, - "step": 9320 - }, - { - "epoch": 0.9464394400486914, - "grad_norm": 0.7199657559394836, - "learning_rate": 1.500896445988087e-07, - "loss": 1.6763, - "step": 9330 - }, - { - "epoch": 0.9474538445932238, - "grad_norm": 0.6315463185310364, - "learning_rate": 1.444718677420176e-07, - "loss": 1.6846, - "step": 9340 - }, - { - "epoch": 0.9484682491377562, - "grad_norm": 0.6154307126998901, - "learning_rate": 1.3896047622328323e-07, - "loss": 1.6736, - "step": 9350 - }, - { - "epoch": 0.9494826536822885, - "grad_norm": 0.6219435334205627, - "learning_rate": 1.3355552953523088e-07, - "loss": 1.6714, - "step": 9360 - }, - { - "epoch": 0.9504970582268208, - "grad_norm": 0.6223509907722473, - "learning_rate": 1.2825708602147046e-07, - "loss": 1.6799, - "step": 9370 - }, - { - "epoch": 0.9515114627713532, - "grad_norm": 0.6444467306137085, - "learning_rate": 1.230652028759638e-07, - "loss": 1.6769, - "step": 9380 - }, - { - "epoch": 0.9525258673158856, - "grad_norm": 0.691252589225769, - "learning_rate": 1.1797993614241055e-07, - "loss": 1.6741, - "step": 9390 - }, - { - "epoch": 0.9535402718604179, - "grad_norm": 0.6360878348350525, - "learning_rate": 1.1300134071363656e-07, - "loss": 1.6804, - "step": 9400 - }, - { - "epoch": 0.9545546764049503, - "grad_norm": 0.627426028251648, - "learning_rate": 1.0812947033101207e-07, - "loss": 1.6766, - "step": 9410 - }, - { - "epoch": 0.9555690809494827, - "grad_norm": 0.6212092638015747, - "learning_rate": 1.0336437758386108e-07, - "loss": 1.687, - "step": 9420 - }, - { - "epoch": 0.9565834854940151, - "grad_norm": 0.6545799970626831, - "learning_rate": 9.870611390889961e-08, - "loss": 1.6797, - "step": 9430 - }, - { - "epoch": 0.9575978900385473, - "grad_norm": 0.6562811136245728, - "learning_rate": 9.415472958968163e-08, - "loss": 1.6664, - "step": 9440 - }, - { - "epoch": 0.9586122945830797, - "grad_norm": 0.6347233057022095, - "learning_rate": 8.971027375604735e-08, - "loss": 1.678, - "step": 9450 - }, - { - "epoch": 0.9596266991276121, - "grad_norm": 0.6453290581703186, - "learning_rate": 8.537279438360579e-08, - "loss": 1.6701, - "step": 9460 - }, - { - "epoch": 0.9606411036721445, - "grad_norm": 0.6392500996589661, - "learning_rate": 8.114233829320639e-08, - "loss": 1.6802, - "step": 9470 - }, - { - "epoch": 0.9616555082166768, - "grad_norm": 0.6962987780570984, - "learning_rate": 7.701895115043823e-08, - "loss": 1.6817, - "step": 9480 - }, - { - "epoch": 0.9626699127612092, - "grad_norm": 0.6284400224685669, - "learning_rate": 7.300267746513712e-08, - "loss": 1.6751, - "step": 9490 - }, - { - "epoch": 0.9636843173057416, - "grad_norm": 0.6418602466583252, - "learning_rate": 6.909356059090489e-08, - "loss": 1.6827, - "step": 9500 - }, - { - "epoch": 0.9646987218502739, - "grad_norm": 0.6559508442878723, - "learning_rate": 6.529164272463862e-08, - "loss": 1.6754, - "step": 9510 - }, - { - "epoch": 0.9657131263948062, - "grad_norm": 0.7080426812171936, - "learning_rate": 6.15969649060788e-08, - "loss": 1.6674, - "step": 9520 - }, - { - "epoch": 0.9667275309393386, - "grad_norm": 0.6240237951278687, - "learning_rate": 5.800956701736638e-08, - "loss": 1.6863, - "step": 9530 - }, - { - "epoch": 0.967741935483871, - "grad_norm": 0.6661326289176941, - "learning_rate": 5.452948778260858e-08, - "loss": 1.6819, - "step": 9540 - }, - { - "epoch": 0.9687563400284033, - "grad_norm": 0.6556071639060974, - "learning_rate": 5.115676476746489e-08, - "loss": 1.6826, - "step": 9550 - }, - { - "epoch": 0.9697707445729357, - "grad_norm": 0.6856659054756165, - "learning_rate": 4.789143437873734e-08, - "loss": 1.6817, - "step": 9560 - }, - { - "epoch": 0.970785149117468, - "grad_norm": 0.6254087686538696, - "learning_rate": 4.473353186398299e-08, - "loss": 1.6875, - "step": 9570 - }, - { - "epoch": 0.9717995536620004, - "grad_norm": 0.6681696772575378, - "learning_rate": 4.168309131112991e-08, - "loss": 1.6746, - "step": 9580 - }, - { - "epoch": 0.9728139582065327, - "grad_norm": 0.7110115885734558, - "learning_rate": 3.8740145648106245e-08, - "loss": 1.6827, - "step": 9590 - }, - { - "epoch": 0.9738283627510651, - "grad_norm": 0.6355034112930298, - "learning_rate": 3.5904726642493894e-08, - "loss": 1.6778, - "step": 9600 - }, - { - "epoch": 0.9748427672955975, - "grad_norm": 0.6343887448310852, - "learning_rate": 3.3176864901176553e-08, - "loss": 1.6808, - "step": 9610 - }, - { - "epoch": 0.9758571718401299, - "grad_norm": 0.6066298484802246, - "learning_rate": 3.05565898700122e-08, - "loss": 1.6868, - "step": 9620 - }, - { - "epoch": 0.9768715763846622, - "grad_norm": 0.7328799962997437, - "learning_rate": 2.804392983351778e-08, - "loss": 1.6755, - "step": 9630 - }, - { - "epoch": 0.9778859809291945, - "grad_norm": 0.6362618207931519, - "learning_rate": 2.563891191456058e-08, - "loss": 1.6691, - "step": 9640 - }, - { - "epoch": 0.9789003854737269, - "grad_norm": 0.642038881778717, - "learning_rate": 2.334156207406846e-08, - "loss": 1.6814, - "step": 9650 - }, - { - "epoch": 0.9799147900182593, - "grad_norm": 0.6480450630187988, - "learning_rate": 2.1151905110746718e-08, - "loss": 1.6834, - "step": 9660 - }, - { - "epoch": 0.9809291945627916, - "grad_norm": 0.6482648849487305, - "learning_rate": 1.9069964660811678e-08, - "loss": 1.6734, - "step": 9670 - }, - { - "epoch": 0.981943599107324, - "grad_norm": 0.6449231505393982, - "learning_rate": 1.7095763197735315e-08, - "loss": 1.6773, - "step": 9680 - }, - { - "epoch": 0.9829580036518564, - "grad_norm": 0.6425095796585083, - "learning_rate": 1.5229322032002113e-08, - "loss": 1.6743, - "step": 9690 - }, - { - "epoch": 0.9839724081963888, - "grad_norm": 0.6829230785369873, - "learning_rate": 1.3470661310883703e-08, - "loss": 1.6859, - "step": 9700 - }, - { - "epoch": 0.984986812740921, - "grad_norm": 0.6447646617889404, - "learning_rate": 1.1819800018212368e-08, - "loss": 1.6787, - "step": 9710 - }, - { - "epoch": 0.9860012172854534, - "grad_norm": 0.6090907454490662, - "learning_rate": 1.0276755974183428e-08, - "loss": 1.6781, - "step": 9720 - }, - { - "epoch": 0.9870156218299858, - "grad_norm": 0.6135707497596741, - "learning_rate": 8.841545835162058e-09, - "loss": 1.6762, - "step": 9730 - }, - { - "epoch": 0.9880300263745182, - "grad_norm": 0.642645001411438, - "learning_rate": 7.514185093500104e-09, - "loss": 1.6894, - "step": 9740 - }, - { - "epoch": 0.9890444309190505, - "grad_norm": 0.653232991695404, - "learning_rate": 6.294688077371769e-09, - "loss": 1.6807, - "step": 9750 - }, - { - "epoch": 0.9900588354635829, - "grad_norm": 0.6810218691825867, - "learning_rate": 5.183067950617071e-09, - "loss": 1.6793, - "step": 9760 - }, - { - "epoch": 0.9910732400081153, - "grad_norm": 0.6899727582931519, - "learning_rate": 4.1793367126019555e-09, - "loss": 1.6776, - "step": 9770 - }, - { - "epoch": 0.9920876445526476, - "grad_norm": 0.6138082146644592, - "learning_rate": 3.2835051980861788e-09, - "loss": 1.6819, - "step": 9780 - }, - { - "epoch": 0.9931020490971799, - "grad_norm": 0.6300218105316162, - "learning_rate": 2.495583077110064e-09, - "loss": 1.6712, - "step": 9790 - }, - { - "epoch": 0.9941164536417123, - "grad_norm": 0.6148221492767334, - "learning_rate": 1.815578854884592e-09, - "loss": 1.6707, - "step": 9800 - }, - { - "epoch": 0.9951308581862447, - "grad_norm": 0.643248438835144, - "learning_rate": 1.2434998717048008e-09, - "loss": 1.6809, - "step": 9810 - }, - { - "epoch": 0.9961452627307771, - "grad_norm": 0.6501971483230591, - "learning_rate": 7.793523028676309e-10, - "loss": 1.6837, - "step": 9820 - }, - { - "epoch": 0.9971596672753094, - "grad_norm": 0.642480194568634, - "learning_rate": 4.2314115860642157e-10, - "loss": 1.6757, - "step": 9830 - }, - { - "epoch": 0.9981740718198417, - "grad_norm": 0.6754608154296875, - "learning_rate": 1.748702840376204e-10, - "loss": 1.676, - "step": 9840 - }, - { - "epoch": 0.9991884763643741, - "grad_norm": 0.6747230291366577, - "learning_rate": 3.4542359116374046e-11, - "loss": 1.6784, - "step": 9850 - } - ], - "logging_steps": 10, - "max_steps": 9858, - "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 1000, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 6.184055280405827e+21, - "train_batch_size": 1, - "trial_name": null, - "trial_params": null -}