diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,108652 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 15517, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 6.444544692917445e-05, + "grad_norm": 0.15129432154940006, + "learning_rate": 0.0, + "loss": 0.0012, + "step": 1 + }, + { + "epoch": 0.0001288908938583489, + "grad_norm": 0.26991460323889616, + "learning_rate": 9.43403983493194e-07, + "loss": 0.0003, + "step": 2 + }, + { + "epoch": 0.00019333634078752337, + "grad_norm": 1.9954004446079063, + "learning_rate": 1.4952599368676733e-06, + "loss": 0.0091, + "step": 3 + }, + { + "epoch": 0.0002577817877166978, + "grad_norm": 0.22931631126106178, + "learning_rate": 1.886807966986388e-06, + "loss": 0.0003, + "step": 4 + }, + { + "epoch": 0.00032222723464587225, + "grad_norm": 0.25371039245875826, + "learning_rate": 2.1905162141015006e-06, + "loss": 0.0005, + "step": 5 + }, + { + "epoch": 0.00038667268157504674, + "grad_norm": 1.1948746521302072, + "learning_rate": 2.4386639203608674e-06, + "loss": 0.006, + "step": 6 + }, + { + "epoch": 0.0004511181285042212, + "grad_norm": 0.30320543126617766, + "learning_rate": 2.648469816548369e-06, + "loss": 0.0027, + "step": 7 + }, + { + "epoch": 0.0005155635754333956, + "grad_norm": 0.10152956997270147, + "learning_rate": 2.8302119504795815e-06, + "loss": 0.0009, + "step": 8 + }, + { + "epoch": 0.0005800090223625701, + "grad_norm": 1.4987747433411953, + "learning_rate": 2.9905198737353466e-06, + "loss": 0.0129, + "step": 9 + }, + { + "epoch": 0.0006444544692917445, + "grad_norm": 0.02707402421982954, + "learning_rate": 3.1339201975946947e-06, + "loss": 0.0001, + "step": 10 + }, + { + "epoch": 0.000708899916220919, + "grad_norm": 0.018766550431254077, + "learning_rate": 3.2636415696447343e-06, + "loss": 0.0001, + "step": 11 + }, + { + "epoch": 0.0007733453631500935, + "grad_norm": 0.20907100771370063, + "learning_rate": 3.382067903854061e-06, + "loss": 0.0004, + "step": 12 + }, + { + "epoch": 0.0008377908100792679, + "grad_norm": 0.5817576027518055, + "learning_rate": 3.4910095707707385e-06, + "loss": 0.0026, + "step": 13 + }, + { + "epoch": 0.0009022362570084424, + "grad_norm": 0.01711225147567763, + "learning_rate": 3.5918738000415628e-06, + "loss": 0.0, + "step": 14 + }, + { + "epoch": 0.0009666817039376168, + "grad_norm": 0.07462783118349445, + "learning_rate": 3.6857761509691734e-06, + "loss": 0.0003, + "step": 15 + }, + { + "epoch": 0.0010311271508667912, + "grad_norm": 0.10816803494037058, + "learning_rate": 3.773615933972776e-06, + "loss": 0.0002, + "step": 16 + }, + { + "epoch": 0.0010955725977959657, + "grad_norm": 1.9217337579697105, + "learning_rate": 3.8561287268159795e-06, + "loss": 0.0167, + "step": 17 + }, + { + "epoch": 0.0011600180447251401, + "grad_norm": 0.1417044693166633, + "learning_rate": 3.93392385722854e-06, + "loss": 0.0002, + "step": 18 + }, + { + "epoch": 0.0012244634916543146, + "grad_norm": 0.023784749719864124, + "learning_rate": 4.0075117377730165e-06, + "loss": 0.0, + "step": 19 + }, + { + "epoch": 0.001288908938583489, + "grad_norm": 0.07828858355950565, + "learning_rate": 4.077324181087888e-06, + "loss": 0.0002, + "step": 20 + }, + { + "epoch": 0.0013533543855126634, + "grad_norm": 0.04722417233368723, + "learning_rate": 4.143729753416042e-06, + "loss": 0.0002, + "step": 21 + }, + { + "epoch": 0.001417799832441838, + "grad_norm": 0.08640805563755993, + "learning_rate": 4.207045553137928e-06, + "loss": 0.0002, + "step": 22 + }, + { + "epoch": 0.0014822452793710125, + "grad_norm": 1.7851848307852938, + "learning_rate": 4.267546368922451e-06, + "loss": 0.012, + "step": 23 + }, + { + "epoch": 0.001546690726300187, + "grad_norm": 0.18091976525222841, + "learning_rate": 4.325471887347255e-06, + "loss": 0.0004, + "step": 24 + }, + { + "epoch": 0.0016111361732293614, + "grad_norm": 0.03394466558933047, + "learning_rate": 4.381032428203001e-06, + "loss": 0.0004, + "step": 25 + }, + { + "epoch": 0.0016755816201585358, + "grad_norm": 0.041340723123166515, + "learning_rate": 4.434413554263933e-06, + "loss": 0.0002, + "step": 26 + }, + { + "epoch": 0.0017400270670877103, + "grad_norm": 0.20461789214303566, + "learning_rate": 4.485779810603019e-06, + "loss": 0.0022, + "step": 27 + }, + { + "epoch": 0.0018044725140168847, + "grad_norm": 0.12106531716915613, + "learning_rate": 4.5352777835347565e-06, + "loss": 0.0002, + "step": 28 + }, + { + "epoch": 0.0018689179609460592, + "grad_norm": 0.03492363492746817, + "learning_rate": 4.583038622537583e-06, + "loss": 0.0001, + "step": 29 + }, + { + "epoch": 0.0019333634078752336, + "grad_norm": 0.03259851462272291, + "learning_rate": 4.629180134462368e-06, + "loss": 0.0001, + "step": 30 + }, + { + "epoch": 0.001997808854804408, + "grad_norm": 0.03013889637859078, + "learning_rate": 4.673808534226263e-06, + "loss": 0.0002, + "step": 31 + }, + { + "epoch": 0.0020622543017335825, + "grad_norm": 0.027037052171979114, + "learning_rate": 4.717019917465971e-06, + "loss": 0.0003, + "step": 32 + }, + { + "epoch": 0.002126699748662757, + "grad_norm": 0.005106793730490385, + "learning_rate": 4.758901506512407e-06, + "loss": 0.0, + "step": 33 + }, + { + "epoch": 0.0021911451955919314, + "grad_norm": 0.3133135457884165, + "learning_rate": 4.799532710309174e-06, + "loss": 0.0008, + "step": 34 + }, + { + "epoch": 0.002255590642521106, + "grad_norm": 0.06576723596208979, + "learning_rate": 4.838986030649869e-06, + "loss": 0.0004, + "step": 35 + }, + { + "epoch": 0.0023200360894502802, + "grad_norm": 0.44200181870496286, + "learning_rate": 4.877327840721735e-06, + "loss": 0.0026, + "step": 36 + }, + { + "epoch": 0.0023844815363794547, + "grad_norm": 0.005113691502045184, + "learning_rate": 4.914619056956378e-06, + "loss": 0.0001, + "step": 37 + }, + { + "epoch": 0.002448926983308629, + "grad_norm": 0.011710030818769188, + "learning_rate": 4.9509157212662115e-06, + "loss": 0.0001, + "step": 38 + }, + { + "epoch": 0.0025133724302378035, + "grad_norm": 0.10730184004136714, + "learning_rate": 4.986269507638412e-06, + "loss": 0.0013, + "step": 39 + }, + { + "epoch": 0.002577817877166978, + "grad_norm": 0.06514135570120874, + "learning_rate": 5.0207281645810825e-06, + "loss": 0.0002, + "step": 40 + }, + { + "epoch": 0.0026422633240961524, + "grad_norm": 0.00949610941862856, + "learning_rate": 5.054335902928648e-06, + "loss": 0.0001, + "step": 41 + }, + { + "epoch": 0.002706708771025327, + "grad_norm": 0.011550266470039141, + "learning_rate": 5.087133736909237e-06, + "loss": 0.0001, + "step": 42 + }, + { + "epoch": 0.0027711542179545013, + "grad_norm": 0.007752723479024223, + "learning_rate": 5.119159785074679e-06, + "loss": 0.0, + "step": 43 + }, + { + "epoch": 0.002835599664883676, + "grad_norm": 0.5378045317232993, + "learning_rate": 5.1504495366311225e-06, + "loss": 0.0032, + "step": 44 + }, + { + "epoch": 0.0029000451118128506, + "grad_norm": 0.2128986742090359, + "learning_rate": 5.181036087836847e-06, + "loss": 0.0006, + "step": 45 + }, + { + "epoch": 0.002964490558742025, + "grad_norm": 0.000640446091847838, + "learning_rate": 5.210950352415644e-06, + "loss": 0.0, + "step": 46 + }, + { + "epoch": 0.0030289360056711995, + "grad_norm": 0.030881680917973543, + "learning_rate": 5.2402212493395684e-06, + "loss": 0.0001, + "step": 47 + }, + { + "epoch": 0.003093381452600374, + "grad_norm": 0.05658321101545766, + "learning_rate": 5.268875870840449e-06, + "loss": 0.0001, + "step": 48 + }, + { + "epoch": 0.0031578268995295484, + "grad_norm": 0.5703326443811637, + "learning_rate": 5.296939633096738e-06, + "loss": 0.0052, + "step": 49 + }, + { + "epoch": 0.003222272346458723, + "grad_norm": 2.8203730272893908, + "learning_rate": 5.324436411696195e-06, + "loss": 0.0161, + "step": 50 + }, + { + "epoch": 0.0032867177933878972, + "grad_norm": 0.001724963280434341, + "learning_rate": 5.351388663683652e-06, + "loss": 0.0, + "step": 51 + }, + { + "epoch": 0.0033511632403170717, + "grad_norm": 0.01416510359891742, + "learning_rate": 5.377817537757127e-06, + "loss": 0.0001, + "step": 52 + }, + { + "epoch": 0.003415608687246246, + "grad_norm": 0.0049279435077748575, + "learning_rate": 5.403742973967069e-06, + "loss": 0.0, + "step": 53 + }, + { + "epoch": 0.0034800541341754206, + "grad_norm": 0.7568150671265557, + "learning_rate": 5.4291837940962135e-06, + "loss": 0.0023, + "step": 54 + }, + { + "epoch": 0.003544499581104595, + "grad_norm": 0.18716012429739934, + "learning_rate": 5.454157783746235e-06, + "loss": 0.0006, + "step": 55 + }, + { + "epoch": 0.0036089450280337694, + "grad_norm": 0.03473747532236853, + "learning_rate": 5.4786817670279514e-06, + "loss": 0.0001, + "step": 56 + }, + { + "epoch": 0.003673390474962944, + "grad_norm": 0.08810403927552686, + "learning_rate": 5.50277167464069e-06, + "loss": 0.0003, + "step": 57 + }, + { + "epoch": 0.0037378359218921183, + "grad_norm": 0.017372241080586315, + "learning_rate": 5.526442606030776e-06, + "loss": 0.0001, + "step": 58 + }, + { + "epoch": 0.0038022813688212928, + "grad_norm": 0.21547829813614516, + "learning_rate": 5.549708886236511e-06, + "loss": 0.0017, + "step": 59 + }, + { + "epoch": 0.003866726815750467, + "grad_norm": 0.5537295329959373, + "learning_rate": 5.572584117955562e-06, + "loss": 0.002, + "step": 60 + }, + { + "epoch": 0.003931172262679642, + "grad_norm": 0.14335554933770492, + "learning_rate": 5.5950812293086465e-06, + "loss": 0.0004, + "step": 61 + }, + { + "epoch": 0.003995617709608816, + "grad_norm": 0.011859054511193099, + "learning_rate": 5.617212517719457e-06, + "loss": 0.0001, + "step": 62 + }, + { + "epoch": 0.004060063156537991, + "grad_norm": 0.004499399040304683, + "learning_rate": 5.638989690283715e-06, + "loss": 0.0, + "step": 63 + }, + { + "epoch": 0.004124508603467165, + "grad_norm": 0.9716421455530285, + "learning_rate": 5.660423900959163e-06, + "loss": 0.0033, + "step": 64 + }, + { + "epoch": 0.00418895405039634, + "grad_norm": 0.2602800996635666, + "learning_rate": 5.681525784872238e-06, + "loss": 0.004, + "step": 65 + }, + { + "epoch": 0.004253399497325514, + "grad_norm": 0.505029623796257, + "learning_rate": 5.7023054900056e-06, + "loss": 0.0048, + "step": 66 + }, + { + "epoch": 0.004317844944254689, + "grad_norm": 0.06494347848669335, + "learning_rate": 5.722772706502867e-06, + "loss": 0.0002, + "step": 67 + }, + { + "epoch": 0.004382290391183863, + "grad_norm": 0.05793100298650254, + "learning_rate": 5.742936693802368e-06, + "loss": 0.0004, + "step": 68 + }, + { + "epoch": 0.004446735838113038, + "grad_norm": 0.3872734676672814, + "learning_rate": 5.762806305790124e-06, + "loss": 0.0031, + "step": 69 + }, + { + "epoch": 0.004511181285042212, + "grad_norm": 0.032758724262618666, + "learning_rate": 5.782390014143064e-06, + "loss": 0.0001, + "step": 70 + }, + { + "epoch": 0.0045756267319713865, + "grad_norm": 0.510062996956511, + "learning_rate": 5.801695930016513e-06, + "loss": 0.0053, + "step": 71 + }, + { + "epoch": 0.0046400721789005605, + "grad_norm": 0.0020728367399568642, + "learning_rate": 5.820731824214928e-06, + "loss": 0.0, + "step": 72 + }, + { + "epoch": 0.004704517625829735, + "grad_norm": 0.022118583202706633, + "learning_rate": 5.8395051459714105e-06, + "loss": 0.0001, + "step": 73 + }, + { + "epoch": 0.004768963072758909, + "grad_norm": 0.023627041277547944, + "learning_rate": 5.858023040449572e-06, + "loss": 0.0, + "step": 74 + }, + { + "epoch": 0.004833408519688084, + "grad_norm": 0.046421888470687886, + "learning_rate": 5.876292365070674e-06, + "loss": 0.0004, + "step": 75 + }, + { + "epoch": 0.004897853966617258, + "grad_norm": 0.02261795227128185, + "learning_rate": 5.894319704759405e-06, + "loss": 0.0, + "step": 76 + }, + { + "epoch": 0.004962299413546433, + "grad_norm": 0.035546317724952904, + "learning_rate": 5.912111386193103e-06, + "loss": 0.0001, + "step": 77 + }, + { + "epoch": 0.005026744860475607, + "grad_norm": 0.01905267113714707, + "learning_rate": 5.929673491131606e-06, + "loss": 0.0, + "step": 78 + }, + { + "epoch": 0.005091190307404782, + "grad_norm": 0.015782799957426378, + "learning_rate": 5.9470118688979865e-06, + "loss": 0.0001, + "step": 79 + }, + { + "epoch": 0.005155635754333956, + "grad_norm": 0.10050158065115125, + "learning_rate": 5.964132148074277e-06, + "loss": 0.0003, + "step": 80 + }, + { + "epoch": 0.005220081201263131, + "grad_norm": 0.02889224712849086, + "learning_rate": 5.981039747470693e-06, + "loss": 0.0002, + "step": 81 + }, + { + "epoch": 0.005284526648192305, + "grad_norm": 0.04525759150992565, + "learning_rate": 5.997739886421841e-06, + "loss": 0.0003, + "step": 82 + }, + { + "epoch": 0.00534897209512148, + "grad_norm": 0.10955070109304078, + "learning_rate": 6.014237594458876e-06, + "loss": 0.0002, + "step": 83 + }, + { + "epoch": 0.005413417542050654, + "grad_norm": 0.005650621324755179, + "learning_rate": 6.030537720402429e-06, + "loss": 0.0, + "step": 84 + }, + { + "epoch": 0.005477862988979829, + "grad_norm": 0.22230525543403104, + "learning_rate": 6.04664494091748e-06, + "loss": 0.001, + "step": 85 + }, + { + "epoch": 0.005542308435909003, + "grad_norm": 0.1757163404784244, + "learning_rate": 6.062563768567872e-06, + "loss": 0.0021, + "step": 86 + }, + { + "epoch": 0.0056067538828381775, + "grad_norm": 0.10250988499729953, + "learning_rate": 6.0782985594052556e-06, + "loss": 0.0003, + "step": 87 + }, + { + "epoch": 0.005671199329767352, + "grad_norm": 0.07404739274091283, + "learning_rate": 6.093853520124317e-06, + "loss": 0.0006, + "step": 88 + }, + { + "epoch": 0.005735644776696526, + "grad_norm": 0.01653300445057603, + "learning_rate": 6.109232714813748e-06, + "loss": 0.0, + "step": 89 + }, + { + "epoch": 0.005800090223625701, + "grad_norm": 0.009526052586484979, + "learning_rate": 6.124440071330041e-06, + "loss": 0.0001, + "step": 90 + }, + { + "epoch": 0.005864535670554875, + "grad_norm": 0.010664951094544989, + "learning_rate": 6.139479387319107e-06, + "loss": 0.0, + "step": 91 + }, + { + "epoch": 0.00592898111748405, + "grad_norm": 0.020226888276105217, + "learning_rate": 6.154354335908838e-06, + "loss": 0.0001, + "step": 92 + }, + { + "epoch": 0.005993426564413224, + "grad_norm": 0.4540252129143341, + "learning_rate": 6.169068471093936e-06, + "loss": 0.0036, + "step": 93 + }, + { + "epoch": 0.006057872011342399, + "grad_norm": 0.0843852312896068, + "learning_rate": 6.183625232832763e-06, + "loss": 0.0008, + "step": 94 + }, + { + "epoch": 0.006122317458271573, + "grad_norm": 0.7314261216216805, + "learning_rate": 6.1980279518745175e-06, + "loss": 0.0058, + "step": 95 + }, + { + "epoch": 0.006186762905200748, + "grad_norm": 0.0012243406202469007, + "learning_rate": 6.212279854333643e-06, + "loss": 0.0, + "step": 96 + }, + { + "epoch": 0.006251208352129922, + "grad_norm": 0.028267818956160366, + "learning_rate": 6.226384066027224e-06, + "loss": 0.0001, + "step": 97 + }, + { + "epoch": 0.006315653799059097, + "grad_norm": 0.018740492262862375, + "learning_rate": 6.240343616589932e-06, + "loss": 0.0001, + "step": 98 + }, + { + "epoch": 0.006380099245988271, + "grad_norm": 0.051675413777484226, + "learning_rate": 6.25416144338008e-06, + "loss": 0.0001, + "step": 99 + }, + { + "epoch": 0.006444544692917446, + "grad_norm": 0.3241670981018575, + "learning_rate": 6.267840395189389e-06, + "loss": 0.0019, + "step": 100 + }, + { + "epoch": 0.00650899013984662, + "grad_norm": 4.658525973154353, + "learning_rate": 6.281383235768169e-06, + "loss": 0.0324, + "step": 101 + }, + { + "epoch": 0.0065734355867757945, + "grad_norm": 0.13965773384789484, + "learning_rate": 6.2947926471768465e-06, + "loss": 0.0004, + "step": 102 + }, + { + "epoch": 0.0066378810337049685, + "grad_norm": 0.027297791319953163, + "learning_rate": 6.30807123297399e-06, + "loss": 0.0001, + "step": 103 + }, + { + "epoch": 0.006702326480634143, + "grad_norm": 0.014509134729985261, + "learning_rate": 6.3212215212503204e-06, + "loss": 0.0, + "step": 104 + }, + { + "epoch": 0.006766771927563317, + "grad_norm": 0.011064436533537543, + "learning_rate": 6.334245967517543e-06, + "loss": 0.0, + "step": 105 + }, + { + "epoch": 0.006831217374492492, + "grad_norm": 0.33541193592494806, + "learning_rate": 6.347146957460263e-06, + "loss": 0.0002, + "step": 106 + }, + { + "epoch": 0.006895662821421666, + "grad_norm": 0.13366590525564065, + "learning_rate": 6.3599268095586996e-06, + "loss": 0.0002, + "step": 107 + }, + { + "epoch": 0.006960108268350841, + "grad_norm": 0.3804038820837792, + "learning_rate": 6.372587777589408e-06, + "loss": 0.0033, + "step": 108 + }, + { + "epoch": 0.007024553715280015, + "grad_norm": 0.004748762629093711, + "learning_rate": 6.385132053010746e-06, + "loss": 0.0, + "step": 109 + }, + { + "epoch": 0.00708899916220919, + "grad_norm": 0.18445047393599892, + "learning_rate": 6.397561767239429e-06, + "loss": 0.002, + "step": 110 + }, + { + "epoch": 0.007153444609138364, + "grad_norm": 0.10146856090062799, + "learning_rate": 6.40987899382405e-06, + "loss": 0.0003, + "step": 111 + }, + { + "epoch": 0.007217890056067539, + "grad_norm": 0.011157311312939988, + "learning_rate": 6.422085750521144e-06, + "loss": 0.0, + "step": 112 + }, + { + "epoch": 0.007282335502996713, + "grad_norm": 0.607664348129492, + "learning_rate": 6.434184001278967e-06, + "loss": 0.0028, + "step": 113 + }, + { + "epoch": 0.007346780949925888, + "grad_norm": 1.1590725239402295, + "learning_rate": 6.446175658133884e-06, + "loss": 0.0069, + "step": 114 + }, + { + "epoch": 0.007411226396855062, + "grad_norm": 0.0064697996621230795, + "learning_rate": 6.458062583023952e-06, + "loss": 0.0, + "step": 115 + }, + { + "epoch": 0.007475671843784237, + "grad_norm": 0.09498369574237263, + "learning_rate": 6.46984658952397e-06, + "loss": 0.0002, + "step": 116 + }, + { + "epoch": 0.0075401172907134115, + "grad_norm": 0.0215383638363165, + "learning_rate": 6.481529444506085e-06, + "loss": 0.0001, + "step": 117 + }, + { + "epoch": 0.0076045627376425855, + "grad_norm": 0.1171485120767354, + "learning_rate": 6.493112869729705e-06, + "loss": 0.0002, + "step": 118 + }, + { + "epoch": 0.00766900818457176, + "grad_norm": 0.06883933984294645, + "learning_rate": 6.5045985433643485e-06, + "loss": 0.0003, + "step": 119 + }, + { + "epoch": 0.007733453631500934, + "grad_norm": 0.015494437689087105, + "learning_rate": 6.515988101448755e-06, + "loss": 0.0, + "step": 120 + }, + { + "epoch": 0.007797899078430109, + "grad_norm": 1.084890231104155, + "learning_rate": 6.5272831392894685e-06, + "loss": 0.0045, + "step": 121 + }, + { + "epoch": 0.007862344525359284, + "grad_norm": 0.0041539375579682835, + "learning_rate": 6.538485212801841e-06, + "loss": 0.0, + "step": 122 + }, + { + "epoch": 0.007926789972288458, + "grad_norm": 0.021719405589740653, + "learning_rate": 6.54959583979632e-06, + "loss": 0.0001, + "step": 123 + }, + { + "epoch": 0.007991235419217632, + "grad_norm": 0.207106756440556, + "learning_rate": 6.560616501212651e-06, + "loss": 0.0015, + "step": 124 + }, + { + "epoch": 0.008055680866146806, + "grad_norm": 0.12389089981738528, + "learning_rate": 6.571548642304503e-06, + "loss": 0.0015, + "step": 125 + }, + { + "epoch": 0.008120126313075982, + "grad_norm": 0.08421375587621023, + "learning_rate": 6.582393673776909e-06, + "loss": 0.0017, + "step": 126 + }, + { + "epoch": 0.008184571760005156, + "grad_norm": 0.7895623314177207, + "learning_rate": 6.593152972878746e-06, + "loss": 0.0034, + "step": 127 + }, + { + "epoch": 0.00824901720693433, + "grad_norm": 0.009141865289784029, + "learning_rate": 6.603827884452357e-06, + "loss": 0.0001, + "step": 128 + }, + { + "epoch": 0.008313462653863504, + "grad_norm": 0.06799812572737098, + "learning_rate": 6.614419721942351e-06, + "loss": 0.0002, + "step": 129 + }, + { + "epoch": 0.00837790810079268, + "grad_norm": 0.010198638620555195, + "learning_rate": 6.624929768365432e-06, + "loss": 0.0001, + "step": 130 + }, + { + "epoch": 0.008442353547721854, + "grad_norm": 0.14960549750420293, + "learning_rate": 6.635359277243088e-06, + "loss": 0.0003, + "step": 131 + }, + { + "epoch": 0.008506798994651028, + "grad_norm": 0.03669934654162836, + "learning_rate": 6.6457094734987945e-06, + "loss": 0.0001, + "step": 132 + }, + { + "epoch": 0.008571244441580202, + "grad_norm": 0.0583445395178466, + "learning_rate": 6.6559815543213856e-06, + "loss": 0.0002, + "step": 133 + }, + { + "epoch": 0.008635689888509377, + "grad_norm": 0.21443872420801796, + "learning_rate": 6.666176689996061e-06, + "loss": 0.002, + "step": 134 + }, + { + "epoch": 0.008700135335438551, + "grad_norm": 0.5406641455353228, + "learning_rate": 6.67629602470452e-06, + "loss": 0.0055, + "step": 135 + }, + { + "epoch": 0.008764580782367725, + "grad_norm": 0.0021834450013692394, + "learning_rate": 6.686340677295562e-06, + "loss": 0.0, + "step": 136 + }, + { + "epoch": 0.0088290262292969, + "grad_norm": 0.008434527707541637, + "learning_rate": 6.696311742027455e-06, + "loss": 0.0, + "step": 137 + }, + { + "epoch": 0.008893471676226075, + "grad_norm": 0.04693062531604418, + "learning_rate": 6.706210289283318e-06, + "loss": 0.0002, + "step": 138 + }, + { + "epoch": 0.00895791712315525, + "grad_norm": 0.05186746336439399, + "learning_rate": 6.7160373662606684e-06, + "loss": 0.0002, + "step": 139 + }, + { + "epoch": 0.009022362570084423, + "grad_norm": 0.015129704926972309, + "learning_rate": 6.7257939976362575e-06, + "loss": 0.0, + "step": 140 + }, + { + "epoch": 0.009086808017013597, + "grad_norm": 0.021042850048326282, + "learning_rate": 6.735481186207243e-06, + "loss": 0.0001, + "step": 141 + }, + { + "epoch": 0.009151253463942773, + "grad_norm": 0.02537888704924219, + "learning_rate": 6.745099913509707e-06, + "loss": 0.0001, + "step": 142 + }, + { + "epoch": 0.009215698910871947, + "grad_norm": 0.02671439549274601, + "learning_rate": 6.754651140415472e-06, + "loss": 0.0, + "step": 143 + }, + { + "epoch": 0.009280144357801121, + "grad_norm": 0.008137524957251669, + "learning_rate": 6.764135807708122e-06, + "loss": 0.0, + "step": 144 + }, + { + "epoch": 0.009344589804730297, + "grad_norm": 0.003042221966208203, + "learning_rate": 6.773554836639082e-06, + "loss": 0.0, + "step": 145 + }, + { + "epoch": 0.00940903525165947, + "grad_norm": 0.06033152692120294, + "learning_rate": 6.782909129464605e-06, + "loss": 0.0002, + "step": 146 + }, + { + "epoch": 0.009473480698588645, + "grad_norm": 0.027788121319448158, + "learning_rate": 6.792199569964411e-06, + "loss": 0.0, + "step": 147 + }, + { + "epoch": 0.009537926145517819, + "grad_norm": 0.09830052518820728, + "learning_rate": 6.801427023942765e-06, + "loss": 0.0009, + "step": 148 + }, + { + "epoch": 0.009602371592446994, + "grad_norm": 0.013099850923853006, + "learning_rate": 6.810592339712671e-06, + "loss": 0.0001, + "step": 149 + }, + { + "epoch": 0.009666817039376168, + "grad_norm": 0.3610734402843211, + "learning_rate": 6.819696348563868e-06, + "loss": 0.005, + "step": 150 + }, + { + "epoch": 0.009731262486305342, + "grad_norm": 0.22420221826039138, + "learning_rate": 6.828739865215293e-06, + "loss": 0.0025, + "step": 151 + }, + { + "epoch": 0.009795707933234516, + "grad_norm": 0.026359216933519877, + "learning_rate": 6.837723688252599e-06, + "loss": 0.0001, + "step": 152 + }, + { + "epoch": 0.009860153380163692, + "grad_norm": 0.37353960421039445, + "learning_rate": 6.846648600551325e-06, + "loss": 0.0013, + "step": 153 + }, + { + "epoch": 0.009924598827092866, + "grad_norm": 0.37925933825010816, + "learning_rate": 6.8555153696862974e-06, + "loss": 0.0014, + "step": 154 + }, + { + "epoch": 0.00998904427402204, + "grad_norm": 0.015689512713385756, + "learning_rate": 6.864324748327763e-06, + "loss": 0.0001, + "step": 155 + }, + { + "epoch": 0.010053489720951214, + "grad_norm": 0.42026716718154256, + "learning_rate": 6.8730774746248e-06, + "loss": 0.0027, + "step": 156 + }, + { + "epoch": 0.01011793516788039, + "grad_norm": 0.03761871169419944, + "learning_rate": 6.881774272576467e-06, + "loss": 0.0001, + "step": 157 + }, + { + "epoch": 0.010182380614809564, + "grad_norm": 0.030528250735410357, + "learning_rate": 6.89041585239118e-06, + "loss": 0.0002, + "step": 158 + }, + { + "epoch": 0.010246826061738738, + "grad_norm": 0.20077781600093578, + "learning_rate": 6.899002910834743e-06, + "loss": 0.0009, + "step": 159 + }, + { + "epoch": 0.010311271508667912, + "grad_norm": 1.613127029104335, + "learning_rate": 6.907536131567471e-06, + "loss": 0.0053, + "step": 160 + }, + { + "epoch": 0.010375716955597088, + "grad_norm": 0.39481368200606176, + "learning_rate": 6.91601618547082e-06, + "loss": 0.0103, + "step": 161 + }, + { + "epoch": 0.010440162402526262, + "grad_norm": 0.03645218519897308, + "learning_rate": 6.9244437309638855e-06, + "loss": 0.0001, + "step": 162 + }, + { + "epoch": 0.010504607849455436, + "grad_norm": 0.026445971997361595, + "learning_rate": 6.932819414310185e-06, + "loss": 0.0001, + "step": 163 + }, + { + "epoch": 0.01056905329638461, + "grad_norm": 0.012293269592344393, + "learning_rate": 6.941143869915035e-06, + "loss": 0.0, + "step": 164 + }, + { + "epoch": 0.010633498743313785, + "grad_norm": 0.006382313439032722, + "learning_rate": 6.949417720613908e-06, + "loss": 0.0, + "step": 165 + }, + { + "epoch": 0.01069794419024296, + "grad_norm": 0.18303938626882663, + "learning_rate": 6.95764157795207e-06, + "loss": 0.0007, + "step": 166 + }, + { + "epoch": 0.010762389637172133, + "grad_norm": 0.08027807718869757, + "learning_rate": 6.965816042455817e-06, + "loss": 0.0002, + "step": 167 + }, + { + "epoch": 0.010826835084101307, + "grad_norm": 0.009543528622426802, + "learning_rate": 6.9739417038956234e-06, + "loss": 0.0, + "step": 168 + }, + { + "epoch": 0.010891280531030483, + "grad_norm": 0.005663156424081436, + "learning_rate": 6.982019141541477e-06, + "loss": 0.0, + "step": 169 + }, + { + "epoch": 0.010955725977959657, + "grad_norm": 0.06478321556126382, + "learning_rate": 6.990048924410674e-06, + "loss": 0.0001, + "step": 170 + }, + { + "epoch": 0.011020171424888831, + "grad_norm": 0.11265850722264495, + "learning_rate": 6.998031611508364e-06, + "loss": 0.0002, + "step": 171 + }, + { + "epoch": 0.011084616871818005, + "grad_norm": 0.13747799410071634, + "learning_rate": 7.005967752061066e-06, + "loss": 0.0017, + "step": 172 + }, + { + "epoch": 0.011149062318747181, + "grad_norm": 0.8610413781168746, + "learning_rate": 7.013857885743432e-06, + "loss": 0.0035, + "step": 173 + }, + { + "epoch": 0.011213507765676355, + "grad_norm": 2.1479235225041444, + "learning_rate": 7.02170254289845e-06, + "loss": 0.0081, + "step": 174 + }, + { + "epoch": 0.011277953212605529, + "grad_norm": 0.19717496733882242, + "learning_rate": 7.02950224475137e-06, + "loss": 0.0009, + "step": 175 + }, + { + "epoch": 0.011342398659534705, + "grad_norm": 0.12631252498003578, + "learning_rate": 7.037257503617509e-06, + "loss": 0.0007, + "step": 176 + }, + { + "epoch": 0.011406844106463879, + "grad_norm": 0.002589981572960044, + "learning_rate": 7.044968823104184e-06, + "loss": 0.0, + "step": 177 + }, + { + "epoch": 0.011471289553393053, + "grad_norm": 0.048464294586520046, + "learning_rate": 7.052636698306942e-06, + "loss": 0.0001, + "step": 178 + }, + { + "epoch": 0.011535735000322227, + "grad_norm": 0.008706107825396808, + "learning_rate": 7.060261616000313e-06, + "loss": 0.0, + "step": 179 + }, + { + "epoch": 0.011600180447251402, + "grad_norm": 0.016572784451954176, + "learning_rate": 7.067844054823235e-06, + "loss": 0.0002, + "step": 180 + }, + { + "epoch": 0.011664625894180576, + "grad_norm": 0.011129301311151215, + "learning_rate": 7.075384485459343e-06, + "loss": 0.0001, + "step": 181 + }, + { + "epoch": 0.01172907134110975, + "grad_norm": 0.039764761465697955, + "learning_rate": 7.082883370812301e-06, + "loss": 0.0001, + "step": 182 + }, + { + "epoch": 0.011793516788038924, + "grad_norm": 0.024318506875899433, + "learning_rate": 7.09034116617632e-06, + "loss": 0.0001, + "step": 183 + }, + { + "epoch": 0.0118579622349681, + "grad_norm": 2.2273904967445186, + "learning_rate": 7.0977583194020325e-06, + "loss": 0.012, + "step": 184 + }, + { + "epoch": 0.011922407681897274, + "grad_norm": 0.006233002881990179, + "learning_rate": 7.105135271057878e-06, + "loss": 0.0, + "step": 185 + }, + { + "epoch": 0.011986853128826448, + "grad_norm": 0.09233918702362297, + "learning_rate": 7.1124724545871305e-06, + "loss": 0.0004, + "step": 186 + }, + { + "epoch": 0.012051298575755622, + "grad_norm": 0.006889842787298746, + "learning_rate": 7.119770296460714e-06, + "loss": 0.0, + "step": 187 + }, + { + "epoch": 0.012115744022684798, + "grad_norm": 0.03291308421119183, + "learning_rate": 7.127029216325958e-06, + "loss": 0.0002, + "step": 188 + }, + { + "epoch": 0.012180189469613972, + "grad_norm": 0.4183982353790482, + "learning_rate": 7.1342496271513885e-06, + "loss": 0.0006, + "step": 189 + }, + { + "epoch": 0.012244634916543146, + "grad_norm": 0.0010894861958931816, + "learning_rate": 7.141431935367712e-06, + "loss": 0.0, + "step": 190 + }, + { + "epoch": 0.01230908036347232, + "grad_norm": 0.1424472482504031, + "learning_rate": 7.14857654100509e-06, + "loss": 0.0005, + "step": 191 + }, + { + "epoch": 0.012373525810401496, + "grad_norm": 0.0006208977366605882, + "learning_rate": 7.155683837826838e-06, + "loss": 0.0, + "step": 192 + }, + { + "epoch": 0.01243797125733067, + "grad_norm": 0.006579198044893609, + "learning_rate": 7.162754213459641e-06, + "loss": 0.0001, + "step": 193 + }, + { + "epoch": 0.012502416704259844, + "grad_norm": 0.0037932328500453966, + "learning_rate": 7.1697880495204184e-06, + "loss": 0.0, + "step": 194 + }, + { + "epoch": 0.012566862151189018, + "grad_norm": 3.7558033860150153, + "learning_rate": 7.176785721739913e-06, + "loss": 0.0326, + "step": 195 + }, + { + "epoch": 0.012631307598118193, + "grad_norm": 0.048965416061562524, + "learning_rate": 7.1837476000831255e-06, + "loss": 0.0001, + "step": 196 + }, + { + "epoch": 0.012695753045047367, + "grad_norm": 0.004427047393266682, + "learning_rate": 7.190674048866693e-06, + "loss": 0.0, + "step": 197 + }, + { + "epoch": 0.012760198491976541, + "grad_norm": 0.8024053711522192, + "learning_rate": 7.197565426873274e-06, + "loss": 0.0031, + "step": 198 + }, + { + "epoch": 0.012824643938905715, + "grad_norm": 0.16278141967045448, + "learning_rate": 7.20442208746308e-06, + "loss": 0.0005, + "step": 199 + }, + { + "epoch": 0.012889089385834891, + "grad_norm": 0.009971334443131889, + "learning_rate": 7.211244378682583e-06, + "loss": 0.0001, + "step": 200 + }, + { + "epoch": 0.012953534832764065, + "grad_norm": 0.0020690471456571028, + "learning_rate": 7.218032643370541e-06, + "loss": 0.0, + "step": 201 + }, + { + "epoch": 0.01301798027969324, + "grad_norm": 0.03277078691989995, + "learning_rate": 7.224787219261363e-06, + "loss": 0.0001, + "step": 202 + }, + { + "epoch": 0.013082425726622415, + "grad_norm": 0.01937230899628366, + "learning_rate": 7.231508439085952e-06, + "loss": 0.0001, + "step": 203 + }, + { + "epoch": 0.013146871173551589, + "grad_norm": 0.05567845986542249, + "learning_rate": 7.238196630670041e-06, + "loss": 0.0005, + "step": 204 + }, + { + "epoch": 0.013211316620480763, + "grad_norm": 0.004371745422922564, + "learning_rate": 7.244852117030148e-06, + "loss": 0.0, + "step": 205 + }, + { + "epoch": 0.013275762067409937, + "grad_norm": 0.0070734951743021, + "learning_rate": 7.251475216467184e-06, + "loss": 0.0001, + "step": 206 + }, + { + "epoch": 0.013340207514339113, + "grad_norm": 0.02153720307664906, + "learning_rate": 7.258066242657797e-06, + "loss": 0.0001, + "step": 207 + }, + { + "epoch": 0.013404652961268287, + "grad_norm": 0.12040073376471393, + "learning_rate": 7.2646255047435146e-06, + "loss": 0.0004, + "step": 208 + }, + { + "epoch": 0.01346909840819746, + "grad_norm": 0.0004398452218530347, + "learning_rate": 7.271153307417751e-06, + "loss": 0.0, + "step": 209 + }, + { + "epoch": 0.013533543855126635, + "grad_norm": 0.04042410483723827, + "learning_rate": 7.277649951010737e-06, + "loss": 0.0003, + "step": 210 + }, + { + "epoch": 0.01359798930205581, + "grad_norm": 0.06771473747313296, + "learning_rate": 7.284115731572427e-06, + "loss": 0.0001, + "step": 211 + }, + { + "epoch": 0.013662434748984984, + "grad_norm": 0.01151591523695472, + "learning_rate": 7.290550940953457e-06, + "loss": 0.0, + "step": 212 + }, + { + "epoch": 0.013726880195914158, + "grad_norm": 0.4189345173901024, + "learning_rate": 7.296955866884187e-06, + "loss": 0.003, + "step": 213 + }, + { + "epoch": 0.013791325642843332, + "grad_norm": 0.04164191329700527, + "learning_rate": 7.303330793051894e-06, + "loss": 0.0001, + "step": 214 + }, + { + "epoch": 0.013855771089772508, + "grad_norm": 0.45272719251647475, + "learning_rate": 7.309675999176179e-06, + "loss": 0.0042, + "step": 215 + }, + { + "epoch": 0.013920216536701682, + "grad_norm": 0.10562175672393527, + "learning_rate": 7.315991761082603e-06, + "loss": 0.0001, + "step": 216 + }, + { + "epoch": 0.013984661983630856, + "grad_norm": 0.004886530786615905, + "learning_rate": 7.322278350774632e-06, + "loss": 0.0, + "step": 217 + }, + { + "epoch": 0.01404910743056003, + "grad_norm": 0.0305665453017975, + "learning_rate": 7.328536036503941e-06, + "loss": 0.0001, + "step": 218 + }, + { + "epoch": 0.014113552877489206, + "grad_norm": 0.03056080236649342, + "learning_rate": 7.334765082839083e-06, + "loss": 0.0002, + "step": 219 + }, + { + "epoch": 0.01417799832441838, + "grad_norm": 0.01914917310382882, + "learning_rate": 7.3409657507326235e-06, + "loss": 0.0001, + "step": 220 + }, + { + "epoch": 0.014242443771347554, + "grad_norm": 0.03466315135259134, + "learning_rate": 7.347138297586717e-06, + "loss": 0.0, + "step": 221 + }, + { + "epoch": 0.014306889218276728, + "grad_norm": 0.005841761742250744, + "learning_rate": 7.353282977317245e-06, + "loss": 0.0001, + "step": 222 + }, + { + "epoch": 0.014371334665205904, + "grad_norm": 0.014125468048465728, + "learning_rate": 7.359400040416475e-06, + "loss": 0.0001, + "step": 223 + }, + { + "epoch": 0.014435780112135078, + "grad_norm": 0.1470687716683251, + "learning_rate": 7.365489734014338e-06, + "loss": 0.0013, + "step": 224 + }, + { + "epoch": 0.014500225559064252, + "grad_norm": 0.1316313746000964, + "learning_rate": 7.371552301938347e-06, + "loss": 0.0011, + "step": 225 + }, + { + "epoch": 0.014564671005993426, + "grad_norm": 1.216626395686913, + "learning_rate": 7.377587984772161e-06, + "loss": 0.003, + "step": 226 + }, + { + "epoch": 0.014629116452922602, + "grad_norm": 0.030215975039620327, + "learning_rate": 7.3835970199128815e-06, + "loss": 0.0001, + "step": 227 + }, + { + "epoch": 0.014693561899851776, + "grad_norm": 0.3925818447866758, + "learning_rate": 7.3895796416270785e-06, + "loss": 0.0014, + "step": 228 + }, + { + "epoch": 0.01475800734678095, + "grad_norm": 0.011962584082899389, + "learning_rate": 7.395536081105594e-06, + "loss": 0.0, + "step": 229 + }, + { + "epoch": 0.014822452793710124, + "grad_norm": 0.31894698647727304, + "learning_rate": 7.401466566517146e-06, + "loss": 0.0012, + "step": 230 + }, + { + "epoch": 0.0148868982406393, + "grad_norm": 0.003858933499673059, + "learning_rate": 7.407371323060776e-06, + "loss": 0.0, + "step": 231 + }, + { + "epoch": 0.014951343687568473, + "grad_norm": 0.002184925615570465, + "learning_rate": 7.413250573017164e-06, + "loss": 0.0, + "step": 232 + }, + { + "epoch": 0.015015789134497647, + "grad_norm": 0.0575590729604499, + "learning_rate": 7.4191045357988316e-06, + "loss": 0.0002, + "step": 233 + }, + { + "epoch": 0.015080234581426823, + "grad_norm": 0.0851483114943205, + "learning_rate": 7.424933427999279e-06, + "loss": 0.0001, + "step": 234 + }, + { + "epoch": 0.015144680028355997, + "grad_norm": 0.023247510036130875, + "learning_rate": 7.43073746344107e-06, + "loss": 0.0002, + "step": 235 + }, + { + "epoch": 0.015209125475285171, + "grad_norm": 0.005134519637574446, + "learning_rate": 7.436516853222899e-06, + "loss": 0.0, + "step": 236 + }, + { + "epoch": 0.015273570922214345, + "grad_norm": 0.012576069907377694, + "learning_rate": 7.442271805765659e-06, + "loss": 0.0, + "step": 237 + }, + { + "epoch": 0.01533801636914352, + "grad_norm": 0.0041381520945973395, + "learning_rate": 7.448002526857543e-06, + "loss": 0.0, + "step": 238 + }, + { + "epoch": 0.015402461816072695, + "grad_norm": 0.1408810734446625, + "learning_rate": 7.453709219698196e-06, + "loss": 0.0014, + "step": 239 + }, + { + "epoch": 0.015466907263001869, + "grad_norm": 0.02099080559759998, + "learning_rate": 7.4593920849419495e-06, + "loss": 0.0001, + "step": 240 + }, + { + "epoch": 0.015531352709931043, + "grad_norm": 0.34798627897458273, + "learning_rate": 7.465051320740162e-06, + "loss": 0.0014, + "step": 241 + }, + { + "epoch": 0.015595798156860219, + "grad_norm": 0.03753130495216673, + "learning_rate": 7.470687122782663e-06, + "loss": 0.0003, + "step": 242 + }, + { + "epoch": 0.01566024360378939, + "grad_norm": 0.26285752177303706, + "learning_rate": 7.476299684338365e-06, + "loss": 0.0014, + "step": 243 + }, + { + "epoch": 0.015724689050718568, + "grad_norm": 0.011810539302378758, + "learning_rate": 7.481889196295035e-06, + "loss": 0.0, + "step": 244 + }, + { + "epoch": 0.015789134497647742, + "grad_norm": 0.2277299056097155, + "learning_rate": 7.487455847198239e-06, + "loss": 0.0012, + "step": 245 + }, + { + "epoch": 0.015853579944576916, + "grad_norm": 0.004420655223345774, + "learning_rate": 7.492999823289514e-06, + "loss": 0.0, + "step": 246 + }, + { + "epoch": 0.01591802539150609, + "grad_norm": 0.09830975739963607, + "learning_rate": 7.498521308543755e-06, + "loss": 0.0005, + "step": 247 + }, + { + "epoch": 0.015982470838435264, + "grad_norm": 0.045239574227459146, + "learning_rate": 7.504020484705845e-06, + "loss": 0.0001, + "step": 248 + }, + { + "epoch": 0.01604691628536444, + "grad_norm": 0.004480344683799468, + "learning_rate": 7.5094975313265485e-06, + "loss": 0.0, + "step": 249 + }, + { + "epoch": 0.016111361732293612, + "grad_norm": 0.04672057822806287, + "learning_rate": 7.5149526257976954e-06, + "loss": 0.0001, + "step": 250 + }, + { + "epoch": 0.016175807179222786, + "grad_norm": 0.27883213565129883, + "learning_rate": 7.5203859433866514e-06, + "loss": 0.001, + "step": 251 + }, + { + "epoch": 0.016240252626151964, + "grad_norm": 0.06406857182641057, + "learning_rate": 7.525797657270103e-06, + "loss": 0.0001, + "step": 252 + }, + { + "epoch": 0.016304698073081138, + "grad_norm": 0.1080837493277941, + "learning_rate": 7.531187938567185e-06, + "loss": 0.0004, + "step": 253 + }, + { + "epoch": 0.016369143520010312, + "grad_norm": 0.03588783821123804, + "learning_rate": 7.53655695637194e-06, + "loss": 0.0003, + "step": 254 + }, + { + "epoch": 0.016433588966939486, + "grad_norm": 0.16026049803218403, + "learning_rate": 7.5419048777851525e-06, + "loss": 0.0018, + "step": 255 + }, + { + "epoch": 0.01649803441386866, + "grad_norm": 1.9165503701573037, + "learning_rate": 7.547231867945552e-06, + "loss": 0.0132, + "step": 256 + }, + { + "epoch": 0.016562479860797834, + "grad_norm": 0.015958733921597468, + "learning_rate": 7.55253809006041e-06, + "loss": 0.0, + "step": 257 + }, + { + "epoch": 0.016626925307727008, + "grad_norm": 0.0019534432535608308, + "learning_rate": 7.557823705435545e-06, + "loss": 0.0, + "step": 258 + }, + { + "epoch": 0.016691370754656182, + "grad_norm": 0.02074439107330492, + "learning_rate": 7.5630888735047455e-06, + "loss": 0.0, + "step": 259 + }, + { + "epoch": 0.01675581620158536, + "grad_norm": 1.1553521149673944, + "learning_rate": 7.568333751858627e-06, + "loss": 0.007, + "step": 260 + }, + { + "epoch": 0.016820261648514533, + "grad_norm": 0.3606061372170696, + "learning_rate": 7.573558496272929e-06, + "loss": 0.0028, + "step": 261 + }, + { + "epoch": 0.016884707095443707, + "grad_norm": 0.00275996815502147, + "learning_rate": 7.578763260736281e-06, + "loss": 0.0, + "step": 262 + }, + { + "epoch": 0.01694915254237288, + "grad_norm": 0.004268297014106991, + "learning_rate": 7.58394819747744e-06, + "loss": 0.0, + "step": 263 + }, + { + "epoch": 0.017013597989302055, + "grad_norm": 0.0367751156855776, + "learning_rate": 7.589113456991989e-06, + "loss": 0.0, + "step": 264 + }, + { + "epoch": 0.01707804343623123, + "grad_norm": 0.03272693147392111, + "learning_rate": 7.594259188068569e-06, + "loss": 0.0004, + "step": 265 + }, + { + "epoch": 0.017142488883160403, + "grad_norm": 0.2226594205262504, + "learning_rate": 7.59938553781458e-06, + "loss": 0.0008, + "step": 266 + }, + { + "epoch": 0.01720693433008958, + "grad_norm": 4.17899426567084, + "learning_rate": 7.604492651681422e-06, + "loss": 0.0028, + "step": 267 + }, + { + "epoch": 0.017271379777018755, + "grad_norm": 0.314194863453364, + "learning_rate": 7.609580673489255e-06, + "loss": 0.0017, + "step": 268 + }, + { + "epoch": 0.01733582522394793, + "grad_norm": 0.009341551827583042, + "learning_rate": 7.614649745451305e-06, + "loss": 0.0, + "step": 269 + }, + { + "epoch": 0.017400270670877103, + "grad_norm": 0.006294262625942665, + "learning_rate": 7.6197000081977145e-06, + "loss": 0.0, + "step": 270 + }, + { + "epoch": 0.017464716117806277, + "grad_norm": 0.042152326765031375, + "learning_rate": 7.624731600798942e-06, + "loss": 0.0003, + "step": 271 + }, + { + "epoch": 0.01752916156473545, + "grad_norm": 0.005840402112676206, + "learning_rate": 7.629744660788755e-06, + "loss": 0.0, + "step": 272 + }, + { + "epoch": 0.017593607011664625, + "grad_norm": 0.003961071319983515, + "learning_rate": 7.63473932418678e-06, + "loss": 0.0, + "step": 273 + }, + { + "epoch": 0.0176580524585938, + "grad_norm": 0.03416186617415247, + "learning_rate": 7.639715725520648e-06, + "loss": 0.0003, + "step": 274 + }, + { + "epoch": 0.017722497905522976, + "grad_norm": 0.011693800006046062, + "learning_rate": 7.644673997847735e-06, + "loss": 0.0, + "step": 275 + }, + { + "epoch": 0.01778694335245215, + "grad_norm": 0.013642164927569226, + "learning_rate": 7.64961427277651e-06, + "loss": 0.0001, + "step": 276 + }, + { + "epoch": 0.017851388799381324, + "grad_norm": 0.005290876124771607, + "learning_rate": 7.6545366804875e-06, + "loss": 0.0, + "step": 277 + }, + { + "epoch": 0.0179158342463105, + "grad_norm": 0.3685656717237363, + "learning_rate": 7.659441349753862e-06, + "loss": 0.0047, + "step": 278 + }, + { + "epoch": 0.017980279693239672, + "grad_norm": 0.162786871985292, + "learning_rate": 7.664328407961608e-06, + "loss": 0.0003, + "step": 279 + }, + { + "epoch": 0.018044725140168846, + "grad_norm": 0.0038340519806686416, + "learning_rate": 7.669197981129452e-06, + "loss": 0.0, + "step": 280 + }, + { + "epoch": 0.01810917058709802, + "grad_norm": 1.1800076890145348, + "learning_rate": 7.674050193928305e-06, + "loss": 0.0107, + "step": 281 + }, + { + "epoch": 0.018173616034027194, + "grad_norm": 0.4022661152723877, + "learning_rate": 7.678885169700438e-06, + "loss": 0.0011, + "step": 282 + }, + { + "epoch": 0.018238061480956372, + "grad_norm": 0.0010258398165785125, + "learning_rate": 7.683703030478275e-06, + "loss": 0.0, + "step": 283 + }, + { + "epoch": 0.018302506927885546, + "grad_norm": 0.03959304117747763, + "learning_rate": 7.688503897002901e-06, + "loss": 0.0002, + "step": 284 + }, + { + "epoch": 0.01836695237481472, + "grad_norm": 0.003900586596470166, + "learning_rate": 7.693287888742192e-06, + "loss": 0.0, + "step": 285 + }, + { + "epoch": 0.018431397821743894, + "grad_norm": 0.0016122389663264988, + "learning_rate": 7.698055123908667e-06, + "loss": 0.0, + "step": 286 + }, + { + "epoch": 0.018495843268673068, + "grad_norm": 0.04600308794351545, + "learning_rate": 7.702805719477016e-06, + "loss": 0.0001, + "step": 287 + }, + { + "epoch": 0.018560288715602242, + "grad_norm": 0.7386376867749295, + "learning_rate": 7.707539791201318e-06, + "loss": 0.0042, + "step": 288 + }, + { + "epoch": 0.018624734162531416, + "grad_norm": 0.07433124536949527, + "learning_rate": 7.712257453631959e-06, + "loss": 0.0001, + "step": 289 + }, + { + "epoch": 0.018689179609460593, + "grad_norm": 0.013048752330046877, + "learning_rate": 7.716958820132276e-06, + "loss": 0.0001, + "step": 290 + }, + { + "epoch": 0.018753625056389767, + "grad_norm": 0.007703258584517692, + "learning_rate": 7.721644002894899e-06, + "loss": 0.0001, + "step": 291 + }, + { + "epoch": 0.01881807050331894, + "grad_norm": 0.09900497813725398, + "learning_rate": 7.726313112957798e-06, + "loss": 0.0004, + "step": 292 + }, + { + "epoch": 0.018882515950248115, + "grad_norm": 0.7346640890359516, + "learning_rate": 7.730966260220105e-06, + "loss": 0.0083, + "step": 293 + }, + { + "epoch": 0.01894696139717729, + "grad_norm": 0.09893905196656488, + "learning_rate": 7.735603553457605e-06, + "loss": 0.0004, + "step": 294 + }, + { + "epoch": 0.019011406844106463, + "grad_norm": 0.24004125974315293, + "learning_rate": 7.740225100338013e-06, + "loss": 0.0021, + "step": 295 + }, + { + "epoch": 0.019075852291035637, + "grad_norm": 0.2778235435865419, + "learning_rate": 7.74483100743596e-06, + "loss": 0.0005, + "step": 296 + }, + { + "epoch": 0.01914029773796481, + "grad_norm": 0.011061506543972114, + "learning_rate": 7.749421380247754e-06, + "loss": 0.0, + "step": 297 + }, + { + "epoch": 0.01920474318489399, + "grad_norm": 0.0099751102256593, + "learning_rate": 7.753996323205866e-06, + "loss": 0.0, + "step": 298 + }, + { + "epoch": 0.019269188631823163, + "grad_norm": 0.04293211545664707, + "learning_rate": 7.758555939693189e-06, + "loss": 0.0001, + "step": 299 + }, + { + "epoch": 0.019333634078752337, + "grad_norm": 0.14890663148925204, + "learning_rate": 7.763100332057061e-06, + "loss": 0.0017, + "step": 300 + }, + { + "epoch": 0.01939807952568151, + "grad_norm": 0.051856462016533725, + "learning_rate": 7.767629601623046e-06, + "loss": 0.0003, + "step": 301 + }, + { + "epoch": 0.019462524972610685, + "grad_norm": 0.20118795719491775, + "learning_rate": 7.77214384870849e-06, + "loss": 0.0008, + "step": 302 + }, + { + "epoch": 0.01952697041953986, + "grad_norm": 0.01699588268895743, + "learning_rate": 7.776643172635842e-06, + "loss": 0.0001, + "step": 303 + }, + { + "epoch": 0.019591415866469033, + "grad_norm": 0.28016719067002793, + "learning_rate": 7.781127671745794e-06, + "loss": 0.0005, + "step": 304 + }, + { + "epoch": 0.019655861313398207, + "grad_norm": 0.13740708235192936, + "learning_rate": 7.785597443410148e-06, + "loss": 0.0002, + "step": 305 + }, + { + "epoch": 0.019720306760327384, + "grad_norm": 0.13476762217271904, + "learning_rate": 7.79005258404452e-06, + "loss": 0.0009, + "step": 306 + }, + { + "epoch": 0.01978475220725656, + "grad_norm": 0.014737783093005689, + "learning_rate": 7.794493189120813e-06, + "loss": 0.0001, + "step": 307 + }, + { + "epoch": 0.019849197654185732, + "grad_norm": 1.1847140609432278, + "learning_rate": 7.79891935317949e-06, + "loss": 0.0063, + "step": 308 + }, + { + "epoch": 0.019913643101114906, + "grad_norm": 0.004352597997975502, + "learning_rate": 7.803331169841663e-06, + "loss": 0.0, + "step": 309 + }, + { + "epoch": 0.01997808854804408, + "grad_norm": 0.025632327962959205, + "learning_rate": 7.807728731820957e-06, + "loss": 0.0002, + "step": 310 + }, + { + "epoch": 0.020042533994973254, + "grad_norm": 0.054951094255651874, + "learning_rate": 7.812112130935214e-06, + "loss": 0.0003, + "step": 311 + }, + { + "epoch": 0.02010697944190243, + "grad_norm": 0.10254521578536714, + "learning_rate": 7.816481458117993e-06, + "loss": 0.0002, + "step": 312 + }, + { + "epoch": 0.020171424888831602, + "grad_norm": 0.14173059175345795, + "learning_rate": 7.820836803429887e-06, + "loss": 0.0004, + "step": 313 + }, + { + "epoch": 0.02023587033576078, + "grad_norm": 0.010335749948364018, + "learning_rate": 7.825178256069662e-06, + "loss": 0.0, + "step": 314 + }, + { + "epoch": 0.020300315782689954, + "grad_norm": 0.009398006668248197, + "learning_rate": 7.829505904385217e-06, + "loss": 0.0, + "step": 315 + }, + { + "epoch": 0.020364761229619128, + "grad_norm": 0.0008854282801088228, + "learning_rate": 7.833819835884374e-06, + "loss": 0.0, + "step": 316 + }, + { + "epoch": 0.020429206676548302, + "grad_norm": 0.01692642260677151, + "learning_rate": 7.838120137245496e-06, + "loss": 0.0001, + "step": 317 + }, + { + "epoch": 0.020493652123477476, + "grad_norm": 0.012734900473579882, + "learning_rate": 7.842406894327936e-06, + "loss": 0.0001, + "step": 318 + }, + { + "epoch": 0.02055809757040665, + "grad_norm": 0.04965355559724808, + "learning_rate": 7.846680192182317e-06, + "loss": 0.0002, + "step": 319 + }, + { + "epoch": 0.020622543017335824, + "grad_norm": 0.027329077999220016, + "learning_rate": 7.850940115060665e-06, + "loss": 0.0002, + "step": 320 + }, + { + "epoch": 0.020686988464265, + "grad_norm": 0.012715309583353273, + "learning_rate": 7.855186746426372e-06, + "loss": 0.0, + "step": 321 + }, + { + "epoch": 0.020751433911194175, + "grad_norm": 0.022217729569863346, + "learning_rate": 7.859420168964014e-06, + "loss": 0.0001, + "step": 322 + }, + { + "epoch": 0.02081587935812335, + "grad_norm": 0.009745268168007587, + "learning_rate": 7.863640464588996e-06, + "loss": 0.0, + "step": 323 + }, + { + "epoch": 0.020880324805052523, + "grad_norm": 0.3033755741514754, + "learning_rate": 7.86784771445708e-06, + "loss": 0.001, + "step": 324 + }, + { + "epoch": 0.020944770251981697, + "grad_norm": 0.012576282814093155, + "learning_rate": 7.87204199897374e-06, + "loss": 0.0, + "step": 325 + }, + { + "epoch": 0.02100921569891087, + "grad_norm": 0.0025267722044404915, + "learning_rate": 7.87622339780338e-06, + "loss": 0.0, + "step": 326 + }, + { + "epoch": 0.021073661145840045, + "grad_norm": 0.06525056075694663, + "learning_rate": 7.88039198987842e-06, + "loss": 0.0001, + "step": 327 + }, + { + "epoch": 0.02113810659276922, + "grad_norm": 0.010063583865905463, + "learning_rate": 7.88454785340823e-06, + "loss": 0.0, + "step": 328 + }, + { + "epoch": 0.021202552039698397, + "grad_norm": 1.3302956737899603, + "learning_rate": 7.888691065887938e-06, + "loss": 0.0081, + "step": 329 + }, + { + "epoch": 0.02126699748662757, + "grad_norm": 0.17322090259007986, + "learning_rate": 7.892821704107102e-06, + "loss": 0.0023, + "step": 330 + }, + { + "epoch": 0.021331442933556745, + "grad_norm": 0.011463568326494302, + "learning_rate": 7.896939844158243e-06, + "loss": 0.0, + "step": 331 + }, + { + "epoch": 0.02139588838048592, + "grad_norm": 0.04771365515172803, + "learning_rate": 7.901045561445263e-06, + "loss": 0.0001, + "step": 332 + }, + { + "epoch": 0.021460333827415093, + "grad_norm": 0.007986473007665173, + "learning_rate": 7.905138930691726e-06, + "loss": 0.0, + "step": 333 + }, + { + "epoch": 0.021524779274344267, + "grad_norm": 0.13398699493738292, + "learning_rate": 7.909220025949012e-06, + "loss": 0.0001, + "step": 334 + }, + { + "epoch": 0.02158922472127344, + "grad_norm": 0.17768507053513713, + "learning_rate": 7.913288920604367e-06, + "loss": 0.0024, + "step": 335 + }, + { + "epoch": 0.021653670168202615, + "grad_norm": 1.1020287142645597, + "learning_rate": 7.917345687388818e-06, + "loss": 0.0027, + "step": 336 + }, + { + "epoch": 0.021718115615131792, + "grad_norm": 0.005485560200914321, + "learning_rate": 7.921390398384963e-06, + "loss": 0.0, + "step": 337 + }, + { + "epoch": 0.021782561062060966, + "grad_norm": 0.39644821828960675, + "learning_rate": 7.925423125034672e-06, + "loss": 0.0025, + "step": 338 + }, + { + "epoch": 0.02184700650899014, + "grad_norm": 0.1476993275933789, + "learning_rate": 7.92944393814664e-06, + "loss": 0.0006, + "step": 339 + }, + { + "epoch": 0.021911451955919314, + "grad_norm": 0.003948748132867952, + "learning_rate": 7.933452907903869e-06, + "loss": 0.0, + "step": 340 + }, + { + "epoch": 0.02197589740284849, + "grad_norm": 0.1684808299627317, + "learning_rate": 7.937450103870998e-06, + "loss": 0.0007, + "step": 341 + }, + { + "epoch": 0.022040342849777662, + "grad_norm": 0.03526063990809892, + "learning_rate": 7.941435595001557e-06, + "loss": 0.0001, + "step": 342 + }, + { + "epoch": 0.022104788296706836, + "grad_norm": 0.00300194528628094, + "learning_rate": 7.945409449645105e-06, + "loss": 0.0, + "step": 343 + }, + { + "epoch": 0.02216923374363601, + "grad_norm": 0.0015934897607704204, + "learning_rate": 7.94937173555426e-06, + "loss": 0.0, + "step": 344 + }, + { + "epoch": 0.022233679190565188, + "grad_norm": 0.0058229976179194655, + "learning_rate": 7.953322519891626e-06, + "loss": 0.0, + "step": 345 + }, + { + "epoch": 0.022298124637494362, + "grad_norm": 0.04014894770212523, + "learning_rate": 7.957261869236626e-06, + "loss": 0.0, + "step": 346 + }, + { + "epoch": 0.022362570084423536, + "grad_norm": 0.3194126538626638, + "learning_rate": 7.961189849592243e-06, + "loss": 0.0056, + "step": 347 + }, + { + "epoch": 0.02242701553135271, + "grad_norm": 0.0064705525401818125, + "learning_rate": 7.965106526391645e-06, + "loss": 0.0001, + "step": 348 + }, + { + "epoch": 0.022491460978281884, + "grad_norm": 3.5872106349028567, + "learning_rate": 7.969011964504728e-06, + "loss": 0.0476, + "step": 349 + }, + { + "epoch": 0.022555906425211058, + "grad_norm": 0.21691632631488653, + "learning_rate": 7.972906228244563e-06, + "loss": 0.0006, + "step": 350 + }, + { + "epoch": 0.022620351872140232, + "grad_norm": 0.013143918484912772, + "learning_rate": 7.976789381373757e-06, + "loss": 0.0, + "step": 351 + }, + { + "epoch": 0.02268479731906941, + "grad_norm": 0.02501359449813824, + "learning_rate": 7.980661487110704e-06, + "loss": 0.0001, + "step": 352 + }, + { + "epoch": 0.022749242765998583, + "grad_norm": 0.0017974908439855096, + "learning_rate": 7.98452260813577e-06, + "loss": 0.0, + "step": 353 + }, + { + "epoch": 0.022813688212927757, + "grad_norm": 0.023813737078225865, + "learning_rate": 7.988372806597378e-06, + "loss": 0.0, + "step": 354 + }, + { + "epoch": 0.02287813365985693, + "grad_norm": 0.36353022749573466, + "learning_rate": 7.992212144118013e-06, + "loss": 0.0034, + "step": 355 + }, + { + "epoch": 0.022942579106786105, + "grad_norm": 0.03440102538916525, + "learning_rate": 7.996040681800137e-06, + "loss": 0.0001, + "step": 356 + }, + { + "epoch": 0.02300702455371528, + "grad_norm": 0.39249523673234826, + "learning_rate": 7.99985848023202e-06, + "loss": 0.0033, + "step": 357 + }, + { + "epoch": 0.023071470000644453, + "grad_norm": 0.015484064838049798, + "learning_rate": 8.003665599493508e-06, + "loss": 0.0002, + "step": 358 + }, + { + "epoch": 0.023135915447573627, + "grad_norm": 0.05831849892882333, + "learning_rate": 8.007462099161673e-06, + "loss": 0.0002, + "step": 359 + }, + { + "epoch": 0.023200360894502805, + "grad_norm": 0.030189192169840014, + "learning_rate": 8.01124803831643e-06, + "loss": 0.0, + "step": 360 + }, + { + "epoch": 0.02326480634143198, + "grad_norm": 0.2871898356727702, + "learning_rate": 8.015023475546033e-06, + "loss": 0.0009, + "step": 361 + }, + { + "epoch": 0.023329251788361153, + "grad_norm": 0.0028194829163485577, + "learning_rate": 8.018788468952538e-06, + "loss": 0.0, + "step": 362 + }, + { + "epoch": 0.023393697235290327, + "grad_norm": 0.13778555057274436, + "learning_rate": 8.022543076157141e-06, + "loss": 0.0039, + "step": 363 + }, + { + "epoch": 0.0234581426822195, + "grad_norm": 0.021112274440135134, + "learning_rate": 8.026287354305495e-06, + "loss": 0.0001, + "step": 364 + }, + { + "epoch": 0.023522588129148675, + "grad_norm": 0.004636861815034096, + "learning_rate": 8.030021360072912e-06, + "loss": 0.0, + "step": 365 + }, + { + "epoch": 0.02358703357607785, + "grad_norm": 0.0028864734672826285, + "learning_rate": 8.033745149669513e-06, + "loss": 0.0, + "step": 366 + }, + { + "epoch": 0.023651479023007023, + "grad_norm": 0.22539105605376755, + "learning_rate": 8.037458778845317e-06, + "loss": 0.0007, + "step": 367 + }, + { + "epoch": 0.0237159244699362, + "grad_norm": 0.7308905296678736, + "learning_rate": 8.041162302895227e-06, + "loss": 0.0024, + "step": 368 + }, + { + "epoch": 0.023780369916865374, + "grad_norm": 0.0055914223650806285, + "learning_rate": 8.044855776663993e-06, + "loss": 0.0, + "step": 369 + }, + { + "epoch": 0.02384481536379455, + "grad_norm": 0.04858513212525571, + "learning_rate": 8.048539254551072e-06, + "loss": 0.0, + "step": 370 + }, + { + "epoch": 0.023909260810723722, + "grad_norm": 0.0032946095109740066, + "learning_rate": 8.052212790515438e-06, + "loss": 0.0, + "step": 371 + }, + { + "epoch": 0.023973706257652896, + "grad_norm": 0.00796063799442098, + "learning_rate": 8.055876438080324e-06, + "loss": 0.0, + "step": 372 + }, + { + "epoch": 0.02403815170458207, + "grad_norm": 0.07456883882132498, + "learning_rate": 8.059530250337904e-06, + "loss": 0.0015, + "step": 373 + }, + { + "epoch": 0.024102597151511244, + "grad_norm": 0.024193773638429922, + "learning_rate": 8.063174279953908e-06, + "loss": 0.0, + "step": 374 + }, + { + "epoch": 0.024167042598440422, + "grad_norm": 0.08240639532182745, + "learning_rate": 8.066808579172175e-06, + "loss": 0.0014, + "step": 375 + }, + { + "epoch": 0.024231488045369596, + "grad_norm": 0.06964492787404344, + "learning_rate": 8.070433199819152e-06, + "loss": 0.0001, + "step": 376 + }, + { + "epoch": 0.02429593349229877, + "grad_norm": 0.13324233929377452, + "learning_rate": 8.074048193308322e-06, + "loss": 0.0014, + "step": 377 + }, + { + "epoch": 0.024360378939227944, + "grad_norm": 0.31884756278114057, + "learning_rate": 8.077653610644582e-06, + "loss": 0.0025, + "step": 378 + }, + { + "epoch": 0.024424824386157118, + "grad_norm": 0.005066331055183986, + "learning_rate": 8.081249502428569e-06, + "loss": 0.0001, + "step": 379 + }, + { + "epoch": 0.024489269833086292, + "grad_norm": 0.11909933234504282, + "learning_rate": 8.084835918860906e-06, + "loss": 0.0006, + "step": 380 + }, + { + "epoch": 0.024553715280015466, + "grad_norm": 0.21011827473200473, + "learning_rate": 8.08841290974642e-06, + "loss": 0.0004, + "step": 381 + }, + { + "epoch": 0.02461816072694464, + "grad_norm": 0.027595556932529592, + "learning_rate": 8.091980524498285e-06, + "loss": 0.0, + "step": 382 + }, + { + "epoch": 0.024682606173873817, + "grad_norm": 0.006354662520049507, + "learning_rate": 8.095538812142118e-06, + "loss": 0.0, + "step": 383 + }, + { + "epoch": 0.02474705162080299, + "grad_norm": 0.013398883993908624, + "learning_rate": 8.099087821320032e-06, + "loss": 0.0001, + "step": 384 + }, + { + "epoch": 0.024811497067732165, + "grad_norm": 0.018757425423641675, + "learning_rate": 8.102627600294604e-06, + "loss": 0.0, + "step": 385 + }, + { + "epoch": 0.02487594251466134, + "grad_norm": 0.003749142883297565, + "learning_rate": 8.106158196952835e-06, + "loss": 0.0, + "step": 386 + }, + { + "epoch": 0.024940387961590513, + "grad_norm": 0.37095629505467437, + "learning_rate": 8.109679658810025e-06, + "loss": 0.0019, + "step": 387 + }, + { + "epoch": 0.025004833408519687, + "grad_norm": 0.049536866135149134, + "learning_rate": 8.113192033013613e-06, + "loss": 0.0001, + "step": 388 + }, + { + "epoch": 0.02506927885544886, + "grad_norm": 0.05720039117501582, + "learning_rate": 8.116695366346962e-06, + "loss": 0.0002, + "step": 389 + }, + { + "epoch": 0.025133724302378035, + "grad_norm": 0.013197769274937824, + "learning_rate": 8.120189705233107e-06, + "loss": 0.0, + "step": 390 + }, + { + "epoch": 0.025198169749307213, + "grad_norm": 1.5365355994845276, + "learning_rate": 8.12367509573843e-06, + "loss": 0.0082, + "step": 391 + }, + { + "epoch": 0.025262615196236387, + "grad_norm": 0.005337843706833008, + "learning_rate": 8.127151583576319e-06, + "loss": 0.0, + "step": 392 + }, + { + "epoch": 0.02532706064316556, + "grad_norm": 0.009108647589636271, + "learning_rate": 8.13061921411076e-06, + "loss": 0.0001, + "step": 393 + }, + { + "epoch": 0.025391506090094735, + "grad_norm": 0.012351875469581566, + "learning_rate": 8.134078032359886e-06, + "loss": 0.0001, + "step": 394 + }, + { + "epoch": 0.02545595153702391, + "grad_norm": 0.0014107577675921742, + "learning_rate": 8.137528082999486e-06, + "loss": 0.0, + "step": 395 + }, + { + "epoch": 0.025520396983953083, + "grad_norm": 0.05295340430722678, + "learning_rate": 8.140969410366469e-06, + "loss": 0.0001, + "step": 396 + }, + { + "epoch": 0.025584842430882257, + "grad_norm": 0.5690354196975049, + "learning_rate": 8.144402058462278e-06, + "loss": 0.0023, + "step": 397 + }, + { + "epoch": 0.02564928787781143, + "grad_norm": 0.03940801697463157, + "learning_rate": 8.147826070956273e-06, + "loss": 0.0001, + "step": 398 + }, + { + "epoch": 0.02571373332474061, + "grad_norm": 0.011530897889634606, + "learning_rate": 8.15124149118906e-06, + "loss": 0.0001, + "step": 399 + }, + { + "epoch": 0.025778178771669782, + "grad_norm": 0.02830996714923142, + "learning_rate": 8.154648362175777e-06, + "loss": 0.0002, + "step": 400 + }, + { + "epoch": 0.025842624218598956, + "grad_norm": 0.00046499532858108194, + "learning_rate": 8.15804672660936e-06, + "loss": 0.0, + "step": 401 + }, + { + "epoch": 0.02590706966552813, + "grad_norm": 0.26561429899763406, + "learning_rate": 8.161436626863734e-06, + "loss": 0.0004, + "step": 402 + }, + { + "epoch": 0.025971515112457304, + "grad_norm": 0.014311267292635437, + "learning_rate": 8.164818104996999e-06, + "loss": 0.0001, + "step": 403 + }, + { + "epoch": 0.02603596055938648, + "grad_norm": 0.033020361512505675, + "learning_rate": 8.168191202754557e-06, + "loss": 0.0001, + "step": 404 + }, + { + "epoch": 0.026100406006315652, + "grad_norm": 0.01233642367780627, + "learning_rate": 8.171555961572193e-06, + "loss": 0.0001, + "step": 405 + }, + { + "epoch": 0.02616485145324483, + "grad_norm": 0.030507303415610476, + "learning_rate": 8.174912422579145e-06, + "loss": 0.0, + "step": 406 + }, + { + "epoch": 0.026229296900174004, + "grad_norm": 0.000771348551102021, + "learning_rate": 8.178260626601112e-06, + "loss": 0.0, + "step": 407 + }, + { + "epoch": 0.026293742347103178, + "grad_norm": 0.04695325270252983, + "learning_rate": 8.181600614163234e-06, + "loss": 0.0001, + "step": 408 + }, + { + "epoch": 0.026358187794032352, + "grad_norm": 0.004360910000568842, + "learning_rate": 8.184932425493038e-06, + "loss": 0.0, + "step": 409 + }, + { + "epoch": 0.026422633240961526, + "grad_norm": 0.0022237242463113796, + "learning_rate": 8.188256100523343e-06, + "loss": 0.0, + "step": 410 + }, + { + "epoch": 0.0264870786878907, + "grad_norm": 0.11268887113016282, + "learning_rate": 8.191571678895127e-06, + "loss": 0.0018, + "step": 411 + }, + { + "epoch": 0.026551524134819874, + "grad_norm": 0.0014461989918299746, + "learning_rate": 8.194879199960378e-06, + "loss": 0.0, + "step": 412 + }, + { + "epoch": 0.026615969581749048, + "grad_norm": 0.03791107941859059, + "learning_rate": 8.19817870278488e-06, + "loss": 0.0001, + "step": 413 + }, + { + "epoch": 0.026680415028678225, + "grad_norm": 0.23998875971638906, + "learning_rate": 8.20147022615099e-06, + "loss": 0.002, + "step": 414 + }, + { + "epoch": 0.0267448604756074, + "grad_norm": 0.012383002073283897, + "learning_rate": 8.204753808560375e-06, + "loss": 0.0, + "step": 415 + }, + { + "epoch": 0.026809305922536573, + "grad_norm": 0.29471501301861663, + "learning_rate": 8.208029488236709e-06, + "loss": 0.0017, + "step": 416 + }, + { + "epoch": 0.026873751369465747, + "grad_norm": 0.20318785025191824, + "learning_rate": 8.211297303128342e-06, + "loss": 0.0015, + "step": 417 + }, + { + "epoch": 0.02693819681639492, + "grad_norm": 0.017450833706922735, + "learning_rate": 8.214557290910945e-06, + "loss": 0.0, + "step": 418 + }, + { + "epoch": 0.027002642263324095, + "grad_norm": 0.012397289560402282, + "learning_rate": 8.21780948899011e-06, + "loss": 0.0, + "step": 419 + }, + { + "epoch": 0.02706708771025327, + "grad_norm": 0.009342866096887397, + "learning_rate": 8.22105393450393e-06, + "loss": 0.0, + "step": 420 + }, + { + "epoch": 0.027131533157182443, + "grad_norm": 0.7392758325675876, + "learning_rate": 8.224290664325538e-06, + "loss": 0.0034, + "step": 421 + }, + { + "epoch": 0.02719597860411162, + "grad_norm": 0.19301211838897894, + "learning_rate": 8.227519715065621e-06, + "loss": 0.0007, + "step": 422 + }, + { + "epoch": 0.027260424051040795, + "grad_norm": 0.04688608763614122, + "learning_rate": 8.230741123074915e-06, + "loss": 0.0001, + "step": 423 + }, + { + "epoch": 0.02732486949796997, + "grad_norm": 0.04107700591487386, + "learning_rate": 8.233954924446651e-06, + "loss": 0.0005, + "step": 424 + }, + { + "epoch": 0.027389314944899143, + "grad_norm": 0.0016944504864118827, + "learning_rate": 8.23716115501898e-06, + "loss": 0.0, + "step": 425 + }, + { + "epoch": 0.027453760391828317, + "grad_norm": 0.14313535363424715, + "learning_rate": 8.24035985037738e-06, + "loss": 0.002, + "step": 426 + }, + { + "epoch": 0.02751820583875749, + "grad_norm": 0.08606880084376932, + "learning_rate": 8.243551045857016e-06, + "loss": 0.0003, + "step": 427 + }, + { + "epoch": 0.027582651285686665, + "grad_norm": 0.17907311266594605, + "learning_rate": 8.246734776545088e-06, + "loss": 0.0006, + "step": 428 + }, + { + "epoch": 0.02764709673261584, + "grad_norm": 0.11154235338706936, + "learning_rate": 8.249911077283146e-06, + "loss": 0.0002, + "step": 429 + }, + { + "epoch": 0.027711542179545016, + "grad_norm": 1.4785572282992487, + "learning_rate": 8.253079982669373e-06, + "loss": 0.0137, + "step": 430 + }, + { + "epoch": 0.02777598762647419, + "grad_norm": 0.012928009033203125, + "learning_rate": 8.25624152706085e-06, + "loss": 0.0, + "step": 431 + }, + { + "epoch": 0.027840433073403364, + "grad_norm": 0.003619215014048827, + "learning_rate": 8.259395744575797e-06, + "loss": 0.0, + "step": 432 + }, + { + "epoch": 0.02790487852033254, + "grad_norm": 0.0014624501580332932, + "learning_rate": 8.26254266909576e-06, + "loss": 0.0, + "step": 433 + }, + { + "epoch": 0.027969323967261712, + "grad_norm": 0.004199093101875544, + "learning_rate": 8.265682334267826e-06, + "loss": 0.0, + "step": 434 + }, + { + "epoch": 0.028033769414190886, + "grad_norm": 0.021314337555709988, + "learning_rate": 8.268814773506757e-06, + "loss": 0.0001, + "step": 435 + }, + { + "epoch": 0.02809821486112006, + "grad_norm": 0.03760360231074014, + "learning_rate": 8.271940019997134e-06, + "loss": 0.0001, + "step": 436 + }, + { + "epoch": 0.028162660308049238, + "grad_norm": 0.005209692266127515, + "learning_rate": 8.275058106695467e-06, + "loss": 0.0, + "step": 437 + }, + { + "epoch": 0.028227105754978412, + "grad_norm": 0.09817250645422441, + "learning_rate": 8.278169066332278e-06, + "loss": 0.0001, + "step": 438 + }, + { + "epoch": 0.028291551201907586, + "grad_norm": 0.28345436264734336, + "learning_rate": 8.28127293141416e-06, + "loss": 0.0005, + "step": 439 + }, + { + "epoch": 0.02835599664883676, + "grad_norm": 0.02705612753805196, + "learning_rate": 8.284369734225816e-06, + "loss": 0.0, + "step": 440 + }, + { + "epoch": 0.028420442095765934, + "grad_norm": 0.07754971769711609, + "learning_rate": 8.287459506832084e-06, + "loss": 0.0001, + "step": 441 + }, + { + "epoch": 0.028484887542695108, + "grad_norm": 0.03146103849257445, + "learning_rate": 8.290542281079913e-06, + "loss": 0.0, + "step": 442 + }, + { + "epoch": 0.028549332989624282, + "grad_norm": 0.1758081044770802, + "learning_rate": 8.293618088600338e-06, + "loss": 0.0003, + "step": 443 + }, + { + "epoch": 0.028613778436553456, + "grad_norm": 0.0029838889004879295, + "learning_rate": 8.29668696081044e-06, + "loss": 0.0, + "step": 444 + }, + { + "epoch": 0.028678223883482633, + "grad_norm": 0.45164235672802683, + "learning_rate": 8.299748928915249e-06, + "loss": 0.001, + "step": 445 + }, + { + "epoch": 0.028742669330411807, + "grad_norm": 0.0033308645237363874, + "learning_rate": 8.302804023909669e-06, + "loss": 0.0, + "step": 446 + }, + { + "epoch": 0.02880711477734098, + "grad_norm": 0.025905336972041268, + "learning_rate": 8.305852276580345e-06, + "loss": 0.0001, + "step": 447 + }, + { + "epoch": 0.028871560224270156, + "grad_norm": 0.0041218454884513304, + "learning_rate": 8.308893717507532e-06, + "loss": 0.0, + "step": 448 + }, + { + "epoch": 0.02893600567119933, + "grad_norm": 0.12197986303560157, + "learning_rate": 8.311928377066941e-06, + "loss": 0.0004, + "step": 449 + }, + { + "epoch": 0.029000451118128504, + "grad_norm": 0.08898476274019052, + "learning_rate": 8.314956285431542e-06, + "loss": 0.0018, + "step": 450 + }, + { + "epoch": 0.029064896565057678, + "grad_norm": 0.09684278236993904, + "learning_rate": 8.317977472573382e-06, + "loss": 0.0006, + "step": 451 + }, + { + "epoch": 0.02912934201198685, + "grad_norm": 0.05075467771608681, + "learning_rate": 8.320991968265357e-06, + "loss": 0.0005, + "step": 452 + }, + { + "epoch": 0.02919378745891603, + "grad_norm": 0.02043870866338334, + "learning_rate": 8.323999802082968e-06, + "loss": 0.0, + "step": 453 + }, + { + "epoch": 0.029258232905845203, + "grad_norm": 0.02380190989209756, + "learning_rate": 8.327001003406075e-06, + "loss": 0.0, + "step": 454 + }, + { + "epoch": 0.029322678352774377, + "grad_norm": 0.19246463681493037, + "learning_rate": 8.329995601420607e-06, + "loss": 0.0007, + "step": 455 + }, + { + "epoch": 0.02938712379970355, + "grad_norm": 0.0018403446570685027, + "learning_rate": 8.332983625120273e-06, + "loss": 0.0015, + "step": 456 + }, + { + "epoch": 0.029451569246632725, + "grad_norm": 0.1614785790568277, + "learning_rate": 8.335965103308235e-06, + "loss": 0.0003, + "step": 457 + }, + { + "epoch": 0.0295160146935619, + "grad_norm": 0.04677324625517432, + "learning_rate": 8.338940064598788e-06, + "loss": 0.0001, + "step": 458 + }, + { + "epoch": 0.029580460140491073, + "grad_norm": 0.023722940740272376, + "learning_rate": 8.341908537418999e-06, + "loss": 0.0002, + "step": 459 + }, + { + "epoch": 0.029644905587420247, + "grad_norm": 0.0071161812529181, + "learning_rate": 8.34487055001034e-06, + "loss": 0.0, + "step": 460 + }, + { + "epoch": 0.029709351034349425, + "grad_norm": 0.01802171434180495, + "learning_rate": 8.347826130430298e-06, + "loss": 0.0001, + "step": 461 + }, + { + "epoch": 0.0297737964812786, + "grad_norm": 0.35340842839961994, + "learning_rate": 8.35077530655397e-06, + "loss": 0.0026, + "step": 462 + }, + { + "epoch": 0.029838241928207773, + "grad_norm": 0.061611551431616104, + "learning_rate": 8.353718106075646e-06, + "loss": 0.0002, + "step": 463 + }, + { + "epoch": 0.029902687375136947, + "grad_norm": 0.04584732913277349, + "learning_rate": 8.356654556510358e-06, + "loss": 0.0002, + "step": 464 + }, + { + "epoch": 0.02996713282206612, + "grad_norm": 0.1855844961474537, + "learning_rate": 8.359584685195436e-06, + "loss": 0.0005, + "step": 465 + }, + { + "epoch": 0.030031578268995295, + "grad_norm": 0.02861397477113445, + "learning_rate": 8.362508519292026e-06, + "loss": 0.0003, + "step": 466 + }, + { + "epoch": 0.03009602371592447, + "grad_norm": 0.04034164553115, + "learning_rate": 8.365426085786605e-06, + "loss": 0.0001, + "step": 467 + }, + { + "epoch": 0.030160469162853646, + "grad_norm": 0.007763013674992482, + "learning_rate": 8.368337411492474e-06, + "loss": 0.0, + "step": 468 + }, + { + "epoch": 0.03022491460978282, + "grad_norm": 0.0920300413920213, + "learning_rate": 8.371242523051236e-06, + "loss": 0.0001, + "step": 469 + }, + { + "epoch": 0.030289360056711994, + "grad_norm": 0.13798576860239228, + "learning_rate": 8.374141446934264e-06, + "loss": 0.0006, + "step": 470 + }, + { + "epoch": 0.030353805503641168, + "grad_norm": 0.013517930261967707, + "learning_rate": 8.37703420944414e-06, + "loss": 0.0, + "step": 471 + }, + { + "epoch": 0.030418250950570342, + "grad_norm": 0.008036030340445328, + "learning_rate": 8.379920836716092e-06, + "loss": 0.0, + "step": 472 + }, + { + "epoch": 0.030482696397499516, + "grad_norm": 0.005974472065890517, + "learning_rate": 8.382801354719412e-06, + "loss": 0.0, + "step": 473 + }, + { + "epoch": 0.03054714184442869, + "grad_norm": 0.0008534880974867222, + "learning_rate": 8.385675789258854e-06, + "loss": 0.0, + "step": 474 + }, + { + "epoch": 0.030611587291357864, + "grad_norm": 0.03526061844791487, + "learning_rate": 8.388544165976018e-06, + "loss": 0.0001, + "step": 475 + }, + { + "epoch": 0.03067603273828704, + "grad_norm": 0.09276391030632433, + "learning_rate": 8.391406510350738e-06, + "loss": 0.0003, + "step": 476 + }, + { + "epoch": 0.030740478185216216, + "grad_norm": 0.006227281517178593, + "learning_rate": 8.394262847702416e-06, + "loss": 0.0, + "step": 477 + }, + { + "epoch": 0.03080492363214539, + "grad_norm": 0.0019055967033663934, + "learning_rate": 8.39711320319139e-06, + "loss": 0.0, + "step": 478 + }, + { + "epoch": 0.030869369079074564, + "grad_norm": 0.008035130758411991, + "learning_rate": 8.399957601820245e-06, + "loss": 0.0, + "step": 479 + }, + { + "epoch": 0.030933814526003738, + "grad_norm": 0.030565513157518043, + "learning_rate": 8.402796068435144e-06, + "loss": 0.0002, + "step": 480 + }, + { + "epoch": 0.03099825997293291, + "grad_norm": 0.0010238980188911407, + "learning_rate": 8.405628627727116e-06, + "loss": 0.0, + "step": 481 + }, + { + "epoch": 0.031062705419862086, + "grad_norm": 0.000852778331990603, + "learning_rate": 8.408455304233356e-06, + "loss": 0.0, + "step": 482 + }, + { + "epoch": 0.03112715086679126, + "grad_norm": 0.009300486191113138, + "learning_rate": 8.411276122338493e-06, + "loss": 0.0001, + "step": 483 + }, + { + "epoch": 0.031191596313720437, + "grad_norm": 0.006314212471481981, + "learning_rate": 8.414091106275857e-06, + "loss": 0.0001, + "step": 484 + }, + { + "epoch": 0.03125604176064961, + "grad_norm": 0.13175345772435, + "learning_rate": 8.416900280128724e-06, + "loss": 0.0012, + "step": 485 + }, + { + "epoch": 0.03132048720757878, + "grad_norm": 0.0034394697556541735, + "learning_rate": 8.41970366783156e-06, + "loss": 0.0, + "step": 486 + }, + { + "epoch": 0.03138493265450796, + "grad_norm": 0.3510331194805854, + "learning_rate": 8.422501293171231e-06, + "loss": 0.0008, + "step": 487 + }, + { + "epoch": 0.031449378101437137, + "grad_norm": 0.0006937932722181743, + "learning_rate": 8.42529317978823e-06, + "loss": 0.0, + "step": 488 + }, + { + "epoch": 0.03151382354836631, + "grad_norm": 0.0395256834106489, + "learning_rate": 8.42807935117786e-06, + "loss": 0.0002, + "step": 489 + }, + { + "epoch": 0.031578268995295485, + "grad_norm": 0.0195828954531926, + "learning_rate": 8.430859830691432e-06, + "loss": 0.0001, + "step": 490 + }, + { + "epoch": 0.031642714442224655, + "grad_norm": 0.2022134980256737, + "learning_rate": 8.43363464153744e-06, + "loss": 0.0012, + "step": 491 + }, + { + "epoch": 0.03170715988915383, + "grad_norm": 0.16900205486186845, + "learning_rate": 8.436403806782708e-06, + "loss": 0.0004, + "step": 492 + }, + { + "epoch": 0.031771605336083, + "grad_norm": 0.0006769181918546838, + "learning_rate": 8.43916734935356e-06, + "loss": 0.0, + "step": 493 + }, + { + "epoch": 0.03183605078301218, + "grad_norm": 0.013904361173820282, + "learning_rate": 8.44192529203695e-06, + "loss": 0.0001, + "step": 494 + }, + { + "epoch": 0.03190049622994136, + "grad_norm": 0.004376416113878596, + "learning_rate": 8.444677657481581e-06, + "loss": 0.0, + "step": 495 + }, + { + "epoch": 0.03196494167687053, + "grad_norm": 0.02817264340354307, + "learning_rate": 8.44742446819904e-06, + "loss": 0.0003, + "step": 496 + }, + { + "epoch": 0.032029387123799706, + "grad_norm": 2.2377603134379584, + "learning_rate": 8.450165746564882e-06, + "loss": 0.0117, + "step": 497 + }, + { + "epoch": 0.03209383257072888, + "grad_norm": 0.002553285998667623, + "learning_rate": 8.452901514819742e-06, + "loss": 0.0, + "step": 498 + }, + { + "epoch": 0.032158278017658054, + "grad_norm": 0.00034067570616675476, + "learning_rate": 8.455631795070407e-06, + "loss": 0.0, + "step": 499 + }, + { + "epoch": 0.032222723464587225, + "grad_norm": 0.35536890409234767, + "learning_rate": 8.45835660929089e-06, + "loss": 0.0018, + "step": 500 + }, + { + "epoch": 0.0322871689115164, + "grad_norm": 0.003508252971943265, + "learning_rate": 8.461075979323489e-06, + "loss": 0.0, + "step": 501 + }, + { + "epoch": 0.03235161435844557, + "grad_norm": 0.7101809909652864, + "learning_rate": 8.463789926879845e-06, + "loss": 0.0045, + "step": 502 + }, + { + "epoch": 0.03241605980537475, + "grad_norm": 0.0013628442685876426, + "learning_rate": 8.466498473541972e-06, + "loss": 0.0, + "step": 503 + }, + { + "epoch": 0.03248050525230393, + "grad_norm": 0.34192186522539375, + "learning_rate": 8.469201640763297e-06, + "loss": 0.0016, + "step": 504 + }, + { + "epoch": 0.0325449506992331, + "grad_norm": 0.0034762216357610365, + "learning_rate": 8.471899449869671e-06, + "loss": 0.0, + "step": 505 + }, + { + "epoch": 0.032609396146162276, + "grad_norm": 0.12617818828412725, + "learning_rate": 8.474591922060378e-06, + "loss": 0.0004, + "step": 506 + }, + { + "epoch": 0.032673841593091446, + "grad_norm": 0.004196338433318039, + "learning_rate": 8.47727907840915e-06, + "loss": 0.0, + "step": 507 + }, + { + "epoch": 0.032738287040020624, + "grad_norm": 0.00876693332198306, + "learning_rate": 8.479960939865135e-06, + "loss": 0.0001, + "step": 508 + }, + { + "epoch": 0.032802732486949794, + "grad_norm": 0.0004212675822051207, + "learning_rate": 8.482637527253888e-06, + "loss": 0.0, + "step": 509 + }, + { + "epoch": 0.03286717793387897, + "grad_norm": 0.004896906167491933, + "learning_rate": 8.485308861278346e-06, + "loss": 0.0, + "step": 510 + }, + { + "epoch": 0.03293162338080815, + "grad_norm": 0.0006167679203622598, + "learning_rate": 8.487974962519779e-06, + "loss": 0.0, + "step": 511 + }, + { + "epoch": 0.03299606882773732, + "grad_norm": 0.25578066656271004, + "learning_rate": 8.490635851438747e-06, + "loss": 0.0011, + "step": 512 + }, + { + "epoch": 0.0330605142746665, + "grad_norm": 0.016222739160781905, + "learning_rate": 8.493291548376036e-06, + "loss": 0.0001, + "step": 513 + }, + { + "epoch": 0.03312495972159567, + "grad_norm": 0.00399666735491616, + "learning_rate": 8.495942073553605e-06, + "loss": 0.0, + "step": 514 + }, + { + "epoch": 0.033189405168524845, + "grad_norm": 0.9266883816050113, + "learning_rate": 8.498587447075492e-06, + "loss": 0.0033, + "step": 515 + }, + { + "epoch": 0.033253850615454016, + "grad_norm": 0.0028993043429181614, + "learning_rate": 8.50122768892874e-06, + "loss": 0.0, + "step": 516 + }, + { + "epoch": 0.03331829606238319, + "grad_norm": 0.27063704065226896, + "learning_rate": 8.503862818984304e-06, + "loss": 0.0021, + "step": 517 + }, + { + "epoch": 0.033382741509312364, + "grad_norm": 0.010379239929541071, + "learning_rate": 8.50649285699794e-06, + "loss": 0.0, + "step": 518 + }, + { + "epoch": 0.03344718695624154, + "grad_norm": 0.35729035839734724, + "learning_rate": 8.509117822611103e-06, + "loss": 0.0023, + "step": 519 + }, + { + "epoch": 0.03351163240317072, + "grad_norm": 0.30007881033376443, + "learning_rate": 8.511737735351822e-06, + "loss": 0.0011, + "step": 520 + }, + { + "epoch": 0.03357607785009989, + "grad_norm": 0.12294530839587339, + "learning_rate": 8.514352614635567e-06, + "loss": 0.0001, + "step": 521 + }, + { + "epoch": 0.03364052329702907, + "grad_norm": 0.11914525257566484, + "learning_rate": 8.516962479766123e-06, + "loss": 0.0002, + "step": 522 + }, + { + "epoch": 0.03370496874395824, + "grad_norm": 0.03495502834888825, + "learning_rate": 8.51956734993644e-06, + "loss": 0.0001, + "step": 523 + }, + { + "epoch": 0.033769414190887415, + "grad_norm": 0.34509190983321814, + "learning_rate": 8.522167244229476e-06, + "loss": 0.0104, + "step": 524 + }, + { + "epoch": 0.033833859637816585, + "grad_norm": 0.2735239028658671, + "learning_rate": 8.524762181619042e-06, + "loss": 0.0005, + "step": 525 + }, + { + "epoch": 0.03389830508474576, + "grad_norm": 0.4540046998770102, + "learning_rate": 8.527352180970633e-06, + "loss": 0.0051, + "step": 526 + }, + { + "epoch": 0.03396275053167494, + "grad_norm": 0.016232761018629926, + "learning_rate": 8.529937261042241e-06, + "loss": 0.0, + "step": 527 + }, + { + "epoch": 0.03402719597860411, + "grad_norm": 0.08212446411675045, + "learning_rate": 8.532517440485183e-06, + "loss": 0.0001, + "step": 528 + }, + { + "epoch": 0.03409164142553329, + "grad_norm": 0.077195666137957, + "learning_rate": 8.535092737844902e-06, + "loss": 0.0001, + "step": 529 + }, + { + "epoch": 0.03415608687246246, + "grad_norm": 0.3339083512543114, + "learning_rate": 8.537663171561763e-06, + "loss": 0.0034, + "step": 530 + }, + { + "epoch": 0.034220532319391636, + "grad_norm": 0.06010152089013261, + "learning_rate": 8.540228759971857e-06, + "loss": 0.0001, + "step": 531 + }, + { + "epoch": 0.03428497776632081, + "grad_norm": 0.10862170243882367, + "learning_rate": 8.542789521307773e-06, + "loss": 0.0001, + "step": 532 + }, + { + "epoch": 0.034349423213249984, + "grad_norm": 0.0018621187038332806, + "learning_rate": 8.545345473699385e-06, + "loss": 0.0, + "step": 533 + }, + { + "epoch": 0.03441386866017916, + "grad_norm": 0.5433106478048796, + "learning_rate": 8.547896635174616e-06, + "loss": 0.0025, + "step": 534 + }, + { + "epoch": 0.03447831410710833, + "grad_norm": 0.02162303799301777, + "learning_rate": 8.550443023660201e-06, + "loss": 0.0, + "step": 535 + }, + { + "epoch": 0.03454275955403751, + "grad_norm": 0.16319136153064215, + "learning_rate": 8.55298465698245e-06, + "loss": 0.0017, + "step": 536 + }, + { + "epoch": 0.03460720500096668, + "grad_norm": 0.03575839023267142, + "learning_rate": 8.555521552867987e-06, + "loss": 0.0, + "step": 537 + }, + { + "epoch": 0.03467165044789586, + "grad_norm": 0.21404037256804487, + "learning_rate": 8.558053728944501e-06, + "loss": 0.0002, + "step": 538 + }, + { + "epoch": 0.03473609589482503, + "grad_norm": 0.007927826619897955, + "learning_rate": 8.560581202741473e-06, + "loss": 0.0, + "step": 539 + }, + { + "epoch": 0.034800541341754206, + "grad_norm": 0.0006026644939430921, + "learning_rate": 8.563103991690909e-06, + "loss": 0.0, + "step": 540 + }, + { + "epoch": 0.034864986788683376, + "grad_norm": 0.001941445168647103, + "learning_rate": 8.565622113128059e-06, + "loss": 0.0, + "step": 541 + }, + { + "epoch": 0.034929432235612554, + "grad_norm": 0.048530455674446166, + "learning_rate": 8.568135584292137e-06, + "loss": 0.0007, + "step": 542 + }, + { + "epoch": 0.03499387768254173, + "grad_norm": 0.0007215121741700993, + "learning_rate": 8.570644422327015e-06, + "loss": 0.0, + "step": 543 + }, + { + "epoch": 0.0350583231294709, + "grad_norm": 0.0026185945721582315, + "learning_rate": 8.57314864428195e-06, + "loss": 0.0, + "step": 544 + }, + { + "epoch": 0.03512276857640008, + "grad_norm": 0.3393699807556369, + "learning_rate": 8.575648267112246e-06, + "loss": 0.0049, + "step": 545 + }, + { + "epoch": 0.03518721402332925, + "grad_norm": 0.19151880886172956, + "learning_rate": 8.578143307679974e-06, + "loss": 0.0008, + "step": 546 + }, + { + "epoch": 0.03525165947025843, + "grad_norm": 0.04460153439407813, + "learning_rate": 8.580633782754635e-06, + "loss": 0.0005, + "step": 547 + }, + { + "epoch": 0.0353161049171876, + "grad_norm": 0.271443576628783, + "learning_rate": 8.583119709013842e-06, + "loss": 0.0013, + "step": 548 + }, + { + "epoch": 0.035380550364116775, + "grad_norm": 0.0041845092527643, + "learning_rate": 8.585601103043993e-06, + "loss": 0.0, + "step": 549 + }, + { + "epoch": 0.03544499581104595, + "grad_norm": 0.007688605143187043, + "learning_rate": 8.58807798134093e-06, + "loss": 0.0, + "step": 550 + }, + { + "epoch": 0.03550944125797512, + "grad_norm": 0.0011524979515473482, + "learning_rate": 8.5905503603106e-06, + "loss": 0.0, + "step": 551 + }, + { + "epoch": 0.0355738867049043, + "grad_norm": 0.004469706196756287, + "learning_rate": 8.593018256269704e-06, + "loss": 0.0, + "step": 552 + }, + { + "epoch": 0.03563833215183347, + "grad_norm": 0.0017258531883411576, + "learning_rate": 8.595481685446355e-06, + "loss": 0.0, + "step": 553 + }, + { + "epoch": 0.03570277759876265, + "grad_norm": 0.011111525603690426, + "learning_rate": 8.597940663980695e-06, + "loss": 0.0, + "step": 554 + }, + { + "epoch": 0.03576722304569182, + "grad_norm": 0.0024214964647003984, + "learning_rate": 8.600395207925551e-06, + "loss": 0.0, + "step": 555 + }, + { + "epoch": 0.035831668492621, + "grad_norm": 0.002982207597299224, + "learning_rate": 8.602845333247056e-06, + "loss": 0.0, + "step": 556 + }, + { + "epoch": 0.035896113939550174, + "grad_norm": 0.0003296500930890877, + "learning_rate": 8.605291055825273e-06, + "loss": 0.0, + "step": 557 + }, + { + "epoch": 0.035960559386479345, + "grad_norm": 0.02805379203644968, + "learning_rate": 8.607732391454803e-06, + "loss": 0.0, + "step": 558 + }, + { + "epoch": 0.03602500483340852, + "grad_norm": 0.007168128548352624, + "learning_rate": 8.610169355845417e-06, + "loss": 0.0, + "step": 559 + }, + { + "epoch": 0.03608945028033769, + "grad_norm": 0.008010737554365606, + "learning_rate": 8.612601964622646e-06, + "loss": 0.0, + "step": 560 + }, + { + "epoch": 0.03615389572726687, + "grad_norm": 0.006762966873986022, + "learning_rate": 8.615030233328387e-06, + "loss": 0.0001, + "step": 561 + }, + { + "epoch": 0.03621834117419604, + "grad_norm": 0.00104232572711473, + "learning_rate": 8.617454177421499e-06, + "loss": 0.0, + "step": 562 + }, + { + "epoch": 0.03628278662112522, + "grad_norm": 2.3966805549541053, + "learning_rate": 8.619873812278397e-06, + "loss": 0.0212, + "step": 563 + }, + { + "epoch": 0.03634723206805439, + "grad_norm": 0.002023202811533298, + "learning_rate": 8.622289153193632e-06, + "loss": 0.0, + "step": 564 + }, + { + "epoch": 0.036411677514983566, + "grad_norm": 0.08835435758202559, + "learning_rate": 8.624700215380469e-06, + "loss": 0.0, + "step": 565 + }, + { + "epoch": 0.036476122961912744, + "grad_norm": 0.008966135734965502, + "learning_rate": 8.62710701397147e-06, + "loss": 0.0, + "step": 566 + }, + { + "epoch": 0.036540568408841914, + "grad_norm": 0.3744501413902579, + "learning_rate": 8.62950956401906e-06, + "loss": 0.0006, + "step": 567 + }, + { + "epoch": 0.03660501385577109, + "grad_norm": 0.074906811338477, + "learning_rate": 8.631907880496095e-06, + "loss": 0.0002, + "step": 568 + }, + { + "epoch": 0.03666945930270026, + "grad_norm": 0.0002814712078328361, + "learning_rate": 8.634301978296409e-06, + "loss": 0.0, + "step": 569 + }, + { + "epoch": 0.03673390474962944, + "grad_norm": 0.0007091569765689754, + "learning_rate": 8.636691872235386e-06, + "loss": 0.0, + "step": 570 + }, + { + "epoch": 0.03679835019655861, + "grad_norm": 0.15370993098858876, + "learning_rate": 8.639077577050498e-06, + "loss": 0.0012, + "step": 571 + }, + { + "epoch": 0.03686279564348779, + "grad_norm": 0.09234985727520918, + "learning_rate": 8.64145910740186e-06, + "loss": 0.002, + "step": 572 + }, + { + "epoch": 0.036927241090416965, + "grad_norm": 0.19519613614308395, + "learning_rate": 8.643836477872763e-06, + "loss": 0.0006, + "step": 573 + }, + { + "epoch": 0.036991686537346136, + "grad_norm": 0.13928936096695185, + "learning_rate": 8.64620970297021e-06, + "loss": 0.0021, + "step": 574 + }, + { + "epoch": 0.03705613198427531, + "grad_norm": 0.013526277478610292, + "learning_rate": 8.648578797125451e-06, + "loss": 0.0, + "step": 575 + }, + { + "epoch": 0.037120577431204484, + "grad_norm": 0.036371678731640995, + "learning_rate": 8.65094377469451e-06, + "loss": 0.0001, + "step": 576 + }, + { + "epoch": 0.03718502287813366, + "grad_norm": 0.011606647574347847, + "learning_rate": 8.6533046499587e-06, + "loss": 0.0, + "step": 577 + }, + { + "epoch": 0.03724946832506283, + "grad_norm": 0.00018151496333934717, + "learning_rate": 8.655661437125153e-06, + "loss": 0.0, + "step": 578 + }, + { + "epoch": 0.03731391377199201, + "grad_norm": 0.017874406288697416, + "learning_rate": 8.658014150327315e-06, + "loss": 0.0002, + "step": 579 + }, + { + "epoch": 0.03737835921892119, + "grad_norm": 0.0006531733163911647, + "learning_rate": 8.66036280362547e-06, + "loss": 0.0, + "step": 580 + }, + { + "epoch": 0.03744280466585036, + "grad_norm": 0.0015501879208044541, + "learning_rate": 8.662707411007244e-06, + "loss": 0.0, + "step": 581 + }, + { + "epoch": 0.037507250112779535, + "grad_norm": 0.0014950575720748577, + "learning_rate": 8.665047986388091e-06, + "loss": 0.0, + "step": 582 + }, + { + "epoch": 0.037571695559708705, + "grad_norm": 0.00018370767475683753, + "learning_rate": 8.667384543611802e-06, + "loss": 0.0, + "step": 583 + }, + { + "epoch": 0.03763614100663788, + "grad_norm": 0.5265942085748113, + "learning_rate": 8.669717096450994e-06, + "loss": 0.0023, + "step": 584 + }, + { + "epoch": 0.03770058645356705, + "grad_norm": 0.0009574933177824663, + "learning_rate": 8.672045658607586e-06, + "loss": 0.0, + "step": 585 + }, + { + "epoch": 0.03776503190049623, + "grad_norm": 0.6674461997778207, + "learning_rate": 8.674370243713298e-06, + "loss": 0.0003, + "step": 586 + }, + { + "epoch": 0.0378294773474254, + "grad_norm": 0.7010925375594455, + "learning_rate": 8.676690865330125e-06, + "loss": 0.0029, + "step": 587 + }, + { + "epoch": 0.03789392279435458, + "grad_norm": 0.00532852021202004, + "learning_rate": 8.6790075369508e-06, + "loss": 0.0, + "step": 588 + }, + { + "epoch": 0.037958368241283756, + "grad_norm": 0.005449903198530545, + "learning_rate": 8.68132027199928e-06, + "loss": 0.0, + "step": 589 + }, + { + "epoch": 0.03802281368821293, + "grad_norm": 0.10746727764211438, + "learning_rate": 8.683629083831205e-06, + "loss": 0.0002, + "step": 590 + }, + { + "epoch": 0.038087259135142104, + "grad_norm": 0.016064785820550875, + "learning_rate": 8.685933985734367e-06, + "loss": 0.0, + "step": 591 + }, + { + "epoch": 0.038151704582071275, + "grad_norm": 0.0029501290312424723, + "learning_rate": 8.688234990929155e-06, + "loss": 0.0, + "step": 592 + }, + { + "epoch": 0.03821615002900045, + "grad_norm": 0.0013237907721366717, + "learning_rate": 8.690532112569025e-06, + "loss": 0.0, + "step": 593 + }, + { + "epoch": 0.03828059547592962, + "grad_norm": 0.2628805365475962, + "learning_rate": 8.692825363740948e-06, + "loss": 0.0008, + "step": 594 + }, + { + "epoch": 0.0383450409228588, + "grad_norm": 0.002112624002434478, + "learning_rate": 8.69511475746585e-06, + "loss": 0.0, + "step": 595 + }, + { + "epoch": 0.03840948636978798, + "grad_norm": 0.14168079395771904, + "learning_rate": 8.69740030669906e-06, + "loss": 0.0013, + "step": 596 + }, + { + "epoch": 0.03847393181671715, + "grad_norm": 0.21190947426606038, + "learning_rate": 8.699682024330754e-06, + "loss": 0.003, + "step": 597 + }, + { + "epoch": 0.038538377263646326, + "grad_norm": 0.003101969641557718, + "learning_rate": 8.701959923186383e-06, + "loss": 0.0, + "step": 598 + }, + { + "epoch": 0.038602822710575496, + "grad_norm": 0.009290021857474596, + "learning_rate": 8.704234016027115e-06, + "loss": 0.0, + "step": 599 + }, + { + "epoch": 0.038667268157504674, + "grad_norm": 0.004114591117807323, + "learning_rate": 8.706504315550256e-06, + "loss": 0.0, + "step": 600 + }, + { + "epoch": 0.038731713604433844, + "grad_norm": 0.16130475782513984, + "learning_rate": 8.708770834389678e-06, + "loss": 0.002, + "step": 601 + }, + { + "epoch": 0.03879615905136302, + "grad_norm": 0.003502346821683403, + "learning_rate": 8.71103358511624e-06, + "loss": 0.0, + "step": 602 + }, + { + "epoch": 0.03886060449829219, + "grad_norm": 0.002125771925744141, + "learning_rate": 8.713292580238214e-06, + "loss": 0.0, + "step": 603 + }, + { + "epoch": 0.03892504994522137, + "grad_norm": 0.00033336581023886793, + "learning_rate": 8.715547832201683e-06, + "loss": 0.0, + "step": 604 + }, + { + "epoch": 0.03898949539215055, + "grad_norm": 0.032290644222526255, + "learning_rate": 8.71779935339097e-06, + "loss": 0.0001, + "step": 605 + }, + { + "epoch": 0.03905394083907972, + "grad_norm": 14.158548400095667, + "learning_rate": 8.720047156129036e-06, + "loss": 0.0439, + "step": 606 + }, + { + "epoch": 0.039118386286008895, + "grad_norm": 0.12495020157562878, + "learning_rate": 8.722291252677892e-06, + "loss": 0.0006, + "step": 607 + }, + { + "epoch": 0.039182831732938066, + "grad_norm": 0.0017411839254716896, + "learning_rate": 8.724531655238988e-06, + "loss": 0.0, + "step": 608 + }, + { + "epoch": 0.03924727717986724, + "grad_norm": 0.0005025559677200699, + "learning_rate": 8.726768375953625e-06, + "loss": 0.0, + "step": 609 + }, + { + "epoch": 0.039311722626796414, + "grad_norm": 0.00043016709712481065, + "learning_rate": 8.729001426903342e-06, + "loss": 0.0, + "step": 610 + }, + { + "epoch": 0.03937616807372559, + "grad_norm": 0.011355258759439064, + "learning_rate": 8.731230820110307e-06, + "loss": 0.0, + "step": 611 + }, + { + "epoch": 0.03944061352065477, + "grad_norm": 0.018387622737482987, + "learning_rate": 8.733456567537714e-06, + "loss": 0.0002, + "step": 612 + }, + { + "epoch": 0.03950505896758394, + "grad_norm": 0.012411244035460034, + "learning_rate": 8.735678681090153e-06, + "loss": 0.0, + "step": 613 + }, + { + "epoch": 0.03956950441451312, + "grad_norm": 0.01917335761000915, + "learning_rate": 8.737897172614007e-06, + "loss": 0.0001, + "step": 614 + }, + { + "epoch": 0.03963394986144229, + "grad_norm": 0.0012099117817259695, + "learning_rate": 8.74011205389782e-06, + "loss": 0.0, + "step": 615 + }, + { + "epoch": 0.039698395308371465, + "grad_norm": 0.011013451182717668, + "learning_rate": 8.742323336672685e-06, + "loss": 0.0, + "step": 616 + }, + { + "epoch": 0.039762840755300635, + "grad_norm": 0.0009947618007444038, + "learning_rate": 8.744531032612602e-06, + "loss": 0.0, + "step": 617 + }, + { + "epoch": 0.03982728620222981, + "grad_norm": 0.007576749125775888, + "learning_rate": 8.746735153334857e-06, + "loss": 0.0001, + "step": 618 + }, + { + "epoch": 0.03989173164915899, + "grad_norm": 0.117186272137885, + "learning_rate": 8.74893571040039e-06, + "loss": 0.0017, + "step": 619 + }, + { + "epoch": 0.03995617709608816, + "grad_norm": 0.11402360435857845, + "learning_rate": 8.751132715314151e-06, + "loss": 0.0002, + "step": 620 + }, + { + "epoch": 0.04002062254301734, + "grad_norm": 2.0487442217612113, + "learning_rate": 8.75332617952547e-06, + "loss": 0.0063, + "step": 621 + }, + { + "epoch": 0.04008506798994651, + "grad_norm": 0.2021698686969095, + "learning_rate": 8.755516114428408e-06, + "loss": 0.0005, + "step": 622 + }, + { + "epoch": 0.040149513436875686, + "grad_norm": 0.0006979889077255704, + "learning_rate": 8.757702531362116e-06, + "loss": 0.0, + "step": 623 + }, + { + "epoch": 0.04021395888380486, + "grad_norm": 0.0057224222695230215, + "learning_rate": 8.759885441611187e-06, + "loss": 0.0, + "step": 624 + }, + { + "epoch": 0.040278404330734034, + "grad_norm": 0.0025807465767095474, + "learning_rate": 8.762064856406002e-06, + "loss": 0.0, + "step": 625 + }, + { + "epoch": 0.040342849777663205, + "grad_norm": 0.3693147077263644, + "learning_rate": 8.76424078692308e-06, + "loss": 0.0005, + "step": 626 + }, + { + "epoch": 0.04040729522459238, + "grad_norm": 0.0006090807770591003, + "learning_rate": 8.766413244285424e-06, + "loss": 0.0, + "step": 627 + }, + { + "epoch": 0.04047174067152156, + "grad_norm": 0.03648462138970883, + "learning_rate": 8.768582239562856e-06, + "loss": 0.0001, + "step": 628 + }, + { + "epoch": 0.04053618611845073, + "grad_norm": 0.0005767880342411257, + "learning_rate": 8.770747783772357e-06, + "loss": 0.0, + "step": 629 + }, + { + "epoch": 0.04060063156537991, + "grad_norm": 0.0006805918616400947, + "learning_rate": 8.77290988787841e-06, + "loss": 0.0, + "step": 630 + }, + { + "epoch": 0.04066507701230908, + "grad_norm": 0.004117036655468099, + "learning_rate": 8.775068562793323e-06, + "loss": 0.0, + "step": 631 + }, + { + "epoch": 0.040729522459238256, + "grad_norm": 0.0007426168119818877, + "learning_rate": 8.777223819377568e-06, + "loss": 0.0, + "step": 632 + }, + { + "epoch": 0.040793967906167426, + "grad_norm": 0.011304413014642186, + "learning_rate": 8.7793756684401e-06, + "loss": 0.0, + "step": 633 + }, + { + "epoch": 0.040858413353096604, + "grad_norm": 0.0004935244143254786, + "learning_rate": 8.78152412073869e-06, + "loss": 0.0, + "step": 634 + }, + { + "epoch": 0.04092285880002578, + "grad_norm": 0.007054770802510865, + "learning_rate": 8.783669186980247e-06, + "loss": 0.0, + "step": 635 + }, + { + "epoch": 0.04098730424695495, + "grad_norm": 0.01369389714554964, + "learning_rate": 8.78581087782113e-06, + "loss": 0.0, + "step": 636 + }, + { + "epoch": 0.04105174969388413, + "grad_norm": 0.011906267839883657, + "learning_rate": 8.787949203867476e-06, + "loss": 0.0, + "step": 637 + }, + { + "epoch": 0.0411161951408133, + "grad_norm": 0.006885284197584247, + "learning_rate": 8.790084175675511e-06, + "loss": 0.0, + "step": 638 + }, + { + "epoch": 0.04118064058774248, + "grad_norm": 0.4487910847199546, + "learning_rate": 8.792215803751859e-06, + "loss": 0.0024, + "step": 639 + }, + { + "epoch": 0.04124508603467165, + "grad_norm": 0.9246561480973362, + "learning_rate": 8.794344098553859e-06, + "loss": 0.0008, + "step": 640 + }, + { + "epoch": 0.041309531481600825, + "grad_norm": 0.04488668151610257, + "learning_rate": 8.796469070489866e-06, + "loss": 0.0005, + "step": 641 + }, + { + "epoch": 0.04137397692853, + "grad_norm": 0.027408085647675932, + "learning_rate": 8.798590729919567e-06, + "loss": 0.0, + "step": 642 + }, + { + "epoch": 0.04143842237545917, + "grad_norm": 0.0017001050288757774, + "learning_rate": 8.800709087154267e-06, + "loss": 0.0, + "step": 643 + }, + { + "epoch": 0.04150286782238835, + "grad_norm": 0.16256402501267625, + "learning_rate": 8.802824152457208e-06, + "loss": 0.0003, + "step": 644 + }, + { + "epoch": 0.04156731326931752, + "grad_norm": 0.011062200472460433, + "learning_rate": 8.804935936043852e-06, + "loss": 0.0, + "step": 645 + }, + { + "epoch": 0.0416317587162467, + "grad_norm": 0.021831654093761303, + "learning_rate": 8.80704444808219e-06, + "loss": 0.0001, + "step": 646 + }, + { + "epoch": 0.04169620416317587, + "grad_norm": 0.06078771214529044, + "learning_rate": 8.809149698693027e-06, + "loss": 0.0001, + "step": 647 + }, + { + "epoch": 0.04176064961010505, + "grad_norm": 0.005953470628247643, + "learning_rate": 8.811251697950276e-06, + "loss": 0.0, + "step": 648 + }, + { + "epoch": 0.04182509505703422, + "grad_norm": 0.2050906677043541, + "learning_rate": 8.813350455881246e-06, + "loss": 0.0007, + "step": 649 + }, + { + "epoch": 0.041889540503963395, + "grad_norm": 0.1971951160380812, + "learning_rate": 8.815445982466934e-06, + "loss": 0.0003, + "step": 650 + }, + { + "epoch": 0.04195398595089257, + "grad_norm": 0.00236395117113748, + "learning_rate": 8.817538287642305e-06, + "loss": 0.0, + "step": 651 + }, + { + "epoch": 0.04201843139782174, + "grad_norm": 0.1256566977010292, + "learning_rate": 8.819627381296574e-06, + "loss": 0.0007, + "step": 652 + }, + { + "epoch": 0.04208287684475092, + "grad_norm": 0.07903941680245861, + "learning_rate": 8.82171327327349e-06, + "loss": 0.0003, + "step": 653 + }, + { + "epoch": 0.04214732229168009, + "grad_norm": 0.39863904829441993, + "learning_rate": 8.823795973371614e-06, + "loss": 0.0012, + "step": 654 + }, + { + "epoch": 0.04221176773860927, + "grad_norm": 0.035371382107604585, + "learning_rate": 8.825875491344588e-06, + "loss": 0.0001, + "step": 655 + }, + { + "epoch": 0.04227621318553844, + "grad_norm": 0.02279703556094779, + "learning_rate": 8.827951836901422e-06, + "loss": 0.0, + "step": 656 + }, + { + "epoch": 0.042340658632467616, + "grad_norm": 0.26524165484813206, + "learning_rate": 8.830025019706755e-06, + "loss": 0.0005, + "step": 657 + }, + { + "epoch": 0.042405104079396794, + "grad_norm": 0.01918668875669408, + "learning_rate": 8.832095049381132e-06, + "loss": 0.0001, + "step": 658 + }, + { + "epoch": 0.042469549526325964, + "grad_norm": 0.02418418424942438, + "learning_rate": 8.834161935501262e-06, + "loss": 0.0002, + "step": 659 + }, + { + "epoch": 0.04253399497325514, + "grad_norm": 0.0029525133059810048, + "learning_rate": 8.836225687600296e-06, + "loss": 0.0, + "step": 660 + }, + { + "epoch": 0.04259844042018431, + "grad_norm": 0.06152243840450862, + "learning_rate": 8.838286315168083e-06, + "loss": 0.0001, + "step": 661 + }, + { + "epoch": 0.04266288586711349, + "grad_norm": 0.05055057189679331, + "learning_rate": 8.840343827651438e-06, + "loss": 0.0004, + "step": 662 + }, + { + "epoch": 0.04272733131404266, + "grad_norm": 0.00030230320615462757, + "learning_rate": 8.842398234454391e-06, + "loss": 0.0, + "step": 663 + }, + { + "epoch": 0.04279177676097184, + "grad_norm": 0.022563544469127325, + "learning_rate": 8.844449544938457e-06, + "loss": 0.0, + "step": 664 + }, + { + "epoch": 0.042856222207901015, + "grad_norm": 0.0010129811466353747, + "learning_rate": 8.846497768422887e-06, + "loss": 0.0, + "step": 665 + }, + { + "epoch": 0.042920667654830186, + "grad_norm": 0.10487420670137693, + "learning_rate": 8.84854291418492e-06, + "loss": 0.0001, + "step": 666 + }, + { + "epoch": 0.04298511310175936, + "grad_norm": 0.13032998283350847, + "learning_rate": 8.850584991460033e-06, + "loss": 0.0024, + "step": 667 + }, + { + "epoch": 0.043049558548688534, + "grad_norm": 0.12460349571325573, + "learning_rate": 8.852624009442204e-06, + "loss": 0.0001, + "step": 668 + }, + { + "epoch": 0.04311400399561771, + "grad_norm": 0.0003381800828124894, + "learning_rate": 8.854659977284147e-06, + "loss": 0.0, + "step": 669 + }, + { + "epoch": 0.04317844944254688, + "grad_norm": 0.007937901512867782, + "learning_rate": 8.856692904097561e-06, + "loss": 0.0001, + "step": 670 + }, + { + "epoch": 0.04324289488947606, + "grad_norm": 0.0008805466292661837, + "learning_rate": 8.85872279895338e-06, + "loss": 0.0, + "step": 671 + }, + { + "epoch": 0.04330734033640523, + "grad_norm": 0.17617806573889097, + "learning_rate": 8.860749670882013e-06, + "loss": 0.0005, + "step": 672 + }, + { + "epoch": 0.04337178578333441, + "grad_norm": 0.00395744069753078, + "learning_rate": 8.862773528873578e-06, + "loss": 0.0, + "step": 673 + }, + { + "epoch": 0.043436231230263585, + "grad_norm": 0.007247134466748965, + "learning_rate": 8.864794381878157e-06, + "loss": 0.0, + "step": 674 + }, + { + "epoch": 0.043500676677192755, + "grad_norm": 0.0030663660709385806, + "learning_rate": 8.86681223880602e-06, + "loss": 0.0, + "step": 675 + }, + { + "epoch": 0.04356512212412193, + "grad_norm": 0.017393769671059423, + "learning_rate": 8.868827108527866e-06, + "loss": 0.0, + "step": 676 + }, + { + "epoch": 0.0436295675710511, + "grad_norm": 0.0017702782273335328, + "learning_rate": 8.87083899987505e-06, + "loss": 0.0, + "step": 677 + }, + { + "epoch": 0.04369401301798028, + "grad_norm": 0.00022959416712921537, + "learning_rate": 8.872847921639834e-06, + "loss": 0.0, + "step": 678 + }, + { + "epoch": 0.04375845846490945, + "grad_norm": 0.003605868948068563, + "learning_rate": 8.874853882575593e-06, + "loss": 0.0, + "step": 679 + }, + { + "epoch": 0.04382290391183863, + "grad_norm": 0.01744342059980127, + "learning_rate": 8.876856891397061e-06, + "loss": 0.0001, + "step": 680 + }, + { + "epoch": 0.043887349358767806, + "grad_norm": 7.388464259112656e-05, + "learning_rate": 8.878856956780554e-06, + "loss": 0.0, + "step": 681 + }, + { + "epoch": 0.04395179480569698, + "grad_norm": 0.0013601676504224963, + "learning_rate": 8.880854087364192e-06, + "loss": 0.0, + "step": 682 + }, + { + "epoch": 0.044016240252626154, + "grad_norm": 0.2798160049980701, + "learning_rate": 8.882848291748122e-06, + "loss": 0.0011, + "step": 683 + }, + { + "epoch": 0.044080685699555325, + "grad_norm": 0.14433741459071334, + "learning_rate": 8.884839578494751e-06, + "loss": 0.0004, + "step": 684 + }, + { + "epoch": 0.0441451311464845, + "grad_norm": 0.24179156302192892, + "learning_rate": 8.886827956128954e-06, + "loss": 0.002, + "step": 685 + }, + { + "epoch": 0.04420957659341367, + "grad_norm": 0.0021927636562543086, + "learning_rate": 8.8888134331383e-06, + "loss": 0.0, + "step": 686 + }, + { + "epoch": 0.04427402204034285, + "grad_norm": 0.0022068358536655036, + "learning_rate": 8.890796017973267e-06, + "loss": 0.0, + "step": 687 + }, + { + "epoch": 0.04433846748727202, + "grad_norm": 0.005372660175485759, + "learning_rate": 8.892775719047455e-06, + "loss": 0.0, + "step": 688 + }, + { + "epoch": 0.0444029129342012, + "grad_norm": 0.03758966528205878, + "learning_rate": 8.894752544737809e-06, + "loss": 0.0001, + "step": 689 + }, + { + "epoch": 0.044467358381130376, + "grad_norm": 0.012306964234103844, + "learning_rate": 8.896726503384818e-06, + "loss": 0.0, + "step": 690 + }, + { + "epoch": 0.044531803828059546, + "grad_norm": 0.00020675266164698914, + "learning_rate": 8.898697603292746e-06, + "loss": 0.0, + "step": 691 + }, + { + "epoch": 0.044596249274988724, + "grad_norm": 0.0004554410369827967, + "learning_rate": 8.900665852729818e-06, + "loss": 0.0, + "step": 692 + }, + { + "epoch": 0.044660694721917894, + "grad_norm": 0.007791950816193934, + "learning_rate": 8.90263125992845e-06, + "loss": 0.0, + "step": 693 + }, + { + "epoch": 0.04472514016884707, + "grad_norm": 0.18602995977878675, + "learning_rate": 8.904593833085437e-06, + "loss": 0.0005, + "step": 694 + }, + { + "epoch": 0.04478958561577624, + "grad_norm": 0.013594795936494878, + "learning_rate": 8.90655358036217e-06, + "loss": 0.0001, + "step": 695 + }, + { + "epoch": 0.04485403106270542, + "grad_norm": 0.00023714633586855382, + "learning_rate": 8.908510509884837e-06, + "loss": 0.0, + "step": 696 + }, + { + "epoch": 0.0449184765096346, + "grad_norm": 6.832725945019009e-05, + "learning_rate": 8.910464629744626e-06, + "loss": 0.0, + "step": 697 + }, + { + "epoch": 0.04498292195656377, + "grad_norm": 0.016889844296641223, + "learning_rate": 8.912415947997922e-06, + "loss": 0.0003, + "step": 698 + }, + { + "epoch": 0.045047367403492945, + "grad_norm": 0.2184353592157856, + "learning_rate": 8.914364472666504e-06, + "loss": 0.0026, + "step": 699 + }, + { + "epoch": 0.045111812850422116, + "grad_norm": 0.25465223325641045, + "learning_rate": 8.916310211737758e-06, + "loss": 0.0008, + "step": 700 + }, + { + "epoch": 0.04517625829735129, + "grad_norm": 0.003655815925584125, + "learning_rate": 8.918253173164853e-06, + "loss": 0.0, + "step": 701 + }, + { + "epoch": 0.045240703744280464, + "grad_norm": 0.0006114981310893153, + "learning_rate": 8.92019336486695e-06, + "loss": 0.0, + "step": 702 + }, + { + "epoch": 0.04530514919120964, + "grad_norm": 0.00010137673386137189, + "learning_rate": 8.922130794729396e-06, + "loss": 0.0, + "step": 703 + }, + { + "epoch": 0.04536959463813882, + "grad_norm": 0.07228969160342366, + "learning_rate": 8.924065470603898e-06, + "loss": 0.0001, + "step": 704 + }, + { + "epoch": 0.04543404008506799, + "grad_norm": 0.1913084734088233, + "learning_rate": 8.925997400308744e-06, + "loss": 0.0004, + "step": 705 + }, + { + "epoch": 0.04549848553199717, + "grad_norm": 0.07917869010187947, + "learning_rate": 8.927926591628964e-06, + "loss": 0.0003, + "step": 706 + }, + { + "epoch": 0.04556293097892634, + "grad_norm": 0.06977438315324237, + "learning_rate": 8.929853052316538e-06, + "loss": 0.0004, + "step": 707 + }, + { + "epoch": 0.045627376425855515, + "grad_norm": 0.034029532033426, + "learning_rate": 8.931776790090572e-06, + "loss": 0.0001, + "step": 708 + }, + { + "epoch": 0.045691821872784685, + "grad_norm": 0.08348560267343069, + "learning_rate": 8.933697812637488e-06, + "loss": 0.0002, + "step": 709 + }, + { + "epoch": 0.04575626731971386, + "grad_norm": 0.051544153316252506, + "learning_rate": 8.935616127611207e-06, + "loss": 0.0017, + "step": 710 + }, + { + "epoch": 0.04582071276664303, + "grad_norm": 0.0763091812179522, + "learning_rate": 8.937531742633331e-06, + "loss": 0.0001, + "step": 711 + }, + { + "epoch": 0.04588515821357221, + "grad_norm": 0.0007173089122636117, + "learning_rate": 8.939444665293331e-06, + "loss": 0.0, + "step": 712 + }, + { + "epoch": 0.04594960366050139, + "grad_norm": 0.004598107264150894, + "learning_rate": 8.941354903148714e-06, + "loss": 0.0, + "step": 713 + }, + { + "epoch": 0.04601404910743056, + "grad_norm": 0.000392021647295444, + "learning_rate": 8.943262463725215e-06, + "loss": 0.0, + "step": 714 + }, + { + "epoch": 0.046078494554359736, + "grad_norm": 0.005239661838366242, + "learning_rate": 8.945167354516973e-06, + "loss": 0.0, + "step": 715 + }, + { + "epoch": 0.04614294000128891, + "grad_norm": 0.013039220056873272, + "learning_rate": 8.947069582986702e-06, + "loss": 0.0, + "step": 716 + }, + { + "epoch": 0.046207385448218084, + "grad_norm": 0.7156779940111023, + "learning_rate": 8.948969156565867e-06, + "loss": 0.0028, + "step": 717 + }, + { + "epoch": 0.046271830895147255, + "grad_norm": 0.0014232633069768458, + "learning_rate": 8.950866082654867e-06, + "loss": 0.0, + "step": 718 + }, + { + "epoch": 0.04633627634207643, + "grad_norm": 0.010360478298695039, + "learning_rate": 8.952760368623196e-06, + "loss": 0.0, + "step": 719 + }, + { + "epoch": 0.04640072178900561, + "grad_norm": 0.04740542080782063, + "learning_rate": 8.954652021809624e-06, + "loss": 0.0002, + "step": 720 + }, + { + "epoch": 0.04646516723593478, + "grad_norm": 0.0013033683657919647, + "learning_rate": 8.956541049522359e-06, + "loss": 0.0, + "step": 721 + }, + { + "epoch": 0.04652961268286396, + "grad_norm": 0.00024011095477306243, + "learning_rate": 8.958427459039227e-06, + "loss": 0.0, + "step": 722 + }, + { + "epoch": 0.04659405812979313, + "grad_norm": 0.03165036091424995, + "learning_rate": 8.960311257607835e-06, + "loss": 0.0016, + "step": 723 + }, + { + "epoch": 0.046658503576722306, + "grad_norm": 0.0037302307404365937, + "learning_rate": 8.96219245244573e-06, + "loss": 0.0, + "step": 724 + }, + { + "epoch": 0.046722949023651476, + "grad_norm": 0.11715760644466812, + "learning_rate": 8.964071050740584e-06, + "loss": 0.0004, + "step": 725 + }, + { + "epoch": 0.046787394470580654, + "grad_norm": 0.011645889650838596, + "learning_rate": 8.965947059650336e-06, + "loss": 0.0001, + "step": 726 + }, + { + "epoch": 0.04685183991750983, + "grad_norm": 0.06984104940667903, + "learning_rate": 8.967820486303374e-06, + "loss": 0.0003, + "step": 727 + }, + { + "epoch": 0.046916285364439, + "grad_norm": 0.32142027752446717, + "learning_rate": 8.96969133779869e-06, + "loss": 0.0005, + "step": 728 + }, + { + "epoch": 0.04698073081136818, + "grad_norm": 2.551444979388958, + "learning_rate": 8.971559621206039e-06, + "loss": 0.0197, + "step": 729 + }, + { + "epoch": 0.04704517625829735, + "grad_norm": 0.002081532217039823, + "learning_rate": 8.973425343566106e-06, + "loss": 0.0, + "step": 730 + }, + { + "epoch": 0.04710962170522653, + "grad_norm": 0.25030308471193197, + "learning_rate": 8.975288511890657e-06, + "loss": 0.0052, + "step": 731 + }, + { + "epoch": 0.0471740671521557, + "grad_norm": 0.005732779290674802, + "learning_rate": 8.977149133162707e-06, + "loss": 0.0, + "step": 732 + }, + { + "epoch": 0.047238512599084875, + "grad_norm": 0.10988615433689451, + "learning_rate": 8.979007214336669e-06, + "loss": 0.0003, + "step": 733 + }, + { + "epoch": 0.047302958046014046, + "grad_norm": 0.12829035230675495, + "learning_rate": 8.98086276233851e-06, + "loss": 0.0003, + "step": 734 + }, + { + "epoch": 0.04736740349294322, + "grad_norm": 0.007249099234747826, + "learning_rate": 8.982715784065911e-06, + "loss": 0.0, + "step": 735 + }, + { + "epoch": 0.0474318489398724, + "grad_norm": 0.019692113896025192, + "learning_rate": 8.984566286388422e-06, + "loss": 0.0001, + "step": 736 + }, + { + "epoch": 0.04749629438680157, + "grad_norm": 0.05206044806916652, + "learning_rate": 8.986414276147602e-06, + "loss": 0.0002, + "step": 737 + }, + { + "epoch": 0.04756073983373075, + "grad_norm": 0.028860954812302157, + "learning_rate": 8.988259760157187e-06, + "loss": 0.0001, + "step": 738 + }, + { + "epoch": 0.04762518528065992, + "grad_norm": 0.001301629719628138, + "learning_rate": 8.990102745203234e-06, + "loss": 0.0, + "step": 739 + }, + { + "epoch": 0.0476896307275891, + "grad_norm": 0.002345407602635431, + "learning_rate": 8.991943238044267e-06, + "loss": 0.0, + "step": 740 + }, + { + "epoch": 0.04775407617451827, + "grad_norm": 0.12231212697510714, + "learning_rate": 8.993781245411428e-06, + "loss": 0.0004, + "step": 741 + }, + { + "epoch": 0.047818521621447445, + "grad_norm": 0.049014023716665675, + "learning_rate": 8.995616774008632e-06, + "loss": 0.0001, + "step": 742 + }, + { + "epoch": 0.04788296706837662, + "grad_norm": 0.009068422831759652, + "learning_rate": 8.9974498305127e-06, + "loss": 0.0, + "step": 743 + }, + { + "epoch": 0.04794741251530579, + "grad_norm": 0.004244489514567139, + "learning_rate": 8.999280421573518e-06, + "loss": 0.0, + "step": 744 + }, + { + "epoch": 0.04801185796223497, + "grad_norm": 0.10947748900064816, + "learning_rate": 9.001108553814172e-06, + "loss": 0.0003, + "step": 745 + }, + { + "epoch": 0.04807630340916414, + "grad_norm": 0.008424072505390963, + "learning_rate": 9.002934233831098e-06, + "loss": 0.0, + "step": 746 + }, + { + "epoch": 0.04814074885609332, + "grad_norm": 0.006270599434854108, + "learning_rate": 9.004757468194222e-06, + "loss": 0.0, + "step": 747 + }, + { + "epoch": 0.04820519430302249, + "grad_norm": 0.01742854568377238, + "learning_rate": 9.006578263447102e-06, + "loss": 0.0001, + "step": 748 + }, + { + "epoch": 0.048269639749951666, + "grad_norm": 0.024492512070331957, + "learning_rate": 9.008396626107069e-06, + "loss": 0.0, + "step": 749 + }, + { + "epoch": 0.048334085196880844, + "grad_norm": 0.12119974988300376, + "learning_rate": 9.010212562665369e-06, + "loss": 0.0001, + "step": 750 + }, + { + "epoch": 0.048398530643810014, + "grad_norm": 0.06679123539568732, + "learning_rate": 9.012026079587298e-06, + "loss": 0.0001, + "step": 751 + }, + { + "epoch": 0.04846297609073919, + "grad_norm": 0.07117144798083709, + "learning_rate": 9.013837183312346e-06, + "loss": 0.0001, + "step": 752 + }, + { + "epoch": 0.04852742153766836, + "grad_norm": 0.00487018289232718, + "learning_rate": 9.015645880254325e-06, + "loss": 0.0, + "step": 753 + }, + { + "epoch": 0.04859186698459754, + "grad_norm": 0.612146846985091, + "learning_rate": 9.017452176801516e-06, + "loss": 0.0025, + "step": 754 + }, + { + "epoch": 0.04865631243152671, + "grad_norm": 0.0034136926349857006, + "learning_rate": 9.019256079316795e-06, + "loss": 0.0, + "step": 755 + }, + { + "epoch": 0.04872075787845589, + "grad_norm": 0.12108867773077127, + "learning_rate": 9.021057594137776e-06, + "loss": 0.0002, + "step": 756 + }, + { + "epoch": 0.04878520332538506, + "grad_norm": 0.004504167955212749, + "learning_rate": 9.022856727576939e-06, + "loss": 0.0, + "step": 757 + }, + { + "epoch": 0.048849648772314236, + "grad_norm": 0.07505442996143205, + "learning_rate": 9.024653485921763e-06, + "loss": 0.0015, + "step": 758 + }, + { + "epoch": 0.04891409421924341, + "grad_norm": 0.0005453544489624827, + "learning_rate": 9.026447875434859e-06, + "loss": 0.0, + "step": 759 + }, + { + "epoch": 0.048978539666172584, + "grad_norm": 0.34459969236495225, + "learning_rate": 9.0282399023541e-06, + "loss": 0.0002, + "step": 760 + }, + { + "epoch": 0.04904298511310176, + "grad_norm": 0.18131343537161349, + "learning_rate": 9.030029572892756e-06, + "loss": 0.0007, + "step": 761 + }, + { + "epoch": 0.04910743056003093, + "grad_norm": 0.02098021280926811, + "learning_rate": 9.031816893239614e-06, + "loss": 0.0, + "step": 762 + }, + { + "epoch": 0.04917187600696011, + "grad_norm": 0.08627074285048919, + "learning_rate": 9.033601869559115e-06, + "loss": 0.0035, + "step": 763 + }, + { + "epoch": 0.04923632145388928, + "grad_norm": 0.002511861290816773, + "learning_rate": 9.035384507991479e-06, + "loss": 0.0, + "step": 764 + }, + { + "epoch": 0.04930076690081846, + "grad_norm": 0.40511111030437946, + "learning_rate": 9.037164814652826e-06, + "loss": 0.0008, + "step": 765 + }, + { + "epoch": 0.049365212347747635, + "grad_norm": 0.14034011913080038, + "learning_rate": 9.038942795635312e-06, + "loss": 0.0001, + "step": 766 + }, + { + "epoch": 0.049429657794676805, + "grad_norm": 0.06861259699001675, + "learning_rate": 9.040718457007249e-06, + "loss": 0.0001, + "step": 767 + }, + { + "epoch": 0.04949410324160598, + "grad_norm": 0.001581944552875584, + "learning_rate": 9.042491804813226e-06, + "loss": 0.0, + "step": 768 + }, + { + "epoch": 0.04955854868853515, + "grad_norm": 0.005308040387458921, + "learning_rate": 9.044262845074235e-06, + "loss": 0.0, + "step": 769 + }, + { + "epoch": 0.04962299413546433, + "grad_norm": 0.0019728464218824234, + "learning_rate": 9.046031583787798e-06, + "loss": 0.0, + "step": 770 + }, + { + "epoch": 0.0496874395823935, + "grad_norm": 0.003223276699460636, + "learning_rate": 9.047798026928083e-06, + "loss": 0.0, + "step": 771 + }, + { + "epoch": 0.04975188502932268, + "grad_norm": 0.021868800717484028, + "learning_rate": 9.049562180446029e-06, + "loss": 0.0001, + "step": 772 + }, + { + "epoch": 0.04981633047625185, + "grad_norm": 0.1826315175518166, + "learning_rate": 9.051324050269462e-06, + "loss": 0.0003, + "step": 773 + }, + { + "epoch": 0.04988077592318103, + "grad_norm": 0.29998356456240793, + "learning_rate": 9.05308364230322e-06, + "loss": 0.0011, + "step": 774 + }, + { + "epoch": 0.049945221370110204, + "grad_norm": 0.5431729488475763, + "learning_rate": 9.054840962429265e-06, + "loss": 0.0004, + "step": 775 + }, + { + "epoch": 0.050009666817039375, + "grad_norm": 0.009489328773548649, + "learning_rate": 9.056596016506807e-06, + "loss": 0.0, + "step": 776 + }, + { + "epoch": 0.05007411226396855, + "grad_norm": 0.00046785632344008304, + "learning_rate": 9.05834881037242e-06, + "loss": 0.0, + "step": 777 + }, + { + "epoch": 0.05013855771089772, + "grad_norm": 0.01111245605224189, + "learning_rate": 9.060099349840158e-06, + "loss": 0.0001, + "step": 778 + }, + { + "epoch": 0.0502030031578269, + "grad_norm": 0.07606922165075893, + "learning_rate": 9.061847640701665e-06, + "loss": 0.0005, + "step": 779 + }, + { + "epoch": 0.05026744860475607, + "grad_norm": 0.000699417674606527, + "learning_rate": 9.063593688726301e-06, + "loss": 0.0, + "step": 780 + }, + { + "epoch": 0.05033189405168525, + "grad_norm": 0.010236254823261563, + "learning_rate": 9.065337499661248e-06, + "loss": 0.0, + "step": 781 + }, + { + "epoch": 0.050396339498614426, + "grad_norm": 0.0017980719947457786, + "learning_rate": 9.067079079231624e-06, + "loss": 0.0, + "step": 782 + }, + { + "epoch": 0.050460784945543596, + "grad_norm": 0.000939549117843024, + "learning_rate": 9.068818433140602e-06, + "loss": 0.0, + "step": 783 + }, + { + "epoch": 0.050525230392472774, + "grad_norm": 0.002781551161266302, + "learning_rate": 9.070555567069513e-06, + "loss": 0.0, + "step": 784 + }, + { + "epoch": 0.050589675839401944, + "grad_norm": 0.0035230904250238056, + "learning_rate": 9.072290486677968e-06, + "loss": 0.0, + "step": 785 + }, + { + "epoch": 0.05065412128633112, + "grad_norm": 0.01560564350924316, + "learning_rate": 9.074023197603955e-06, + "loss": 0.0001, + "step": 786 + }, + { + "epoch": 0.05071856673326029, + "grad_norm": 0.010400567996276689, + "learning_rate": 9.075753705463962e-06, + "loss": 0.0, + "step": 787 + }, + { + "epoch": 0.05078301218018947, + "grad_norm": 0.00015725166531810745, + "learning_rate": 9.07748201585308e-06, + "loss": 0.0, + "step": 788 + }, + { + "epoch": 0.05084745762711865, + "grad_norm": 0.0016245494880248916, + "learning_rate": 9.079208134345112e-06, + "loss": 0.0, + "step": 789 + }, + { + "epoch": 0.05091190307404782, + "grad_norm": 0.001113785553319258, + "learning_rate": 9.080932066492682e-06, + "loss": 0.0, + "step": 790 + }, + { + "epoch": 0.050976348520976995, + "grad_norm": 0.46446961485109706, + "learning_rate": 9.082653817827336e-06, + "loss": 0.0007, + "step": 791 + }, + { + "epoch": 0.051040793967906166, + "grad_norm": 0.0013411261450823374, + "learning_rate": 9.084373393859663e-06, + "loss": 0.0, + "step": 792 + }, + { + "epoch": 0.05110523941483534, + "grad_norm": 0.004991003306657957, + "learning_rate": 9.086090800079385e-06, + "loss": 0.0, + "step": 793 + }, + { + "epoch": 0.051169684861764514, + "grad_norm": 0.0013036785024449445, + "learning_rate": 9.087806041955472e-06, + "loss": 0.0, + "step": 794 + }, + { + "epoch": 0.05123413030869369, + "grad_norm": 0.00048091056047600136, + "learning_rate": 9.089519124936242e-06, + "loss": 0.0, + "step": 795 + }, + { + "epoch": 0.05129857575562286, + "grad_norm": 0.0016136776036838984, + "learning_rate": 9.091230054449467e-06, + "loss": 0.0, + "step": 796 + }, + { + "epoch": 0.05136302120255204, + "grad_norm": 0.4099738669208059, + "learning_rate": 9.092938835902477e-06, + "loss": 0.0029, + "step": 797 + }, + { + "epoch": 0.05142746664948122, + "grad_norm": 0.003942835183172, + "learning_rate": 9.094645474682253e-06, + "loss": 0.0, + "step": 798 + }, + { + "epoch": 0.05149191209641039, + "grad_norm": 0.0030768112635003782, + "learning_rate": 9.096349976155548e-06, + "loss": 0.0, + "step": 799 + }, + { + "epoch": 0.051556357543339565, + "grad_norm": 0.006397829046396678, + "learning_rate": 9.098052345668971e-06, + "loss": 0.0, + "step": 800 + }, + { + "epoch": 0.051620802990268735, + "grad_norm": 0.06002623182371448, + "learning_rate": 9.099752588549096e-06, + "loss": 0.0007, + "step": 801 + }, + { + "epoch": 0.05168524843719791, + "grad_norm": 3.3841404940161315, + "learning_rate": 9.101450710102554e-06, + "loss": 0.034, + "step": 802 + }, + { + "epoch": 0.05174969388412708, + "grad_norm": 0.0013937209491712162, + "learning_rate": 9.103146715616145e-06, + "loss": 0.0, + "step": 803 + }, + { + "epoch": 0.05181413933105626, + "grad_norm": 6.458562080363265e-05, + "learning_rate": 9.104840610356928e-06, + "loss": 0.0, + "step": 804 + }, + { + "epoch": 0.05187858477798544, + "grad_norm": 0.3579401105183063, + "learning_rate": 9.10653239957232e-06, + "loss": 0.0025, + "step": 805 + }, + { + "epoch": 0.05194303022491461, + "grad_norm": 0.008360636412067086, + "learning_rate": 9.108222088490193e-06, + "loss": 0.0, + "step": 806 + }, + { + "epoch": 0.052007475671843786, + "grad_norm": 0.005285542514094032, + "learning_rate": 9.10990968231898e-06, + "loss": 0.0, + "step": 807 + }, + { + "epoch": 0.05207192111877296, + "grad_norm": 0.0002770779211620817, + "learning_rate": 9.11159518624775e-06, + "loss": 0.0, + "step": 808 + }, + { + "epoch": 0.052136366565702134, + "grad_norm": 0.0011585447889919547, + "learning_rate": 9.113278605446331e-06, + "loss": 0.0, + "step": 809 + }, + { + "epoch": 0.052200812012631305, + "grad_norm": 0.049277994096405396, + "learning_rate": 9.114959945065387e-06, + "loss": 0.0004, + "step": 810 + }, + { + "epoch": 0.05226525745956048, + "grad_norm": 0.00015442997540374582, + "learning_rate": 9.116639210236513e-06, + "loss": 0.0, + "step": 811 + }, + { + "epoch": 0.05232970290648966, + "grad_norm": 0.0013635697356662283, + "learning_rate": 9.11831640607234e-06, + "loss": 0.0, + "step": 812 + }, + { + "epoch": 0.05239414835341883, + "grad_norm": 0.016716470890307505, + "learning_rate": 9.119991537666614e-06, + "loss": 0.0002, + "step": 813 + }, + { + "epoch": 0.05245859380034801, + "grad_norm": 0.0005278616955096009, + "learning_rate": 9.121664610094306e-06, + "loss": 0.0, + "step": 814 + }, + { + "epoch": 0.05252303924727718, + "grad_norm": 0.0025750142539637693, + "learning_rate": 9.123335628411687e-06, + "loss": 0.0, + "step": 815 + }, + { + "epoch": 0.052587484694206356, + "grad_norm": 0.00020521592683312736, + "learning_rate": 9.125004597656428e-06, + "loss": 0.0, + "step": 816 + }, + { + "epoch": 0.052651930141135526, + "grad_norm": 0.19445270116812716, + "learning_rate": 9.126671522847696e-06, + "loss": 0.0004, + "step": 817 + }, + { + "epoch": 0.052716375588064704, + "grad_norm": 0.3771810287388212, + "learning_rate": 9.128336408986232e-06, + "loss": 0.0025, + "step": 818 + }, + { + "epoch": 0.052780821034993874, + "grad_norm": 0.05887705540169662, + "learning_rate": 9.129999261054454e-06, + "loss": 0.0001, + "step": 819 + }, + { + "epoch": 0.05284526648192305, + "grad_norm": 0.1534011955489274, + "learning_rate": 9.131660084016536e-06, + "loss": 0.0003, + "step": 820 + }, + { + "epoch": 0.05290971192885223, + "grad_norm": 0.013809870076849355, + "learning_rate": 9.133318882818504e-06, + "loss": 0.0001, + "step": 821 + }, + { + "epoch": 0.0529741573757814, + "grad_norm": 0.0032549074867175556, + "learning_rate": 9.134975662388321e-06, + "loss": 0.0, + "step": 822 + }, + { + "epoch": 0.05303860282271058, + "grad_norm": 0.009309798570618937, + "learning_rate": 9.136630427635978e-06, + "loss": 0.0001, + "step": 823 + }, + { + "epoch": 0.05310304826963975, + "grad_norm": 0.0033493742577040365, + "learning_rate": 9.138283183453572e-06, + "loss": 0.0, + "step": 824 + }, + { + "epoch": 0.053167493716568925, + "grad_norm": 0.018859710507561127, + "learning_rate": 9.139933934715408e-06, + "loss": 0.0001, + "step": 825 + }, + { + "epoch": 0.053231939163498096, + "grad_norm": 0.01560803326672018, + "learning_rate": 9.141582686278072e-06, + "loss": 0.0, + "step": 826 + }, + { + "epoch": 0.05329638461042727, + "grad_norm": 0.00016964226944774627, + "learning_rate": 9.143229442980527e-06, + "loss": 0.0, + "step": 827 + }, + { + "epoch": 0.05336083005735645, + "grad_norm": 0.022061608980455484, + "learning_rate": 9.144874209644185e-06, + "loss": 0.0016, + "step": 828 + }, + { + "epoch": 0.05342527550428562, + "grad_norm": 0.006649718737306569, + "learning_rate": 9.146516991073004e-06, + "loss": 0.0, + "step": 829 + }, + { + "epoch": 0.0534897209512148, + "grad_norm": 0.00042874270227666826, + "learning_rate": 9.148157792053569e-06, + "loss": 0.0, + "step": 830 + }, + { + "epoch": 0.05355416639814397, + "grad_norm": 0.2741124949858014, + "learning_rate": 9.149796617355175e-06, + "loss": 0.0042, + "step": 831 + }, + { + "epoch": 0.05361861184507315, + "grad_norm": 0.03657944854462583, + "learning_rate": 9.151433471729903e-06, + "loss": 0.0001, + "step": 832 + }, + { + "epoch": 0.05368305729200232, + "grad_norm": 0.001000460494932859, + "learning_rate": 9.153068359912718e-06, + "loss": 0.0, + "step": 833 + }, + { + "epoch": 0.053747502738931495, + "grad_norm": 0.4291703666910219, + "learning_rate": 9.154701286621536e-06, + "loss": 0.0005, + "step": 834 + }, + { + "epoch": 0.05381194818586067, + "grad_norm": 0.011767546566647835, + "learning_rate": 9.156332256557316e-06, + "loss": 0.0, + "step": 835 + }, + { + "epoch": 0.05387639363278984, + "grad_norm": 0.02017886370790413, + "learning_rate": 9.157961274404139e-06, + "loss": 0.0001, + "step": 836 + }, + { + "epoch": 0.05394083907971902, + "grad_norm": 0.1635306058218916, + "learning_rate": 9.159588344829283e-06, + "loss": 0.0003, + "step": 837 + }, + { + "epoch": 0.05400528452664819, + "grad_norm": 0.03241790156834949, + "learning_rate": 9.161213472483306e-06, + "loss": 0.0001, + "step": 838 + }, + { + "epoch": 0.05406972997357737, + "grad_norm": 0.03645090452502005, + "learning_rate": 9.162836662000131e-06, + "loss": 0.0, + "step": 839 + }, + { + "epoch": 0.05413417542050654, + "grad_norm": 0.001018123489854683, + "learning_rate": 9.164457917997124e-06, + "loss": 0.0, + "step": 840 + }, + { + "epoch": 0.054198620867435716, + "grad_norm": 0.0006147523546883594, + "learning_rate": 9.166077245075165e-06, + "loss": 0.0, + "step": 841 + }, + { + "epoch": 0.05426306631436489, + "grad_norm": 0.03234380830020073, + "learning_rate": 9.167694647818732e-06, + "loss": 0.0002, + "step": 842 + }, + { + "epoch": 0.054327511761294064, + "grad_norm": 0.30908209151377397, + "learning_rate": 9.16931013079598e-06, + "loss": 0.0003, + "step": 843 + }, + { + "epoch": 0.05439195720822324, + "grad_norm": 0.03522377846818942, + "learning_rate": 9.170923698558816e-06, + "loss": 0.0005, + "step": 844 + }, + { + "epoch": 0.05445640265515241, + "grad_norm": 0.000277197422768193, + "learning_rate": 9.172535355642978e-06, + "loss": 0.0, + "step": 845 + }, + { + "epoch": 0.05452084810208159, + "grad_norm": 0.0016432842093718164, + "learning_rate": 9.174145106568109e-06, + "loss": 0.0, + "step": 846 + }, + { + "epoch": 0.05458529354901076, + "grad_norm": 0.013591225137140847, + "learning_rate": 9.175752955837838e-06, + "loss": 0.0, + "step": 847 + }, + { + "epoch": 0.05464973899593994, + "grad_norm": 0.8462395497498096, + "learning_rate": 9.177358907939845e-06, + "loss": 0.0031, + "step": 848 + }, + { + "epoch": 0.05471418444286911, + "grad_norm": 0.04412306889026068, + "learning_rate": 9.178962967345948e-06, + "loss": 0.0001, + "step": 849 + }, + { + "epoch": 0.054778629889798286, + "grad_norm": 0.005405304038502618, + "learning_rate": 9.180565138512175e-06, + "loss": 0.0, + "step": 850 + }, + { + "epoch": 0.05484307533672746, + "grad_norm": 0.3794935924368555, + "learning_rate": 9.182165425878829e-06, + "loss": 0.0019, + "step": 851 + }, + { + "epoch": 0.054907520783656634, + "grad_norm": 0.0020777573880359597, + "learning_rate": 9.183763833870574e-06, + "loss": 0.0, + "step": 852 + }, + { + "epoch": 0.05497196623058581, + "grad_norm": 0.004563794466339452, + "learning_rate": 9.185360366896503e-06, + "loss": 0.0, + "step": 853 + }, + { + "epoch": 0.05503641167751498, + "grad_norm": 0.007595697852477659, + "learning_rate": 9.186955029350209e-06, + "loss": 0.0001, + "step": 854 + }, + { + "epoch": 0.05510085712444416, + "grad_norm": 0.021470438571486155, + "learning_rate": 9.188547825609863e-06, + "loss": 0.0003, + "step": 855 + }, + { + "epoch": 0.05516530257137333, + "grad_norm": 0.03807929940940631, + "learning_rate": 9.190138760038282e-06, + "loss": 0.0001, + "step": 856 + }, + { + "epoch": 0.05522974801830251, + "grad_norm": 0.052610970411618635, + "learning_rate": 9.191727836983e-06, + "loss": 0.0002, + "step": 857 + }, + { + "epoch": 0.05529419346523168, + "grad_norm": 0.011456006523472022, + "learning_rate": 9.19331506077634e-06, + "loss": 0.0001, + "step": 858 + }, + { + "epoch": 0.055358638912160855, + "grad_norm": 0.010611982148422123, + "learning_rate": 9.19490043573549e-06, + "loss": 0.0, + "step": 859 + }, + { + "epoch": 0.05542308435909003, + "grad_norm": 0.00036207342749774215, + "learning_rate": 9.196483966162567e-06, + "loss": 0.0, + "step": 860 + }, + { + "epoch": 0.055487529806019203, + "grad_norm": 0.006096769717651004, + "learning_rate": 9.198065656344689e-06, + "loss": 0.0, + "step": 861 + }, + { + "epoch": 0.05555197525294838, + "grad_norm": 0.0033176289929112016, + "learning_rate": 9.199645510554044e-06, + "loss": 0.0, + "step": 862 + }, + { + "epoch": 0.05561642069987755, + "grad_norm": 0.07427952499156017, + "learning_rate": 9.201223533047966e-06, + "loss": 0.0007, + "step": 863 + }, + { + "epoch": 0.05568086614680673, + "grad_norm": 0.007857354582267988, + "learning_rate": 9.20279972806899e-06, + "loss": 0.0, + "step": 864 + }, + { + "epoch": 0.0557453115937359, + "grad_norm": 0.43490041132666496, + "learning_rate": 9.204374099844932e-06, + "loss": 0.003, + "step": 865 + }, + { + "epoch": 0.05580975704066508, + "grad_norm": 0.004341780147231225, + "learning_rate": 9.205946652588954e-06, + "loss": 0.0, + "step": 866 + }, + { + "epoch": 0.055874202487594254, + "grad_norm": 0.02007426507979371, + "learning_rate": 9.20751739049963e-06, + "loss": 0.0001, + "step": 867 + }, + { + "epoch": 0.055938647934523425, + "grad_norm": 6.445465882152348e-05, + "learning_rate": 9.20908631776102e-06, + "loss": 0.0, + "step": 868 + }, + { + "epoch": 0.0560030933814526, + "grad_norm": 0.01036632348269938, + "learning_rate": 9.21065343854272e-06, + "loss": 0.0001, + "step": 869 + }, + { + "epoch": 0.05606753882838177, + "grad_norm": 0.03182250548398591, + "learning_rate": 9.21221875699995e-06, + "loss": 0.0001, + "step": 870 + }, + { + "epoch": 0.05613198427531095, + "grad_norm": 0.0027731863618260023, + "learning_rate": 9.213782277273607e-06, + "loss": 0.0, + "step": 871 + }, + { + "epoch": 0.05619642972224012, + "grad_norm": 0.02141632130848708, + "learning_rate": 9.215344003490328e-06, + "loss": 0.0, + "step": 872 + }, + { + "epoch": 0.0562608751691693, + "grad_norm": 0.00841137676276659, + "learning_rate": 9.21690393976257e-06, + "loss": 0.0, + "step": 873 + }, + { + "epoch": 0.056325320616098476, + "grad_norm": 0.0026180638976685206, + "learning_rate": 9.21846209018866e-06, + "loss": 0.0, + "step": 874 + }, + { + "epoch": 0.056389766063027646, + "grad_norm": 0.1417267476215166, + "learning_rate": 9.220018458852871e-06, + "loss": 0.0002, + "step": 875 + }, + { + "epoch": 0.056454211509956824, + "grad_norm": 0.0016003214834285813, + "learning_rate": 9.221573049825472e-06, + "loss": 0.0, + "step": 876 + }, + { + "epoch": 0.056518656956885995, + "grad_norm": 0.0008617063924082198, + "learning_rate": 9.223125867162807e-06, + "loss": 0.0, + "step": 877 + }, + { + "epoch": 0.05658310240381517, + "grad_norm": 0.00019133509686306216, + "learning_rate": 9.224676914907353e-06, + "loss": 0.0, + "step": 878 + }, + { + "epoch": 0.05664754785074434, + "grad_norm": 0.09435838250675625, + "learning_rate": 9.226226197087778e-06, + "loss": 0.0009, + "step": 879 + }, + { + "epoch": 0.05671199329767352, + "grad_norm": 0.002137334572342066, + "learning_rate": 9.22777371771901e-06, + "loss": 0.0, + "step": 880 + }, + { + "epoch": 0.05677643874460269, + "grad_norm": 0.00500561291614537, + "learning_rate": 9.229319480802301e-06, + "loss": 0.0, + "step": 881 + }, + { + "epoch": 0.05684088419153187, + "grad_norm": 0.0018617593146569935, + "learning_rate": 9.230863490325278e-06, + "loss": 0.0, + "step": 882 + }, + { + "epoch": 0.056905329638461045, + "grad_norm": 0.003249829210823486, + "learning_rate": 9.232405750262018e-06, + "loss": 0.0, + "step": 883 + }, + { + "epoch": 0.056969775085390216, + "grad_norm": 4.155492727945956, + "learning_rate": 9.233946264573107e-06, + "loss": 0.0309, + "step": 884 + }, + { + "epoch": 0.057034220532319393, + "grad_norm": 0.0004600202922205456, + "learning_rate": 9.235485037205686e-06, + "loss": 0.0, + "step": 885 + }, + { + "epoch": 0.057098665979248564, + "grad_norm": 0.0009887524614545788, + "learning_rate": 9.237022072093532e-06, + "loss": 0.0, + "step": 886 + }, + { + "epoch": 0.05716311142617774, + "grad_norm": 0.12449591667280237, + "learning_rate": 9.238557373157111e-06, + "loss": 0.0002, + "step": 887 + }, + { + "epoch": 0.05722755687310691, + "grad_norm": 0.015552755601544609, + "learning_rate": 9.240090944303633e-06, + "loss": 0.0001, + "step": 888 + }, + { + "epoch": 0.05729200232003609, + "grad_norm": 0.23141060741991862, + "learning_rate": 9.241622789427116e-06, + "loss": 0.0004, + "step": 889 + }, + { + "epoch": 0.05735644776696527, + "grad_norm": 0.8822941904283358, + "learning_rate": 9.243152912408443e-06, + "loss": 0.0065, + "step": 890 + }, + { + "epoch": 0.05742089321389444, + "grad_norm": 0.00029090730639586807, + "learning_rate": 9.244681317115427e-06, + "loss": 0.0, + "step": 891 + }, + { + "epoch": 0.057485338660823615, + "grad_norm": 0.001579554953343648, + "learning_rate": 9.246208007402863e-06, + "loss": 0.0, + "step": 892 + }, + { + "epoch": 0.057549784107752786, + "grad_norm": 0.023213539630016298, + "learning_rate": 9.247732987112587e-06, + "loss": 0.0001, + "step": 893 + }, + { + "epoch": 0.05761422955468196, + "grad_norm": 0.0024890347909475733, + "learning_rate": 9.249256260073538e-06, + "loss": 0.0, + "step": 894 + }, + { + "epoch": 0.057678675001611134, + "grad_norm": 0.000585152467749838, + "learning_rate": 9.250777830101814e-06, + "loss": 0.0, + "step": 895 + }, + { + "epoch": 0.05774312044854031, + "grad_norm": 0.028285490200250774, + "learning_rate": 9.252297701000728e-06, + "loss": 0.0, + "step": 896 + }, + { + "epoch": 0.05780756589546949, + "grad_norm": 0.10915553690810925, + "learning_rate": 9.253815876560862e-06, + "loss": 0.0008, + "step": 897 + }, + { + "epoch": 0.05787201134239866, + "grad_norm": 0.0002786805844268407, + "learning_rate": 9.255332360560135e-06, + "loss": 0.0, + "step": 898 + }, + { + "epoch": 0.057936456789327836, + "grad_norm": 0.005925699088651387, + "learning_rate": 9.256847156763845e-06, + "loss": 0.0001, + "step": 899 + }, + { + "epoch": 0.05800090223625701, + "grad_norm": 0.0033125726030387532, + "learning_rate": 9.258360268924736e-06, + "loss": 0.0, + "step": 900 + }, + { + "epoch": 0.058065347683186185, + "grad_norm": 0.010476595702934775, + "learning_rate": 9.259871700783048e-06, + "loss": 0.0, + "step": 901 + }, + { + "epoch": 0.058129793130115355, + "grad_norm": 0.02884480152810691, + "learning_rate": 9.261381456066576e-06, + "loss": 0.0001, + "step": 902 + }, + { + "epoch": 0.05819423857704453, + "grad_norm": 0.02782736705714902, + "learning_rate": 9.26288953849072e-06, + "loss": 0.0003, + "step": 903 + }, + { + "epoch": 0.0582586840239737, + "grad_norm": 0.10276730883429447, + "learning_rate": 9.264395951758548e-06, + "loss": 0.0014, + "step": 904 + }, + { + "epoch": 0.05832312947090288, + "grad_norm": 0.021272532663555836, + "learning_rate": 9.265900699560844e-06, + "loss": 0.0, + "step": 905 + }, + { + "epoch": 0.05838757491783206, + "grad_norm": 0.32919439257374566, + "learning_rate": 9.267403785576162e-06, + "loss": 0.0004, + "step": 906 + }, + { + "epoch": 0.05845202036476123, + "grad_norm": 0.03212588290219352, + "learning_rate": 9.268905213470882e-06, + "loss": 0.0001, + "step": 907 + }, + { + "epoch": 0.058516465811690406, + "grad_norm": 0.005368526072576281, + "learning_rate": 9.270404986899269e-06, + "loss": 0.0, + "step": 908 + }, + { + "epoch": 0.05858091125861958, + "grad_norm": 0.004630751419976039, + "learning_rate": 9.271903109503515e-06, + "loss": 0.0, + "step": 909 + }, + { + "epoch": 0.058645356705548754, + "grad_norm": 0.04599784677651563, + "learning_rate": 9.273399584913803e-06, + "loss": 0.0001, + "step": 910 + }, + { + "epoch": 0.058709802152477925, + "grad_norm": 0.14592824909655777, + "learning_rate": 9.274894416748349e-06, + "loss": 0.0019, + "step": 911 + }, + { + "epoch": 0.0587742475994071, + "grad_norm": 0.165071235996826, + "learning_rate": 9.276387608613467e-06, + "loss": 0.0003, + "step": 912 + }, + { + "epoch": 0.05883869304633628, + "grad_norm": 0.0045161075821298306, + "learning_rate": 9.27787916410361e-06, + "loss": 0.0, + "step": 913 + }, + { + "epoch": 0.05890313849326545, + "grad_norm": 0.0609009449695222, + "learning_rate": 9.279369086801427e-06, + "loss": 0.0001, + "step": 914 + }, + { + "epoch": 0.05896758394019463, + "grad_norm": 0.01850613402613245, + "learning_rate": 9.28085738027782e-06, + "loss": 0.0001, + "step": 915 + }, + { + "epoch": 0.0590320293871238, + "grad_norm": 0.15463124032407352, + "learning_rate": 9.28234404809198e-06, + "loss": 0.0007, + "step": 916 + }, + { + "epoch": 0.059096474834052976, + "grad_norm": 0.003664506187491668, + "learning_rate": 9.283829093791457e-06, + "loss": 0.0, + "step": 917 + }, + { + "epoch": 0.059160920280982146, + "grad_norm": 0.00880322416467816, + "learning_rate": 9.285312520912193e-06, + "loss": 0.0001, + "step": 918 + }, + { + "epoch": 0.059225365727911324, + "grad_norm": 3.7186074109349265, + "learning_rate": 9.286794332978585e-06, + "loss": 0.0237, + "step": 919 + }, + { + "epoch": 0.059289811174840494, + "grad_norm": 8.817468231810405e-05, + "learning_rate": 9.288274533503533e-06, + "loss": 0.0, + "step": 920 + }, + { + "epoch": 0.05935425662176967, + "grad_norm": 0.004592331110218876, + "learning_rate": 9.289753125988485e-06, + "loss": 0.0, + "step": 921 + }, + { + "epoch": 0.05941870206869885, + "grad_norm": 1.0849440167306779, + "learning_rate": 9.291230113923492e-06, + "loss": 0.0055, + "step": 922 + }, + { + "epoch": 0.05948314751562802, + "grad_norm": 0.0024849397582142455, + "learning_rate": 9.29270550078725e-06, + "loss": 0.0, + "step": 923 + }, + { + "epoch": 0.0595475929625572, + "grad_norm": 0.9088584914373267, + "learning_rate": 9.294179290047164e-06, + "loss": 0.0059, + "step": 924 + }, + { + "epoch": 0.05961203840948637, + "grad_norm": 0.00017863757655568402, + "learning_rate": 9.29565148515938e-06, + "loss": 0.0, + "step": 925 + }, + { + "epoch": 0.059676483856415545, + "grad_norm": 0.16478213790474924, + "learning_rate": 9.29712208956884e-06, + "loss": 0.0028, + "step": 926 + }, + { + "epoch": 0.059740929303344716, + "grad_norm": 0.009325136506692883, + "learning_rate": 9.298591106709336e-06, + "loss": 0.0, + "step": 927 + }, + { + "epoch": 0.05980537475027389, + "grad_norm": 0.0018313403949757983, + "learning_rate": 9.300058540003553e-06, + "loss": 0.0, + "step": 928 + }, + { + "epoch": 0.05986982019720307, + "grad_norm": 0.036417811764517145, + "learning_rate": 9.301524392863113e-06, + "loss": 0.0001, + "step": 929 + }, + { + "epoch": 0.05993426564413224, + "grad_norm": 1.021395560902065, + "learning_rate": 9.30298866868863e-06, + "loss": 0.0049, + "step": 930 + }, + { + "epoch": 0.05999871109106142, + "grad_norm": 0.0024827359936627643, + "learning_rate": 9.304451370869755e-06, + "loss": 0.0, + "step": 931 + }, + { + "epoch": 0.06006315653799059, + "grad_norm": 0.7179352247112939, + "learning_rate": 9.30591250278522e-06, + "loss": 0.0021, + "step": 932 + }, + { + "epoch": 0.06012760198491977, + "grad_norm": 0.0631933717664277, + "learning_rate": 9.307372067802889e-06, + "loss": 0.0003, + "step": 933 + }, + { + "epoch": 0.06019204743184894, + "grad_norm": 0.060907736335031146, + "learning_rate": 9.308830069279799e-06, + "loss": 0.0004, + "step": 934 + }, + { + "epoch": 0.060256492878778115, + "grad_norm": 0.0008149091579210628, + "learning_rate": 9.310286510562214e-06, + "loss": 0.0, + "step": 935 + }, + { + "epoch": 0.06032093832570729, + "grad_norm": 0.006663428570939788, + "learning_rate": 9.311741394985666e-06, + "loss": 0.0, + "step": 936 + }, + { + "epoch": 0.06038538377263646, + "grad_norm": 0.12337773360999044, + "learning_rate": 9.313194725875004e-06, + "loss": 0.001, + "step": 937 + }, + { + "epoch": 0.06044982921956564, + "grad_norm": 0.19178069720437993, + "learning_rate": 9.31464650654443e-06, + "loss": 0.0005, + "step": 938 + }, + { + "epoch": 0.06051427466649481, + "grad_norm": 0.0015436894352676542, + "learning_rate": 9.316096740297561e-06, + "loss": 0.0, + "step": 939 + }, + { + "epoch": 0.06057872011342399, + "grad_norm": 0.15529906463269208, + "learning_rate": 9.317545430427458e-06, + "loss": 0.0009, + "step": 940 + }, + { + "epoch": 0.06064316556035316, + "grad_norm": 0.15753065047540682, + "learning_rate": 9.318992580216683e-06, + "loss": 0.0025, + "step": 941 + }, + { + "epoch": 0.060707611007282336, + "grad_norm": 0.0037847305096929324, + "learning_rate": 9.320438192937334e-06, + "loss": 0.0, + "step": 942 + }, + { + "epoch": 0.06077205645421151, + "grad_norm": 0.560417957966202, + "learning_rate": 9.321882271851098e-06, + "loss": 0.0015, + "step": 943 + }, + { + "epoch": 0.060836501901140684, + "grad_norm": 0.0015783399253200654, + "learning_rate": 9.323324820209286e-06, + "loss": 0.0, + "step": 944 + }, + { + "epoch": 0.06090094734806986, + "grad_norm": 0.08633490002918111, + "learning_rate": 9.32476584125289e-06, + "loss": 0.0003, + "step": 945 + }, + { + "epoch": 0.06096539279499903, + "grad_norm": 0.17250897245119487, + "learning_rate": 9.326205338212606e-06, + "loss": 0.0006, + "step": 946 + }, + { + "epoch": 0.06102983824192821, + "grad_norm": 0.11453406446909774, + "learning_rate": 9.327643314308903e-06, + "loss": 0.0003, + "step": 947 + }, + { + "epoch": 0.06109428368885738, + "grad_norm": 0.3036814978949101, + "learning_rate": 9.329079772752047e-06, + "loss": 0.0015, + "step": 948 + }, + { + "epoch": 0.06115872913578656, + "grad_norm": 0.011376020798526345, + "learning_rate": 9.33051471674215e-06, + "loss": 0.0, + "step": 949 + }, + { + "epoch": 0.06122317458271573, + "grad_norm": 0.012309696487391423, + "learning_rate": 9.331948149469212e-06, + "loss": 0.0001, + "step": 950 + }, + { + "epoch": 0.061287620029644906, + "grad_norm": 0.006174902085906057, + "learning_rate": 9.33338007411317e-06, + "loss": 0.0, + "step": 951 + }, + { + "epoch": 0.06135206547657408, + "grad_norm": 0.19661972321282462, + "learning_rate": 9.33481049384393e-06, + "loss": 0.0008, + "step": 952 + }, + { + "epoch": 0.061416510923503254, + "grad_norm": 0.005731411898032642, + "learning_rate": 9.336239411821416e-06, + "loss": 0.0, + "step": 953 + }, + { + "epoch": 0.06148095637043243, + "grad_norm": 0.6383001896675805, + "learning_rate": 9.33766683119561e-06, + "loss": 0.004, + "step": 954 + }, + { + "epoch": 0.0615454018173616, + "grad_norm": 0.0018123395465073323, + "learning_rate": 9.33909275510659e-06, + "loss": 0.0, + "step": 955 + }, + { + "epoch": 0.06160984726429078, + "grad_norm": 0.5576655806137351, + "learning_rate": 9.340517186684582e-06, + "loss": 0.002, + "step": 956 + }, + { + "epoch": 0.06167429271121995, + "grad_norm": 0.11143409554222802, + "learning_rate": 9.34194012904999e-06, + "loss": 0.0011, + "step": 957 + }, + { + "epoch": 0.06173873815814913, + "grad_norm": 0.163548453280282, + "learning_rate": 9.34336158531344e-06, + "loss": 0.0002, + "step": 958 + }, + { + "epoch": 0.061803183605078305, + "grad_norm": 0.0179151870815518, + "learning_rate": 9.344781558575823e-06, + "loss": 0.0, + "step": 959 + }, + { + "epoch": 0.061867629052007475, + "grad_norm": 0.00805664830135211, + "learning_rate": 9.346200051928338e-06, + "loss": 0.0001, + "step": 960 + }, + { + "epoch": 0.06193207449893665, + "grad_norm": 0.035669773452513494, + "learning_rate": 9.347617068452525e-06, + "loss": 0.0001, + "step": 961 + }, + { + "epoch": 0.06199651994586582, + "grad_norm": 0.016276996306397545, + "learning_rate": 9.34903261122031e-06, + "loss": 0.0001, + "step": 962 + }, + { + "epoch": 0.062060965392795, + "grad_norm": 0.006962232185746814, + "learning_rate": 9.350446683294047e-06, + "loss": 0.0, + "step": 963 + }, + { + "epoch": 0.06212541083972417, + "grad_norm": 0.017856100377398786, + "learning_rate": 9.35185928772655e-06, + "loss": 0.0, + "step": 964 + }, + { + "epoch": 0.06218985628665335, + "grad_norm": 0.25958305769630685, + "learning_rate": 9.353270427561143e-06, + "loss": 0.0011, + "step": 965 + }, + { + "epoch": 0.06225430173358252, + "grad_norm": 0.06486360650927218, + "learning_rate": 9.354680105831685e-06, + "loss": 0.0001, + "step": 966 + }, + { + "epoch": 0.0623187471805117, + "grad_norm": 3.134478879749426, + "learning_rate": 9.356088325562633e-06, + "loss": 0.0352, + "step": 967 + }, + { + "epoch": 0.062383192627440874, + "grad_norm": 0.19477207757671616, + "learning_rate": 9.35749508976905e-06, + "loss": 0.0002, + "step": 968 + }, + { + "epoch": 0.062447638074370045, + "grad_norm": 0.0303179640337168, + "learning_rate": 9.35890040145667e-06, + "loss": 0.0, + "step": 969 + }, + { + "epoch": 0.06251208352129922, + "grad_norm": 0.22131384688029207, + "learning_rate": 9.360304263621919e-06, + "loss": 0.0006, + "step": 970 + }, + { + "epoch": 0.06257652896822839, + "grad_norm": 0.021000072329624038, + "learning_rate": 9.361706679251967e-06, + "loss": 0.0, + "step": 971 + }, + { + "epoch": 0.06264097441515756, + "grad_norm": 0.017301594668236765, + "learning_rate": 9.363107651324754e-06, + "loss": 0.0, + "step": 972 + }, + { + "epoch": 0.06270541986208675, + "grad_norm": 0.005622783541268775, + "learning_rate": 9.364507182809037e-06, + "loss": 0.0, + "step": 973 + }, + { + "epoch": 0.06276986530901592, + "grad_norm": 0.23653247762425234, + "learning_rate": 9.365905276664425e-06, + "loss": 0.0012, + "step": 974 + }, + { + "epoch": 0.06283431075594509, + "grad_norm": 0.0019284558066157138, + "learning_rate": 9.367301935841413e-06, + "loss": 0.0, + "step": 975 + }, + { + "epoch": 0.06289875620287427, + "grad_norm": 0.013986864143152953, + "learning_rate": 9.368697163281422e-06, + "loss": 0.0, + "step": 976 + }, + { + "epoch": 0.06296320164980344, + "grad_norm": 0.0022328048482544325, + "learning_rate": 9.37009096191684e-06, + "loss": 0.0, + "step": 977 + }, + { + "epoch": 0.06302764709673261, + "grad_norm": 0.05739005222315148, + "learning_rate": 9.371483334671052e-06, + "loss": 0.0001, + "step": 978 + }, + { + "epoch": 0.06309209254366178, + "grad_norm": 0.0010009926174638238, + "learning_rate": 9.372874284458484e-06, + "loss": 0.0, + "step": 979 + }, + { + "epoch": 0.06315653799059097, + "grad_norm": 0.002213090174786511, + "learning_rate": 9.374263814184626e-06, + "loss": 0.0, + "step": 980 + }, + { + "epoch": 0.06322098343752014, + "grad_norm": 0.037727668382915094, + "learning_rate": 9.375651926746093e-06, + "loss": 0.0, + "step": 981 + }, + { + "epoch": 0.06328542888444931, + "grad_norm": 0.5743290801677684, + "learning_rate": 9.377038625030634e-06, + "loss": 0.0035, + "step": 982 + }, + { + "epoch": 0.0633498743313785, + "grad_norm": 0.2047254163261985, + "learning_rate": 9.378423911917185e-06, + "loss": 0.0007, + "step": 983 + }, + { + "epoch": 0.06341431977830767, + "grad_norm": 0.001514966733781814, + "learning_rate": 9.379807790275902e-06, + "loss": 0.0, + "step": 984 + }, + { + "epoch": 0.06347876522523684, + "grad_norm": 0.025498008379182133, + "learning_rate": 9.381190262968194e-06, + "loss": 0.0001, + "step": 985 + }, + { + "epoch": 0.063543210672166, + "grad_norm": 0.009340591807369656, + "learning_rate": 9.382571332846755e-06, + "loss": 0.0, + "step": 986 + }, + { + "epoch": 0.06360765611909519, + "grad_norm": 0.001099268463229261, + "learning_rate": 9.383951002755613e-06, + "loss": 0.0, + "step": 987 + }, + { + "epoch": 0.06367210156602436, + "grad_norm": 0.21566121032299157, + "learning_rate": 9.385329275530144e-06, + "loss": 0.0006, + "step": 988 + }, + { + "epoch": 0.06373654701295353, + "grad_norm": 0.0011949991086739977, + "learning_rate": 9.38670615399713e-06, + "loss": 0.0, + "step": 989 + }, + { + "epoch": 0.06380099245988272, + "grad_norm": 0.001856079925364757, + "learning_rate": 9.388081640974775e-06, + "loss": 0.0, + "step": 990 + }, + { + "epoch": 0.06386543790681189, + "grad_norm": 0.618543390978522, + "learning_rate": 9.389455739272754e-06, + "loss": 0.0067, + "step": 991 + }, + { + "epoch": 0.06392988335374106, + "grad_norm": 0.00102082338563046, + "learning_rate": 9.390828451692232e-06, + "loss": 0.0, + "step": 992 + }, + { + "epoch": 0.06399432880067023, + "grad_norm": 0.0004849908016925305, + "learning_rate": 9.392199781025916e-06, + "loss": 0.0, + "step": 993 + }, + { + "epoch": 0.06405877424759941, + "grad_norm": 0.006964614517327541, + "learning_rate": 9.393569730058076e-06, + "loss": 0.0, + "step": 994 + }, + { + "epoch": 0.06412321969452858, + "grad_norm": 0.0680559337719936, + "learning_rate": 9.394938301564581e-06, + "loss": 0.0001, + "step": 995 + }, + { + "epoch": 0.06418766514145775, + "grad_norm": 0.02046442622393038, + "learning_rate": 9.396305498312936e-06, + "loss": 0.0001, + "step": 996 + }, + { + "epoch": 0.06425211058838692, + "grad_norm": 0.022657457270191042, + "learning_rate": 9.397671323062319e-06, + "loss": 0.0002, + "step": 997 + }, + { + "epoch": 0.06431655603531611, + "grad_norm": 0.0010187417482833032, + "learning_rate": 9.399035778563601e-06, + "loss": 0.0, + "step": 998 + }, + { + "epoch": 0.06438100148224528, + "grad_norm": 0.011742133910265604, + "learning_rate": 9.400398867559398e-06, + "loss": 0.0001, + "step": 999 + }, + { + "epoch": 0.06444544692917445, + "grad_norm": 0.013586182883457138, + "learning_rate": 9.401760592784085e-06, + "loss": 0.0, + "step": 1000 + }, + { + "epoch": 0.06450989237610363, + "grad_norm": 0.2551176516648852, + "learning_rate": 9.403120956963842e-06, + "loss": 0.0034, + "step": 1001 + }, + { + "epoch": 0.0645743378230328, + "grad_norm": 0.03086066982715626, + "learning_rate": 9.404479962816683e-06, + "loss": 0.0001, + "step": 1002 + }, + { + "epoch": 0.06463878326996197, + "grad_norm": 0.2412145436747094, + "learning_rate": 9.405837613052491e-06, + "loss": 0.0006, + "step": 1003 + }, + { + "epoch": 0.06470322871689115, + "grad_norm": 0.037208169947243144, + "learning_rate": 9.407193910373039e-06, + "loss": 0.0004, + "step": 1004 + }, + { + "epoch": 0.06476767416382033, + "grad_norm": 0.0024333425810042523, + "learning_rate": 9.40854885747204e-06, + "loss": 0.0, + "step": 1005 + }, + { + "epoch": 0.0648321196107495, + "grad_norm": 0.08621493714841584, + "learning_rate": 9.409902457035166e-06, + "loss": 0.0002, + "step": 1006 + }, + { + "epoch": 0.06489656505767867, + "grad_norm": 0.02889134757488118, + "learning_rate": 9.411254711740086e-06, + "loss": 0.0004, + "step": 1007 + }, + { + "epoch": 0.06496101050460786, + "grad_norm": 0.228104563999611, + "learning_rate": 9.412605624256493e-06, + "loss": 0.0019, + "step": 1008 + }, + { + "epoch": 0.06502545595153703, + "grad_norm": 0.015025817973177263, + "learning_rate": 9.413955197246137e-06, + "loss": 0.0001, + "step": 1009 + }, + { + "epoch": 0.0650899013984662, + "grad_norm": 0.8140691497003968, + "learning_rate": 9.415303433362863e-06, + "loss": 0.0038, + "step": 1010 + }, + { + "epoch": 0.06515434684539537, + "grad_norm": 0.004443188188472599, + "learning_rate": 9.416650335252638e-06, + "loss": 0.0, + "step": 1011 + }, + { + "epoch": 0.06521879229232455, + "grad_norm": 0.16307514977324722, + "learning_rate": 9.417995905553573e-06, + "loss": 0.0004, + "step": 1012 + }, + { + "epoch": 0.06528323773925372, + "grad_norm": 0.08674513548595156, + "learning_rate": 9.419340146895971e-06, + "loss": 0.0003, + "step": 1013 + }, + { + "epoch": 0.06534768318618289, + "grad_norm": 0.0863520286012186, + "learning_rate": 9.420683061902345e-06, + "loss": 0.0004, + "step": 1014 + }, + { + "epoch": 0.06541212863311208, + "grad_norm": 0.0015631516269431248, + "learning_rate": 9.422024653187451e-06, + "loss": 0.0, + "step": 1015 + }, + { + "epoch": 0.06547657408004125, + "grad_norm": 0.0009583769723558712, + "learning_rate": 9.423364923358329e-06, + "loss": 0.0, + "step": 1016 + }, + { + "epoch": 0.06554101952697042, + "grad_norm": 0.3525857656721515, + "learning_rate": 9.424703875014313e-06, + "loss": 0.0033, + "step": 1017 + }, + { + "epoch": 0.06560546497389959, + "grad_norm": 0.01980719319717165, + "learning_rate": 9.426041510747082e-06, + "loss": 0.0001, + "step": 1018 + }, + { + "epoch": 0.06566991042082877, + "grad_norm": 0.05776367316483845, + "learning_rate": 9.42737783314068e-06, + "loss": 0.0019, + "step": 1019 + }, + { + "epoch": 0.06573435586775794, + "grad_norm": 0.029714163614339636, + "learning_rate": 9.42871284477154e-06, + "loss": 0.0001, + "step": 1020 + }, + { + "epoch": 0.06579880131468711, + "grad_norm": 0.1090029194822556, + "learning_rate": 9.430046548208533e-06, + "loss": 0.0002, + "step": 1021 + }, + { + "epoch": 0.0658632467616163, + "grad_norm": 0.79815705332845, + "learning_rate": 9.431378946012973e-06, + "loss": 0.0021, + "step": 1022 + }, + { + "epoch": 0.06592769220854547, + "grad_norm": 0.12051999387583368, + "learning_rate": 9.432710040738669e-06, + "loss": 0.0002, + "step": 1023 + }, + { + "epoch": 0.06599213765547464, + "grad_norm": 0.009057564683849412, + "learning_rate": 9.434039834931941e-06, + "loss": 0.0001, + "step": 1024 + }, + { + "epoch": 0.06605658310240381, + "grad_norm": 0.0362359697607633, + "learning_rate": 9.43536833113165e-06, + "loss": 0.0001, + "step": 1025 + }, + { + "epoch": 0.066121028549333, + "grad_norm": 0.029041901339865995, + "learning_rate": 9.436695531869232e-06, + "loss": 0.0001, + "step": 1026 + }, + { + "epoch": 0.06618547399626216, + "grad_norm": 0.005182049662164661, + "learning_rate": 9.438021439668724e-06, + "loss": 0.0, + "step": 1027 + }, + { + "epoch": 0.06624991944319134, + "grad_norm": 0.005158991102843603, + "learning_rate": 9.439346057046797e-06, + "loss": 0.0001, + "step": 1028 + }, + { + "epoch": 0.06631436489012052, + "grad_norm": 0.4060666330698772, + "learning_rate": 9.44066938651278e-06, + "loss": 0.0057, + "step": 1029 + }, + { + "epoch": 0.06637881033704969, + "grad_norm": 0.0013174037296815595, + "learning_rate": 9.441991430568686e-06, + "loss": 0.0, + "step": 1030 + }, + { + "epoch": 0.06644325578397886, + "grad_norm": 0.0033868625919964246, + "learning_rate": 9.443312191709244e-06, + "loss": 0.0, + "step": 1031 + }, + { + "epoch": 0.06650770123090803, + "grad_norm": 0.006117924134115885, + "learning_rate": 9.444631672421934e-06, + "loss": 0.0001, + "step": 1032 + }, + { + "epoch": 0.06657214667783722, + "grad_norm": 0.13950942529862392, + "learning_rate": 9.445949875187002e-06, + "loss": 0.0019, + "step": 1033 + }, + { + "epoch": 0.06663659212476639, + "grad_norm": 0.01851590001806397, + "learning_rate": 9.447266802477499e-06, + "loss": 0.0004, + "step": 1034 + }, + { + "epoch": 0.06670103757169556, + "grad_norm": 0.0016811966371559576, + "learning_rate": 9.448582456759298e-06, + "loss": 0.0, + "step": 1035 + }, + { + "epoch": 0.06676548301862473, + "grad_norm": 0.1653637147615305, + "learning_rate": 9.449896840491135e-06, + "loss": 0.0022, + "step": 1036 + }, + { + "epoch": 0.06682992846555391, + "grad_norm": 0.022010529514772826, + "learning_rate": 9.451209956124626e-06, + "loss": 0.0001, + "step": 1037 + }, + { + "epoch": 0.06689437391248308, + "grad_norm": 0.0071250187018150595, + "learning_rate": 9.452521806104297e-06, + "loss": 0.0, + "step": 1038 + }, + { + "epoch": 0.06695881935941225, + "grad_norm": 0.02253860670889963, + "learning_rate": 9.453832392867618e-06, + "loss": 0.0002, + "step": 1039 + }, + { + "epoch": 0.06702326480634144, + "grad_norm": 0.024827336967876906, + "learning_rate": 9.455141718845016e-06, + "loss": 0.0001, + "step": 1040 + }, + { + "epoch": 0.06708771025327061, + "grad_norm": 0.028985878209440076, + "learning_rate": 9.456449786459916e-06, + "loss": 0.0001, + "step": 1041 + }, + { + "epoch": 0.06715215570019978, + "grad_norm": 0.06560310114761658, + "learning_rate": 9.45775659812876e-06, + "loss": 0.0002, + "step": 1042 + }, + { + "epoch": 0.06721660114712895, + "grad_norm": 0.007292826751741267, + "learning_rate": 9.459062156261041e-06, + "loss": 0.0, + "step": 1043 + }, + { + "epoch": 0.06728104659405813, + "grad_norm": 0.001488974553572595, + "learning_rate": 9.460366463259316e-06, + "loss": 0.0, + "step": 1044 + }, + { + "epoch": 0.0673454920409873, + "grad_norm": 0.058802188234950835, + "learning_rate": 9.461669521519253e-06, + "loss": 0.0001, + "step": 1045 + }, + { + "epoch": 0.06740993748791647, + "grad_norm": 0.015528356733507939, + "learning_rate": 9.462971333429634e-06, + "loss": 0.0, + "step": 1046 + }, + { + "epoch": 0.06747438293484566, + "grad_norm": 0.010762498561983332, + "learning_rate": 9.4642719013724e-06, + "loss": 0.0001, + "step": 1047 + }, + { + "epoch": 0.06753882838177483, + "grad_norm": 0.027761143204603284, + "learning_rate": 9.46557122772267e-06, + "loss": 0.0001, + "step": 1048 + }, + { + "epoch": 0.067603273828704, + "grad_norm": 0.13193905647539747, + "learning_rate": 9.466869314848766e-06, + "loss": 0.0001, + "step": 1049 + }, + { + "epoch": 0.06766771927563317, + "grad_norm": 0.06437447835886814, + "learning_rate": 9.468166165112236e-06, + "loss": 0.0, + "step": 1050 + }, + { + "epoch": 0.06773216472256235, + "grad_norm": 0.39598322422926224, + "learning_rate": 9.469461780867895e-06, + "loss": 0.0015, + "step": 1051 + }, + { + "epoch": 0.06779661016949153, + "grad_norm": 0.020346673344546363, + "learning_rate": 9.470756164463827e-06, + "loss": 0.0001, + "step": 1052 + }, + { + "epoch": 0.0678610556164207, + "grad_norm": 0.0005355693822581008, + "learning_rate": 9.472049318241431e-06, + "loss": 0.0, + "step": 1053 + }, + { + "epoch": 0.06792550106334988, + "grad_norm": 0.025392949474745102, + "learning_rate": 9.473341244535435e-06, + "loss": 0.0, + "step": 1054 + }, + { + "epoch": 0.06798994651027905, + "grad_norm": 0.040790192372236425, + "learning_rate": 9.474631945673927e-06, + "loss": 0.0003, + "step": 1055 + }, + { + "epoch": 0.06805439195720822, + "grad_norm": 0.033763923209698024, + "learning_rate": 9.475921423978379e-06, + "loss": 0.0001, + "step": 1056 + }, + { + "epoch": 0.06811883740413739, + "grad_norm": 0.020406697261917767, + "learning_rate": 9.477209681763664e-06, + "loss": 0.0, + "step": 1057 + }, + { + "epoch": 0.06818328285106658, + "grad_norm": 0.0010645021910689245, + "learning_rate": 9.478496721338096e-06, + "loss": 0.0, + "step": 1058 + }, + { + "epoch": 0.06824772829799575, + "grad_norm": 0.05344905297559554, + "learning_rate": 9.479782545003442e-06, + "loss": 0.0006, + "step": 1059 + }, + { + "epoch": 0.06831217374492492, + "grad_norm": 0.22296419932981829, + "learning_rate": 9.481067155054957e-06, + "loss": 0.0005, + "step": 1060 + }, + { + "epoch": 0.0683766191918541, + "grad_norm": 0.0006634595339300624, + "learning_rate": 9.482350553781397e-06, + "loss": 0.0, + "step": 1061 + }, + { + "epoch": 0.06844106463878327, + "grad_norm": 0.15285856729547703, + "learning_rate": 9.48363274346505e-06, + "loss": 0.0003, + "step": 1062 + }, + { + "epoch": 0.06850551008571244, + "grad_norm": 0.030757339862208403, + "learning_rate": 9.484913726381766e-06, + "loss": 0.0002, + "step": 1063 + }, + { + "epoch": 0.06856995553264161, + "grad_norm": 0.20174093222005063, + "learning_rate": 9.486193504800969e-06, + "loss": 0.0006, + "step": 1064 + }, + { + "epoch": 0.0686344009795708, + "grad_norm": 0.030571583646065718, + "learning_rate": 9.487472080985686e-06, + "loss": 0.0001, + "step": 1065 + }, + { + "epoch": 0.06869884642649997, + "grad_norm": 0.0009779026849065075, + "learning_rate": 9.48874945719258e-06, + "loss": 0.0, + "step": 1066 + }, + { + "epoch": 0.06876329187342914, + "grad_norm": 0.00596599383421079, + "learning_rate": 9.49002563567196e-06, + "loss": 0.0001, + "step": 1067 + }, + { + "epoch": 0.06882773732035832, + "grad_norm": 0.12309505639517282, + "learning_rate": 9.49130061866781e-06, + "loss": 0.002, + "step": 1068 + }, + { + "epoch": 0.0688921827672875, + "grad_norm": 0.00540315072705124, + "learning_rate": 9.492574408417818e-06, + "loss": 0.0, + "step": 1069 + }, + { + "epoch": 0.06895662821421666, + "grad_norm": 0.0008161722316358159, + "learning_rate": 9.493847007153396e-06, + "loss": 0.0, + "step": 1070 + }, + { + "epoch": 0.06902107366114583, + "grad_norm": 0.0047131316080134674, + "learning_rate": 9.495118417099695e-06, + "loss": 0.0, + "step": 1071 + }, + { + "epoch": 0.06908551910807502, + "grad_norm": 0.051150246774193775, + "learning_rate": 9.496388640475643e-06, + "loss": 0.0002, + "step": 1072 + }, + { + "epoch": 0.06914996455500419, + "grad_norm": 0.0019308822291618391, + "learning_rate": 9.49765767949396e-06, + "loss": 0.0, + "step": 1073 + }, + { + "epoch": 0.06921441000193336, + "grad_norm": 0.006845832426320013, + "learning_rate": 9.498925536361181e-06, + "loss": 0.0001, + "step": 1074 + }, + { + "epoch": 0.06927885544886254, + "grad_norm": 0.07802347799089825, + "learning_rate": 9.500192213277679e-06, + "loss": 0.0001, + "step": 1075 + }, + { + "epoch": 0.06934330089579172, + "grad_norm": 0.06181839032778164, + "learning_rate": 9.501457712437695e-06, + "loss": 0.0, + "step": 1076 + }, + { + "epoch": 0.06940774634272089, + "grad_norm": 0.005407418144409315, + "learning_rate": 9.502722036029348e-06, + "loss": 0.0, + "step": 1077 + }, + { + "epoch": 0.06947219178965006, + "grad_norm": 0.013561958584136294, + "learning_rate": 9.503985186234666e-06, + "loss": 0.0016, + "step": 1078 + }, + { + "epoch": 0.06953663723657924, + "grad_norm": 0.041943084762526915, + "learning_rate": 9.505247165229614e-06, + "loss": 0.0001, + "step": 1079 + }, + { + "epoch": 0.06960108268350841, + "grad_norm": 0.4291865810230572, + "learning_rate": 9.506507975184103e-06, + "loss": 0.0015, + "step": 1080 + }, + { + "epoch": 0.06966552813043758, + "grad_norm": 0.18459478347266867, + "learning_rate": 9.507767618262019e-06, + "loss": 0.0009, + "step": 1081 + }, + { + "epoch": 0.06972997357736675, + "grad_norm": 0.55468446435232, + "learning_rate": 9.509026096621253e-06, + "loss": 0.0038, + "step": 1082 + }, + { + "epoch": 0.06979441902429594, + "grad_norm": 0.007788673964636509, + "learning_rate": 9.510283412413708e-06, + "loss": 0.0001, + "step": 1083 + }, + { + "epoch": 0.06985886447122511, + "grad_norm": 0.005881688910034065, + "learning_rate": 9.511539567785331e-06, + "loss": 0.0, + "step": 1084 + }, + { + "epoch": 0.06992330991815428, + "grad_norm": 0.010063309122665664, + "learning_rate": 9.512794564876132e-06, + "loss": 0.0, + "step": 1085 + }, + { + "epoch": 0.06998775536508346, + "grad_norm": 0.16376256774401285, + "learning_rate": 9.51404840582021e-06, + "loss": 0.0003, + "step": 1086 + }, + { + "epoch": 0.07005220081201263, + "grad_norm": 0.012820581196048885, + "learning_rate": 9.51530109274577e-06, + "loss": 0.0001, + "step": 1087 + }, + { + "epoch": 0.0701166462589418, + "grad_norm": 0.3957807941035052, + "learning_rate": 9.516552627775143e-06, + "loss": 0.0029, + "step": 1088 + }, + { + "epoch": 0.07018109170587097, + "grad_norm": 0.021148173145825185, + "learning_rate": 9.517803013024814e-06, + "loss": 0.0001, + "step": 1089 + }, + { + "epoch": 0.07024553715280016, + "grad_norm": 0.005577600092877411, + "learning_rate": 9.51905225060544e-06, + "loss": 0.0001, + "step": 1090 + }, + { + "epoch": 0.07030998259972933, + "grad_norm": 0.00015751397743556044, + "learning_rate": 9.52030034262187e-06, + "loss": 0.0, + "step": 1091 + }, + { + "epoch": 0.0703744280466585, + "grad_norm": 0.002895350355714667, + "learning_rate": 9.521547291173168e-06, + "loss": 0.0, + "step": 1092 + }, + { + "epoch": 0.07043887349358768, + "grad_norm": 0.0023124064473402568, + "learning_rate": 9.522793098352637e-06, + "loss": 0.0, + "step": 1093 + }, + { + "epoch": 0.07050331894051685, + "grad_norm": 0.20981965745450876, + "learning_rate": 9.524037766247829e-06, + "loss": 0.0017, + "step": 1094 + }, + { + "epoch": 0.07056776438744602, + "grad_norm": 0.016446824720652117, + "learning_rate": 9.525281296940584e-06, + "loss": 0.0, + "step": 1095 + }, + { + "epoch": 0.0706322098343752, + "grad_norm": 0.13535662307196408, + "learning_rate": 9.526523692507037e-06, + "loss": 0.0005, + "step": 1096 + }, + { + "epoch": 0.07069665528130438, + "grad_norm": 0.030537059287996025, + "learning_rate": 9.527764955017639e-06, + "loss": 0.0, + "step": 1097 + }, + { + "epoch": 0.07076110072823355, + "grad_norm": 0.014719741351277675, + "learning_rate": 9.529005086537187e-06, + "loss": 0.0001, + "step": 1098 + }, + { + "epoch": 0.07082554617516272, + "grad_norm": 0.22880329055924983, + "learning_rate": 9.530244089124837e-06, + "loss": 0.0012, + "step": 1099 + }, + { + "epoch": 0.0708899916220919, + "grad_norm": 0.00448704140852509, + "learning_rate": 9.531481964834122e-06, + "loss": 0.0, + "step": 1100 + }, + { + "epoch": 0.07095443706902108, + "grad_norm": 0.009209617813428274, + "learning_rate": 9.532718715712988e-06, + "loss": 0.0, + "step": 1101 + }, + { + "epoch": 0.07101888251595025, + "grad_norm": 0.09858833858855938, + "learning_rate": 9.533954343803793e-06, + "loss": 0.001, + "step": 1102 + }, + { + "epoch": 0.07108332796287942, + "grad_norm": 0.0003334525690795657, + "learning_rate": 9.535188851143343e-06, + "loss": 0.0, + "step": 1103 + }, + { + "epoch": 0.0711477734098086, + "grad_norm": 0.001099738770532882, + "learning_rate": 9.536422239762899e-06, + "loss": 0.0, + "step": 1104 + }, + { + "epoch": 0.07121221885673777, + "grad_norm": 0.012566891242134792, + "learning_rate": 9.537654511688219e-06, + "loss": 0.0, + "step": 1105 + }, + { + "epoch": 0.07127666430366694, + "grad_norm": 0.029662996653041767, + "learning_rate": 9.538885668939549e-06, + "loss": 0.0001, + "step": 1106 + }, + { + "epoch": 0.07134110975059613, + "grad_norm": 0.0039183110992792244, + "learning_rate": 9.540115713531667e-06, + "loss": 0.0, + "step": 1107 + }, + { + "epoch": 0.0714055551975253, + "grad_norm": 0.06460220983321396, + "learning_rate": 9.541344647473889e-06, + "loss": 0.0006, + "step": 1108 + }, + { + "epoch": 0.07147000064445447, + "grad_norm": 0.01392801660340169, + "learning_rate": 9.542572472770095e-06, + "loss": 0.0001, + "step": 1109 + }, + { + "epoch": 0.07153444609138364, + "grad_norm": 0.0075267292844853, + "learning_rate": 9.543799191418745e-06, + "loss": 0.0001, + "step": 1110 + }, + { + "epoch": 0.07159889153831282, + "grad_norm": 0.4668644333477323, + "learning_rate": 9.545024805412904e-06, + "loss": 0.0024, + "step": 1111 + }, + { + "epoch": 0.071663336985242, + "grad_norm": 0.04064895583802354, + "learning_rate": 9.546249316740252e-06, + "loss": 0.0001, + "step": 1112 + }, + { + "epoch": 0.07172778243217116, + "grad_norm": 0.01834313036665756, + "learning_rate": 9.54747272738311e-06, + "loss": 0.0, + "step": 1113 + }, + { + "epoch": 0.07179222787910035, + "grad_norm": 0.15668289299222576, + "learning_rate": 9.548695039318467e-06, + "loss": 0.0001, + "step": 1114 + }, + { + "epoch": 0.07185667332602952, + "grad_norm": 0.04903259619234823, + "learning_rate": 9.549916254517975e-06, + "loss": 0.0001, + "step": 1115 + }, + { + "epoch": 0.07192111877295869, + "grad_norm": 0.023982815736462482, + "learning_rate": 9.551136374947997e-06, + "loss": 0.0, + "step": 1116 + }, + { + "epoch": 0.07198556421988786, + "grad_norm": 0.0032611786740441946, + "learning_rate": 9.552355402569607e-06, + "loss": 0.0, + "step": 1117 + }, + { + "epoch": 0.07205000966681704, + "grad_norm": 0.03591521896676965, + "learning_rate": 9.553573339338611e-06, + "loss": 0.0001, + "step": 1118 + }, + { + "epoch": 0.07211445511374621, + "grad_norm": 0.002452457086472786, + "learning_rate": 9.554790187205577e-06, + "loss": 0.0, + "step": 1119 + }, + { + "epoch": 0.07217890056067539, + "grad_norm": 0.005269897617250751, + "learning_rate": 9.55600594811584e-06, + "loss": 0.0, + "step": 1120 + }, + { + "epoch": 0.07224334600760456, + "grad_norm": 0.4053084576654436, + "learning_rate": 9.557220624009528e-06, + "loss": 0.0009, + "step": 1121 + }, + { + "epoch": 0.07230779145453374, + "grad_norm": 0.04937713962457996, + "learning_rate": 9.55843421682158e-06, + "loss": 0.0004, + "step": 1122 + }, + { + "epoch": 0.07237223690146291, + "grad_norm": 0.31367174055920827, + "learning_rate": 9.559646728481764e-06, + "loss": 0.0005, + "step": 1123 + }, + { + "epoch": 0.07243668234839208, + "grad_norm": 0.0021878403973614306, + "learning_rate": 9.560858160914693e-06, + "loss": 0.0, + "step": 1124 + }, + { + "epoch": 0.07250112779532127, + "grad_norm": 0.04644155774287131, + "learning_rate": 9.562068516039848e-06, + "loss": 0.0001, + "step": 1125 + }, + { + "epoch": 0.07256557324225044, + "grad_norm": 0.005334428075335696, + "learning_rate": 9.563277795771592e-06, + "loss": 0.0, + "step": 1126 + }, + { + "epoch": 0.0726300186891796, + "grad_norm": 0.0009123395890019158, + "learning_rate": 9.564486002019189e-06, + "loss": 0.0, + "step": 1127 + }, + { + "epoch": 0.07269446413610878, + "grad_norm": 0.0032421208617262635, + "learning_rate": 9.565693136686826e-06, + "loss": 0.0, + "step": 1128 + }, + { + "epoch": 0.07275890958303796, + "grad_norm": 0.20209974356677035, + "learning_rate": 9.566899201673622e-06, + "loss": 0.0004, + "step": 1129 + }, + { + "epoch": 0.07282335502996713, + "grad_norm": 0.0017558729881317762, + "learning_rate": 9.568104198873661e-06, + "loss": 0.0, + "step": 1130 + }, + { + "epoch": 0.0728878004768963, + "grad_norm": 0.0007344998898286816, + "learning_rate": 9.569308130175994e-06, + "loss": 0.0, + "step": 1131 + }, + { + "epoch": 0.07295224592382549, + "grad_norm": 0.07438051013009608, + "learning_rate": 9.570510997464664e-06, + "loss": 0.0004, + "step": 1132 + }, + { + "epoch": 0.07301669137075466, + "grad_norm": 0.05245505196256475, + "learning_rate": 9.571712802618725e-06, + "loss": 0.0002, + "step": 1133 + }, + { + "epoch": 0.07308113681768383, + "grad_norm": 0.0009284186006608998, + "learning_rate": 9.572913547512255e-06, + "loss": 0.0, + "step": 1134 + }, + { + "epoch": 0.073145582264613, + "grad_norm": 0.0034999118506301983, + "learning_rate": 9.574113234014383e-06, + "loss": 0.0, + "step": 1135 + }, + { + "epoch": 0.07321002771154218, + "grad_norm": 0.07515344754801417, + "learning_rate": 9.57531186398929e-06, + "loss": 0.0002, + "step": 1136 + }, + { + "epoch": 0.07327447315847135, + "grad_norm": 0.008738924023498379, + "learning_rate": 9.576509439296242e-06, + "loss": 0.0, + "step": 1137 + }, + { + "epoch": 0.07333891860540052, + "grad_norm": 0.0008081805016731883, + "learning_rate": 9.577705961789603e-06, + "loss": 0.0, + "step": 1138 + }, + { + "epoch": 0.07340336405232971, + "grad_norm": 0.03078131770414565, + "learning_rate": 9.578901433318846e-06, + "loss": 0.0001, + "step": 1139 + }, + { + "epoch": 0.07346780949925888, + "grad_norm": 0.03254781257906103, + "learning_rate": 9.580095855728579e-06, + "loss": 0.0003, + "step": 1140 + }, + { + "epoch": 0.07353225494618805, + "grad_norm": 0.10584629442542785, + "learning_rate": 9.581289230858554e-06, + "loss": 0.0018, + "step": 1141 + }, + { + "epoch": 0.07359670039311722, + "grad_norm": 0.013347870485689773, + "learning_rate": 9.582481560543692e-06, + "loss": 0.0, + "step": 1142 + }, + { + "epoch": 0.0736611458400464, + "grad_norm": 0.0043998961185891345, + "learning_rate": 9.583672846614092e-06, + "loss": 0.0, + "step": 1143 + }, + { + "epoch": 0.07372559128697558, + "grad_norm": 0.04458813463929904, + "learning_rate": 9.584863090895054e-06, + "loss": 0.0001, + "step": 1144 + }, + { + "epoch": 0.07379003673390475, + "grad_norm": 0.026909427188295067, + "learning_rate": 9.586052295207094e-06, + "loss": 0.0, + "step": 1145 + }, + { + "epoch": 0.07385448218083393, + "grad_norm": 0.008740718491162603, + "learning_rate": 9.587240461365958e-06, + "loss": 0.0, + "step": 1146 + }, + { + "epoch": 0.0739189276277631, + "grad_norm": 0.2052019199750452, + "learning_rate": 9.58842759118264e-06, + "loss": 0.002, + "step": 1147 + }, + { + "epoch": 0.07398337307469227, + "grad_norm": 0.0039053163539573186, + "learning_rate": 9.589613686463404e-06, + "loss": 0.0001, + "step": 1148 + }, + { + "epoch": 0.07404781852162144, + "grad_norm": 0.007556923226473166, + "learning_rate": 9.590798749009793e-06, + "loss": 0.0, + "step": 1149 + }, + { + "epoch": 0.07411226396855063, + "grad_norm": 0.02777061566941155, + "learning_rate": 9.591982780618645e-06, + "loss": 0.0001, + "step": 1150 + }, + { + "epoch": 0.0741767094154798, + "grad_norm": 0.3086832496171256, + "learning_rate": 9.593165783082122e-06, + "loss": 0.0012, + "step": 1151 + }, + { + "epoch": 0.07424115486240897, + "grad_norm": 0.0005099813533373637, + "learning_rate": 9.594347758187705e-06, + "loss": 0.0, + "step": 1152 + }, + { + "epoch": 0.07430560030933815, + "grad_norm": 0.0010335636522138616, + "learning_rate": 9.59552870771823e-06, + "loss": 0.0, + "step": 1153 + }, + { + "epoch": 0.07437004575626732, + "grad_norm": 0.0005503441681810711, + "learning_rate": 9.596708633451897e-06, + "loss": 0.0, + "step": 1154 + }, + { + "epoch": 0.07443449120319649, + "grad_norm": 0.29321193774320214, + "learning_rate": 9.597887537162277e-06, + "loss": 0.0007, + "step": 1155 + }, + { + "epoch": 0.07449893665012566, + "grad_norm": 0.0003677550358705728, + "learning_rate": 9.599065420618347e-06, + "loss": 0.0, + "step": 1156 + }, + { + "epoch": 0.07456338209705485, + "grad_norm": 0.09427091300374382, + "learning_rate": 9.600242285584487e-06, + "loss": 0.0001, + "step": 1157 + }, + { + "epoch": 0.07462782754398402, + "grad_norm": 0.1102941329864884, + "learning_rate": 9.601418133820508e-06, + "loss": 0.0002, + "step": 1158 + }, + { + "epoch": 0.07469227299091319, + "grad_norm": 0.0016408962246154898, + "learning_rate": 9.602592967081665e-06, + "loss": 0.0, + "step": 1159 + }, + { + "epoch": 0.07475671843784237, + "grad_norm": 0.015343294270169556, + "learning_rate": 9.603766787118666e-06, + "loss": 0.0001, + "step": 1160 + }, + { + "epoch": 0.07482116388477154, + "grad_norm": 0.007299553047901523, + "learning_rate": 9.604939595677697e-06, + "loss": 0.0001, + "step": 1161 + }, + { + "epoch": 0.07488560933170071, + "grad_norm": 0.06251656417098318, + "learning_rate": 9.606111394500438e-06, + "loss": 0.0002, + "step": 1162 + }, + { + "epoch": 0.07495005477862988, + "grad_norm": 0.005736968608914812, + "learning_rate": 9.607282185324068e-06, + "loss": 0.0, + "step": 1163 + }, + { + "epoch": 0.07501450022555907, + "grad_norm": 0.0003761041372193296, + "learning_rate": 9.608451969881285e-06, + "loss": 0.0, + "step": 1164 + }, + { + "epoch": 0.07507894567248824, + "grad_norm": 0.004082930690161441, + "learning_rate": 9.609620749900332e-06, + "loss": 0.0, + "step": 1165 + }, + { + "epoch": 0.07514339111941741, + "grad_norm": 0.001868907810488472, + "learning_rate": 9.610788527104996e-06, + "loss": 0.0, + "step": 1166 + }, + { + "epoch": 0.07520783656634658, + "grad_norm": 0.01672035118957357, + "learning_rate": 9.611955303214636e-06, + "loss": 0.0, + "step": 1167 + }, + { + "epoch": 0.07527228201327577, + "grad_norm": 0.0003677767328121933, + "learning_rate": 9.613121079944188e-06, + "loss": 0.0, + "step": 1168 + }, + { + "epoch": 0.07533672746020494, + "grad_norm": 0.20056341800735186, + "learning_rate": 9.614285859004185e-06, + "loss": 0.0018, + "step": 1169 + }, + { + "epoch": 0.0754011729071341, + "grad_norm": 0.672537078225248, + "learning_rate": 9.61544964210078e-06, + "loss": 0.0034, + "step": 1170 + }, + { + "epoch": 0.07546561835406329, + "grad_norm": 0.0004565966029069957, + "learning_rate": 9.616612430935744e-06, + "loss": 0.0, + "step": 1171 + }, + { + "epoch": 0.07553006380099246, + "grad_norm": 0.00035533163882761317, + "learning_rate": 9.617774227206492e-06, + "loss": 0.0, + "step": 1172 + }, + { + "epoch": 0.07559450924792163, + "grad_norm": 0.192594228075119, + "learning_rate": 9.618935032606104e-06, + "loss": 0.0015, + "step": 1173 + }, + { + "epoch": 0.0756589546948508, + "grad_norm": 0.10144572697475107, + "learning_rate": 9.620094848823319e-06, + "loss": 0.0002, + "step": 1174 + }, + { + "epoch": 0.07572340014177999, + "grad_norm": 0.007117030604680507, + "learning_rate": 9.621253677542571e-06, + "loss": 0.0, + "step": 1175 + }, + { + "epoch": 0.07578784558870916, + "grad_norm": 0.0061437197610423756, + "learning_rate": 9.622411520443993e-06, + "loss": 0.0, + "step": 1176 + }, + { + "epoch": 0.07585229103563833, + "grad_norm": 0.04613563697317982, + "learning_rate": 9.623568379203435e-06, + "loss": 0.0001, + "step": 1177 + }, + { + "epoch": 0.07591673648256751, + "grad_norm": 0.058720324813408255, + "learning_rate": 9.624724255492474e-06, + "loss": 0.0001, + "step": 1178 + }, + { + "epoch": 0.07598118192949668, + "grad_norm": 0.006014910631233619, + "learning_rate": 9.625879150978434e-06, + "loss": 0.0, + "step": 1179 + }, + { + "epoch": 0.07604562737642585, + "grad_norm": 0.0006228941474103343, + "learning_rate": 9.6270330673244e-06, + "loss": 0.0, + "step": 1180 + }, + { + "epoch": 0.07611007282335502, + "grad_norm": 0.006748228304873041, + "learning_rate": 9.628186006189226e-06, + "loss": 0.0, + "step": 1181 + }, + { + "epoch": 0.07617451827028421, + "grad_norm": 0.5808579312109688, + "learning_rate": 9.62933796922756e-06, + "loss": 0.0007, + "step": 1182 + }, + { + "epoch": 0.07623896371721338, + "grad_norm": 0.043795489180758776, + "learning_rate": 9.630488958089847e-06, + "loss": 0.0002, + "step": 1183 + }, + { + "epoch": 0.07630340916414255, + "grad_norm": 0.0515382355616, + "learning_rate": 9.631638974422349e-06, + "loss": 0.0001, + "step": 1184 + }, + { + "epoch": 0.07636785461107173, + "grad_norm": 0.031008705248851732, + "learning_rate": 9.632788019867159e-06, + "loss": 0.0001, + "step": 1185 + }, + { + "epoch": 0.0764323000580009, + "grad_norm": 0.003564128836485725, + "learning_rate": 9.63393609606222e-06, + "loss": 0.0, + "step": 1186 + }, + { + "epoch": 0.07649674550493007, + "grad_norm": 0.0021392358347696247, + "learning_rate": 9.635083204641324e-06, + "loss": 0.0, + "step": 1187 + }, + { + "epoch": 0.07656119095185925, + "grad_norm": 0.0004078006923083002, + "learning_rate": 9.636229347234144e-06, + "loss": 0.0, + "step": 1188 + }, + { + "epoch": 0.07662563639878843, + "grad_norm": 0.20509931364044576, + "learning_rate": 9.637374525466229e-06, + "loss": 0.0004, + "step": 1189 + }, + { + "epoch": 0.0766900818457176, + "grad_norm": 0.06311748250374623, + "learning_rate": 9.638518740959042e-06, + "loss": 0.0004, + "step": 1190 + }, + { + "epoch": 0.07675452729264677, + "grad_norm": 0.022872006103306926, + "learning_rate": 9.639661995329951e-06, + "loss": 0.0, + "step": 1191 + }, + { + "epoch": 0.07681897273957596, + "grad_norm": 0.0007420031287808976, + "learning_rate": 9.640804290192255e-06, + "loss": 0.0, + "step": 1192 + }, + { + "epoch": 0.07688341818650513, + "grad_norm": 0.005457022771712689, + "learning_rate": 9.641945627155186e-06, + "loss": 0.0, + "step": 1193 + }, + { + "epoch": 0.0769478636334343, + "grad_norm": 0.008290051615120557, + "learning_rate": 9.643086007823948e-06, + "loss": 0.0001, + "step": 1194 + }, + { + "epoch": 0.07701230908036347, + "grad_norm": 0.023992458922295828, + "learning_rate": 9.644225433799694e-06, + "loss": 0.0001, + "step": 1195 + }, + { + "epoch": 0.07707675452729265, + "grad_norm": 0.003102753117205217, + "learning_rate": 9.645363906679577e-06, + "loss": 0.0, + "step": 1196 + }, + { + "epoch": 0.07714119997422182, + "grad_norm": 0.058512229636302565, + "learning_rate": 9.646501428056732e-06, + "loss": 0.0001, + "step": 1197 + }, + { + "epoch": 0.07720564542115099, + "grad_norm": 0.00014176378035999393, + "learning_rate": 9.64763799952031e-06, + "loss": 0.0, + "step": 1198 + }, + { + "epoch": 0.07727009086808018, + "grad_norm": 0.0003108339992887321, + "learning_rate": 9.64877362265548e-06, + "loss": 0.0, + "step": 1199 + }, + { + "epoch": 0.07733453631500935, + "grad_norm": 0.0032003990062005197, + "learning_rate": 9.649908299043451e-06, + "loss": 0.0, + "step": 1200 + }, + { + "epoch": 0.07739898176193852, + "grad_norm": 0.5838421549541228, + "learning_rate": 9.651042030261476e-06, + "loss": 0.0034, + "step": 1201 + }, + { + "epoch": 0.07746342720886769, + "grad_norm": 0.0008412995426426008, + "learning_rate": 9.652174817882872e-06, + "loss": 0.0, + "step": 1202 + }, + { + "epoch": 0.07752787265579687, + "grad_norm": 9.185586311117465e-05, + "learning_rate": 9.653306663477033e-06, + "loss": 0.0, + "step": 1203 + }, + { + "epoch": 0.07759231810272604, + "grad_norm": 0.23148305400119992, + "learning_rate": 9.654437568609434e-06, + "loss": 0.0019, + "step": 1204 + }, + { + "epoch": 0.07765676354965521, + "grad_norm": 0.00339606142341171, + "learning_rate": 9.655567534841662e-06, + "loss": 0.0, + "step": 1205 + }, + { + "epoch": 0.07772120899658438, + "grad_norm": 0.07896940352611387, + "learning_rate": 9.656696563731408e-06, + "loss": 0.0002, + "step": 1206 + }, + { + "epoch": 0.07778565444351357, + "grad_norm": 0.08238037779996558, + "learning_rate": 9.657824656832492e-06, + "loss": 0.0002, + "step": 1207 + }, + { + "epoch": 0.07785009989044274, + "grad_norm": 0.002891572609876939, + "learning_rate": 9.658951815694877e-06, + "loss": 0.0, + "step": 1208 + }, + { + "epoch": 0.07791454533737191, + "grad_norm": 0.0007835520232595968, + "learning_rate": 9.660078041864675e-06, + "loss": 0.0, + "step": 1209 + }, + { + "epoch": 0.0779789907843011, + "grad_norm": 0.0002144706980715807, + "learning_rate": 9.661203336884163e-06, + "loss": 0.0, + "step": 1210 + }, + { + "epoch": 0.07804343623123026, + "grad_norm": 0.141667232412957, + "learning_rate": 9.6623277022918e-06, + "loss": 0.0008, + "step": 1211 + }, + { + "epoch": 0.07810788167815944, + "grad_norm": 0.013447958714361321, + "learning_rate": 9.66345113962223e-06, + "loss": 0.0001, + "step": 1212 + }, + { + "epoch": 0.0781723271250886, + "grad_norm": 0.015480042443949904, + "learning_rate": 9.664573650406304e-06, + "loss": 0.0, + "step": 1213 + }, + { + "epoch": 0.07823677257201779, + "grad_norm": 0.023478707705058638, + "learning_rate": 9.665695236171086e-06, + "loss": 0.0001, + "step": 1214 + }, + { + "epoch": 0.07830121801894696, + "grad_norm": 0.19705356870810878, + "learning_rate": 9.666815898439866e-06, + "loss": 0.0026, + "step": 1215 + }, + { + "epoch": 0.07836566346587613, + "grad_norm": 0.03781223988640594, + "learning_rate": 9.667935638732182e-06, + "loss": 0.0, + "step": 1216 + }, + { + "epoch": 0.07843010891280532, + "grad_norm": 0.08106810676990649, + "learning_rate": 9.669054458563815e-06, + "loss": 0.0016, + "step": 1217 + }, + { + "epoch": 0.07849455435973449, + "grad_norm": 0.0032037268903943607, + "learning_rate": 9.67017235944682e-06, + "loss": 0.0, + "step": 1218 + }, + { + "epoch": 0.07855899980666366, + "grad_norm": 0.0014909723492173954, + "learning_rate": 9.67128934288952e-06, + "loss": 0.0, + "step": 1219 + }, + { + "epoch": 0.07862344525359283, + "grad_norm": 0.05132170474820264, + "learning_rate": 9.672405410396536e-06, + "loss": 0.0, + "step": 1220 + }, + { + "epoch": 0.07868789070052201, + "grad_norm": 0.05448380494674192, + "learning_rate": 9.673520563468786e-06, + "loss": 0.0002, + "step": 1221 + }, + { + "epoch": 0.07875233614745118, + "grad_norm": 0.00041079875761307306, + "learning_rate": 9.674634803603501e-06, + "loss": 0.0, + "step": 1222 + }, + { + "epoch": 0.07881678159438035, + "grad_norm": 1.6507721048111714, + "learning_rate": 9.675748132294243e-06, + "loss": 0.0026, + "step": 1223 + }, + { + "epoch": 0.07888122704130954, + "grad_norm": 0.0002573480081688759, + "learning_rate": 9.676860551030908e-06, + "loss": 0.0, + "step": 1224 + }, + { + "epoch": 0.07894567248823871, + "grad_norm": 0.05452053945885145, + "learning_rate": 9.677972061299738e-06, + "loss": 0.0001, + "step": 1225 + }, + { + "epoch": 0.07901011793516788, + "grad_norm": 0.0009042614824524367, + "learning_rate": 9.679082664583347e-06, + "loss": 0.0, + "step": 1226 + }, + { + "epoch": 0.07907456338209705, + "grad_norm": 0.12650184384658905, + "learning_rate": 9.68019236236071e-06, + "loss": 0.0005, + "step": 1227 + }, + { + "epoch": 0.07913900882902623, + "grad_norm": 0.0003593917699330583, + "learning_rate": 9.6813011561072e-06, + "loss": 0.0, + "step": 1228 + }, + { + "epoch": 0.0792034542759554, + "grad_norm": 0.00784809323102259, + "learning_rate": 9.682409047294576e-06, + "loss": 0.0, + "step": 1229 + }, + { + "epoch": 0.07926789972288457, + "grad_norm": 0.0034080110459679502, + "learning_rate": 9.683516037391016e-06, + "loss": 0.0, + "step": 1230 + }, + { + "epoch": 0.07933234516981376, + "grad_norm": 0.0008000263937962497, + "learning_rate": 9.684622127861108e-06, + "loss": 0.0, + "step": 1231 + }, + { + "epoch": 0.07939679061674293, + "grad_norm": 0.4572995042527525, + "learning_rate": 9.685727320165879e-06, + "loss": 0.0051, + "step": 1232 + }, + { + "epoch": 0.0794612360636721, + "grad_norm": 0.14509120287519361, + "learning_rate": 9.6868316157628e-06, + "loss": 0.0004, + "step": 1233 + }, + { + "epoch": 0.07952568151060127, + "grad_norm": 0.0019696790708333524, + "learning_rate": 9.687935016105796e-06, + "loss": 0.0, + "step": 1234 + }, + { + "epoch": 0.07959012695753045, + "grad_norm": 0.2617435443577594, + "learning_rate": 9.689037522645257e-06, + "loss": 0.0007, + "step": 1235 + }, + { + "epoch": 0.07965457240445963, + "grad_norm": 0.1585587871704741, + "learning_rate": 9.690139136828051e-06, + "loss": 0.0002, + "step": 1236 + }, + { + "epoch": 0.0797190178513888, + "grad_norm": 0.0007139002751616745, + "learning_rate": 9.69123986009754e-06, + "loss": 0.0, + "step": 1237 + }, + { + "epoch": 0.07978346329831798, + "grad_norm": 0.0005685128388628978, + "learning_rate": 9.692339693893584e-06, + "loss": 0.0, + "step": 1238 + }, + { + "epoch": 0.07984790874524715, + "grad_norm": 0.00020046062008239465, + "learning_rate": 9.693438639652553e-06, + "loss": 0.0, + "step": 1239 + }, + { + "epoch": 0.07991235419217632, + "grad_norm": 0.018666319797578546, + "learning_rate": 9.694536698807345e-06, + "loss": 0.0002, + "step": 1240 + }, + { + "epoch": 0.07997679963910549, + "grad_norm": 0.002108454970512216, + "learning_rate": 9.69563387278739e-06, + "loss": 0.0, + "step": 1241 + }, + { + "epoch": 0.08004124508603468, + "grad_norm": 0.005928479550208142, + "learning_rate": 9.696730163018664e-06, + "loss": 0.0001, + "step": 1242 + }, + { + "epoch": 0.08010569053296385, + "grad_norm": 0.007256955744477934, + "learning_rate": 9.697825570923702e-06, + "loss": 0.0, + "step": 1243 + }, + { + "epoch": 0.08017013597989302, + "grad_norm": 0.012520196155166474, + "learning_rate": 9.698920097921602e-06, + "loss": 0.0, + "step": 1244 + }, + { + "epoch": 0.0802345814268222, + "grad_norm": 0.0049347868056927174, + "learning_rate": 9.700013745428048e-06, + "loss": 0.0, + "step": 1245 + }, + { + "epoch": 0.08029902687375137, + "grad_norm": 0.14433231710557265, + "learning_rate": 9.701106514855312e-06, + "loss": 0.0002, + "step": 1246 + }, + { + "epoch": 0.08036347232068054, + "grad_norm": 0.16086143413594908, + "learning_rate": 9.70219840761226e-06, + "loss": 0.0004, + "step": 1247 + }, + { + "epoch": 0.08042791776760971, + "grad_norm": 0.022894449012242864, + "learning_rate": 9.703289425104382e-06, + "loss": 0.0001, + "step": 1248 + }, + { + "epoch": 0.0804923632145389, + "grad_norm": 0.0004916579296406989, + "learning_rate": 9.70437956873378e-06, + "loss": 0.0, + "step": 1249 + }, + { + "epoch": 0.08055680866146807, + "grad_norm": 0.006341910971692203, + "learning_rate": 9.705468839899196e-06, + "loss": 0.0, + "step": 1250 + }, + { + "epoch": 0.08062125410839724, + "grad_norm": 0.000566196866322031, + "learning_rate": 9.706557239996017e-06, + "loss": 0.0, + "step": 1251 + }, + { + "epoch": 0.08068569955532641, + "grad_norm": 4.660119618530064, + "learning_rate": 9.707644770416276e-06, + "loss": 0.0008, + "step": 1252 + }, + { + "epoch": 0.0807501450022556, + "grad_norm": 0.0011596501754506218, + "learning_rate": 9.708731432548683e-06, + "loss": 0.0, + "step": 1253 + }, + { + "epoch": 0.08081459044918476, + "grad_norm": 0.0002413284823318843, + "learning_rate": 9.70981722777862e-06, + "loss": 0.0, + "step": 1254 + }, + { + "epoch": 0.08087903589611393, + "grad_norm": 0.00018585046524750463, + "learning_rate": 9.71090215748815e-06, + "loss": 0.0, + "step": 1255 + }, + { + "epoch": 0.08094348134304312, + "grad_norm": 0.004867257547187822, + "learning_rate": 9.71198622305605e-06, + "loss": 0.0, + "step": 1256 + }, + { + "epoch": 0.08100792678997229, + "grad_norm": 0.006004401052693547, + "learning_rate": 9.713069425857785e-06, + "loss": 0.0, + "step": 1257 + }, + { + "epoch": 0.08107237223690146, + "grad_norm": 0.0062596726703548775, + "learning_rate": 9.714151767265551e-06, + "loss": 0.0, + "step": 1258 + }, + { + "epoch": 0.08113681768383063, + "grad_norm": 0.0001596291398008103, + "learning_rate": 9.71523324864827e-06, + "loss": 0.0, + "step": 1259 + }, + { + "epoch": 0.08120126313075982, + "grad_norm": 0.003137489508600633, + "learning_rate": 9.716313871371605e-06, + "loss": 0.0, + "step": 1260 + }, + { + "epoch": 0.08126570857768899, + "grad_norm": 0.03389845337090403, + "learning_rate": 9.717393636797964e-06, + "loss": 0.0002, + "step": 1261 + }, + { + "epoch": 0.08133015402461816, + "grad_norm": 0.0015749193613697652, + "learning_rate": 9.718472546286517e-06, + "loss": 0.0, + "step": 1262 + }, + { + "epoch": 0.08139459947154734, + "grad_norm": 0.0016629746994158803, + "learning_rate": 9.71955060119321e-06, + "loss": 0.0, + "step": 1263 + }, + { + "epoch": 0.08145904491847651, + "grad_norm": 0.9630747342468597, + "learning_rate": 9.720627802870762e-06, + "loss": 0.0016, + "step": 1264 + }, + { + "epoch": 0.08152349036540568, + "grad_norm": 0.005799075443889431, + "learning_rate": 9.721704152668686e-06, + "loss": 0.0, + "step": 1265 + }, + { + "epoch": 0.08158793581233485, + "grad_norm": 0.9226179200559602, + "learning_rate": 9.722779651933294e-06, + "loss": 0.0038, + "step": 1266 + }, + { + "epoch": 0.08165238125926404, + "grad_norm": 0.009570234618307968, + "learning_rate": 9.723854302007711e-06, + "loss": 0.0, + "step": 1267 + }, + { + "epoch": 0.08171682670619321, + "grad_norm": 0.007609505717263756, + "learning_rate": 9.724928104231886e-06, + "loss": 0.0, + "step": 1268 + }, + { + "epoch": 0.08178127215312238, + "grad_norm": 0.0037044836505944105, + "learning_rate": 9.72600105994259e-06, + "loss": 0.0, + "step": 1269 + }, + { + "epoch": 0.08184571760005156, + "grad_norm": 0.3324494028852565, + "learning_rate": 9.727073170473441e-06, + "loss": 0.0003, + "step": 1270 + }, + { + "epoch": 0.08191016304698073, + "grad_norm": 0.22125266870947377, + "learning_rate": 9.72814443715491e-06, + "loss": 0.0015, + "step": 1271 + }, + { + "epoch": 0.0819746084939099, + "grad_norm": 0.8368448698447373, + "learning_rate": 9.729214861314324e-06, + "loss": 0.0008, + "step": 1272 + }, + { + "epoch": 0.08203905394083907, + "grad_norm": 0.03696941188445865, + "learning_rate": 9.730284444275884e-06, + "loss": 0.0, + "step": 1273 + }, + { + "epoch": 0.08210349938776826, + "grad_norm": 0.09513810317509, + "learning_rate": 9.73135318736067e-06, + "loss": 0.0001, + "step": 1274 + }, + { + "epoch": 0.08216794483469743, + "grad_norm": 0.1407959614094262, + "learning_rate": 9.732421091886653e-06, + "loss": 0.0001, + "step": 1275 + }, + { + "epoch": 0.0822323902816266, + "grad_norm": 0.11572725150082791, + "learning_rate": 9.733488159168705e-06, + "loss": 0.0002, + "step": 1276 + }, + { + "epoch": 0.08229683572855578, + "grad_norm": 0.20268866551076187, + "learning_rate": 9.734554390518605e-06, + "loss": 0.0006, + "step": 1277 + }, + { + "epoch": 0.08236128117548495, + "grad_norm": 0.034702215271124964, + "learning_rate": 9.735619787245053e-06, + "loss": 0.0, + "step": 1278 + }, + { + "epoch": 0.08242572662241412, + "grad_norm": 0.618313305814074, + "learning_rate": 9.73668435065368e-06, + "loss": 0.0058, + "step": 1279 + }, + { + "epoch": 0.0824901720693433, + "grad_norm": 0.12879200409898806, + "learning_rate": 9.737748082047053e-06, + "loss": 0.002, + "step": 1280 + }, + { + "epoch": 0.08255461751627248, + "grad_norm": 0.15517708649995626, + "learning_rate": 9.738810982724688e-06, + "loss": 0.0014, + "step": 1281 + }, + { + "epoch": 0.08261906296320165, + "grad_norm": 0.022942057466187164, + "learning_rate": 9.739873053983062e-06, + "loss": 0.0, + "step": 1282 + }, + { + "epoch": 0.08268350841013082, + "grad_norm": 0.32524165834202085, + "learning_rate": 9.740934297115614e-06, + "loss": 0.0001, + "step": 1283 + }, + { + "epoch": 0.08274795385706, + "grad_norm": 0.017645141056179724, + "learning_rate": 9.74199471341276e-06, + "loss": 0.0001, + "step": 1284 + }, + { + "epoch": 0.08281239930398918, + "grad_norm": 0.001701689789313395, + "learning_rate": 9.743054304161912e-06, + "loss": 0.0, + "step": 1285 + }, + { + "epoch": 0.08287684475091835, + "grad_norm": 0.11248027509383528, + "learning_rate": 9.744113070647462e-06, + "loss": 0.002, + "step": 1286 + }, + { + "epoch": 0.08294129019784752, + "grad_norm": 0.009134959493058783, + "learning_rate": 9.745171014150819e-06, + "loss": 0.0, + "step": 1287 + }, + { + "epoch": 0.0830057356447767, + "grad_norm": 0.001851569614579538, + "learning_rate": 9.746228135950402e-06, + "loss": 0.0, + "step": 1288 + }, + { + "epoch": 0.08307018109170587, + "grad_norm": 0.004553438642060542, + "learning_rate": 9.747284437321652e-06, + "loss": 0.0, + "step": 1289 + }, + { + "epoch": 0.08313462653863504, + "grad_norm": 0.010993416473078717, + "learning_rate": 9.748339919537046e-06, + "loss": 0.0001, + "step": 1290 + }, + { + "epoch": 0.08319907198556421, + "grad_norm": 0.046090326040599205, + "learning_rate": 9.749394583866101e-06, + "loss": 0.0002, + "step": 1291 + }, + { + "epoch": 0.0832635174324934, + "grad_norm": 0.016645441025789916, + "learning_rate": 9.750448431575384e-06, + "loss": 0.0002, + "step": 1292 + }, + { + "epoch": 0.08332796287942257, + "grad_norm": 0.07861484502338792, + "learning_rate": 9.751501463928525e-06, + "loss": 0.0002, + "step": 1293 + }, + { + "epoch": 0.08339240832635174, + "grad_norm": 0.14598362209163068, + "learning_rate": 9.75255368218622e-06, + "loss": 0.0012, + "step": 1294 + }, + { + "epoch": 0.08345685377328092, + "grad_norm": 0.005253576712949334, + "learning_rate": 9.753605087606247e-06, + "loss": 0.0, + "step": 1295 + }, + { + "epoch": 0.0835212992202101, + "grad_norm": 0.06470684124601767, + "learning_rate": 9.75465568144347e-06, + "loss": 0.0001, + "step": 1296 + }, + { + "epoch": 0.08358574466713926, + "grad_norm": 0.0001312365358307035, + "learning_rate": 9.755705464949845e-06, + "loss": 0.0, + "step": 1297 + }, + { + "epoch": 0.08365019011406843, + "grad_norm": 0.009228063738194224, + "learning_rate": 9.756754439374438e-06, + "loss": 0.0, + "step": 1298 + }, + { + "epoch": 0.08371463556099762, + "grad_norm": 0.022493899301872504, + "learning_rate": 9.757802605963434e-06, + "loss": 0.0001, + "step": 1299 + }, + { + "epoch": 0.08377908100792679, + "grad_norm": 0.025017351223788283, + "learning_rate": 9.758849965960128e-06, + "loss": 0.0, + "step": 1300 + }, + { + "epoch": 0.08384352645485596, + "grad_norm": 0.0033973402368583138, + "learning_rate": 9.759896520604957e-06, + "loss": 0.0, + "step": 1301 + }, + { + "epoch": 0.08390797190178514, + "grad_norm": 0.016653590335366984, + "learning_rate": 9.760942271135499e-06, + "loss": 0.0001, + "step": 1302 + }, + { + "epoch": 0.08397241734871431, + "grad_norm": 0.05323761764710082, + "learning_rate": 9.761987218786474e-06, + "loss": 0.0001, + "step": 1303 + }, + { + "epoch": 0.08403686279564349, + "grad_norm": 0.00047998523145980336, + "learning_rate": 9.763031364789768e-06, + "loss": 0.0, + "step": 1304 + }, + { + "epoch": 0.08410130824257266, + "grad_norm": 0.0012154446343361834, + "learning_rate": 9.764074710374431e-06, + "loss": 0.0, + "step": 1305 + }, + { + "epoch": 0.08416575368950184, + "grad_norm": 0.1703871353470703, + "learning_rate": 9.765117256766685e-06, + "loss": 0.0003, + "step": 1306 + }, + { + "epoch": 0.08423019913643101, + "grad_norm": 0.3756810798293176, + "learning_rate": 9.766159005189943e-06, + "loss": 0.0032, + "step": 1307 + }, + { + "epoch": 0.08429464458336018, + "grad_norm": 0.0012046472375180962, + "learning_rate": 9.767199956864808e-06, + "loss": 0.0, + "step": 1308 + }, + { + "epoch": 0.08435909003028937, + "grad_norm": 0.0008695978340420592, + "learning_rate": 9.768240113009083e-06, + "loss": 0.0, + "step": 1309 + }, + { + "epoch": 0.08442353547721854, + "grad_norm": 0.035405811355180825, + "learning_rate": 9.769279474837782e-06, + "loss": 0.0003, + "step": 1310 + }, + { + "epoch": 0.08448798092414771, + "grad_norm": 0.05460181932338454, + "learning_rate": 9.770318043563141e-06, + "loss": 0.0003, + "step": 1311 + }, + { + "epoch": 0.08455242637107688, + "grad_norm": 0.0020692583491097017, + "learning_rate": 9.771355820394616e-06, + "loss": 0.0, + "step": 1312 + }, + { + "epoch": 0.08461687181800606, + "grad_norm": 0.0029885875308376477, + "learning_rate": 9.772392806538907e-06, + "loss": 0.0, + "step": 1313 + }, + { + "epoch": 0.08468131726493523, + "grad_norm": 0.0010176173067989608, + "learning_rate": 9.773429003199951e-06, + "loss": 0.0, + "step": 1314 + }, + { + "epoch": 0.0847457627118644, + "grad_norm": 0.00025002265076070745, + "learning_rate": 9.77446441157894e-06, + "loss": 0.0, + "step": 1315 + }, + { + "epoch": 0.08481020815879359, + "grad_norm": 0.011615362301719032, + "learning_rate": 9.775499032874327e-06, + "loss": 0.0, + "step": 1316 + }, + { + "epoch": 0.08487465360572276, + "grad_norm": 0.8283102080329923, + "learning_rate": 9.776532868281832e-06, + "loss": 0.0065, + "step": 1317 + }, + { + "epoch": 0.08493909905265193, + "grad_norm": 0.009607753739860307, + "learning_rate": 9.777565918994456e-06, + "loss": 0.0, + "step": 1318 + }, + { + "epoch": 0.0850035444995811, + "grad_norm": 0.0008744673144066277, + "learning_rate": 9.778598186202483e-06, + "loss": 0.0, + "step": 1319 + }, + { + "epoch": 0.08506798994651028, + "grad_norm": 0.35578931250669904, + "learning_rate": 9.77962967109349e-06, + "loss": 0.0015, + "step": 1320 + }, + { + "epoch": 0.08513243539343945, + "grad_norm": 0.0002869815156587147, + "learning_rate": 9.780660374852359e-06, + "loss": 0.0, + "step": 1321 + }, + { + "epoch": 0.08519688084036862, + "grad_norm": 9.750659056478101e-05, + "learning_rate": 9.781690298661277e-06, + "loss": 0.0, + "step": 1322 + }, + { + "epoch": 0.08526132628729781, + "grad_norm": 0.0008800151837540015, + "learning_rate": 9.782719443699757e-06, + "loss": 0.0, + "step": 1323 + }, + { + "epoch": 0.08532577173422698, + "grad_norm": 0.007283107294294607, + "learning_rate": 9.783747811144632e-06, + "loss": 0.0, + "step": 1324 + }, + { + "epoch": 0.08539021718115615, + "grad_norm": 0.00046669696933537125, + "learning_rate": 9.784775402170071e-06, + "loss": 0.0, + "step": 1325 + }, + { + "epoch": 0.08545466262808532, + "grad_norm": 0.011137442948644324, + "learning_rate": 9.785802217947585e-06, + "loss": 0.0, + "step": 1326 + }, + { + "epoch": 0.0855191080750145, + "grad_norm": 0.00046858581858597265, + "learning_rate": 9.786828259646038e-06, + "loss": 0.0, + "step": 1327 + }, + { + "epoch": 0.08558355352194368, + "grad_norm": 0.0011227359990139579, + "learning_rate": 9.787853528431651e-06, + "loss": 0.0, + "step": 1328 + }, + { + "epoch": 0.08564799896887285, + "grad_norm": 0.005764189022396666, + "learning_rate": 9.788878025468011e-06, + "loss": 0.0, + "step": 1329 + }, + { + "epoch": 0.08571244441580203, + "grad_norm": 0.00036195693870573616, + "learning_rate": 9.78990175191608e-06, + "loss": 0.0, + "step": 1330 + }, + { + "epoch": 0.0857768898627312, + "grad_norm": 0.0004031809736862309, + "learning_rate": 9.790924708934204e-06, + "loss": 0.0, + "step": 1331 + }, + { + "epoch": 0.08584133530966037, + "grad_norm": 0.12820426942731789, + "learning_rate": 9.791946897678112e-06, + "loss": 0.0015, + "step": 1332 + }, + { + "epoch": 0.08590578075658954, + "grad_norm": 0.007201268710137027, + "learning_rate": 9.792968319300942e-06, + "loss": 0.0, + "step": 1333 + }, + { + "epoch": 0.08597022620351873, + "grad_norm": 0.29705053711328777, + "learning_rate": 9.793988974953227e-06, + "loss": 0.0012, + "step": 1334 + }, + { + "epoch": 0.0860346716504479, + "grad_norm": 0.048011958608229134, + "learning_rate": 9.795008865782922e-06, + "loss": 0.0003, + "step": 1335 + }, + { + "epoch": 0.08609911709737707, + "grad_norm": 0.000588550762065899, + "learning_rate": 9.796027992935399e-06, + "loss": 0.0, + "step": 1336 + }, + { + "epoch": 0.08616356254430624, + "grad_norm": 0.00403323898251864, + "learning_rate": 9.79704635755346e-06, + "loss": 0.0, + "step": 1337 + }, + { + "epoch": 0.08622800799123542, + "grad_norm": 0.0770604017607048, + "learning_rate": 9.798063960777342e-06, + "loss": 0.0003, + "step": 1338 + }, + { + "epoch": 0.08629245343816459, + "grad_norm": 0.002188687958222415, + "learning_rate": 9.799080803744728e-06, + "loss": 0.0, + "step": 1339 + }, + { + "epoch": 0.08635689888509376, + "grad_norm": 0.0005241857617764945, + "learning_rate": 9.800096887590755e-06, + "loss": 0.0, + "step": 1340 + }, + { + "epoch": 0.08642134433202295, + "grad_norm": 0.0007191739447359967, + "learning_rate": 9.801112213448018e-06, + "loss": 0.0, + "step": 1341 + }, + { + "epoch": 0.08648578977895212, + "grad_norm": 0.0020778973471756966, + "learning_rate": 9.802126782446577e-06, + "loss": 0.0, + "step": 1342 + }, + { + "epoch": 0.08655023522588129, + "grad_norm": 0.023544600776087193, + "learning_rate": 9.803140595713964e-06, + "loss": 0.0001, + "step": 1343 + }, + { + "epoch": 0.08661468067281046, + "grad_norm": 0.005317380634932252, + "learning_rate": 9.804153654375207e-06, + "loss": 0.0, + "step": 1344 + }, + { + "epoch": 0.08667912611973964, + "grad_norm": 0.001298663308470156, + "learning_rate": 9.805165959552807e-06, + "loss": 0.0, + "step": 1345 + }, + { + "epoch": 0.08674357156666881, + "grad_norm": 0.001955103407708176, + "learning_rate": 9.806177512366772e-06, + "loss": 0.0, + "step": 1346 + }, + { + "epoch": 0.08680801701359799, + "grad_norm": 0.012739390414210567, + "learning_rate": 9.807188313934614e-06, + "loss": 0.0, + "step": 1347 + }, + { + "epoch": 0.08687246246052717, + "grad_norm": 0.0017430904771763144, + "learning_rate": 9.808198365371351e-06, + "loss": 0.0, + "step": 1348 + }, + { + "epoch": 0.08693690790745634, + "grad_norm": 0.007015913286311483, + "learning_rate": 9.80920766778953e-06, + "loss": 0.0, + "step": 1349 + }, + { + "epoch": 0.08700135335438551, + "grad_norm": 0.0018873731549218875, + "learning_rate": 9.810216222299216e-06, + "loss": 0.0, + "step": 1350 + }, + { + "epoch": 0.08706579880131468, + "grad_norm": 0.0003947696754434573, + "learning_rate": 9.81122403000801e-06, + "loss": 0.0, + "step": 1351 + }, + { + "epoch": 0.08713024424824387, + "grad_norm": 0.038484140741465445, + "learning_rate": 9.81223109202106e-06, + "loss": 0.0001, + "step": 1352 + }, + { + "epoch": 0.08719468969517304, + "grad_norm": 0.11864357590167482, + "learning_rate": 9.813237409441055e-06, + "loss": 0.0003, + "step": 1353 + }, + { + "epoch": 0.0872591351421022, + "grad_norm": 0.5198617615258171, + "learning_rate": 9.814242983368245e-06, + "loss": 0.0043, + "step": 1354 + }, + { + "epoch": 0.08732358058903139, + "grad_norm": 0.06844873037905837, + "learning_rate": 9.815247814900443e-06, + "loss": 0.0001, + "step": 1355 + }, + { + "epoch": 0.08738802603596056, + "grad_norm": 0.00017070857672494202, + "learning_rate": 9.816251905133028e-06, + "loss": 0.0, + "step": 1356 + }, + { + "epoch": 0.08745247148288973, + "grad_norm": 0.014964520116337086, + "learning_rate": 9.817255255158962e-06, + "loss": 0.0001, + "step": 1357 + }, + { + "epoch": 0.0875169169298189, + "grad_norm": 0.003770806539701692, + "learning_rate": 9.818257866068787e-06, + "loss": 0.0, + "step": 1358 + }, + { + "epoch": 0.08758136237674809, + "grad_norm": 0.09473215165314121, + "learning_rate": 9.81925973895064e-06, + "loss": 0.0005, + "step": 1359 + }, + { + "epoch": 0.08764580782367726, + "grad_norm": 0.005408603276892646, + "learning_rate": 9.820260874890255e-06, + "loss": 0.0, + "step": 1360 + }, + { + "epoch": 0.08771025327060643, + "grad_norm": 0.0015547945886781377, + "learning_rate": 9.821261274970973e-06, + "loss": 0.0, + "step": 1361 + }, + { + "epoch": 0.08777469871753561, + "grad_norm": 0.0015138486387163961, + "learning_rate": 9.822260940273748e-06, + "loss": 0.0, + "step": 1362 + }, + { + "epoch": 0.08783914416446478, + "grad_norm": 0.00069763105509275, + "learning_rate": 9.823259871877153e-06, + "loss": 0.0015, + "step": 1363 + }, + { + "epoch": 0.08790358961139395, + "grad_norm": 0.0025643315514372773, + "learning_rate": 9.824258070857386e-06, + "loss": 0.0, + "step": 1364 + }, + { + "epoch": 0.08796803505832312, + "grad_norm": 0.009340093648465875, + "learning_rate": 9.825255538288282e-06, + "loss": 0.0, + "step": 1365 + }, + { + "epoch": 0.08803248050525231, + "grad_norm": 0.015881523472943818, + "learning_rate": 9.826252275241316e-06, + "loss": 0.0001, + "step": 1366 + }, + { + "epoch": 0.08809692595218148, + "grad_norm": 0.017519239632679023, + "learning_rate": 9.827248282785611e-06, + "loss": 0.0, + "step": 1367 + }, + { + "epoch": 0.08816137139911065, + "grad_norm": 0.0016914266291122215, + "learning_rate": 9.828243561987945e-06, + "loss": 0.0, + "step": 1368 + }, + { + "epoch": 0.08822581684603983, + "grad_norm": 0.05371591804994142, + "learning_rate": 9.829238113912756e-06, + "loss": 0.0002, + "step": 1369 + }, + { + "epoch": 0.088290262292969, + "grad_norm": 0.002753948558404598, + "learning_rate": 9.83023193962215e-06, + "loss": 0.0, + "step": 1370 + }, + { + "epoch": 0.08835470773989818, + "grad_norm": 0.002328304292487111, + "learning_rate": 9.831225040175908e-06, + "loss": 0.0, + "step": 1371 + }, + { + "epoch": 0.08841915318682735, + "grad_norm": 0.001300685910040194, + "learning_rate": 9.832217416631494e-06, + "loss": 0.0, + "step": 1372 + }, + { + "epoch": 0.08848359863375653, + "grad_norm": 0.02016763377567185, + "learning_rate": 9.833209070044064e-06, + "loss": 0.0, + "step": 1373 + }, + { + "epoch": 0.0885480440806857, + "grad_norm": 0.0006239190779079104, + "learning_rate": 9.834200001466461e-06, + "loss": 0.0, + "step": 1374 + }, + { + "epoch": 0.08861248952761487, + "grad_norm": 0.17247998444561874, + "learning_rate": 9.835190211949236e-06, + "loss": 0.0006, + "step": 1375 + }, + { + "epoch": 0.08867693497454404, + "grad_norm": 0.02695488949887911, + "learning_rate": 9.83617970254065e-06, + "loss": 0.0, + "step": 1376 + }, + { + "epoch": 0.08874138042147323, + "grad_norm": 0.004205663295906084, + "learning_rate": 9.837168474286672e-06, + "loss": 0.0, + "step": 1377 + }, + { + "epoch": 0.0888058258684024, + "grad_norm": 0.23413007177194578, + "learning_rate": 9.838156528231003e-06, + "loss": 0.002, + "step": 1378 + }, + { + "epoch": 0.08887027131533157, + "grad_norm": 0.010107797521254137, + "learning_rate": 9.839143865415061e-06, + "loss": 0.0, + "step": 1379 + }, + { + "epoch": 0.08893471676226075, + "grad_norm": 0.22197280527991795, + "learning_rate": 9.840130486878012e-06, + "loss": 0.0034, + "step": 1380 + }, + { + "epoch": 0.08899916220918992, + "grad_norm": 0.011300355431752012, + "learning_rate": 9.841116393656755e-06, + "loss": 0.0, + "step": 1381 + }, + { + "epoch": 0.08906360765611909, + "grad_norm": 0.0006419008143487855, + "learning_rate": 9.84210158678594e-06, + "loss": 0.0, + "step": 1382 + }, + { + "epoch": 0.08912805310304826, + "grad_norm": 0.06378269833658995, + "learning_rate": 9.843086067297971e-06, + "loss": 0.0001, + "step": 1383 + }, + { + "epoch": 0.08919249854997745, + "grad_norm": 0.01747026315388834, + "learning_rate": 9.844069836223013e-06, + "loss": 0.0, + "step": 1384 + }, + { + "epoch": 0.08925694399690662, + "grad_norm": 0.03605956953767996, + "learning_rate": 9.845052894589003e-06, + "loss": 0.0001, + "step": 1385 + }, + { + "epoch": 0.08932138944383579, + "grad_norm": 0.008695684207934068, + "learning_rate": 9.846035243421644e-06, + "loss": 0.0, + "step": 1386 + }, + { + "epoch": 0.08938583489076497, + "grad_norm": 1.3870611063691909, + "learning_rate": 9.847016883744427e-06, + "loss": 0.009, + "step": 1387 + }, + { + "epoch": 0.08945028033769414, + "grad_norm": 0.0004034604019097972, + "learning_rate": 9.847997816578631e-06, + "loss": 0.0, + "step": 1388 + }, + { + "epoch": 0.08951472578462331, + "grad_norm": 0.24464011404589933, + "learning_rate": 9.848978042943318e-06, + "loss": 0.0054, + "step": 1389 + }, + { + "epoch": 0.08957917123155248, + "grad_norm": 0.006027357184420001, + "learning_rate": 9.849957563855364e-06, + "loss": 0.0, + "step": 1390 + }, + { + "epoch": 0.08964361667848167, + "grad_norm": 0.0032428463574663727, + "learning_rate": 9.85093638032944e-06, + "loss": 0.0, + "step": 1391 + }, + { + "epoch": 0.08970806212541084, + "grad_norm": 0.18261966601643068, + "learning_rate": 9.851914493378031e-06, + "loss": 0.0002, + "step": 1392 + }, + { + "epoch": 0.08977250757234001, + "grad_norm": 0.019904185252961712, + "learning_rate": 9.85289190401145e-06, + "loss": 0.0, + "step": 1393 + }, + { + "epoch": 0.0898369530192692, + "grad_norm": 0.006466688261427656, + "learning_rate": 9.853868613237822e-06, + "loss": 0.0, + "step": 1394 + }, + { + "epoch": 0.08990139846619837, + "grad_norm": 0.2036738682902856, + "learning_rate": 9.854844622063109e-06, + "loss": 0.0005, + "step": 1395 + }, + { + "epoch": 0.08996584391312754, + "grad_norm": 0.12498444221953549, + "learning_rate": 9.855819931491116e-06, + "loss": 0.0005, + "step": 1396 + }, + { + "epoch": 0.0900302893600567, + "grad_norm": 0.25550481384225326, + "learning_rate": 9.85679454252348e-06, + "loss": 0.0002, + "step": 1397 + }, + { + "epoch": 0.09009473480698589, + "grad_norm": 0.005078154637463445, + "learning_rate": 9.857768456159699e-06, + "loss": 0.0, + "step": 1398 + }, + { + "epoch": 0.09015918025391506, + "grad_norm": 0.006486574987916103, + "learning_rate": 9.85874167339712e-06, + "loss": 0.0, + "step": 1399 + }, + { + "epoch": 0.09022362570084423, + "grad_norm": 0.1849051672410749, + "learning_rate": 9.859714195230952e-06, + "loss": 0.0003, + "step": 1400 + }, + { + "epoch": 0.09028807114777342, + "grad_norm": 1.157685394105611, + "learning_rate": 9.860686022654276e-06, + "loss": 0.0045, + "step": 1401 + }, + { + "epoch": 0.09035251659470259, + "grad_norm": 0.05039861841158964, + "learning_rate": 9.861657156658047e-06, + "loss": 0.0002, + "step": 1402 + }, + { + "epoch": 0.09041696204163176, + "grad_norm": 0.003640545123375758, + "learning_rate": 9.862627598231097e-06, + "loss": 0.0, + "step": 1403 + }, + { + "epoch": 0.09048140748856093, + "grad_norm": 0.18785870532732124, + "learning_rate": 9.863597348360145e-06, + "loss": 0.0005, + "step": 1404 + }, + { + "epoch": 0.09054585293549011, + "grad_norm": 0.02162690258325965, + "learning_rate": 9.864566408029807e-06, + "loss": 0.0, + "step": 1405 + }, + { + "epoch": 0.09061029838241928, + "grad_norm": 0.3182465991267757, + "learning_rate": 9.86553477822259e-06, + "loss": 0.0013, + "step": 1406 + }, + { + "epoch": 0.09067474382934845, + "grad_norm": 0.00506957712645276, + "learning_rate": 9.866502459918909e-06, + "loss": 0.0, + "step": 1407 + }, + { + "epoch": 0.09073918927627764, + "grad_norm": 0.0008336997610315851, + "learning_rate": 9.867469454097092e-06, + "loss": 0.0, + "step": 1408 + }, + { + "epoch": 0.09080363472320681, + "grad_norm": 0.044061586416188465, + "learning_rate": 9.86843576173338e-06, + "loss": 0.0004, + "step": 1409 + }, + { + "epoch": 0.09086808017013598, + "grad_norm": 0.007409890122830501, + "learning_rate": 9.869401383801938e-06, + "loss": 0.0, + "step": 1410 + }, + { + "epoch": 0.09093252561706515, + "grad_norm": 0.018156252708728265, + "learning_rate": 9.870366321274855e-06, + "loss": 0.0, + "step": 1411 + }, + { + "epoch": 0.09099697106399433, + "grad_norm": 0.0006506659029600226, + "learning_rate": 9.871330575122158e-06, + "loss": 0.0, + "step": 1412 + }, + { + "epoch": 0.0910614165109235, + "grad_norm": 0.0008328508705257589, + "learning_rate": 9.872294146311813e-06, + "loss": 0.0, + "step": 1413 + }, + { + "epoch": 0.09112586195785267, + "grad_norm": 0.0007773650111156604, + "learning_rate": 9.87325703580973e-06, + "loss": 0.0, + "step": 1414 + }, + { + "epoch": 0.09119030740478186, + "grad_norm": 0.0012217894950347168, + "learning_rate": 9.874219244579777e-06, + "loss": 0.0, + "step": 1415 + }, + { + "epoch": 0.09125475285171103, + "grad_norm": 0.0480236903713464, + "learning_rate": 9.875180773583766e-06, + "loss": 0.0001, + "step": 1416 + }, + { + "epoch": 0.0913191982986402, + "grad_norm": 0.001227121457690193, + "learning_rate": 9.876141623781484e-06, + "loss": 0.0, + "step": 1417 + }, + { + "epoch": 0.09138364374556937, + "grad_norm": 0.004099157581286233, + "learning_rate": 9.877101796130682e-06, + "loss": 0.0, + "step": 1418 + }, + { + "epoch": 0.09144808919249856, + "grad_norm": 0.001988160165573771, + "learning_rate": 9.878061291587085e-06, + "loss": 0.0, + "step": 1419 + }, + { + "epoch": 0.09151253463942773, + "grad_norm": 0.00018577178633780048, + "learning_rate": 9.879020111104401e-06, + "loss": 0.0, + "step": 1420 + }, + { + "epoch": 0.0915769800863569, + "grad_norm": 0.22095755874515524, + "learning_rate": 9.87997825563432e-06, + "loss": 0.0003, + "step": 1421 + }, + { + "epoch": 0.09164142553328607, + "grad_norm": 2.3769321613123924, + "learning_rate": 9.880935726126525e-06, + "loss": 0.015, + "step": 1422 + }, + { + "epoch": 0.09170587098021525, + "grad_norm": 0.018600959082779225, + "learning_rate": 9.8818925235287e-06, + "loss": 0.0001, + "step": 1423 + }, + { + "epoch": 0.09177031642714442, + "grad_norm": 0.08022021561964109, + "learning_rate": 9.882848648786525e-06, + "loss": 0.0001, + "step": 1424 + }, + { + "epoch": 0.09183476187407359, + "grad_norm": 0.20565971377159456, + "learning_rate": 9.883804102843692e-06, + "loss": 0.0002, + "step": 1425 + }, + { + "epoch": 0.09189920732100278, + "grad_norm": 0.9405621745271036, + "learning_rate": 9.884758886641908e-06, + "loss": 0.0044, + "step": 1426 + }, + { + "epoch": 0.09196365276793195, + "grad_norm": 0.0004957289890303268, + "learning_rate": 9.885713001120897e-06, + "loss": 0.0, + "step": 1427 + }, + { + "epoch": 0.09202809821486112, + "grad_norm": 0.0017034345264256923, + "learning_rate": 9.886666447218409e-06, + "loss": 0.0, + "step": 1428 + }, + { + "epoch": 0.09209254366179029, + "grad_norm": 0.0339874361429872, + "learning_rate": 9.887619225870228e-06, + "loss": 0.0001, + "step": 1429 + }, + { + "epoch": 0.09215698910871947, + "grad_norm": 0.3589929540229609, + "learning_rate": 9.888571338010167e-06, + "loss": 0.0032, + "step": 1430 + }, + { + "epoch": 0.09222143455564864, + "grad_norm": 0.005050188803306223, + "learning_rate": 9.889522784570087e-06, + "loss": 0.0001, + "step": 1431 + }, + { + "epoch": 0.09228588000257781, + "grad_norm": 0.15730377931145592, + "learning_rate": 9.890473566479896e-06, + "loss": 0.0005, + "step": 1432 + }, + { + "epoch": 0.092350325449507, + "grad_norm": 0.0005229967634722769, + "learning_rate": 9.891423684667549e-06, + "loss": 0.0, + "step": 1433 + }, + { + "epoch": 0.09241477089643617, + "grad_norm": 0.005672397433659443, + "learning_rate": 9.892373140059061e-06, + "loss": 0.0001, + "step": 1434 + }, + { + "epoch": 0.09247921634336534, + "grad_norm": 0.0002519493904518232, + "learning_rate": 9.893321933578516e-06, + "loss": 0.0, + "step": 1435 + }, + { + "epoch": 0.09254366179029451, + "grad_norm": 0.027063703592058647, + "learning_rate": 9.894270066148061e-06, + "loss": 0.0001, + "step": 1436 + }, + { + "epoch": 0.0926081072372237, + "grad_norm": 0.026686603489626357, + "learning_rate": 9.895217538687919e-06, + "loss": 0.0003, + "step": 1437 + }, + { + "epoch": 0.09267255268415286, + "grad_norm": 0.000908574260126288, + "learning_rate": 9.89616435211639e-06, + "loss": 0.0, + "step": 1438 + }, + { + "epoch": 0.09273699813108204, + "grad_norm": 0.02027503235190547, + "learning_rate": 9.897110507349863e-06, + "loss": 0.0, + "step": 1439 + }, + { + "epoch": 0.09280144357801122, + "grad_norm": 0.019177680948524713, + "learning_rate": 9.898056005302818e-06, + "loss": 0.0001, + "step": 1440 + }, + { + "epoch": 0.09286588902494039, + "grad_norm": 0.010158676693733085, + "learning_rate": 9.899000846887821e-06, + "loss": 0.0, + "step": 1441 + }, + { + "epoch": 0.09293033447186956, + "grad_norm": 0.004541012965222062, + "learning_rate": 9.899945033015553e-06, + "loss": 0.0, + "step": 1442 + }, + { + "epoch": 0.09299477991879873, + "grad_norm": 0.002712991317341899, + "learning_rate": 9.900888564594789e-06, + "loss": 0.0, + "step": 1443 + }, + { + "epoch": 0.09305922536572792, + "grad_norm": 0.020957469761172257, + "learning_rate": 9.901831442532423e-06, + "loss": 0.0001, + "step": 1444 + }, + { + "epoch": 0.09312367081265709, + "grad_norm": 0.00033094373180639466, + "learning_rate": 9.902773667733459e-06, + "loss": 0.0, + "step": 1445 + }, + { + "epoch": 0.09318811625958626, + "grad_norm": 0.3864754190635272, + "learning_rate": 9.90371524110103e-06, + "loss": 0.0007, + "step": 1446 + }, + { + "epoch": 0.09325256170651544, + "grad_norm": 0.01617892127793432, + "learning_rate": 9.904656163536388e-06, + "loss": 0.0001, + "step": 1447 + }, + { + "epoch": 0.09331700715344461, + "grad_norm": 0.012048904136151128, + "learning_rate": 9.905596435938926e-06, + "loss": 0.0, + "step": 1448 + }, + { + "epoch": 0.09338145260037378, + "grad_norm": 0.0011809616144378202, + "learning_rate": 9.906536059206166e-06, + "loss": 0.0, + "step": 1449 + }, + { + "epoch": 0.09344589804730295, + "grad_norm": 0.0017681243992767163, + "learning_rate": 9.907475034233778e-06, + "loss": 0.0, + "step": 1450 + }, + { + "epoch": 0.09351034349423214, + "grad_norm": 0.0017163327437415454, + "learning_rate": 9.908413361915576e-06, + "loss": 0.0, + "step": 1451 + }, + { + "epoch": 0.09357478894116131, + "grad_norm": 0.01384325150685238, + "learning_rate": 9.90935104314353e-06, + "loss": 0.0, + "step": 1452 + }, + { + "epoch": 0.09363923438809048, + "grad_norm": 0.0867655132908843, + "learning_rate": 9.910288078807764e-06, + "loss": 0.0001, + "step": 1453 + }, + { + "epoch": 0.09370367983501966, + "grad_norm": 0.7641827369741804, + "learning_rate": 9.911224469796568e-06, + "loss": 0.0075, + "step": 1454 + }, + { + "epoch": 0.09376812528194883, + "grad_norm": 0.060422641060364934, + "learning_rate": 9.912160216996399e-06, + "loss": 0.0002, + "step": 1455 + }, + { + "epoch": 0.093832570728878, + "grad_norm": 2.010386913125508, + "learning_rate": 9.913095321291884e-06, + "loss": 0.0106, + "step": 1456 + }, + { + "epoch": 0.09389701617580717, + "grad_norm": 0.006303394784412231, + "learning_rate": 9.914029783565832e-06, + "loss": 0.0, + "step": 1457 + }, + { + "epoch": 0.09396146162273636, + "grad_norm": 0.009684100579074902, + "learning_rate": 9.914963604699233e-06, + "loss": 0.0, + "step": 1458 + }, + { + "epoch": 0.09402590706966553, + "grad_norm": 0.008250804826870545, + "learning_rate": 9.915896785571264e-06, + "loss": 0.0, + "step": 1459 + }, + { + "epoch": 0.0940903525165947, + "grad_norm": 0.0009466256485816172, + "learning_rate": 9.9168293270593e-06, + "loss": 0.0, + "step": 1460 + }, + { + "epoch": 0.09415479796352387, + "grad_norm": 0.048614013439532935, + "learning_rate": 9.917761230038905e-06, + "loss": 0.0017, + "step": 1461 + }, + { + "epoch": 0.09421924341045305, + "grad_norm": 0.01888382417348389, + "learning_rate": 9.918692495383852e-06, + "loss": 0.0, + "step": 1462 + }, + { + "epoch": 0.09428368885738223, + "grad_norm": 0.05641691448848606, + "learning_rate": 9.91962312396612e-06, + "loss": 0.0002, + "step": 1463 + }, + { + "epoch": 0.0943481343043114, + "grad_norm": 0.42407625379866715, + "learning_rate": 9.920553116655903e-06, + "loss": 0.0027, + "step": 1464 + }, + { + "epoch": 0.09441257975124058, + "grad_norm": 0.010436752034464663, + "learning_rate": 9.921482474321605e-06, + "loss": 0.0, + "step": 1465 + }, + { + "epoch": 0.09447702519816975, + "grad_norm": 0.8174559493256072, + "learning_rate": 9.922411197829863e-06, + "loss": 0.0033, + "step": 1466 + }, + { + "epoch": 0.09454147064509892, + "grad_norm": 0.0583152390449536, + "learning_rate": 9.923339288045531e-06, + "loss": 0.0, + "step": 1467 + }, + { + "epoch": 0.09460591609202809, + "grad_norm": 0.306076587996211, + "learning_rate": 9.924266745831703e-06, + "loss": 0.0071, + "step": 1468 + }, + { + "epoch": 0.09467036153895728, + "grad_norm": 0.23739539088773876, + "learning_rate": 9.925193572049705e-06, + "loss": 0.0005, + "step": 1469 + }, + { + "epoch": 0.09473480698588645, + "grad_norm": 0.250875088850879, + "learning_rate": 9.926119767559105e-06, + "loss": 0.0006, + "step": 1470 + }, + { + "epoch": 0.09479925243281562, + "grad_norm": 0.042556311330388334, + "learning_rate": 9.927045333217721e-06, + "loss": 0.0, + "step": 1471 + }, + { + "epoch": 0.0948636978797448, + "grad_norm": 0.03793214656428974, + "learning_rate": 9.927970269881614e-06, + "loss": 0.0, + "step": 1472 + }, + { + "epoch": 0.09492814332667397, + "grad_norm": 0.28249577284916433, + "learning_rate": 9.928894578405113e-06, + "loss": 0.0012, + "step": 1473 + }, + { + "epoch": 0.09499258877360314, + "grad_norm": 0.17475368245792194, + "learning_rate": 9.929818259640796e-06, + "loss": 0.0003, + "step": 1474 + }, + { + "epoch": 0.09505703422053231, + "grad_norm": 0.1445167057348758, + "learning_rate": 9.930741314439511e-06, + "loss": 0.0002, + "step": 1475 + }, + { + "epoch": 0.0951214796674615, + "grad_norm": 0.0236630083799695, + "learning_rate": 9.931663743650381e-06, + "loss": 0.0, + "step": 1476 + }, + { + "epoch": 0.09518592511439067, + "grad_norm": 0.029098778789947974, + "learning_rate": 9.932585548120796e-06, + "loss": 0.0001, + "step": 1477 + }, + { + "epoch": 0.09525037056131984, + "grad_norm": 0.009179967320519636, + "learning_rate": 9.933506728696428e-06, + "loss": 0.0001, + "step": 1478 + }, + { + "epoch": 0.09531481600824902, + "grad_norm": 0.001304011320141148, + "learning_rate": 9.934427286221235e-06, + "loss": 0.0, + "step": 1479 + }, + { + "epoch": 0.0953792614551782, + "grad_norm": 0.037167467586508234, + "learning_rate": 9.93534722153746e-06, + "loss": 0.0001, + "step": 1480 + }, + { + "epoch": 0.09544370690210736, + "grad_norm": 0.004253996176845752, + "learning_rate": 9.936266535485644e-06, + "loss": 0.0, + "step": 1481 + }, + { + "epoch": 0.09550815234903653, + "grad_norm": 0.012284348723612233, + "learning_rate": 9.937185228904622e-06, + "loss": 0.0, + "step": 1482 + }, + { + "epoch": 0.09557259779596572, + "grad_norm": 0.23995914820594377, + "learning_rate": 9.938103302631535e-06, + "loss": 0.0005, + "step": 1483 + }, + { + "epoch": 0.09563704324289489, + "grad_norm": 0.4843605408510047, + "learning_rate": 9.939020757501826e-06, + "loss": 0.0029, + "step": 1484 + }, + { + "epoch": 0.09570148868982406, + "grad_norm": 0.0006049933104365288, + "learning_rate": 9.939937594349256e-06, + "loss": 0.0, + "step": 1485 + }, + { + "epoch": 0.09576593413675324, + "grad_norm": 0.007013362420659339, + "learning_rate": 9.940853814005894e-06, + "loss": 0.0001, + "step": 1486 + }, + { + "epoch": 0.09583037958368242, + "grad_norm": 0.057614378941506235, + "learning_rate": 9.941769417302139e-06, + "loss": 0.0001, + "step": 1487 + }, + { + "epoch": 0.09589482503061159, + "grad_norm": 0.016872804892730364, + "learning_rate": 9.942684405066712e-06, + "loss": 0.0002, + "step": 1488 + }, + { + "epoch": 0.09595927047754076, + "grad_norm": 0.02935959737771146, + "learning_rate": 9.94359877812666e-06, + "loss": 0.0002, + "step": 1489 + }, + { + "epoch": 0.09602371592446994, + "grad_norm": 0.002290192727872697, + "learning_rate": 9.944512537307367e-06, + "loss": 0.0, + "step": 1490 + }, + { + "epoch": 0.09608816137139911, + "grad_norm": 0.022548370604501426, + "learning_rate": 9.945425683432555e-06, + "loss": 0.0001, + "step": 1491 + }, + { + "epoch": 0.09615260681832828, + "grad_norm": 0.007626670351049349, + "learning_rate": 9.946338217324292e-06, + "loss": 0.0, + "step": 1492 + }, + { + "epoch": 0.09621705226525747, + "grad_norm": 0.006175822573090903, + "learning_rate": 9.94725013980299e-06, + "loss": 0.0001, + "step": 1493 + }, + { + "epoch": 0.09628149771218664, + "grad_norm": 0.038742508647196895, + "learning_rate": 9.948161451687416e-06, + "loss": 0.0004, + "step": 1494 + }, + { + "epoch": 0.09634594315911581, + "grad_norm": 0.0005905164246859292, + "learning_rate": 9.949072153794689e-06, + "loss": 0.0, + "step": 1495 + }, + { + "epoch": 0.09641038860604498, + "grad_norm": 0.131122069226978, + "learning_rate": 9.949982246940294e-06, + "loss": 0.0005, + "step": 1496 + }, + { + "epoch": 0.09647483405297416, + "grad_norm": 0.0005022347779479968, + "learning_rate": 9.950891731938082e-06, + "loss": 0.0, + "step": 1497 + }, + { + "epoch": 0.09653927949990333, + "grad_norm": 0.0001437653529467265, + "learning_rate": 9.951800609600263e-06, + "loss": 0.0, + "step": 1498 + }, + { + "epoch": 0.0966037249468325, + "grad_norm": 0.0010122622831039668, + "learning_rate": 9.952708880737434e-06, + "loss": 0.0, + "step": 1499 + }, + { + "epoch": 0.09666817039376169, + "grad_norm": 0.33948919282434475, + "learning_rate": 9.953616546158563e-06, + "loss": 0.0024, + "step": 1500 + }, + { + "epoch": 0.09673261584069086, + "grad_norm": 0.003581175820871328, + "learning_rate": 9.954523606671003e-06, + "loss": 0.0, + "step": 1501 + }, + { + "epoch": 0.09679706128762003, + "grad_norm": 0.00042302368364188466, + "learning_rate": 9.955430063080492e-06, + "loss": 0.0, + "step": 1502 + }, + { + "epoch": 0.0968615067345492, + "grad_norm": 0.0017871230098158303, + "learning_rate": 9.956335916191164e-06, + "loss": 0.0, + "step": 1503 + }, + { + "epoch": 0.09692595218147838, + "grad_norm": 0.0777799107536665, + "learning_rate": 9.95724116680554e-06, + "loss": 0.0005, + "step": 1504 + }, + { + "epoch": 0.09699039762840755, + "grad_norm": 0.0012527611164778536, + "learning_rate": 9.958145815724548e-06, + "loss": 0.0, + "step": 1505 + }, + { + "epoch": 0.09705484307533672, + "grad_norm": 0.24797657097956194, + "learning_rate": 9.95904986374752e-06, + "loss": 0.0009, + "step": 1506 + }, + { + "epoch": 0.0971192885222659, + "grad_norm": 0.0014533547584727054, + "learning_rate": 9.95995331167219e-06, + "loss": 0.0, + "step": 1507 + }, + { + "epoch": 0.09718373396919508, + "grad_norm": 0.0015967057251301884, + "learning_rate": 9.96085616029471e-06, + "loss": 0.0, + "step": 1508 + }, + { + "epoch": 0.09724817941612425, + "grad_norm": 0.004208869780398821, + "learning_rate": 9.961758410409647e-06, + "loss": 0.0, + "step": 1509 + }, + { + "epoch": 0.09731262486305342, + "grad_norm": 0.01696486413837203, + "learning_rate": 9.96266006280999e-06, + "loss": 0.0, + "step": 1510 + }, + { + "epoch": 0.0973770703099826, + "grad_norm": 0.0018957704558056011, + "learning_rate": 9.963561118287149e-06, + "loss": 0.0, + "step": 1511 + }, + { + "epoch": 0.09744151575691178, + "grad_norm": 0.0040607474789808124, + "learning_rate": 9.96446157763097e-06, + "loss": 0.0, + "step": 1512 + }, + { + "epoch": 0.09750596120384095, + "grad_norm": 0.0011904989553570007, + "learning_rate": 9.965361441629727e-06, + "loss": 0.0, + "step": 1513 + }, + { + "epoch": 0.09757040665077012, + "grad_norm": 0.01142907492530569, + "learning_rate": 9.966260711070133e-06, + "loss": 0.0001, + "step": 1514 + }, + { + "epoch": 0.0976348520976993, + "grad_norm": 0.0046347557260846255, + "learning_rate": 9.967159386737342e-06, + "loss": 0.0, + "step": 1515 + }, + { + "epoch": 0.09769929754462847, + "grad_norm": 0.0010111396870745295, + "learning_rate": 9.968057469414957e-06, + "loss": 0.0, + "step": 1516 + }, + { + "epoch": 0.09776374299155764, + "grad_norm": 0.0036916821478951735, + "learning_rate": 9.968954959885024e-06, + "loss": 0.0, + "step": 1517 + }, + { + "epoch": 0.09782818843848683, + "grad_norm": 0.0547688530482111, + "learning_rate": 9.969851858928051e-06, + "loss": 0.0003, + "step": 1518 + }, + { + "epoch": 0.097892633885416, + "grad_norm": 0.00326234239409605, + "learning_rate": 9.970748167323e-06, + "loss": 0.0, + "step": 1519 + }, + { + "epoch": 0.09795707933234517, + "grad_norm": 0.004320389485947813, + "learning_rate": 9.971643885847294e-06, + "loss": 0.0, + "step": 1520 + }, + { + "epoch": 0.09802152477927434, + "grad_norm": 0.002041762479109093, + "learning_rate": 9.972539015276823e-06, + "loss": 0.0, + "step": 1521 + }, + { + "epoch": 0.09808597022620352, + "grad_norm": 0.06881354887199352, + "learning_rate": 9.97343355638595e-06, + "loss": 0.0001, + "step": 1522 + }, + { + "epoch": 0.0981504156731327, + "grad_norm": 0.06540829750124166, + "learning_rate": 9.974327509947506e-06, + "loss": 0.0001, + "step": 1523 + }, + { + "epoch": 0.09821486112006186, + "grad_norm": 0.00757338516581833, + "learning_rate": 9.975220876732808e-06, + "loss": 0.0, + "step": 1524 + }, + { + "epoch": 0.09827930656699105, + "grad_norm": 0.008557299181676596, + "learning_rate": 9.97611365751165e-06, + "loss": 0.0001, + "step": 1525 + }, + { + "epoch": 0.09834375201392022, + "grad_norm": 0.0004520848970033896, + "learning_rate": 9.977005853052309e-06, + "loss": 0.0, + "step": 1526 + }, + { + "epoch": 0.09840819746084939, + "grad_norm": 0.0021838000738325022, + "learning_rate": 9.977897464121563e-06, + "loss": 0.0, + "step": 1527 + }, + { + "epoch": 0.09847264290777856, + "grad_norm": 0.003258614152280576, + "learning_rate": 9.978788491484673e-06, + "loss": 0.0, + "step": 1528 + }, + { + "epoch": 0.09853708835470774, + "grad_norm": 0.0015698688676021998, + "learning_rate": 9.979678935905403e-06, + "loss": 0.0, + "step": 1529 + }, + { + "epoch": 0.09860153380163691, + "grad_norm": 0.01185302928350965, + "learning_rate": 9.98056879814602e-06, + "loss": 0.0001, + "step": 1530 + }, + { + "epoch": 0.09866597924856609, + "grad_norm": 0.001710585233948929, + "learning_rate": 9.981458078967295e-06, + "loss": 0.0, + "step": 1531 + }, + { + "epoch": 0.09873042469549527, + "grad_norm": 0.0593000578214577, + "learning_rate": 9.982346779128506e-06, + "loss": 0.0001, + "step": 1532 + }, + { + "epoch": 0.09879487014242444, + "grad_norm": 0.0037362333247285434, + "learning_rate": 9.983234899387453e-06, + "loss": 0.0, + "step": 1533 + }, + { + "epoch": 0.09885931558935361, + "grad_norm": 0.0004543681892485397, + "learning_rate": 9.984122440500443e-06, + "loss": 0.0, + "step": 1534 + }, + { + "epoch": 0.09892376103628278, + "grad_norm": 9.713128271191101e-05, + "learning_rate": 9.985009403222313e-06, + "loss": 0.0, + "step": 1535 + }, + { + "epoch": 0.09898820648321197, + "grad_norm": 0.038945052790278024, + "learning_rate": 9.98589578830642e-06, + "loss": 0.0002, + "step": 1536 + }, + { + "epoch": 0.09905265193014114, + "grad_norm": 0.00021423168057826362, + "learning_rate": 9.98678159650465e-06, + "loss": 0.0, + "step": 1537 + }, + { + "epoch": 0.0991170973770703, + "grad_norm": 0.017880657638670156, + "learning_rate": 9.987666828567429e-06, + "loss": 0.0001, + "step": 1538 + }, + { + "epoch": 0.09918154282399949, + "grad_norm": 0.01319963089075672, + "learning_rate": 9.98855148524371e-06, + "loss": 0.0, + "step": 1539 + }, + { + "epoch": 0.09924598827092866, + "grad_norm": 0.0003586439757768937, + "learning_rate": 9.989435567280993e-06, + "loss": 0.0, + "step": 1540 + }, + { + "epoch": 0.09931043371785783, + "grad_norm": 0.36412274922213167, + "learning_rate": 9.990319075425318e-06, + "loss": 0.0026, + "step": 1541 + }, + { + "epoch": 0.099374879164787, + "grad_norm": 0.24167075712069122, + "learning_rate": 9.991202010421277e-06, + "loss": 0.0021, + "step": 1542 + }, + { + "epoch": 0.09943932461171619, + "grad_norm": 0.008199802468034723, + "learning_rate": 9.992084373012014e-06, + "loss": 0.0001, + "step": 1543 + }, + { + "epoch": 0.09950377005864536, + "grad_norm": 0.08797115768979515, + "learning_rate": 9.992966163939223e-06, + "loss": 0.0006, + "step": 1544 + }, + { + "epoch": 0.09956821550557453, + "grad_norm": 0.00023174358708018033, + "learning_rate": 9.993847383943163e-06, + "loss": 0.0, + "step": 1545 + }, + { + "epoch": 0.0996326609525037, + "grad_norm": 0.11313614101504088, + "learning_rate": 9.994728033762655e-06, + "loss": 0.0002, + "step": 1546 + }, + { + "epoch": 0.09969710639943288, + "grad_norm": 0.07457094638349225, + "learning_rate": 9.995608114135087e-06, + "loss": 0.0002, + "step": 1547 + }, + { + "epoch": 0.09976155184636205, + "grad_norm": 0.012721014483883464, + "learning_rate": 9.996487625796414e-06, + "loss": 0.0001, + "step": 1548 + }, + { + "epoch": 0.09982599729329122, + "grad_norm": 0.0012972554044241161, + "learning_rate": 9.997366569481168e-06, + "loss": 0.0, + "step": 1549 + }, + { + "epoch": 0.09989044274022041, + "grad_norm": 0.0043032076280890485, + "learning_rate": 9.998244945922459e-06, + "loss": 0.0, + "step": 1550 + }, + { + "epoch": 0.09995488818714958, + "grad_norm": 0.0019208322998490745, + "learning_rate": 9.999122755851977e-06, + "loss": 0.0, + "step": 1551 + }, + { + "epoch": 0.10001933363407875, + "grad_norm": 0.0025537528524414454, + "learning_rate": 1e-05, + "loss": 0.0, + "step": 1552 + }, + { + "epoch": 0.10008377908100792, + "grad_norm": 0.01819473357393409, + "learning_rate": 1e-05, + "loss": 0.0001, + "step": 1553 + }, + { + "epoch": 0.1001482245279371, + "grad_norm": 0.023582341253408183, + "learning_rate": 9.999283924095955e-06, + "loss": 0.0001, + "step": 1554 + }, + { + "epoch": 0.10021266997486628, + "grad_norm": 0.0006501829399390779, + "learning_rate": 9.99856784819191e-06, + "loss": 0.0, + "step": 1555 + }, + { + "epoch": 0.10027711542179545, + "grad_norm": 0.0045095002079804025, + "learning_rate": 9.997851772287864e-06, + "loss": 0.0, + "step": 1556 + }, + { + "epoch": 0.10034156086872463, + "grad_norm": 0.0048714429017706305, + "learning_rate": 9.997135696383818e-06, + "loss": 0.0, + "step": 1557 + }, + { + "epoch": 0.1004060063156538, + "grad_norm": 0.06844209026375658, + "learning_rate": 9.99641962047977e-06, + "loss": 0.0002, + "step": 1558 + }, + { + "epoch": 0.10047045176258297, + "grad_norm": 0.011684224684139077, + "learning_rate": 9.995703544575725e-06, + "loss": 0.0, + "step": 1559 + }, + { + "epoch": 0.10053489720951214, + "grad_norm": 0.02074485054202999, + "learning_rate": 9.99498746867168e-06, + "loss": 0.0, + "step": 1560 + }, + { + "epoch": 0.10059934265644133, + "grad_norm": 0.0688557315716873, + "learning_rate": 9.994271392767634e-06, + "loss": 0.0002, + "step": 1561 + }, + { + "epoch": 0.1006637881033705, + "grad_norm": 0.04624153556322279, + "learning_rate": 9.993555316863588e-06, + "loss": 0.0001, + "step": 1562 + }, + { + "epoch": 0.10072823355029967, + "grad_norm": 0.008817848692303166, + "learning_rate": 9.992839240959542e-06, + "loss": 0.0, + "step": 1563 + }, + { + "epoch": 0.10079267899722885, + "grad_norm": 0.41477144476881117, + "learning_rate": 9.992123165055496e-06, + "loss": 0.0025, + "step": 1564 + }, + { + "epoch": 0.10085712444415802, + "grad_norm": 0.0028627288818668436, + "learning_rate": 9.99140708915145e-06, + "loss": 0.0, + "step": 1565 + }, + { + "epoch": 0.10092156989108719, + "grad_norm": 0.8068578764666766, + "learning_rate": 9.990691013247405e-06, + "loss": 0.0037, + "step": 1566 + }, + { + "epoch": 0.10098601533801636, + "grad_norm": 0.00030946291876683197, + "learning_rate": 9.98997493734336e-06, + "loss": 0.0, + "step": 1567 + }, + { + "epoch": 0.10105046078494555, + "grad_norm": 0.510590744176873, + "learning_rate": 9.989258861439314e-06, + "loss": 0.0015, + "step": 1568 + }, + { + "epoch": 0.10111490623187472, + "grad_norm": 0.005179627251681472, + "learning_rate": 9.988542785535268e-06, + "loss": 0.0, + "step": 1569 + }, + { + "epoch": 0.10117935167880389, + "grad_norm": 0.5898086335295064, + "learning_rate": 9.987826709631222e-06, + "loss": 0.0034, + "step": 1570 + }, + { + "epoch": 0.10124379712573307, + "grad_norm": 0.06419531237883218, + "learning_rate": 9.987110633727177e-06, + "loss": 0.0002, + "step": 1571 + }, + { + "epoch": 0.10130824257266224, + "grad_norm": 0.015524767153565688, + "learning_rate": 9.98639455782313e-06, + "loss": 0.0001, + "step": 1572 + }, + { + "epoch": 0.10137268801959141, + "grad_norm": 0.02811805669194153, + "learning_rate": 9.985678481919085e-06, + "loss": 0.0002, + "step": 1573 + }, + { + "epoch": 0.10143713346652058, + "grad_norm": 0.06079880538116993, + "learning_rate": 9.984962406015038e-06, + "loss": 0.0016, + "step": 1574 + }, + { + "epoch": 0.10150157891344977, + "grad_norm": 0.021396310851056387, + "learning_rate": 9.984246330110992e-06, + "loss": 0.0001, + "step": 1575 + }, + { + "epoch": 0.10156602436037894, + "grad_norm": 0.00487446275950087, + "learning_rate": 9.983530254206946e-06, + "loss": 0.0, + "step": 1576 + }, + { + "epoch": 0.10163046980730811, + "grad_norm": 0.001861853412206755, + "learning_rate": 9.9828141783029e-06, + "loss": 0.0, + "step": 1577 + }, + { + "epoch": 0.1016949152542373, + "grad_norm": 0.13226993615728455, + "learning_rate": 9.982098102398855e-06, + "loss": 0.0002, + "step": 1578 + }, + { + "epoch": 0.10175936070116647, + "grad_norm": 0.6347721275583404, + "learning_rate": 9.98138202649481e-06, + "loss": 0.0029, + "step": 1579 + }, + { + "epoch": 0.10182380614809564, + "grad_norm": 0.12557575131126444, + "learning_rate": 9.980665950590763e-06, + "loss": 0.0003, + "step": 1580 + }, + { + "epoch": 0.1018882515950248, + "grad_norm": 0.15937583048922008, + "learning_rate": 9.979949874686718e-06, + "loss": 0.0014, + "step": 1581 + }, + { + "epoch": 0.10195269704195399, + "grad_norm": 0.1975559703975177, + "learning_rate": 9.979233798782672e-06, + "loss": 0.0008, + "step": 1582 + }, + { + "epoch": 0.10201714248888316, + "grad_norm": 0.18503193927540512, + "learning_rate": 9.978517722878626e-06, + "loss": 0.0001, + "step": 1583 + }, + { + "epoch": 0.10208158793581233, + "grad_norm": 0.018288625469601905, + "learning_rate": 9.977801646974579e-06, + "loss": 0.0, + "step": 1584 + }, + { + "epoch": 0.10214603338274152, + "grad_norm": 0.06556653898587625, + "learning_rate": 9.977085571070533e-06, + "loss": 0.0016, + "step": 1585 + }, + { + "epoch": 0.10221047882967069, + "grad_norm": 0.12746553448387646, + "learning_rate": 9.976369495166488e-06, + "loss": 0.0, + "step": 1586 + }, + { + "epoch": 0.10227492427659986, + "grad_norm": 1.126843214046914, + "learning_rate": 9.975653419262442e-06, + "loss": 0.0058, + "step": 1587 + }, + { + "epoch": 0.10233936972352903, + "grad_norm": 0.0009175100005660954, + "learning_rate": 9.974937343358396e-06, + "loss": 0.0, + "step": 1588 + }, + { + "epoch": 0.10240381517045821, + "grad_norm": 0.025905761639659113, + "learning_rate": 9.974221267454352e-06, + "loss": 0.0, + "step": 1589 + }, + { + "epoch": 0.10246826061738738, + "grad_norm": 0.001112717209841313, + "learning_rate": 9.973505191550305e-06, + "loss": 0.0, + "step": 1590 + }, + { + "epoch": 0.10253270606431655, + "grad_norm": 0.007526676600103704, + "learning_rate": 9.972789115646259e-06, + "loss": 0.0, + "step": 1591 + }, + { + "epoch": 0.10259715151124572, + "grad_norm": 0.07737540406489828, + "learning_rate": 9.972073039742213e-06, + "loss": 0.0001, + "step": 1592 + }, + { + "epoch": 0.10266159695817491, + "grad_norm": 0.15650133541903946, + "learning_rate": 9.971356963838168e-06, + "loss": 0.0002, + "step": 1593 + }, + { + "epoch": 0.10272604240510408, + "grad_norm": 0.25322032122670035, + "learning_rate": 9.970640887934122e-06, + "loss": 0.0025, + "step": 1594 + }, + { + "epoch": 0.10279048785203325, + "grad_norm": 0.3750155962243933, + "learning_rate": 9.969924812030076e-06, + "loss": 0.0039, + "step": 1595 + }, + { + "epoch": 0.10285493329896243, + "grad_norm": 0.01156925484668394, + "learning_rate": 9.96920873612603e-06, + "loss": 0.0015, + "step": 1596 + }, + { + "epoch": 0.1029193787458916, + "grad_norm": 0.0010545124652149038, + "learning_rate": 9.968492660221985e-06, + "loss": 0.0, + "step": 1597 + }, + { + "epoch": 0.10298382419282077, + "grad_norm": 0.029305482633034747, + "learning_rate": 9.96777658431794e-06, + "loss": 0.0, + "step": 1598 + }, + { + "epoch": 0.10304826963974995, + "grad_norm": 0.0017090864848175315, + "learning_rate": 9.967060508413893e-06, + "loss": 0.0, + "step": 1599 + }, + { + "epoch": 0.10311271508667913, + "grad_norm": 0.05317997095655382, + "learning_rate": 9.966344432509846e-06, + "loss": 0.0001, + "step": 1600 + }, + { + "epoch": 0.1031771605336083, + "grad_norm": 0.22688678027798553, + "learning_rate": 9.9656283566058e-06, + "loss": 0.0026, + "step": 1601 + }, + { + "epoch": 0.10324160598053747, + "grad_norm": 0.30919653532738356, + "learning_rate": 9.964912280701755e-06, + "loss": 0.0004, + "step": 1602 + }, + { + "epoch": 0.10330605142746666, + "grad_norm": 0.014351292654744227, + "learning_rate": 9.964196204797709e-06, + "loss": 0.0, + "step": 1603 + }, + { + "epoch": 0.10337049687439583, + "grad_norm": 0.010907906211386922, + "learning_rate": 9.963480128893663e-06, + "loss": 0.0, + "step": 1604 + }, + { + "epoch": 0.103434942321325, + "grad_norm": 0.002965178042445306, + "learning_rate": 9.962764052989618e-06, + "loss": 0.0, + "step": 1605 + }, + { + "epoch": 0.10349938776825417, + "grad_norm": 0.003496357209876648, + "learning_rate": 9.962047977085572e-06, + "loss": 0.0, + "step": 1606 + }, + { + "epoch": 0.10356383321518335, + "grad_norm": 0.13440704324049624, + "learning_rate": 9.961331901181526e-06, + "loss": 0.0008, + "step": 1607 + }, + { + "epoch": 0.10362827866211252, + "grad_norm": 0.013390774687813115, + "learning_rate": 9.96061582527748e-06, + "loss": 0.0016, + "step": 1608 + }, + { + "epoch": 0.10369272410904169, + "grad_norm": 0.0007995069776809722, + "learning_rate": 9.959899749373435e-06, + "loss": 0.0, + "step": 1609 + }, + { + "epoch": 0.10375716955597088, + "grad_norm": 0.0002824253997624104, + "learning_rate": 9.959183673469387e-06, + "loss": 0.0, + "step": 1610 + }, + { + "epoch": 0.10382161500290005, + "grad_norm": 0.019146714592035718, + "learning_rate": 9.958467597565342e-06, + "loss": 0.0, + "step": 1611 + }, + { + "epoch": 0.10388606044982922, + "grad_norm": 0.3344483081280959, + "learning_rate": 9.957751521661298e-06, + "loss": 0.0014, + "step": 1612 + }, + { + "epoch": 0.10395050589675839, + "grad_norm": 0.007686443658249208, + "learning_rate": 9.957035445757252e-06, + "loss": 0.0016, + "step": 1613 + }, + { + "epoch": 0.10401495134368757, + "grad_norm": 0.004736729924681449, + "learning_rate": 9.956319369853206e-06, + "loss": 0.0001, + "step": 1614 + }, + { + "epoch": 0.10407939679061674, + "grad_norm": 0.2593175709583272, + "learning_rate": 9.95560329394916e-06, + "loss": 0.0045, + "step": 1615 + }, + { + "epoch": 0.10414384223754591, + "grad_norm": 1.196787952016555, + "learning_rate": 9.954887218045113e-06, + "loss": 0.0024, + "step": 1616 + }, + { + "epoch": 0.1042082876844751, + "grad_norm": 0.00788326272426621, + "learning_rate": 9.954171142141067e-06, + "loss": 0.0001, + "step": 1617 + }, + { + "epoch": 0.10427273313140427, + "grad_norm": 0.004238651176470014, + "learning_rate": 9.953455066237022e-06, + "loss": 0.0, + "step": 1618 + }, + { + "epoch": 0.10433717857833344, + "grad_norm": 0.006960126069193266, + "learning_rate": 9.952738990332976e-06, + "loss": 0.0001, + "step": 1619 + }, + { + "epoch": 0.10440162402526261, + "grad_norm": 0.3046891750387463, + "learning_rate": 9.95202291442893e-06, + "loss": 0.0005, + "step": 1620 + }, + { + "epoch": 0.1044660694721918, + "grad_norm": 0.19090559741277238, + "learning_rate": 9.951306838524885e-06, + "loss": 0.0003, + "step": 1621 + }, + { + "epoch": 0.10453051491912096, + "grad_norm": 0.12862948849226613, + "learning_rate": 9.950590762620839e-06, + "loss": 0.0009, + "step": 1622 + }, + { + "epoch": 0.10459496036605014, + "grad_norm": 0.000399543694424654, + "learning_rate": 9.949874686716793e-06, + "loss": 0.0, + "step": 1623 + }, + { + "epoch": 0.10465940581297932, + "grad_norm": 0.02006041245888554, + "learning_rate": 9.949158610812748e-06, + "loss": 0.0001, + "step": 1624 + }, + { + "epoch": 0.10472385125990849, + "grad_norm": 0.031908140114484264, + "learning_rate": 9.948442534908702e-06, + "loss": 0.0001, + "step": 1625 + }, + { + "epoch": 0.10478829670683766, + "grad_norm": 0.0009540502284184393, + "learning_rate": 9.947726459004654e-06, + "loss": 0.0, + "step": 1626 + }, + { + "epoch": 0.10485274215376683, + "grad_norm": 0.0008386468327602505, + "learning_rate": 9.947010383100609e-06, + "loss": 0.0, + "step": 1627 + }, + { + "epoch": 0.10491718760069602, + "grad_norm": 0.00263780282117747, + "learning_rate": 9.946294307196563e-06, + "loss": 0.0, + "step": 1628 + }, + { + "epoch": 0.10498163304762519, + "grad_norm": 0.006001984598497469, + "learning_rate": 9.945578231292517e-06, + "loss": 0.0001, + "step": 1629 + }, + { + "epoch": 0.10504607849455436, + "grad_norm": 0.00013517572587163083, + "learning_rate": 9.944862155388472e-06, + "loss": 0.0, + "step": 1630 + }, + { + "epoch": 0.10511052394148353, + "grad_norm": 0.03848265845278273, + "learning_rate": 9.944146079484426e-06, + "loss": 0.0001, + "step": 1631 + }, + { + "epoch": 0.10517496938841271, + "grad_norm": 0.022167288196300997, + "learning_rate": 9.94343000358038e-06, + "loss": 0.0001, + "step": 1632 + }, + { + "epoch": 0.10523941483534188, + "grad_norm": 0.002821403357723632, + "learning_rate": 9.942713927676335e-06, + "loss": 0.0, + "step": 1633 + }, + { + "epoch": 0.10530386028227105, + "grad_norm": 1.4247583351674593, + "learning_rate": 9.941997851772289e-06, + "loss": 0.0085, + "step": 1634 + }, + { + "epoch": 0.10536830572920024, + "grad_norm": 0.21941276183174943, + "learning_rate": 9.941281775868243e-06, + "loss": 0.0018, + "step": 1635 + }, + { + "epoch": 0.10543275117612941, + "grad_norm": 0.12458575422455666, + "learning_rate": 9.940565699964197e-06, + "loss": 0.0025, + "step": 1636 + }, + { + "epoch": 0.10549719662305858, + "grad_norm": 0.0027435063651743657, + "learning_rate": 9.939849624060152e-06, + "loss": 0.0, + "step": 1637 + }, + { + "epoch": 0.10556164206998775, + "grad_norm": 0.6987531166484844, + "learning_rate": 9.939133548156106e-06, + "loss": 0.0022, + "step": 1638 + }, + { + "epoch": 0.10562608751691693, + "grad_norm": 0.02080380749067857, + "learning_rate": 9.93841747225206e-06, + "loss": 0.0001, + "step": 1639 + }, + { + "epoch": 0.1056905329638461, + "grad_norm": 0.04035038836258241, + "learning_rate": 9.937701396348015e-06, + "loss": 0.0001, + "step": 1640 + }, + { + "epoch": 0.10575497841077527, + "grad_norm": 0.003756669122280707, + "learning_rate": 9.936985320443969e-06, + "loss": 0.0, + "step": 1641 + }, + { + "epoch": 0.10581942385770446, + "grad_norm": 0.009743641159397206, + "learning_rate": 9.936269244539922e-06, + "loss": 0.0001, + "step": 1642 + }, + { + "epoch": 0.10588386930463363, + "grad_norm": 0.06777027930780727, + "learning_rate": 9.935553168635876e-06, + "loss": 0.0004, + "step": 1643 + }, + { + "epoch": 0.1059483147515628, + "grad_norm": 0.005226804001337679, + "learning_rate": 9.93483709273183e-06, + "loss": 0.0, + "step": 1644 + }, + { + "epoch": 0.10601276019849197, + "grad_norm": 0.015324877669177634, + "learning_rate": 9.934121016827784e-06, + "loss": 0.0001, + "step": 1645 + }, + { + "epoch": 0.10607720564542115, + "grad_norm": 0.0344477850831631, + "learning_rate": 9.933404940923739e-06, + "loss": 0.0003, + "step": 1646 + }, + { + "epoch": 0.10614165109235033, + "grad_norm": 0.07882127139899352, + "learning_rate": 9.932688865019693e-06, + "loss": 0.0001, + "step": 1647 + }, + { + "epoch": 0.1062060965392795, + "grad_norm": 0.053166576465501644, + "learning_rate": 9.931972789115647e-06, + "loss": 0.0004, + "step": 1648 + }, + { + "epoch": 0.10627054198620868, + "grad_norm": 0.0001717063849772211, + "learning_rate": 9.931256713211602e-06, + "loss": 0.0, + "step": 1649 + }, + { + "epoch": 0.10633498743313785, + "grad_norm": 0.009019181123145955, + "learning_rate": 9.930540637307556e-06, + "loss": 0.0002, + "step": 1650 + }, + { + "epoch": 0.10639943288006702, + "grad_norm": 0.04910897784280347, + "learning_rate": 9.929824561403509e-06, + "loss": 0.0002, + "step": 1651 + }, + { + "epoch": 0.10646387832699619, + "grad_norm": 0.011014606916717328, + "learning_rate": 9.929108485499463e-06, + "loss": 0.0001, + "step": 1652 + }, + { + "epoch": 0.10652832377392538, + "grad_norm": 0.14360992510726237, + "learning_rate": 9.928392409595417e-06, + "loss": 0.0011, + "step": 1653 + }, + { + "epoch": 0.10659276922085455, + "grad_norm": 0.49932092507531767, + "learning_rate": 9.927676333691371e-06, + "loss": 0.0015, + "step": 1654 + }, + { + "epoch": 0.10665721466778372, + "grad_norm": 0.006795758715444581, + "learning_rate": 9.926960257787326e-06, + "loss": 0.0001, + "step": 1655 + }, + { + "epoch": 0.1067216601147129, + "grad_norm": 0.001934329060949019, + "learning_rate": 9.92624418188328e-06, + "loss": 0.0, + "step": 1656 + }, + { + "epoch": 0.10678610556164207, + "grad_norm": 0.09470259963083237, + "learning_rate": 9.925528105979234e-06, + "loss": 0.0003, + "step": 1657 + }, + { + "epoch": 0.10685055100857124, + "grad_norm": 0.02276080609148665, + "learning_rate": 9.924812030075189e-06, + "loss": 0.0002, + "step": 1658 + }, + { + "epoch": 0.10691499645550041, + "grad_norm": 0.004617006557617145, + "learning_rate": 9.924095954171143e-06, + "loss": 0.0, + "step": 1659 + }, + { + "epoch": 0.1069794419024296, + "grad_norm": 0.008264148745958573, + "learning_rate": 9.923379878267097e-06, + "loss": 0.0, + "step": 1660 + }, + { + "epoch": 0.10704388734935877, + "grad_norm": 0.42388602709453416, + "learning_rate": 9.922663802363052e-06, + "loss": 0.003, + "step": 1661 + }, + { + "epoch": 0.10710833279628794, + "grad_norm": 0.022281854083548366, + "learning_rate": 9.921947726459006e-06, + "loss": 0.0003, + "step": 1662 + }, + { + "epoch": 0.10717277824321712, + "grad_norm": 0.024515886161469536, + "learning_rate": 9.92123165055496e-06, + "loss": 0.0001, + "step": 1663 + }, + { + "epoch": 0.1072372236901463, + "grad_norm": 0.004597568827487063, + "learning_rate": 9.920515574650914e-06, + "loss": 0.0001, + "step": 1664 + }, + { + "epoch": 0.10730166913707546, + "grad_norm": 0.020596066669992474, + "learning_rate": 9.919799498746869e-06, + "loss": 0.0001, + "step": 1665 + }, + { + "epoch": 0.10736611458400463, + "grad_norm": 0.17759965674602354, + "learning_rate": 9.919083422842823e-06, + "loss": 0.0013, + "step": 1666 + }, + { + "epoch": 0.10743056003093382, + "grad_norm": 0.018403341874534827, + "learning_rate": 9.918367346938776e-06, + "loss": 0.0002, + "step": 1667 + }, + { + "epoch": 0.10749500547786299, + "grad_norm": 2.1243720529282752, + "learning_rate": 9.91765127103473e-06, + "loss": 0.011, + "step": 1668 + }, + { + "epoch": 0.10755945092479216, + "grad_norm": 0.009084960426039212, + "learning_rate": 9.916935195130684e-06, + "loss": 0.0001, + "step": 1669 + }, + { + "epoch": 0.10762389637172134, + "grad_norm": 0.0387725830078125, + "learning_rate": 9.916219119226639e-06, + "loss": 0.0003, + "step": 1670 + }, + { + "epoch": 0.10768834181865052, + "grad_norm": 0.22734220117519863, + "learning_rate": 9.915503043322593e-06, + "loss": 0.0006, + "step": 1671 + }, + { + "epoch": 0.10775278726557969, + "grad_norm": 0.839883847721016, + "learning_rate": 9.914786967418547e-06, + "loss": 0.0105, + "step": 1672 + }, + { + "epoch": 0.10781723271250886, + "grad_norm": 0.0015275560043602424, + "learning_rate": 9.914070891514501e-06, + "loss": 0.0, + "step": 1673 + }, + { + "epoch": 0.10788167815943804, + "grad_norm": 0.2546175456952681, + "learning_rate": 9.913354815610456e-06, + "loss": 0.0021, + "step": 1674 + }, + { + "epoch": 0.10794612360636721, + "grad_norm": 0.27017822568966937, + "learning_rate": 9.91263873970641e-06, + "loss": 0.0007, + "step": 1675 + }, + { + "epoch": 0.10801056905329638, + "grad_norm": 0.020131666449685264, + "learning_rate": 9.911922663802364e-06, + "loss": 0.0001, + "step": 1676 + }, + { + "epoch": 0.10807501450022555, + "grad_norm": 0.007008979154739355, + "learning_rate": 9.911206587898317e-06, + "loss": 0.0, + "step": 1677 + }, + { + "epoch": 0.10813945994715474, + "grad_norm": 0.31403968120823095, + "learning_rate": 9.910490511994271e-06, + "loss": 0.0008, + "step": 1678 + }, + { + "epoch": 0.10820390539408391, + "grad_norm": 0.08434008796606712, + "learning_rate": 9.909774436090226e-06, + "loss": 0.0001, + "step": 1679 + }, + { + "epoch": 0.10826835084101308, + "grad_norm": 0.21085719946650242, + "learning_rate": 9.90905836018618e-06, + "loss": 0.0007, + "step": 1680 + }, + { + "epoch": 0.10833279628794226, + "grad_norm": 0.06126331219652856, + "learning_rate": 9.908342284282134e-06, + "loss": 0.0001, + "step": 1681 + }, + { + "epoch": 0.10839724173487143, + "grad_norm": 0.02127553894163218, + "learning_rate": 9.90762620837809e-06, + "loss": 0.0, + "step": 1682 + }, + { + "epoch": 0.1084616871818006, + "grad_norm": 0.0020103520188776516, + "learning_rate": 9.906910132474043e-06, + "loss": 0.0, + "step": 1683 + }, + { + "epoch": 0.10852613262872977, + "grad_norm": 0.1730377089352632, + "learning_rate": 9.906194056569997e-06, + "loss": 0.0032, + "step": 1684 + }, + { + "epoch": 0.10859057807565896, + "grad_norm": 0.40038176736105546, + "learning_rate": 9.905477980665951e-06, + "loss": 0.0048, + "step": 1685 + }, + { + "epoch": 0.10865502352258813, + "grad_norm": 0.0349529717470364, + "learning_rate": 9.904761904761906e-06, + "loss": 0.0001, + "step": 1686 + }, + { + "epoch": 0.1087194689695173, + "grad_norm": 0.12564037620330987, + "learning_rate": 9.90404582885786e-06, + "loss": 0.0002, + "step": 1687 + }, + { + "epoch": 0.10878391441644648, + "grad_norm": 0.02573585559729339, + "learning_rate": 9.903329752953814e-06, + "loss": 0.0001, + "step": 1688 + }, + { + "epoch": 0.10884835986337565, + "grad_norm": 0.0182613506637033, + "learning_rate": 9.902613677049769e-06, + "loss": 0.0001, + "step": 1689 + }, + { + "epoch": 0.10891280531030482, + "grad_norm": 0.0033500290164589796, + "learning_rate": 9.901897601145723e-06, + "loss": 0.0, + "step": 1690 + }, + { + "epoch": 0.108977250757234, + "grad_norm": 0.22283454081114984, + "learning_rate": 9.901181525241677e-06, + "loss": 0.0009, + "step": 1691 + }, + { + "epoch": 0.10904169620416318, + "grad_norm": 0.0013649688327389462, + "learning_rate": 9.900465449337631e-06, + "loss": 0.0, + "step": 1692 + }, + { + "epoch": 0.10910614165109235, + "grad_norm": 0.0007646449219787592, + "learning_rate": 9.899749373433584e-06, + "loss": 0.0, + "step": 1693 + }, + { + "epoch": 0.10917058709802152, + "grad_norm": 0.009881301935125679, + "learning_rate": 9.899033297529538e-06, + "loss": 0.0001, + "step": 1694 + }, + { + "epoch": 0.1092350325449507, + "grad_norm": 0.22115118406281953, + "learning_rate": 9.898317221625493e-06, + "loss": 0.001, + "step": 1695 + }, + { + "epoch": 0.10929947799187988, + "grad_norm": 0.015784157099274273, + "learning_rate": 9.897601145721447e-06, + "loss": 0.0001, + "step": 1696 + }, + { + "epoch": 0.10936392343880905, + "grad_norm": 0.04523709863820392, + "learning_rate": 9.896885069817401e-06, + "loss": 0.0001, + "step": 1697 + }, + { + "epoch": 0.10942836888573822, + "grad_norm": 0.028951350795599847, + "learning_rate": 9.896168993913355e-06, + "loss": 0.0, + "step": 1698 + }, + { + "epoch": 0.1094928143326674, + "grad_norm": 0.00021010130849105123, + "learning_rate": 9.89545291800931e-06, + "loss": 0.0, + "step": 1699 + }, + { + "epoch": 0.10955725977959657, + "grad_norm": 0.01118543396035685, + "learning_rate": 9.894736842105264e-06, + "loss": 0.0001, + "step": 1700 + }, + { + "epoch": 0.10962170522652574, + "grad_norm": 0.00016900792184204182, + "learning_rate": 9.894020766201218e-06, + "loss": 0.0, + "step": 1701 + }, + { + "epoch": 0.10968615067345493, + "grad_norm": 0.00286496145638672, + "learning_rate": 9.893304690297173e-06, + "loss": 0.0, + "step": 1702 + }, + { + "epoch": 0.1097505961203841, + "grad_norm": 0.00041776218172744057, + "learning_rate": 9.892588614393125e-06, + "loss": 0.0, + "step": 1703 + }, + { + "epoch": 0.10981504156731327, + "grad_norm": 0.008368331222943564, + "learning_rate": 9.89187253848908e-06, + "loss": 0.0001, + "step": 1704 + }, + { + "epoch": 0.10987948701424244, + "grad_norm": 0.041839912630408586, + "learning_rate": 9.891156462585036e-06, + "loss": 0.0002, + "step": 1705 + }, + { + "epoch": 0.10994393246117162, + "grad_norm": 0.024497527264033654, + "learning_rate": 9.89044038668099e-06, + "loss": 0.0001, + "step": 1706 + }, + { + "epoch": 0.1100083779081008, + "grad_norm": 0.023590454305983673, + "learning_rate": 9.889724310776944e-06, + "loss": 0.0001, + "step": 1707 + }, + { + "epoch": 0.11007282335502996, + "grad_norm": 0.012773182132556847, + "learning_rate": 9.889008234872898e-06, + "loss": 0.0, + "step": 1708 + }, + { + "epoch": 0.11013726880195915, + "grad_norm": 0.004021249149665355, + "learning_rate": 9.888292158968851e-06, + "loss": 0.0, + "step": 1709 + }, + { + "epoch": 0.11020171424888832, + "grad_norm": 0.02710948476336297, + "learning_rate": 9.887576083064805e-06, + "loss": 0.0003, + "step": 1710 + }, + { + "epoch": 0.11026615969581749, + "grad_norm": 0.006102303146094259, + "learning_rate": 9.88686000716076e-06, + "loss": 0.0, + "step": 1711 + }, + { + "epoch": 0.11033060514274666, + "grad_norm": 0.07333378468360141, + "learning_rate": 9.886143931256714e-06, + "loss": 0.0004, + "step": 1712 + }, + { + "epoch": 0.11039505058967584, + "grad_norm": 0.03245848052794509, + "learning_rate": 9.885427855352668e-06, + "loss": 0.0, + "step": 1713 + }, + { + "epoch": 0.11045949603660501, + "grad_norm": 0.06893218377444407, + "learning_rate": 9.884711779448623e-06, + "loss": 0.0001, + "step": 1714 + }, + { + "epoch": 0.11052394148353419, + "grad_norm": 0.0598030877506716, + "learning_rate": 9.883995703544577e-06, + "loss": 0.0002, + "step": 1715 + }, + { + "epoch": 0.11058838693046336, + "grad_norm": 0.0013980899631203523, + "learning_rate": 9.883279627640531e-06, + "loss": 0.0, + "step": 1716 + }, + { + "epoch": 0.11065283237739254, + "grad_norm": 0.3934271253107235, + "learning_rate": 9.882563551736485e-06, + "loss": 0.0009, + "step": 1717 + }, + { + "epoch": 0.11071727782432171, + "grad_norm": 0.13356755708162912, + "learning_rate": 9.88184747583244e-06, + "loss": 0.0004, + "step": 1718 + }, + { + "epoch": 0.11078172327125088, + "grad_norm": 0.009251442707350574, + "learning_rate": 9.881131399928392e-06, + "loss": 0.0001, + "step": 1719 + }, + { + "epoch": 0.11084616871818007, + "grad_norm": 0.09078265506655032, + "learning_rate": 9.880415324024347e-06, + "loss": 0.0002, + "step": 1720 + }, + { + "epoch": 0.11091061416510924, + "grad_norm": 0.45446654387990904, + "learning_rate": 9.879699248120301e-06, + "loss": 0.0018, + "step": 1721 + }, + { + "epoch": 0.11097505961203841, + "grad_norm": 0.03796955507606215, + "learning_rate": 9.878983172216255e-06, + "loss": 0.0002, + "step": 1722 + }, + { + "epoch": 0.11103950505896758, + "grad_norm": 0.33390644346596626, + "learning_rate": 9.87826709631221e-06, + "loss": 0.0004, + "step": 1723 + }, + { + "epoch": 0.11110395050589676, + "grad_norm": 0.0017985687644987878, + "learning_rate": 9.877551020408164e-06, + "loss": 0.0, + "step": 1724 + }, + { + "epoch": 0.11116839595282593, + "grad_norm": 0.00012855284184480202, + "learning_rate": 9.876834944504118e-06, + "loss": 0.0, + "step": 1725 + }, + { + "epoch": 0.1112328413997551, + "grad_norm": 0.002766077239669994, + "learning_rate": 9.876118868600072e-06, + "loss": 0.0, + "step": 1726 + }, + { + "epoch": 0.11129728684668429, + "grad_norm": 0.007371952801886638, + "learning_rate": 9.875402792696027e-06, + "loss": 0.0, + "step": 1727 + }, + { + "epoch": 0.11136173229361346, + "grad_norm": 0.004698404644119558, + "learning_rate": 9.87468671679198e-06, + "loss": 0.0, + "step": 1728 + }, + { + "epoch": 0.11142617774054263, + "grad_norm": 0.003165305540191389, + "learning_rate": 9.873970640887935e-06, + "loss": 0.0, + "step": 1729 + }, + { + "epoch": 0.1114906231874718, + "grad_norm": 0.002938261901882307, + "learning_rate": 9.87325456498389e-06, + "loss": 0.0, + "step": 1730 + }, + { + "epoch": 0.11155506863440098, + "grad_norm": 0.004821551934840438, + "learning_rate": 9.872538489079844e-06, + "loss": 0.0001, + "step": 1731 + }, + { + "epoch": 0.11161951408133015, + "grad_norm": 4.844431380060839, + "learning_rate": 9.871822413175798e-06, + "loss": 0.0238, + "step": 1732 + }, + { + "epoch": 0.11168395952825932, + "grad_norm": 0.023224661434122617, + "learning_rate": 9.871106337271753e-06, + "loss": 0.0001, + "step": 1733 + }, + { + "epoch": 0.11174840497518851, + "grad_norm": 0.31371399393588917, + "learning_rate": 9.870390261367707e-06, + "loss": 0.0055, + "step": 1734 + }, + { + "epoch": 0.11181285042211768, + "grad_norm": 0.06859223690514762, + "learning_rate": 9.86967418546366e-06, + "loss": 0.0004, + "step": 1735 + }, + { + "epoch": 0.11187729586904685, + "grad_norm": 0.00032449914922864464, + "learning_rate": 9.868958109559614e-06, + "loss": 0.0, + "step": 1736 + }, + { + "epoch": 0.11194174131597602, + "grad_norm": 0.000419428479045938, + "learning_rate": 9.868242033655568e-06, + "loss": 0.0, + "step": 1737 + }, + { + "epoch": 0.1120061867629052, + "grad_norm": 0.00641962832500581, + "learning_rate": 9.867525957751522e-06, + "loss": 0.0, + "step": 1738 + }, + { + "epoch": 0.11207063220983438, + "grad_norm": 0.007188855193993879, + "learning_rate": 9.866809881847477e-06, + "loss": 0.0, + "step": 1739 + }, + { + "epoch": 0.11213507765676355, + "grad_norm": 0.5678866540904066, + "learning_rate": 9.866093805943431e-06, + "loss": 0.0022, + "step": 1740 + }, + { + "epoch": 0.11219952310369273, + "grad_norm": 0.05270199532192997, + "learning_rate": 9.865377730039385e-06, + "loss": 0.0002, + "step": 1741 + }, + { + "epoch": 0.1122639685506219, + "grad_norm": 0.0034274067808784683, + "learning_rate": 9.86466165413534e-06, + "loss": 0.0, + "step": 1742 + }, + { + "epoch": 0.11232841399755107, + "grad_norm": 0.0012332182509001428, + "learning_rate": 9.863945578231294e-06, + "loss": 0.0, + "step": 1743 + }, + { + "epoch": 0.11239285944448024, + "grad_norm": 0.019587984236910893, + "learning_rate": 9.863229502327246e-06, + "loss": 0.0001, + "step": 1744 + }, + { + "epoch": 0.11245730489140943, + "grad_norm": 0.0001058282062590549, + "learning_rate": 9.8625134264232e-06, + "loss": 0.0, + "step": 1745 + }, + { + "epoch": 0.1125217503383386, + "grad_norm": 0.0019646040441125955, + "learning_rate": 9.861797350519155e-06, + "loss": 0.0, + "step": 1746 + }, + { + "epoch": 0.11258619578526777, + "grad_norm": 0.0024159480603714364, + "learning_rate": 9.86108127461511e-06, + "loss": 0.0, + "step": 1747 + }, + { + "epoch": 0.11265064123219695, + "grad_norm": 0.03507614114998864, + "learning_rate": 9.860365198711064e-06, + "loss": 0.0002, + "step": 1748 + }, + { + "epoch": 0.11271508667912612, + "grad_norm": 0.02787627501469119, + "learning_rate": 9.859649122807018e-06, + "loss": 0.0001, + "step": 1749 + }, + { + "epoch": 0.11277953212605529, + "grad_norm": 0.008317143690736094, + "learning_rate": 9.858933046902972e-06, + "loss": 0.0, + "step": 1750 + }, + { + "epoch": 0.11284397757298446, + "grad_norm": 0.8252554064657068, + "learning_rate": 9.858216970998927e-06, + "loss": 0.0044, + "step": 1751 + }, + { + "epoch": 0.11290842301991365, + "grad_norm": 1.160233260658864, + "learning_rate": 9.857500895094881e-06, + "loss": 0.0055, + "step": 1752 + }, + { + "epoch": 0.11297286846684282, + "grad_norm": 0.007301981019052537, + "learning_rate": 9.856784819190835e-06, + "loss": 0.0, + "step": 1753 + }, + { + "epoch": 0.11303731391377199, + "grad_norm": 0.0021348737129215627, + "learning_rate": 9.85606874328679e-06, + "loss": 0.0, + "step": 1754 + }, + { + "epoch": 0.11310175936070116, + "grad_norm": 0.1850555238494288, + "learning_rate": 9.855352667382744e-06, + "loss": 0.0002, + "step": 1755 + }, + { + "epoch": 0.11316620480763034, + "grad_norm": 0.003926293491443531, + "learning_rate": 9.854636591478698e-06, + "loss": 0.0, + "step": 1756 + }, + { + "epoch": 0.11323065025455951, + "grad_norm": 0.03075867687050704, + "learning_rate": 9.853920515574652e-06, + "loss": 0.0, + "step": 1757 + }, + { + "epoch": 0.11329509570148869, + "grad_norm": 0.0006125017611817344, + "learning_rate": 9.853204439670607e-06, + "loss": 0.0, + "step": 1758 + }, + { + "epoch": 0.11335954114841787, + "grad_norm": 0.0019315106515052796, + "learning_rate": 9.852488363766561e-06, + "loss": 0.0, + "step": 1759 + }, + { + "epoch": 0.11342398659534704, + "grad_norm": 0.14089677959341357, + "learning_rate": 9.851772287862514e-06, + "loss": 0.0002, + "step": 1760 + }, + { + "epoch": 0.11348843204227621, + "grad_norm": 0.0015930161051359712, + "learning_rate": 9.851056211958468e-06, + "loss": 0.0, + "step": 1761 + }, + { + "epoch": 0.11355287748920538, + "grad_norm": 0.2209100850050719, + "learning_rate": 9.850340136054422e-06, + "loss": 0.0021, + "step": 1762 + }, + { + "epoch": 0.11361732293613457, + "grad_norm": 0.11348926830889935, + "learning_rate": 9.849624060150376e-06, + "loss": 0.0004, + "step": 1763 + }, + { + "epoch": 0.11368176838306374, + "grad_norm": 0.05025630142028179, + "learning_rate": 9.84890798424633e-06, + "loss": 0.0002, + "step": 1764 + }, + { + "epoch": 0.1137462138299929, + "grad_norm": 0.004523011762179798, + "learning_rate": 9.848191908342285e-06, + "loss": 0.0, + "step": 1765 + }, + { + "epoch": 0.11381065927692209, + "grad_norm": 0.03315617945049665, + "learning_rate": 9.84747583243824e-06, + "loss": 0.0004, + "step": 1766 + }, + { + "epoch": 0.11387510472385126, + "grad_norm": 0.004434986096414539, + "learning_rate": 9.846759756534194e-06, + "loss": 0.0, + "step": 1767 + }, + { + "epoch": 0.11393955017078043, + "grad_norm": 0.23389844924721334, + "learning_rate": 9.846043680630148e-06, + "loss": 0.0008, + "step": 1768 + }, + { + "epoch": 0.1140039956177096, + "grad_norm": 0.12071385320497696, + "learning_rate": 9.845327604726102e-06, + "loss": 0.0004, + "step": 1769 + }, + { + "epoch": 0.11406844106463879, + "grad_norm": 0.027456958188735676, + "learning_rate": 9.844611528822055e-06, + "loss": 0.0, + "step": 1770 + }, + { + "epoch": 0.11413288651156796, + "grad_norm": 0.2016661137400553, + "learning_rate": 9.843895452918009e-06, + "loss": 0.0026, + "step": 1771 + }, + { + "epoch": 0.11419733195849713, + "grad_norm": 0.0006652434216453054, + "learning_rate": 9.843179377013963e-06, + "loss": 0.0, + "step": 1772 + }, + { + "epoch": 0.11426177740542631, + "grad_norm": 0.06822878762253942, + "learning_rate": 9.842463301109918e-06, + "loss": 0.001, + "step": 1773 + }, + { + "epoch": 0.11432622285235548, + "grad_norm": 0.19988441517336292, + "learning_rate": 9.841747225205872e-06, + "loss": 0.0003, + "step": 1774 + }, + { + "epoch": 0.11439066829928465, + "grad_norm": 0.05110567110947775, + "learning_rate": 9.841031149301828e-06, + "loss": 0.0001, + "step": 1775 + }, + { + "epoch": 0.11445511374621382, + "grad_norm": 0.007954576835518657, + "learning_rate": 9.84031507339778e-06, + "loss": 0.0, + "step": 1776 + }, + { + "epoch": 0.11451955919314301, + "grad_norm": 0.0010392938517551102, + "learning_rate": 9.839598997493735e-06, + "loss": 0.0, + "step": 1777 + }, + { + "epoch": 0.11458400464007218, + "grad_norm": 0.0056564445665283, + "learning_rate": 9.83888292158969e-06, + "loss": 0.0, + "step": 1778 + }, + { + "epoch": 0.11464845008700135, + "grad_norm": 0.0014838747156959813, + "learning_rate": 9.838166845685644e-06, + "loss": 0.0, + "step": 1779 + }, + { + "epoch": 0.11471289553393053, + "grad_norm": 0.0030324228042027165, + "learning_rate": 9.837450769781598e-06, + "loss": 0.0, + "step": 1780 + }, + { + "epoch": 0.1147773409808597, + "grad_norm": 0.007153180921713235, + "learning_rate": 9.836734693877552e-06, + "loss": 0.0, + "step": 1781 + }, + { + "epoch": 0.11484178642778888, + "grad_norm": 0.0014775236203942192, + "learning_rate": 9.836018617973506e-06, + "loss": 0.0, + "step": 1782 + }, + { + "epoch": 0.11490623187471805, + "grad_norm": 0.042548194151211846, + "learning_rate": 9.83530254206946e-06, + "loss": 0.0001, + "step": 1783 + }, + { + "epoch": 0.11497067732164723, + "grad_norm": 0.012568469437944967, + "learning_rate": 9.834586466165415e-06, + "loss": 0.0, + "step": 1784 + }, + { + "epoch": 0.1150351227685764, + "grad_norm": 0.03218187318560303, + "learning_rate": 9.83387039026137e-06, + "loss": 0.0, + "step": 1785 + }, + { + "epoch": 0.11509956821550557, + "grad_norm": 0.007692717030373828, + "learning_rate": 9.833154314357322e-06, + "loss": 0.0001, + "step": 1786 + }, + { + "epoch": 0.11516401366243476, + "grad_norm": 0.012189991502221508, + "learning_rate": 9.832438238453276e-06, + "loss": 0.0, + "step": 1787 + }, + { + "epoch": 0.11522845910936393, + "grad_norm": 0.04904071543282355, + "learning_rate": 9.83172216254923e-06, + "loss": 0.0022, + "step": 1788 + }, + { + "epoch": 0.1152929045562931, + "grad_norm": 0.7873672933629802, + "learning_rate": 9.831006086645185e-06, + "loss": 0.0021, + "step": 1789 + }, + { + "epoch": 0.11535735000322227, + "grad_norm": 0.03984673321766176, + "learning_rate": 9.830290010741139e-06, + "loss": 0.0002, + "step": 1790 + }, + { + "epoch": 0.11542179545015145, + "grad_norm": 0.04043361873909708, + "learning_rate": 9.829573934837093e-06, + "loss": 0.0001, + "step": 1791 + }, + { + "epoch": 0.11548624089708062, + "grad_norm": 0.00460612867531268, + "learning_rate": 9.828857858933048e-06, + "loss": 0.0, + "step": 1792 + }, + { + "epoch": 0.11555068634400979, + "grad_norm": 0.006081790106934461, + "learning_rate": 9.828141783029002e-06, + "loss": 0.0, + "step": 1793 + }, + { + "epoch": 0.11561513179093898, + "grad_norm": 0.06486523973034726, + "learning_rate": 9.827425707124956e-06, + "loss": 0.0001, + "step": 1794 + }, + { + "epoch": 0.11567957723786815, + "grad_norm": 3.2129624564538615, + "learning_rate": 9.82670963122091e-06, + "loss": 0.03, + "step": 1795 + }, + { + "epoch": 0.11574402268479732, + "grad_norm": 0.0007317990986965962, + "learning_rate": 9.825993555316863e-06, + "loss": 0.0, + "step": 1796 + }, + { + "epoch": 0.11580846813172649, + "grad_norm": 0.007586954091167557, + "learning_rate": 9.825277479412818e-06, + "loss": 0.0, + "step": 1797 + }, + { + "epoch": 0.11587291357865567, + "grad_norm": 0.004224269569500741, + "learning_rate": 9.824561403508772e-06, + "loss": 0.0, + "step": 1798 + }, + { + "epoch": 0.11593735902558484, + "grad_norm": 0.0027508155825239165, + "learning_rate": 9.823845327604728e-06, + "loss": 0.0, + "step": 1799 + }, + { + "epoch": 0.11600180447251401, + "grad_norm": 0.0005093112787406999, + "learning_rate": 9.823129251700682e-06, + "loss": 0.0, + "step": 1800 + }, + { + "epoch": 0.11606624991944318, + "grad_norm": 0.2830816863572351, + "learning_rate": 9.822413175796636e-06, + "loss": 0.0006, + "step": 1801 + }, + { + "epoch": 0.11613069536637237, + "grad_norm": 0.008831027053601843, + "learning_rate": 9.821697099892589e-06, + "loss": 0.0001, + "step": 1802 + }, + { + "epoch": 0.11619514081330154, + "grad_norm": 0.041514996841974086, + "learning_rate": 9.820981023988543e-06, + "loss": 0.0003, + "step": 1803 + }, + { + "epoch": 0.11625958626023071, + "grad_norm": 0.0021810375241418326, + "learning_rate": 9.820264948084498e-06, + "loss": 0.0, + "step": 1804 + }, + { + "epoch": 0.1163240317071599, + "grad_norm": 0.004574236003985102, + "learning_rate": 9.819548872180452e-06, + "loss": 0.0, + "step": 1805 + }, + { + "epoch": 0.11638847715408907, + "grad_norm": 0.001797843287860481, + "learning_rate": 9.818832796276406e-06, + "loss": 0.0, + "step": 1806 + }, + { + "epoch": 0.11645292260101824, + "grad_norm": 0.03992228954057036, + "learning_rate": 9.81811672037236e-06, + "loss": 0.0003, + "step": 1807 + }, + { + "epoch": 0.1165173680479474, + "grad_norm": 2.652800530496413, + "learning_rate": 9.817400644468315e-06, + "loss": 0.0095, + "step": 1808 + }, + { + "epoch": 0.11658181349487659, + "grad_norm": 0.005735454108690199, + "learning_rate": 9.816684568564269e-06, + "loss": 0.0, + "step": 1809 + }, + { + "epoch": 0.11664625894180576, + "grad_norm": 0.46595162514166677, + "learning_rate": 9.815968492660223e-06, + "loss": 0.0022, + "step": 1810 + }, + { + "epoch": 0.11671070438873493, + "grad_norm": 0.008367182582977108, + "learning_rate": 9.815252416756178e-06, + "loss": 0.0001, + "step": 1811 + }, + { + "epoch": 0.11677514983566412, + "grad_norm": 0.010296350222608969, + "learning_rate": 9.81453634085213e-06, + "loss": 0.0001, + "step": 1812 + }, + { + "epoch": 0.11683959528259329, + "grad_norm": 0.002394567203813334, + "learning_rate": 9.813820264948085e-06, + "loss": 0.0, + "step": 1813 + }, + { + "epoch": 0.11690404072952246, + "grad_norm": 0.013784040528215559, + "learning_rate": 9.813104189044039e-06, + "loss": 0.0, + "step": 1814 + }, + { + "epoch": 0.11696848617645163, + "grad_norm": 0.003288869045197393, + "learning_rate": 9.812388113139993e-06, + "loss": 0.0, + "step": 1815 + }, + { + "epoch": 0.11703293162338081, + "grad_norm": 0.003724212986621817, + "learning_rate": 9.811672037235947e-06, + "loss": 0.0, + "step": 1816 + }, + { + "epoch": 0.11709737707030998, + "grad_norm": 0.0013027106074673877, + "learning_rate": 9.810955961331902e-06, + "loss": 0.0, + "step": 1817 + }, + { + "epoch": 0.11716182251723915, + "grad_norm": 0.5599757351897601, + "learning_rate": 9.810239885427856e-06, + "loss": 0.0036, + "step": 1818 + }, + { + "epoch": 0.11722626796416834, + "grad_norm": 0.012470146755273252, + "learning_rate": 9.80952380952381e-06, + "loss": 0.0, + "step": 1819 + }, + { + "epoch": 0.11729071341109751, + "grad_norm": 0.31943589725439364, + "learning_rate": 9.808807733619765e-06, + "loss": 0.0017, + "step": 1820 + }, + { + "epoch": 0.11735515885802668, + "grad_norm": 0.1360801427960747, + "learning_rate": 9.808091657715717e-06, + "loss": 0.0003, + "step": 1821 + }, + { + "epoch": 0.11741960430495585, + "grad_norm": 1.11044941842528, + "learning_rate": 9.807375581811673e-06, + "loss": 0.0037, + "step": 1822 + }, + { + "epoch": 0.11748404975188503, + "grad_norm": 0.046221214479398, + "learning_rate": 9.806659505907628e-06, + "loss": 0.0001, + "step": 1823 + }, + { + "epoch": 0.1175484951988142, + "grad_norm": 0.014703095802911305, + "learning_rate": 9.805943430003582e-06, + "loss": 0.0001, + "step": 1824 + }, + { + "epoch": 0.11761294064574337, + "grad_norm": 0.09168547607566063, + "learning_rate": 9.805227354099536e-06, + "loss": 0.0002, + "step": 1825 + }, + { + "epoch": 0.11767738609267256, + "grad_norm": 0.04476822615850487, + "learning_rate": 9.80451127819549e-06, + "loss": 0.0002, + "step": 1826 + }, + { + "epoch": 0.11774183153960173, + "grad_norm": 0.23735031819500035, + "learning_rate": 9.803795202291445e-06, + "loss": 0.0001, + "step": 1827 + }, + { + "epoch": 0.1178062769865309, + "grad_norm": 0.019883241428665027, + "learning_rate": 9.803079126387397e-06, + "loss": 0.0, + "step": 1828 + }, + { + "epoch": 0.11787072243346007, + "grad_norm": 0.021027037639753155, + "learning_rate": 9.802363050483352e-06, + "loss": 0.0002, + "step": 1829 + }, + { + "epoch": 0.11793516788038926, + "grad_norm": 0.10222835241112932, + "learning_rate": 9.801646974579306e-06, + "loss": 0.0009, + "step": 1830 + }, + { + "epoch": 0.11799961332731843, + "grad_norm": 0.019721556166337365, + "learning_rate": 9.80093089867526e-06, + "loss": 0.0001, + "step": 1831 + }, + { + "epoch": 0.1180640587742476, + "grad_norm": 0.011796718616704312, + "learning_rate": 9.800214822771215e-06, + "loss": 0.0001, + "step": 1832 + }, + { + "epoch": 0.11812850422117678, + "grad_norm": 1.1332119467653918, + "learning_rate": 9.799498746867169e-06, + "loss": 0.0074, + "step": 1833 + }, + { + "epoch": 0.11819294966810595, + "grad_norm": 0.04183516698015525, + "learning_rate": 9.798782670963123e-06, + "loss": 0.0001, + "step": 1834 + }, + { + "epoch": 0.11825739511503512, + "grad_norm": 0.020117152431605615, + "learning_rate": 9.798066595059077e-06, + "loss": 0.0, + "step": 1835 + }, + { + "epoch": 0.11832184056196429, + "grad_norm": 0.005855164840714329, + "learning_rate": 9.797350519155032e-06, + "loss": 0.0, + "step": 1836 + }, + { + "epoch": 0.11838628600889348, + "grad_norm": 1.0156371482709448, + "learning_rate": 9.796634443250986e-06, + "loss": 0.0036, + "step": 1837 + }, + { + "epoch": 0.11845073145582265, + "grad_norm": 0.14426022971488647, + "learning_rate": 9.795918367346939e-06, + "loss": 0.0004, + "step": 1838 + }, + { + "epoch": 0.11851517690275182, + "grad_norm": 0.059642671585571926, + "learning_rate": 9.795202291442893e-06, + "loss": 0.0002, + "step": 1839 + }, + { + "epoch": 0.11857962234968099, + "grad_norm": 0.01622077639010314, + "learning_rate": 9.794486215538847e-06, + "loss": 0.0, + "step": 1840 + }, + { + "epoch": 0.11864406779661017, + "grad_norm": 0.00355795310393443, + "learning_rate": 9.793770139634802e-06, + "loss": 0.0, + "step": 1841 + }, + { + "epoch": 0.11870851324353934, + "grad_norm": 0.22162360267499878, + "learning_rate": 9.793054063730756e-06, + "loss": 0.0016, + "step": 1842 + }, + { + "epoch": 0.11877295869046851, + "grad_norm": 0.03565730231915416, + "learning_rate": 9.79233798782671e-06, + "loss": 0.0, + "step": 1843 + }, + { + "epoch": 0.1188374041373977, + "grad_norm": 0.04782365939084201, + "learning_rate": 9.791621911922664e-06, + "loss": 0.0, + "step": 1844 + }, + { + "epoch": 0.11890184958432687, + "grad_norm": 0.007612927308443903, + "learning_rate": 9.790905836018619e-06, + "loss": 0.0, + "step": 1845 + }, + { + "epoch": 0.11896629503125604, + "grad_norm": 0.010512277541804959, + "learning_rate": 9.790189760114573e-06, + "loss": 0.0001, + "step": 1846 + }, + { + "epoch": 0.11903074047818521, + "grad_norm": 0.01802613225231296, + "learning_rate": 9.789473684210527e-06, + "loss": 0.0001, + "step": 1847 + }, + { + "epoch": 0.1190951859251144, + "grad_norm": 0.002060216204765636, + "learning_rate": 9.788757608306482e-06, + "loss": 0.0, + "step": 1848 + }, + { + "epoch": 0.11915963137204356, + "grad_norm": 0.01830879044055786, + "learning_rate": 9.788041532402436e-06, + "loss": 0.0001, + "step": 1849 + }, + { + "epoch": 0.11922407681897274, + "grad_norm": 0.1223366844699642, + "learning_rate": 9.78732545649839e-06, + "loss": 0.002, + "step": 1850 + }, + { + "epoch": 0.11928852226590192, + "grad_norm": 0.2997939887607467, + "learning_rate": 9.786609380594345e-06, + "loss": 0.0004, + "step": 1851 + }, + { + "epoch": 0.11935296771283109, + "grad_norm": 0.10824277371729986, + "learning_rate": 9.785893304690299e-06, + "loss": 0.0003, + "step": 1852 + }, + { + "epoch": 0.11941741315976026, + "grad_norm": 0.0021208647960036446, + "learning_rate": 9.785177228786253e-06, + "loss": 0.0, + "step": 1853 + }, + { + "epoch": 0.11948185860668943, + "grad_norm": 0.29415234850131244, + "learning_rate": 9.784461152882206e-06, + "loss": 0.0007, + "step": 1854 + }, + { + "epoch": 0.11954630405361862, + "grad_norm": 0.9598414365134162, + "learning_rate": 9.78374507697816e-06, + "loss": 0.0098, + "step": 1855 + }, + { + "epoch": 0.11961074950054779, + "grad_norm": 0.0016752527904032157, + "learning_rate": 9.783029001074114e-06, + "loss": 0.0, + "step": 1856 + }, + { + "epoch": 0.11967519494747696, + "grad_norm": 0.0483249174057824, + "learning_rate": 9.782312925170069e-06, + "loss": 0.0001, + "step": 1857 + }, + { + "epoch": 0.11973964039440614, + "grad_norm": 0.03142188899828553, + "learning_rate": 9.781596849266023e-06, + "loss": 0.0003, + "step": 1858 + }, + { + "epoch": 0.11980408584133531, + "grad_norm": 0.2874638451772468, + "learning_rate": 9.780880773361977e-06, + "loss": 0.0006, + "step": 1859 + }, + { + "epoch": 0.11986853128826448, + "grad_norm": 0.005053723657460722, + "learning_rate": 9.780164697457932e-06, + "loss": 0.0, + "step": 1860 + }, + { + "epoch": 0.11993297673519365, + "grad_norm": 0.02870584304045045, + "learning_rate": 9.779448621553886e-06, + "loss": 0.0001, + "step": 1861 + }, + { + "epoch": 0.11999742218212284, + "grad_norm": 0.2748382656294078, + "learning_rate": 9.77873254564984e-06, + "loss": 0.0004, + "step": 1862 + }, + { + "epoch": 0.12006186762905201, + "grad_norm": 0.0008884232880952846, + "learning_rate": 9.778016469745793e-06, + "loss": 0.0, + "step": 1863 + }, + { + "epoch": 0.12012631307598118, + "grad_norm": 0.17966616545364283, + "learning_rate": 9.777300393841747e-06, + "loss": 0.0022, + "step": 1864 + }, + { + "epoch": 0.12019075852291036, + "grad_norm": 0.004776275188142645, + "learning_rate": 9.776584317937701e-06, + "loss": 0.0, + "step": 1865 + }, + { + "epoch": 0.12025520396983953, + "grad_norm": 0.006079849315427602, + "learning_rate": 9.775868242033656e-06, + "loss": 0.0, + "step": 1866 + }, + { + "epoch": 0.1203196494167687, + "grad_norm": 0.2494943765783425, + "learning_rate": 9.77515216612961e-06, + "loss": 0.0005, + "step": 1867 + }, + { + "epoch": 0.12038409486369787, + "grad_norm": 0.011233876711569475, + "learning_rate": 9.774436090225564e-06, + "loss": 0.0001, + "step": 1868 + }, + { + "epoch": 0.12044854031062706, + "grad_norm": 0.22918849086157178, + "learning_rate": 9.77372001432152e-06, + "loss": 0.0007, + "step": 1869 + }, + { + "epoch": 0.12051298575755623, + "grad_norm": 0.003232146333357443, + "learning_rate": 9.773003938417473e-06, + "loss": 0.0, + "step": 1870 + }, + { + "epoch": 0.1205774312044854, + "grad_norm": 0.09946201213585422, + "learning_rate": 9.772287862513427e-06, + "loss": 0.0011, + "step": 1871 + }, + { + "epoch": 0.12064187665141458, + "grad_norm": 0.02818442187642415, + "learning_rate": 9.771571786609381e-06, + "loss": 0.0002, + "step": 1872 + }, + { + "epoch": 0.12070632209834375, + "grad_norm": 0.010201301486194175, + "learning_rate": 9.770855710705336e-06, + "loss": 0.0, + "step": 1873 + }, + { + "epoch": 0.12077076754527293, + "grad_norm": 0.6826322714867648, + "learning_rate": 9.77013963480129e-06, + "loss": 0.0058, + "step": 1874 + }, + { + "epoch": 0.1208352129922021, + "grad_norm": 7.93762296335806e-05, + "learning_rate": 9.769423558897244e-06, + "loss": 0.0, + "step": 1875 + }, + { + "epoch": 0.12089965843913128, + "grad_norm": 0.03652186301096464, + "learning_rate": 9.768707482993199e-06, + "loss": 0.0004, + "step": 1876 + }, + { + "epoch": 0.12096410388606045, + "grad_norm": 0.06363982698805158, + "learning_rate": 9.767991407089153e-06, + "loss": 0.0002, + "step": 1877 + }, + { + "epoch": 0.12102854933298962, + "grad_norm": 0.0020954344021100178, + "learning_rate": 9.767275331185107e-06, + "loss": 0.0, + "step": 1878 + }, + { + "epoch": 0.1210929947799188, + "grad_norm": 0.17501861119930448, + "learning_rate": 9.76655925528106e-06, + "loss": 0.0009, + "step": 1879 + }, + { + "epoch": 0.12115744022684798, + "grad_norm": 0.004144092077334751, + "learning_rate": 9.765843179377014e-06, + "loss": 0.0, + "step": 1880 + }, + { + "epoch": 0.12122188567377715, + "grad_norm": 0.0020211306433941265, + "learning_rate": 9.765127103472968e-06, + "loss": 0.0, + "step": 1881 + }, + { + "epoch": 0.12128633112070632, + "grad_norm": 0.02949696401101104, + "learning_rate": 9.764411027568923e-06, + "loss": 0.0002, + "step": 1882 + }, + { + "epoch": 0.1213507765676355, + "grad_norm": 0.016619784508678393, + "learning_rate": 9.763694951664877e-06, + "loss": 0.0001, + "step": 1883 + }, + { + "epoch": 0.12141522201456467, + "grad_norm": 0.3003303690624513, + "learning_rate": 9.762978875760831e-06, + "loss": 0.0015, + "step": 1884 + }, + { + "epoch": 0.12147966746149384, + "grad_norm": 0.0013416382681100284, + "learning_rate": 9.762262799856786e-06, + "loss": 0.0, + "step": 1885 + }, + { + "epoch": 0.12154411290842301, + "grad_norm": 0.00362328786595532, + "learning_rate": 9.76154672395274e-06, + "loss": 0.0, + "step": 1886 + }, + { + "epoch": 0.1216085583553522, + "grad_norm": 0.20061469461587267, + "learning_rate": 9.760830648048694e-06, + "loss": 0.0008, + "step": 1887 + }, + { + "epoch": 0.12167300380228137, + "grad_norm": 0.1765857512431687, + "learning_rate": 9.760114572144649e-06, + "loss": 0.0007, + "step": 1888 + }, + { + "epoch": 0.12173744924921054, + "grad_norm": 0.009202730838186858, + "learning_rate": 9.759398496240601e-06, + "loss": 0.0001, + "step": 1889 + }, + { + "epoch": 0.12180189469613972, + "grad_norm": 0.024221015879938333, + "learning_rate": 9.758682420336555e-06, + "loss": 0.0001, + "step": 1890 + }, + { + "epoch": 0.1218663401430689, + "grad_norm": 0.0032962728768922566, + "learning_rate": 9.75796634443251e-06, + "loss": 0.0, + "step": 1891 + }, + { + "epoch": 0.12193078558999806, + "grad_norm": 0.0017656631596894676, + "learning_rate": 9.757250268528466e-06, + "loss": 0.0, + "step": 1892 + }, + { + "epoch": 0.12199523103692723, + "grad_norm": 0.29931956751586863, + "learning_rate": 9.75653419262442e-06, + "loss": 0.001, + "step": 1893 + }, + { + "epoch": 0.12205967648385642, + "grad_norm": 0.005788445500950225, + "learning_rate": 9.755818116720374e-06, + "loss": 0.0, + "step": 1894 + }, + { + "epoch": 0.12212412193078559, + "grad_norm": 0.0004621153606314104, + "learning_rate": 9.755102040816327e-06, + "loss": 0.0, + "step": 1895 + }, + { + "epoch": 0.12218856737771476, + "grad_norm": 0.0032908027163354404, + "learning_rate": 9.754385964912281e-06, + "loss": 0.0, + "step": 1896 + }, + { + "epoch": 0.12225301282464394, + "grad_norm": 0.011861358115373295, + "learning_rate": 9.753669889008236e-06, + "loss": 0.0, + "step": 1897 + }, + { + "epoch": 0.12231745827157312, + "grad_norm": 0.28728746872163846, + "learning_rate": 9.75295381310419e-06, + "loss": 0.001, + "step": 1898 + }, + { + "epoch": 0.12238190371850229, + "grad_norm": 0.05119925405332734, + "learning_rate": 9.752237737200144e-06, + "loss": 0.0001, + "step": 1899 + }, + { + "epoch": 0.12244634916543146, + "grad_norm": 0.027689745751122395, + "learning_rate": 9.751521661296098e-06, + "loss": 0.0001, + "step": 1900 + }, + { + "epoch": 0.12251079461236064, + "grad_norm": 0.013967656639211944, + "learning_rate": 9.750805585392053e-06, + "loss": 0.0001, + "step": 1901 + }, + { + "epoch": 0.12257524005928981, + "grad_norm": 0.006609898455584625, + "learning_rate": 9.750089509488007e-06, + "loss": 0.0, + "step": 1902 + }, + { + "epoch": 0.12263968550621898, + "grad_norm": 0.0014454099959909776, + "learning_rate": 9.749373433583961e-06, + "loss": 0.0, + "step": 1903 + }, + { + "epoch": 0.12270413095314817, + "grad_norm": 0.00026203836987910003, + "learning_rate": 9.748657357679916e-06, + "loss": 0.0, + "step": 1904 + }, + { + "epoch": 0.12276857640007734, + "grad_norm": 0.00271610028264705, + "learning_rate": 9.747941281775868e-06, + "loss": 0.0, + "step": 1905 + }, + { + "epoch": 0.12283302184700651, + "grad_norm": 0.0016812886013362202, + "learning_rate": 9.747225205871823e-06, + "loss": 0.0, + "step": 1906 + }, + { + "epoch": 0.12289746729393568, + "grad_norm": 0.061560284842779436, + "learning_rate": 9.746509129967777e-06, + "loss": 0.0002, + "step": 1907 + }, + { + "epoch": 0.12296191274086486, + "grad_norm": 0.003250485974073988, + "learning_rate": 9.745793054063731e-06, + "loss": 0.0, + "step": 1908 + }, + { + "epoch": 0.12302635818779403, + "grad_norm": 0.0006700169758031093, + "learning_rate": 9.745076978159685e-06, + "loss": 0.0, + "step": 1909 + }, + { + "epoch": 0.1230908036347232, + "grad_norm": 0.005113968592353677, + "learning_rate": 9.74436090225564e-06, + "loss": 0.0001, + "step": 1910 + }, + { + "epoch": 0.12315524908165239, + "grad_norm": 0.41192248477347315, + "learning_rate": 9.743644826351594e-06, + "loss": 0.0015, + "step": 1911 + }, + { + "epoch": 0.12321969452858156, + "grad_norm": 0.0045803669893567924, + "learning_rate": 9.742928750447548e-06, + "loss": 0.0, + "step": 1912 + }, + { + "epoch": 0.12328413997551073, + "grad_norm": 0.17900576010418873, + "learning_rate": 9.742212674543503e-06, + "loss": 0.0022, + "step": 1913 + }, + { + "epoch": 0.1233485854224399, + "grad_norm": 0.04272674418974187, + "learning_rate": 9.741496598639457e-06, + "loss": 0.0001, + "step": 1914 + }, + { + "epoch": 0.12341303086936908, + "grad_norm": 0.021601893328035547, + "learning_rate": 9.740780522735411e-06, + "loss": 0.0, + "step": 1915 + }, + { + "epoch": 0.12347747631629825, + "grad_norm": 0.13924543879510617, + "learning_rate": 9.740064446831366e-06, + "loss": 0.001, + "step": 1916 + }, + { + "epoch": 0.12354192176322742, + "grad_norm": 0.028301274749120395, + "learning_rate": 9.73934837092732e-06, + "loss": 0.0001, + "step": 1917 + }, + { + "epoch": 0.12360636721015661, + "grad_norm": 0.009873854772588045, + "learning_rate": 9.738632295023274e-06, + "loss": 0.0001, + "step": 1918 + }, + { + "epoch": 0.12367081265708578, + "grad_norm": 0.16220766557710695, + "learning_rate": 9.737916219119228e-06, + "loss": 0.0007, + "step": 1919 + }, + { + "epoch": 0.12373525810401495, + "grad_norm": 0.022465447563904575, + "learning_rate": 9.737200143215183e-06, + "loss": 0.0001, + "step": 1920 + }, + { + "epoch": 0.12379970355094412, + "grad_norm": 0.009806271616587365, + "learning_rate": 9.736484067311135e-06, + "loss": 0.0, + "step": 1921 + }, + { + "epoch": 0.1238641489978733, + "grad_norm": 0.05567401244931437, + "learning_rate": 9.73576799140709e-06, + "loss": 0.0002, + "step": 1922 + }, + { + "epoch": 0.12392859444480248, + "grad_norm": 0.052939701804537706, + "learning_rate": 9.735051915503044e-06, + "loss": 0.0002, + "step": 1923 + }, + { + "epoch": 0.12399303989173165, + "grad_norm": 0.001525405124781685, + "learning_rate": 9.734335839598998e-06, + "loss": 0.0, + "step": 1924 + }, + { + "epoch": 0.12405748533866082, + "grad_norm": 0.37714290288877417, + "learning_rate": 9.733619763694953e-06, + "loss": 0.0011, + "step": 1925 + }, + { + "epoch": 0.12412193078559, + "grad_norm": 0.001764385053678129, + "learning_rate": 9.732903687790907e-06, + "loss": 0.0, + "step": 1926 + }, + { + "epoch": 0.12418637623251917, + "grad_norm": 0.019501183135983383, + "learning_rate": 9.732187611886861e-06, + "loss": 0.0002, + "step": 1927 + }, + { + "epoch": 0.12425082167944834, + "grad_norm": 0.018751990192806996, + "learning_rate": 9.731471535982815e-06, + "loss": 0.0001, + "step": 1928 + }, + { + "epoch": 0.12431526712637753, + "grad_norm": 0.004955295051767424, + "learning_rate": 9.73075546007877e-06, + "loss": 0.0, + "step": 1929 + }, + { + "epoch": 0.1243797125733067, + "grad_norm": 0.061112311234522064, + "learning_rate": 9.730039384174724e-06, + "loss": 0.0002, + "step": 1930 + }, + { + "epoch": 0.12444415802023587, + "grad_norm": 0.004004600983625894, + "learning_rate": 9.729323308270677e-06, + "loss": 0.0, + "step": 1931 + }, + { + "epoch": 0.12450860346716504, + "grad_norm": 0.0003309675091962049, + "learning_rate": 9.728607232366631e-06, + "loss": 0.0, + "step": 1932 + }, + { + "epoch": 0.12457304891409422, + "grad_norm": 0.15170736542210217, + "learning_rate": 9.727891156462585e-06, + "loss": 0.002, + "step": 1933 + }, + { + "epoch": 0.1246374943610234, + "grad_norm": 1.4362513882968126, + "learning_rate": 9.72717508055854e-06, + "loss": 0.0162, + "step": 1934 + }, + { + "epoch": 0.12470193980795256, + "grad_norm": 0.014300368475316579, + "learning_rate": 9.726459004654494e-06, + "loss": 0.0001, + "step": 1935 + }, + { + "epoch": 0.12476638525488175, + "grad_norm": 0.004234241794280436, + "learning_rate": 9.725742928750448e-06, + "loss": 0.0001, + "step": 1936 + }, + { + "epoch": 0.12483083070181092, + "grad_norm": 0.015785880095632743, + "learning_rate": 9.725026852846402e-06, + "loss": 0.0, + "step": 1937 + }, + { + "epoch": 0.12489527614874009, + "grad_norm": 0.16290879479847703, + "learning_rate": 9.724310776942357e-06, + "loss": 0.0019, + "step": 1938 + }, + { + "epoch": 0.12495972159566926, + "grad_norm": 0.0871295187201113, + "learning_rate": 9.723594701038311e-06, + "loss": 0.0017, + "step": 1939 + }, + { + "epoch": 0.12502416704259844, + "grad_norm": 0.04819015484545837, + "learning_rate": 9.722878625134265e-06, + "loss": 0.0001, + "step": 1940 + }, + { + "epoch": 0.12508861248952763, + "grad_norm": 0.043515775119977985, + "learning_rate": 9.72216254923022e-06, + "loss": 0.0006, + "step": 1941 + }, + { + "epoch": 0.12515305793645679, + "grad_norm": 0.019120648631456776, + "learning_rate": 9.721446473326174e-06, + "loss": 0.0002, + "step": 1942 + }, + { + "epoch": 0.12521750338338597, + "grad_norm": 0.06639534145403339, + "learning_rate": 9.720730397422128e-06, + "loss": 0.0002, + "step": 1943 + }, + { + "epoch": 0.12528194883031513, + "grad_norm": 0.32793971915972786, + "learning_rate": 9.720014321518082e-06, + "loss": 0.0003, + "step": 1944 + }, + { + "epoch": 0.1253463942772443, + "grad_norm": 0.23585167454332714, + "learning_rate": 9.719298245614037e-06, + "loss": 0.0004, + "step": 1945 + }, + { + "epoch": 0.1254108397241735, + "grad_norm": 0.08747161219538542, + "learning_rate": 9.718582169709991e-06, + "loss": 0.0003, + "step": 1946 + }, + { + "epoch": 0.12547528517110265, + "grad_norm": 0.08837184298199861, + "learning_rate": 9.717866093805944e-06, + "loss": 0.0001, + "step": 1947 + }, + { + "epoch": 0.12553973061803184, + "grad_norm": 0.048392791842107145, + "learning_rate": 9.717150017901898e-06, + "loss": 0.0001, + "step": 1948 + }, + { + "epoch": 0.12560417606496102, + "grad_norm": 0.9592803684743428, + "learning_rate": 9.716433941997852e-06, + "loss": 0.0082, + "step": 1949 + }, + { + "epoch": 0.12566862151189018, + "grad_norm": 0.2817725519397007, + "learning_rate": 9.715717866093807e-06, + "loss": 0.0003, + "step": 1950 + }, + { + "epoch": 0.12573306695881936, + "grad_norm": 0.012663876833443062, + "learning_rate": 9.715001790189761e-06, + "loss": 0.0, + "step": 1951 + }, + { + "epoch": 0.12579751240574855, + "grad_norm": 0.05103851906930751, + "learning_rate": 9.714285714285715e-06, + "loss": 0.0016, + "step": 1952 + }, + { + "epoch": 0.1258619578526777, + "grad_norm": 0.07201781829296526, + "learning_rate": 9.71356963838167e-06, + "loss": 0.0003, + "step": 1953 + }, + { + "epoch": 0.1259264032996069, + "grad_norm": 0.0009689525530489202, + "learning_rate": 9.712853562477624e-06, + "loss": 0.0, + "step": 1954 + }, + { + "epoch": 0.12599084874653604, + "grad_norm": 0.0145900601196395, + "learning_rate": 9.712137486573578e-06, + "loss": 0.0, + "step": 1955 + }, + { + "epoch": 0.12605529419346523, + "grad_norm": 0.14103773266063874, + "learning_rate": 9.71142141066953e-06, + "loss": 0.0005, + "step": 1956 + }, + { + "epoch": 0.1261197396403944, + "grad_norm": 0.007242582761787885, + "learning_rate": 9.710705334765485e-06, + "loss": 0.0, + "step": 1957 + }, + { + "epoch": 0.12618418508732357, + "grad_norm": 2.3089804801462774, + "learning_rate": 9.70998925886144e-06, + "loss": 0.018, + "step": 1958 + }, + { + "epoch": 0.12624863053425275, + "grad_norm": 0.001684282938530845, + "learning_rate": 9.709273182957394e-06, + "loss": 0.0, + "step": 1959 + }, + { + "epoch": 0.12631307598118194, + "grad_norm": 0.008110944387793166, + "learning_rate": 9.708557107053348e-06, + "loss": 0.0, + "step": 1960 + }, + { + "epoch": 0.1263775214281111, + "grad_norm": 0.0012180645839539397, + "learning_rate": 9.707841031149302e-06, + "loss": 0.0, + "step": 1961 + }, + { + "epoch": 0.12644196687504028, + "grad_norm": 0.0032079103488066195, + "learning_rate": 9.707124955245258e-06, + "loss": 0.0, + "step": 1962 + }, + { + "epoch": 0.12650641232196946, + "grad_norm": 0.0015223787814444203, + "learning_rate": 9.70640887934121e-06, + "loss": 0.0, + "step": 1963 + }, + { + "epoch": 0.12657085776889862, + "grad_norm": 0.002616986152407543, + "learning_rate": 9.705692803437165e-06, + "loss": 0.0, + "step": 1964 + }, + { + "epoch": 0.1266353032158278, + "grad_norm": 0.45123696786549056, + "learning_rate": 9.70497672753312e-06, + "loss": 0.0022, + "step": 1965 + }, + { + "epoch": 0.126699748662757, + "grad_norm": 0.5187885166714238, + "learning_rate": 9.704260651629074e-06, + "loss": 0.0025, + "step": 1966 + }, + { + "epoch": 0.12676419410968615, + "grad_norm": 0.002532257006504168, + "learning_rate": 9.703544575725028e-06, + "loss": 0.0, + "step": 1967 + }, + { + "epoch": 0.12682863955661533, + "grad_norm": 0.0024266808451328583, + "learning_rate": 9.702828499820982e-06, + "loss": 0.0, + "step": 1968 + }, + { + "epoch": 0.1268930850035445, + "grad_norm": 0.000678778908986063, + "learning_rate": 9.702112423916937e-06, + "loss": 0.0, + "step": 1969 + }, + { + "epoch": 0.12695753045047367, + "grad_norm": 0.2541039650735298, + "learning_rate": 9.701396348012891e-06, + "loss": 0.002, + "step": 1970 + }, + { + "epoch": 0.12702197589740286, + "grad_norm": 0.05886083764835366, + "learning_rate": 9.700680272108845e-06, + "loss": 0.0005, + "step": 1971 + }, + { + "epoch": 0.127086421344332, + "grad_norm": 0.01571323952426794, + "learning_rate": 9.699964196204798e-06, + "loss": 0.0001, + "step": 1972 + }, + { + "epoch": 0.1271508667912612, + "grad_norm": 0.003961847090334322, + "learning_rate": 9.699248120300752e-06, + "loss": 0.0, + "step": 1973 + }, + { + "epoch": 0.12721531223819038, + "grad_norm": 0.027488595208986197, + "learning_rate": 9.698532044396706e-06, + "loss": 0.0, + "step": 1974 + }, + { + "epoch": 0.12727975768511954, + "grad_norm": 0.011537951536441094, + "learning_rate": 9.69781596849266e-06, + "loss": 0.0, + "step": 1975 + }, + { + "epoch": 0.12734420313204872, + "grad_norm": 0.06099060148991771, + "learning_rate": 9.697099892588615e-06, + "loss": 0.0001, + "step": 1976 + }, + { + "epoch": 0.1274086485789779, + "grad_norm": 0.0044298324954258495, + "learning_rate": 9.69638381668457e-06, + "loss": 0.0, + "step": 1977 + }, + { + "epoch": 0.12747309402590706, + "grad_norm": 0.03492540826460728, + "learning_rate": 9.695667740780524e-06, + "loss": 0.0001, + "step": 1978 + }, + { + "epoch": 0.12753753947283625, + "grad_norm": 0.01319119997833318, + "learning_rate": 9.694951664876478e-06, + "loss": 0.0, + "step": 1979 + }, + { + "epoch": 0.12760198491976543, + "grad_norm": 0.007561792083493985, + "learning_rate": 9.694235588972432e-06, + "loss": 0.0, + "step": 1980 + }, + { + "epoch": 0.1276664303666946, + "grad_norm": 0.04029629379038578, + "learning_rate": 9.693519513068386e-06, + "loss": 0.0001, + "step": 1981 + }, + { + "epoch": 0.12773087581362377, + "grad_norm": 0.0435667968924196, + "learning_rate": 9.692803437164339e-06, + "loss": 0.0002, + "step": 1982 + }, + { + "epoch": 0.12779532126055293, + "grad_norm": 0.010267197277295997, + "learning_rate": 9.692087361260293e-06, + "loss": 0.0, + "step": 1983 + }, + { + "epoch": 0.12785976670748211, + "grad_norm": 0.11801684485489015, + "learning_rate": 9.691371285356248e-06, + "loss": 0.0023, + "step": 1984 + }, + { + "epoch": 0.1279242121544113, + "grad_norm": 0.02908363021812742, + "learning_rate": 9.690655209452202e-06, + "loss": 0.0001, + "step": 1985 + }, + { + "epoch": 0.12798865760134046, + "grad_norm": 0.0020590699766240743, + "learning_rate": 9.689939133548158e-06, + "loss": 0.0, + "step": 1986 + }, + { + "epoch": 0.12805310304826964, + "grad_norm": 0.18389281811274666, + "learning_rate": 9.689223057644112e-06, + "loss": 0.0002, + "step": 1987 + }, + { + "epoch": 0.12811754849519882, + "grad_norm": 0.09317822238790244, + "learning_rate": 9.688506981740065e-06, + "loss": 0.002, + "step": 1988 + }, + { + "epoch": 0.12818199394212798, + "grad_norm": 0.2188158447077549, + "learning_rate": 9.687790905836019e-06, + "loss": 0.001, + "step": 1989 + }, + { + "epoch": 0.12824643938905717, + "grad_norm": 0.0652653566331668, + "learning_rate": 9.687074829931973e-06, + "loss": 0.0001, + "step": 1990 + }, + { + "epoch": 0.12831088483598635, + "grad_norm": 0.08241202614232288, + "learning_rate": 9.686358754027928e-06, + "loss": 0.0001, + "step": 1991 + }, + { + "epoch": 0.1283753302829155, + "grad_norm": 0.006286295574519581, + "learning_rate": 9.685642678123882e-06, + "loss": 0.0, + "step": 1992 + }, + { + "epoch": 0.1284397757298447, + "grad_norm": 0.003714464869485905, + "learning_rate": 9.684926602219836e-06, + "loss": 0.0, + "step": 1993 + }, + { + "epoch": 0.12850422117677385, + "grad_norm": 0.005759371136794642, + "learning_rate": 9.68421052631579e-06, + "loss": 0.0, + "step": 1994 + }, + { + "epoch": 0.12856866662370303, + "grad_norm": 0.005675051030252726, + "learning_rate": 9.683494450411745e-06, + "loss": 0.0001, + "step": 1995 + }, + { + "epoch": 0.12863311207063222, + "grad_norm": 0.0024021179085006966, + "learning_rate": 9.6827783745077e-06, + "loss": 0.0, + "step": 1996 + }, + { + "epoch": 0.12869755751756137, + "grad_norm": 0.0003785387330054233, + "learning_rate": 9.682062298603654e-06, + "loss": 0.0, + "step": 1997 + }, + { + "epoch": 0.12876200296449056, + "grad_norm": 0.029128929405783818, + "learning_rate": 9.681346222699606e-06, + "loss": 0.0003, + "step": 1998 + }, + { + "epoch": 0.12882644841141974, + "grad_norm": 0.00965094471811793, + "learning_rate": 9.68063014679556e-06, + "loss": 0.0001, + "step": 1999 + }, + { + "epoch": 0.1288908938583489, + "grad_norm": 0.1631243083836886, + "learning_rate": 9.679914070891515e-06, + "loss": 0.0004, + "step": 2000 + }, + { + "epoch": 0.12895533930527808, + "grad_norm": 0.0008463747064084646, + "learning_rate": 9.679197994987469e-06, + "loss": 0.0, + "step": 2001 + }, + { + "epoch": 0.12901978475220727, + "grad_norm": 0.16633632762422967, + "learning_rate": 9.678481919083423e-06, + "loss": 0.0017, + "step": 2002 + }, + { + "epoch": 0.12908423019913642, + "grad_norm": 0.010527829476286266, + "learning_rate": 9.677765843179378e-06, + "loss": 0.0, + "step": 2003 + }, + { + "epoch": 0.1291486756460656, + "grad_norm": 0.0005172988190096706, + "learning_rate": 9.677049767275332e-06, + "loss": 0.0, + "step": 2004 + }, + { + "epoch": 0.1292131210929948, + "grad_norm": 0.5475268974403349, + "learning_rate": 9.676333691371286e-06, + "loss": 0.0049, + "step": 2005 + }, + { + "epoch": 0.12927756653992395, + "grad_norm": 0.3089875231942968, + "learning_rate": 9.67561761546724e-06, + "loss": 0.0015, + "step": 2006 + }, + { + "epoch": 0.12934201198685313, + "grad_norm": 0.4836295297972049, + "learning_rate": 9.674901539563195e-06, + "loss": 0.0014, + "step": 2007 + }, + { + "epoch": 0.1294064574337823, + "grad_norm": 0.027169375141539498, + "learning_rate": 9.674185463659147e-06, + "loss": 0.0001, + "step": 2008 + }, + { + "epoch": 0.12947090288071147, + "grad_norm": 0.021970903803559733, + "learning_rate": 9.673469387755103e-06, + "loss": 0.0001, + "step": 2009 + }, + { + "epoch": 0.12953534832764066, + "grad_norm": 0.009162414864943546, + "learning_rate": 9.672753311851058e-06, + "loss": 0.0, + "step": 2010 + }, + { + "epoch": 0.12959979377456982, + "grad_norm": 0.11454378921360125, + "learning_rate": 9.672037235947012e-06, + "loss": 0.0001, + "step": 2011 + }, + { + "epoch": 0.129664239221499, + "grad_norm": 0.029784219172073287, + "learning_rate": 9.671321160042966e-06, + "loss": 0.0, + "step": 2012 + }, + { + "epoch": 0.12972868466842818, + "grad_norm": 0.023949376703751586, + "learning_rate": 9.67060508413892e-06, + "loss": 0.0001, + "step": 2013 + }, + { + "epoch": 0.12979313011535734, + "grad_norm": 0.002974615832058479, + "learning_rate": 9.669889008234873e-06, + "loss": 0.0, + "step": 2014 + }, + { + "epoch": 0.12985757556228653, + "grad_norm": 0.0378054906125581, + "learning_rate": 9.669172932330828e-06, + "loss": 0.0002, + "step": 2015 + }, + { + "epoch": 0.1299220210092157, + "grad_norm": 0.9699590275654907, + "learning_rate": 9.668456856426782e-06, + "loss": 0.0052, + "step": 2016 + }, + { + "epoch": 0.12998646645614487, + "grad_norm": 0.04729061845179302, + "learning_rate": 9.667740780522736e-06, + "loss": 0.0002, + "step": 2017 + }, + { + "epoch": 0.13005091190307405, + "grad_norm": 0.0011672871369493613, + "learning_rate": 9.66702470461869e-06, + "loss": 0.0, + "step": 2018 + }, + { + "epoch": 0.13011535735000324, + "grad_norm": 0.16779780560629792, + "learning_rate": 9.666308628714645e-06, + "loss": 0.0013, + "step": 2019 + }, + { + "epoch": 0.1301798027969324, + "grad_norm": 0.009769002881575378, + "learning_rate": 9.665592552810599e-06, + "loss": 0.0, + "step": 2020 + }, + { + "epoch": 0.13024424824386158, + "grad_norm": 0.06925341571206166, + "learning_rate": 9.664876476906553e-06, + "loss": 0.0016, + "step": 2021 + }, + { + "epoch": 0.13030869369079073, + "grad_norm": 0.028873011933198297, + "learning_rate": 9.664160401002508e-06, + "loss": 0.0002, + "step": 2022 + }, + { + "epoch": 0.13037313913771992, + "grad_norm": 0.030437929164553016, + "learning_rate": 9.663444325098462e-06, + "loss": 0.0003, + "step": 2023 + }, + { + "epoch": 0.1304375845846491, + "grad_norm": 0.04157236987937166, + "learning_rate": 9.662728249194415e-06, + "loss": 0.0, + "step": 2024 + }, + { + "epoch": 0.13050203003157826, + "grad_norm": 0.002376335667268474, + "learning_rate": 9.662012173290369e-06, + "loss": 0.0, + "step": 2025 + }, + { + "epoch": 0.13056647547850744, + "grad_norm": 0.4035950618520248, + "learning_rate": 9.661296097386323e-06, + "loss": 0.0029, + "step": 2026 + }, + { + "epoch": 0.13063092092543663, + "grad_norm": 0.027696990872225418, + "learning_rate": 9.660580021482277e-06, + "loss": 0.0, + "step": 2027 + }, + { + "epoch": 0.13069536637236578, + "grad_norm": 0.04223727765421684, + "learning_rate": 9.659863945578232e-06, + "loss": 0.0001, + "step": 2028 + }, + { + "epoch": 0.13075981181929497, + "grad_norm": 0.07407476864774445, + "learning_rate": 9.659147869674186e-06, + "loss": 0.0001, + "step": 2029 + }, + { + "epoch": 0.13082425726622415, + "grad_norm": 0.06935889911574292, + "learning_rate": 9.65843179377014e-06, + "loss": 0.0001, + "step": 2030 + }, + { + "epoch": 0.1308887027131533, + "grad_norm": 0.048697351332985216, + "learning_rate": 9.657715717866095e-06, + "loss": 0.0003, + "step": 2031 + }, + { + "epoch": 0.1309531481600825, + "grad_norm": 0.005297919283381317, + "learning_rate": 9.656999641962049e-06, + "loss": 0.0, + "step": 2032 + }, + { + "epoch": 0.13101759360701165, + "grad_norm": 0.026038987175857987, + "learning_rate": 9.656283566058003e-06, + "loss": 0.0, + "step": 2033 + }, + { + "epoch": 0.13108203905394084, + "grad_norm": 0.031187994357722387, + "learning_rate": 9.655567490153958e-06, + "loss": 0.0, + "step": 2034 + }, + { + "epoch": 0.13114648450087002, + "grad_norm": 0.0026731337132794813, + "learning_rate": 9.654851414249912e-06, + "loss": 0.0, + "step": 2035 + }, + { + "epoch": 0.13121092994779918, + "grad_norm": 0.031899373503944894, + "learning_rate": 9.654135338345866e-06, + "loss": 0.0, + "step": 2036 + }, + { + "epoch": 0.13127537539472836, + "grad_norm": 0.19949602017699414, + "learning_rate": 9.65341926244182e-06, + "loss": 0.0018, + "step": 2037 + }, + { + "epoch": 0.13133982084165755, + "grad_norm": 0.08790331325073492, + "learning_rate": 9.652703186537775e-06, + "loss": 0.0001, + "step": 2038 + }, + { + "epoch": 0.1314042662885867, + "grad_norm": 0.017596495982531747, + "learning_rate": 9.651987110633729e-06, + "loss": 0.0001, + "step": 2039 + }, + { + "epoch": 0.1314687117355159, + "grad_norm": 0.019189778384638312, + "learning_rate": 9.651271034729682e-06, + "loss": 0.0002, + "step": 2040 + }, + { + "epoch": 0.13153315718244507, + "grad_norm": 0.06314370843579555, + "learning_rate": 9.650554958825636e-06, + "loss": 0.0003, + "step": 2041 + }, + { + "epoch": 0.13159760262937423, + "grad_norm": 0.0006894959370470676, + "learning_rate": 9.64983888292159e-06, + "loss": 0.0, + "step": 2042 + }, + { + "epoch": 0.1316620480763034, + "grad_norm": 0.37931082451387943, + "learning_rate": 9.649122807017545e-06, + "loss": 0.0007, + "step": 2043 + }, + { + "epoch": 0.1317264935232326, + "grad_norm": 0.0030844319112572124, + "learning_rate": 9.648406731113499e-06, + "loss": 0.0, + "step": 2044 + }, + { + "epoch": 0.13179093897016175, + "grad_norm": 0.08177165554426978, + "learning_rate": 9.647690655209453e-06, + "loss": 0.0004, + "step": 2045 + }, + { + "epoch": 0.13185538441709094, + "grad_norm": 0.43350847797617365, + "learning_rate": 9.646974579305407e-06, + "loss": 0.0023, + "step": 2046 + }, + { + "epoch": 0.1319198298640201, + "grad_norm": 0.017477741474904874, + "learning_rate": 9.646258503401362e-06, + "loss": 0.0, + "step": 2047 + }, + { + "epoch": 0.13198427531094928, + "grad_norm": 0.002203850556521162, + "learning_rate": 9.645542427497316e-06, + "loss": 0.0, + "step": 2048 + }, + { + "epoch": 0.13204872075787846, + "grad_norm": 0.31950717965940695, + "learning_rate": 9.644826351593269e-06, + "loss": 0.0006, + "step": 2049 + }, + { + "epoch": 0.13211316620480762, + "grad_norm": 0.2171599295152496, + "learning_rate": 9.644110275689223e-06, + "loss": 0.0012, + "step": 2050 + }, + { + "epoch": 0.1321776116517368, + "grad_norm": 0.006761549558222412, + "learning_rate": 9.643394199785177e-06, + "loss": 0.0001, + "step": 2051 + }, + { + "epoch": 0.132242057098666, + "grad_norm": 0.21121347000074073, + "learning_rate": 9.642678123881131e-06, + "loss": 0.0016, + "step": 2052 + }, + { + "epoch": 0.13230650254559514, + "grad_norm": 0.00621073689854682, + "learning_rate": 9.641962047977086e-06, + "loss": 0.0001, + "step": 2053 + }, + { + "epoch": 0.13237094799252433, + "grad_norm": 0.32037854095344764, + "learning_rate": 9.64124597207304e-06, + "loss": 0.0011, + "step": 2054 + }, + { + "epoch": 0.1324353934394535, + "grad_norm": 0.009164569823797965, + "learning_rate": 9.640529896168994e-06, + "loss": 0.0, + "step": 2055 + }, + { + "epoch": 0.13249983888638267, + "grad_norm": 0.02221471650999578, + "learning_rate": 9.639813820264949e-06, + "loss": 0.0002, + "step": 2056 + }, + { + "epoch": 0.13256428433331185, + "grad_norm": 0.000575011763455413, + "learning_rate": 9.639097744360903e-06, + "loss": 0.0, + "step": 2057 + }, + { + "epoch": 0.13262872978024104, + "grad_norm": 0.010129757102013255, + "learning_rate": 9.638381668456857e-06, + "loss": 0.0, + "step": 2058 + }, + { + "epoch": 0.1326931752271702, + "grad_norm": 0.1409973473472684, + "learning_rate": 9.637665592552812e-06, + "loss": 0.0013, + "step": 2059 + }, + { + "epoch": 0.13275762067409938, + "grad_norm": 0.005983310707828044, + "learning_rate": 9.636949516648766e-06, + "loss": 0.0, + "step": 2060 + }, + { + "epoch": 0.13282206612102854, + "grad_norm": 0.0009344883651601945, + "learning_rate": 9.63623344074472e-06, + "loss": 0.0, + "step": 2061 + }, + { + "epoch": 0.13288651156795772, + "grad_norm": 0.003960576942543866, + "learning_rate": 9.635517364840674e-06, + "loss": 0.0, + "step": 2062 + }, + { + "epoch": 0.1329509570148869, + "grad_norm": 0.5569627866898234, + "learning_rate": 9.634801288936629e-06, + "loss": 0.0028, + "step": 2063 + }, + { + "epoch": 0.13301540246181606, + "grad_norm": 0.0047079077069493705, + "learning_rate": 9.634085213032583e-06, + "loss": 0.0, + "step": 2064 + }, + { + "epoch": 0.13307984790874525, + "grad_norm": 0.003906507967835101, + "learning_rate": 9.633369137128536e-06, + "loss": 0.0, + "step": 2065 + }, + { + "epoch": 0.13314429335567443, + "grad_norm": 0.11023906442304471, + "learning_rate": 9.63265306122449e-06, + "loss": 0.0003, + "step": 2066 + }, + { + "epoch": 0.1332087388026036, + "grad_norm": 0.0004294632581472271, + "learning_rate": 9.631936985320444e-06, + "loss": 0.0, + "step": 2067 + }, + { + "epoch": 0.13327318424953277, + "grad_norm": 0.11307068278807467, + "learning_rate": 9.631220909416399e-06, + "loss": 0.0021, + "step": 2068 + }, + { + "epoch": 0.13333762969646196, + "grad_norm": 0.0006063610247870296, + "learning_rate": 9.630504833512353e-06, + "loss": 0.0, + "step": 2069 + }, + { + "epoch": 0.1334020751433911, + "grad_norm": 0.0014840911368628803, + "learning_rate": 9.629788757608307e-06, + "loss": 0.0, + "step": 2070 + }, + { + "epoch": 0.1334665205903203, + "grad_norm": 0.6738283019134732, + "learning_rate": 9.629072681704261e-06, + "loss": 0.0026, + "step": 2071 + }, + { + "epoch": 0.13353096603724945, + "grad_norm": 0.0011470253664121449, + "learning_rate": 9.628356605800216e-06, + "loss": 0.0, + "step": 2072 + }, + { + "epoch": 0.13359541148417864, + "grad_norm": 0.0026127623637299136, + "learning_rate": 9.62764052989617e-06, + "loss": 0.0, + "step": 2073 + }, + { + "epoch": 0.13365985693110782, + "grad_norm": 0.0012740458006002587, + "learning_rate": 9.626924453992124e-06, + "loss": 0.0, + "step": 2074 + }, + { + "epoch": 0.13372430237803698, + "grad_norm": 0.01763503065907178, + "learning_rate": 9.626208378088077e-06, + "loss": 0.0001, + "step": 2075 + }, + { + "epoch": 0.13378874782496616, + "grad_norm": 0.023128139248136018, + "learning_rate": 9.625492302184031e-06, + "loss": 0.0003, + "step": 2076 + }, + { + "epoch": 0.13385319327189535, + "grad_norm": 0.027094712760084454, + "learning_rate": 9.624776226279986e-06, + "loss": 0.0, + "step": 2077 + }, + { + "epoch": 0.1339176387188245, + "grad_norm": 0.006780221983481411, + "learning_rate": 9.62406015037594e-06, + "loss": 0.0, + "step": 2078 + }, + { + "epoch": 0.1339820841657537, + "grad_norm": 0.03709572145604888, + "learning_rate": 9.623344074471896e-06, + "loss": 0.0, + "step": 2079 + }, + { + "epoch": 0.13404652961268287, + "grad_norm": 0.013222114372806895, + "learning_rate": 9.62262799856785e-06, + "loss": 0.0, + "step": 2080 + }, + { + "epoch": 0.13411097505961203, + "grad_norm": 2.5401217554602096, + "learning_rate": 9.621911922663803e-06, + "loss": 0.0209, + "step": 2081 + }, + { + "epoch": 0.13417542050654122, + "grad_norm": 1.0853759081919199, + "learning_rate": 9.621195846759757e-06, + "loss": 0.004, + "step": 2082 + }, + { + "epoch": 0.1342398659534704, + "grad_norm": 0.0014960894831067125, + "learning_rate": 9.620479770855711e-06, + "loss": 0.0, + "step": 2083 + }, + { + "epoch": 0.13430431140039956, + "grad_norm": 0.0010822836961904576, + "learning_rate": 9.619763694951666e-06, + "loss": 0.0, + "step": 2084 + }, + { + "epoch": 0.13436875684732874, + "grad_norm": 0.0020887246339200602, + "learning_rate": 9.61904761904762e-06, + "loss": 0.0, + "step": 2085 + }, + { + "epoch": 0.1344332022942579, + "grad_norm": 0.0006119661795616889, + "learning_rate": 9.618331543143574e-06, + "loss": 0.0, + "step": 2086 + }, + { + "epoch": 0.13449764774118708, + "grad_norm": 0.0343665176866361, + "learning_rate": 9.617615467239529e-06, + "loss": 0.0001, + "step": 2087 + }, + { + "epoch": 0.13456209318811627, + "grad_norm": 0.013401859452242893, + "learning_rate": 9.616899391335483e-06, + "loss": 0.0, + "step": 2088 + }, + { + "epoch": 0.13462653863504542, + "grad_norm": 0.012735135863470763, + "learning_rate": 9.616183315431437e-06, + "loss": 0.0, + "step": 2089 + }, + { + "epoch": 0.1346909840819746, + "grad_norm": 0.00041242381199152895, + "learning_rate": 9.615467239527391e-06, + "loss": 0.0, + "step": 2090 + }, + { + "epoch": 0.1347554295289038, + "grad_norm": 0.0021930814514787705, + "learning_rate": 9.614751163623344e-06, + "loss": 0.0, + "step": 2091 + }, + { + "epoch": 0.13481987497583295, + "grad_norm": 0.010113932262379114, + "learning_rate": 9.614035087719298e-06, + "loss": 0.0, + "step": 2092 + }, + { + "epoch": 0.13488432042276213, + "grad_norm": 0.002889841846447938, + "learning_rate": 9.613319011815253e-06, + "loss": 0.0, + "step": 2093 + }, + { + "epoch": 0.13494876586969132, + "grad_norm": 0.10021186453941479, + "learning_rate": 9.612602935911207e-06, + "loss": 0.0017, + "step": 2094 + }, + { + "epoch": 0.13501321131662047, + "grad_norm": 0.1701542427445515, + "learning_rate": 9.611886860007161e-06, + "loss": 0.0005, + "step": 2095 + }, + { + "epoch": 0.13507765676354966, + "grad_norm": 0.2863434546765848, + "learning_rate": 9.611170784103116e-06, + "loss": 0.0003, + "step": 2096 + }, + { + "epoch": 0.13514210221047884, + "grad_norm": 0.030204838470596046, + "learning_rate": 9.61045470819907e-06, + "loss": 0.0001, + "step": 2097 + }, + { + "epoch": 0.135206547657408, + "grad_norm": 0.0006739822506401262, + "learning_rate": 9.609738632295024e-06, + "loss": 0.0, + "step": 2098 + }, + { + "epoch": 0.13527099310433718, + "grad_norm": 0.011925152450066019, + "learning_rate": 9.609022556390978e-06, + "loss": 0.0, + "step": 2099 + }, + { + "epoch": 0.13533543855126634, + "grad_norm": 0.000670276235356618, + "learning_rate": 9.608306480486933e-06, + "loss": 0.0, + "step": 2100 + }, + { + "epoch": 0.13539988399819552, + "grad_norm": 0.0018891649124682022, + "learning_rate": 9.607590404582885e-06, + "loss": 0.0, + "step": 2101 + }, + { + "epoch": 0.1354643294451247, + "grad_norm": 0.0008358051574337161, + "learning_rate": 9.606874328678841e-06, + "loss": 0.0, + "step": 2102 + }, + { + "epoch": 0.13552877489205387, + "grad_norm": 0.17796909637015565, + "learning_rate": 9.606158252774796e-06, + "loss": 0.0005, + "step": 2103 + }, + { + "epoch": 0.13559322033898305, + "grad_norm": 0.0029781535719557023, + "learning_rate": 9.60544217687075e-06, + "loss": 0.0, + "step": 2104 + }, + { + "epoch": 0.13565766578591223, + "grad_norm": 0.0007821579397496209, + "learning_rate": 9.604726100966704e-06, + "loss": 0.0, + "step": 2105 + }, + { + "epoch": 0.1357221112328414, + "grad_norm": 0.013895512731706612, + "learning_rate": 9.604010025062659e-06, + "loss": 0.0001, + "step": 2106 + }, + { + "epoch": 0.13578655667977058, + "grad_norm": 0.0006325714668676574, + "learning_rate": 9.603293949158611e-06, + "loss": 0.0, + "step": 2107 + }, + { + "epoch": 0.13585100212669976, + "grad_norm": 0.0036075466188459538, + "learning_rate": 9.602577873254565e-06, + "loss": 0.0, + "step": 2108 + }, + { + "epoch": 0.13591544757362892, + "grad_norm": 0.00022546715718579325, + "learning_rate": 9.60186179735052e-06, + "loss": 0.0, + "step": 2109 + }, + { + "epoch": 0.1359798930205581, + "grad_norm": 0.0019036321791445633, + "learning_rate": 9.601145721446474e-06, + "loss": 0.0, + "step": 2110 + }, + { + "epoch": 0.13604433846748729, + "grad_norm": 0.0013767631086754703, + "learning_rate": 9.600429645542428e-06, + "loss": 0.0, + "step": 2111 + }, + { + "epoch": 0.13610878391441644, + "grad_norm": 0.00033106270899061087, + "learning_rate": 9.599713569638383e-06, + "loss": 0.0, + "step": 2112 + }, + { + "epoch": 0.13617322936134563, + "grad_norm": 0.0004757141807601016, + "learning_rate": 9.598997493734337e-06, + "loss": 0.0, + "step": 2113 + }, + { + "epoch": 0.13623767480827478, + "grad_norm": 0.0012730227358612794, + "learning_rate": 9.598281417830291e-06, + "loss": 0.0, + "step": 2114 + }, + { + "epoch": 0.13630212025520397, + "grad_norm": 0.006321125741840267, + "learning_rate": 9.597565341926246e-06, + "loss": 0.0001, + "step": 2115 + }, + { + "epoch": 0.13636656570213315, + "grad_norm": 0.007981766124111247, + "learning_rate": 9.5968492660222e-06, + "loss": 0.0001, + "step": 2116 + }, + { + "epoch": 0.1364310111490623, + "grad_norm": 0.0013743153367953734, + "learning_rate": 9.596133190118152e-06, + "loss": 0.0, + "step": 2117 + }, + { + "epoch": 0.1364954565959915, + "grad_norm": 0.009479398745795132, + "learning_rate": 9.595417114214107e-06, + "loss": 0.0, + "step": 2118 + }, + { + "epoch": 0.13655990204292068, + "grad_norm": 0.0025998528844112315, + "learning_rate": 9.594701038310061e-06, + "loss": 0.0, + "step": 2119 + }, + { + "epoch": 0.13662434748984983, + "grad_norm": 0.0014551499795753477, + "learning_rate": 9.593984962406015e-06, + "loss": 0.0, + "step": 2120 + }, + { + "epoch": 0.13668879293677902, + "grad_norm": 0.00018595876394929454, + "learning_rate": 9.59326888650197e-06, + "loss": 0.0, + "step": 2121 + }, + { + "epoch": 0.1367532383837082, + "grad_norm": 0.16831879403294095, + "learning_rate": 9.592552810597924e-06, + "loss": 0.0001, + "step": 2122 + }, + { + "epoch": 0.13681768383063736, + "grad_norm": 0.006065252590868405, + "learning_rate": 9.591836734693878e-06, + "loss": 0.0001, + "step": 2123 + }, + { + "epoch": 0.13688212927756654, + "grad_norm": 0.0008141178296848007, + "learning_rate": 9.591120658789833e-06, + "loss": 0.0, + "step": 2124 + }, + { + "epoch": 0.1369465747244957, + "grad_norm": 0.010431545831266202, + "learning_rate": 9.590404582885787e-06, + "loss": 0.0, + "step": 2125 + }, + { + "epoch": 0.13701102017142489, + "grad_norm": 0.00022965953498677982, + "learning_rate": 9.589688506981741e-06, + "loss": 0.0, + "step": 2126 + }, + { + "epoch": 0.13707546561835407, + "grad_norm": 0.03739440462698081, + "learning_rate": 9.588972431077695e-06, + "loss": 0.0006, + "step": 2127 + }, + { + "epoch": 0.13713991106528323, + "grad_norm": 0.0012059992764552644, + "learning_rate": 9.58825635517365e-06, + "loss": 0.0, + "step": 2128 + }, + { + "epoch": 0.1372043565122124, + "grad_norm": 0.003077806228310975, + "learning_rate": 9.587540279269604e-06, + "loss": 0.0, + "step": 2129 + }, + { + "epoch": 0.1372688019591416, + "grad_norm": 0.0038935344462661513, + "learning_rate": 9.586824203365558e-06, + "loss": 0.0001, + "step": 2130 + }, + { + "epoch": 0.13733324740607075, + "grad_norm": 0.3498434423366078, + "learning_rate": 9.586108127461513e-06, + "loss": 0.0024, + "step": 2131 + }, + { + "epoch": 0.13739769285299994, + "grad_norm": 0.0030082327199185107, + "learning_rate": 9.585392051557467e-06, + "loss": 0.0, + "step": 2132 + }, + { + "epoch": 0.13746213829992912, + "grad_norm": 0.001989814893872896, + "learning_rate": 9.58467597565342e-06, + "loss": 0.0, + "step": 2133 + }, + { + "epoch": 0.13752658374685828, + "grad_norm": 0.2112891392170814, + "learning_rate": 9.583959899749374e-06, + "loss": 0.0004, + "step": 2134 + }, + { + "epoch": 0.13759102919378746, + "grad_norm": 0.007960537454691344, + "learning_rate": 9.583243823845328e-06, + "loss": 0.0001, + "step": 2135 + }, + { + "epoch": 0.13765547464071665, + "grad_norm": 0.14093893824348863, + "learning_rate": 9.582527747941282e-06, + "loss": 0.0002, + "step": 2136 + }, + { + "epoch": 0.1377199200876458, + "grad_norm": 0.926997838025005, + "learning_rate": 9.581811672037237e-06, + "loss": 0.0055, + "step": 2137 + }, + { + "epoch": 0.137784365534575, + "grad_norm": 0.0007762803125597636, + "learning_rate": 9.581095596133191e-06, + "loss": 0.0, + "step": 2138 + }, + { + "epoch": 0.13784881098150414, + "grad_norm": 0.011034256644735584, + "learning_rate": 9.580379520229145e-06, + "loss": 0.0001, + "step": 2139 + }, + { + "epoch": 0.13791325642843333, + "grad_norm": 0.005050776046657039, + "learning_rate": 9.5796634443251e-06, + "loss": 0.0, + "step": 2140 + }, + { + "epoch": 0.1379777018753625, + "grad_norm": 0.0058777191061129405, + "learning_rate": 9.578947368421054e-06, + "loss": 0.0, + "step": 2141 + }, + { + "epoch": 0.13804214732229167, + "grad_norm": 0.00010401014356011279, + "learning_rate": 9.578231292517007e-06, + "loss": 0.0, + "step": 2142 + }, + { + "epoch": 0.13810659276922085, + "grad_norm": 0.010355138189235565, + "learning_rate": 9.57751521661296e-06, + "loss": 0.0, + "step": 2143 + }, + { + "epoch": 0.13817103821615004, + "grad_norm": 0.0004001475512964918, + "learning_rate": 9.576799140708915e-06, + "loss": 0.0, + "step": 2144 + }, + { + "epoch": 0.1382354836630792, + "grad_norm": 0.004463203340507725, + "learning_rate": 9.57608306480487e-06, + "loss": 0.0, + "step": 2145 + }, + { + "epoch": 0.13829992911000838, + "grad_norm": 0.08375990333559151, + "learning_rate": 9.575366988900824e-06, + "loss": 0.0013, + "step": 2146 + }, + { + "epoch": 0.13836437455693756, + "grad_norm": 0.001991547368177983, + "learning_rate": 9.574650912996778e-06, + "loss": 0.0, + "step": 2147 + }, + { + "epoch": 0.13842882000386672, + "grad_norm": 0.00025540746774049394, + "learning_rate": 9.573934837092732e-06, + "loss": 0.0, + "step": 2148 + }, + { + "epoch": 0.1384932654507959, + "grad_norm": 0.00028334922210477735, + "learning_rate": 9.573218761188687e-06, + "loss": 0.0, + "step": 2149 + }, + { + "epoch": 0.1385577108977251, + "grad_norm": 0.0002980895207097618, + "learning_rate": 9.572502685284641e-06, + "loss": 0.0, + "step": 2150 + }, + { + "epoch": 0.13862215634465425, + "grad_norm": 0.06858295254675982, + "learning_rate": 9.571786609380595e-06, + "loss": 0.0007, + "step": 2151 + }, + { + "epoch": 0.13868660179158343, + "grad_norm": 0.0037622125428383424, + "learning_rate": 9.57107053347655e-06, + "loss": 0.0, + "step": 2152 + }, + { + "epoch": 0.1387510472385126, + "grad_norm": 0.002992232614199599, + "learning_rate": 9.570354457572504e-06, + "loss": 0.0, + "step": 2153 + }, + { + "epoch": 0.13881549268544177, + "grad_norm": 0.0015270018171054526, + "learning_rate": 9.569638381668458e-06, + "loss": 0.0, + "step": 2154 + }, + { + "epoch": 0.13887993813237096, + "grad_norm": 0.00048099787247835995, + "learning_rate": 9.568922305764412e-06, + "loss": 0.0, + "step": 2155 + }, + { + "epoch": 0.1389443835793001, + "grad_norm": 0.06242042743630073, + "learning_rate": 9.568206229860367e-06, + "loss": 0.0008, + "step": 2156 + }, + { + "epoch": 0.1390088290262293, + "grad_norm": 0.010198910788331253, + "learning_rate": 9.567490153956321e-06, + "loss": 0.0, + "step": 2157 + }, + { + "epoch": 0.13907327447315848, + "grad_norm": 0.02498183349411272, + "learning_rate": 9.566774078052274e-06, + "loss": 0.0002, + "step": 2158 + }, + { + "epoch": 0.13913771992008764, + "grad_norm": 0.02871299698962203, + "learning_rate": 9.566058002148228e-06, + "loss": 0.0002, + "step": 2159 + }, + { + "epoch": 0.13920216536701682, + "grad_norm": 0.0027904745167431897, + "learning_rate": 9.565341926244182e-06, + "loss": 0.0, + "step": 2160 + }, + { + "epoch": 0.139266610813946, + "grad_norm": 0.0017900588858772886, + "learning_rate": 9.564625850340137e-06, + "loss": 0.0, + "step": 2161 + }, + { + "epoch": 0.13933105626087516, + "grad_norm": 0.34400681092622376, + "learning_rate": 9.56390977443609e-06, + "loss": 0.0009, + "step": 2162 + }, + { + "epoch": 0.13939550170780435, + "grad_norm": 0.0004665528583749067, + "learning_rate": 9.563193698532045e-06, + "loss": 0.0, + "step": 2163 + }, + { + "epoch": 0.1394599471547335, + "grad_norm": 0.0022528881939270448, + "learning_rate": 9.562477622628e-06, + "loss": 0.0, + "step": 2164 + }, + { + "epoch": 0.1395243926016627, + "grad_norm": 0.0747928905148953, + "learning_rate": 9.561761546723954e-06, + "loss": 0.0008, + "step": 2165 + }, + { + "epoch": 0.13958883804859187, + "grad_norm": 0.15587924242670487, + "learning_rate": 9.561045470819908e-06, + "loss": 0.0015, + "step": 2166 + }, + { + "epoch": 0.13965328349552103, + "grad_norm": 0.010952113106743318, + "learning_rate": 9.560329394915862e-06, + "loss": 0.0001, + "step": 2167 + }, + { + "epoch": 0.13971772894245021, + "grad_norm": 0.0052290386293551245, + "learning_rate": 9.559613319011815e-06, + "loss": 0.0, + "step": 2168 + }, + { + "epoch": 0.1397821743893794, + "grad_norm": 0.0009271938531524405, + "learning_rate": 9.55889724310777e-06, + "loss": 0.0, + "step": 2169 + }, + { + "epoch": 0.13984661983630856, + "grad_norm": 0.0012394748834099222, + "learning_rate": 9.558181167203723e-06, + "loss": 0.0, + "step": 2170 + }, + { + "epoch": 0.13991106528323774, + "grad_norm": 0.058789488562754094, + "learning_rate": 9.557465091299678e-06, + "loss": 0.0001, + "step": 2171 + }, + { + "epoch": 0.13997551073016692, + "grad_norm": 0.0008058793089597991, + "learning_rate": 9.556749015395634e-06, + "loss": 0.0, + "step": 2172 + }, + { + "epoch": 0.14003995617709608, + "grad_norm": 0.010173480068610822, + "learning_rate": 9.556032939491588e-06, + "loss": 0.0001, + "step": 2173 + }, + { + "epoch": 0.14010440162402527, + "grad_norm": 0.015670921228439428, + "learning_rate": 9.55531686358754e-06, + "loss": 0.0001, + "step": 2174 + }, + { + "epoch": 0.14016884707095445, + "grad_norm": 0.03238827058306233, + "learning_rate": 9.554600787683495e-06, + "loss": 0.0001, + "step": 2175 + }, + { + "epoch": 0.1402332925178836, + "grad_norm": 0.001430104595176344, + "learning_rate": 9.55388471177945e-06, + "loss": 0.0, + "step": 2176 + }, + { + "epoch": 0.1402977379648128, + "grad_norm": 0.22314276162412025, + "learning_rate": 9.553168635875404e-06, + "loss": 0.0009, + "step": 2177 + }, + { + "epoch": 0.14036218341174195, + "grad_norm": 0.003981575309131401, + "learning_rate": 9.552452559971358e-06, + "loss": 0.0, + "step": 2178 + }, + { + "epoch": 0.14042662885867113, + "grad_norm": 0.022705913856466316, + "learning_rate": 9.551736484067312e-06, + "loss": 0.0, + "step": 2179 + }, + { + "epoch": 0.14049107430560032, + "grad_norm": 0.02675406863132256, + "learning_rate": 9.551020408163266e-06, + "loss": 0.0004, + "step": 2180 + }, + { + "epoch": 0.14055551975252947, + "grad_norm": 0.0002129580119729682, + "learning_rate": 9.55030433225922e-06, + "loss": 0.0, + "step": 2181 + }, + { + "epoch": 0.14061996519945866, + "grad_norm": 0.4309991929595454, + "learning_rate": 9.549588256355175e-06, + "loss": 0.001, + "step": 2182 + }, + { + "epoch": 0.14068441064638784, + "grad_norm": 0.027090977677346518, + "learning_rate": 9.54887218045113e-06, + "loss": 0.0002, + "step": 2183 + }, + { + "epoch": 0.140748856093317, + "grad_norm": 0.015662161214944636, + "learning_rate": 9.548156104547082e-06, + "loss": 0.0002, + "step": 2184 + }, + { + "epoch": 0.14081330154024618, + "grad_norm": 0.2672868382237579, + "learning_rate": 9.547440028643036e-06, + "loss": 0.0017, + "step": 2185 + }, + { + "epoch": 0.14087774698717537, + "grad_norm": 0.42117580116917186, + "learning_rate": 9.54672395273899e-06, + "loss": 0.0019, + "step": 2186 + }, + { + "epoch": 0.14094219243410452, + "grad_norm": 0.0010824486543928827, + "learning_rate": 9.546007876834945e-06, + "loss": 0.0, + "step": 2187 + }, + { + "epoch": 0.1410066378810337, + "grad_norm": 0.002333835033309739, + "learning_rate": 9.5452918009309e-06, + "loss": 0.0, + "step": 2188 + }, + { + "epoch": 0.1410710833279629, + "grad_norm": 0.01005892812360604, + "learning_rate": 9.544575725026853e-06, + "loss": 0.0001, + "step": 2189 + }, + { + "epoch": 0.14113552877489205, + "grad_norm": 0.04455364954326522, + "learning_rate": 9.543859649122808e-06, + "loss": 0.0001, + "step": 2190 + }, + { + "epoch": 0.14119997422182123, + "grad_norm": 0.011728766129579472, + "learning_rate": 9.543143573218762e-06, + "loss": 0.0, + "step": 2191 + }, + { + "epoch": 0.1412644196687504, + "grad_norm": 0.04750135049264757, + "learning_rate": 9.542427497314716e-06, + "loss": 0.0001, + "step": 2192 + }, + { + "epoch": 0.14132886511567957, + "grad_norm": 0.10700909026667592, + "learning_rate": 9.54171142141067e-06, + "loss": 0.0001, + "step": 2193 + }, + { + "epoch": 0.14139331056260876, + "grad_norm": 0.0018928932543482945, + "learning_rate": 9.540995345506623e-06, + "loss": 0.0, + "step": 2194 + }, + { + "epoch": 0.14145775600953792, + "grad_norm": 0.00035078335624548354, + "learning_rate": 9.540279269602578e-06, + "loss": 0.0, + "step": 2195 + }, + { + "epoch": 0.1415222014564671, + "grad_norm": 0.003913708593869081, + "learning_rate": 9.539563193698534e-06, + "loss": 0.0, + "step": 2196 + }, + { + "epoch": 0.14158664690339628, + "grad_norm": 0.32054424044004987, + "learning_rate": 9.538847117794488e-06, + "loss": 0.0025, + "step": 2197 + }, + { + "epoch": 0.14165109235032544, + "grad_norm": 0.00016171468966313804, + "learning_rate": 9.538131041890442e-06, + "loss": 0.0, + "step": 2198 + }, + { + "epoch": 0.14171553779725463, + "grad_norm": 0.03721486801308035, + "learning_rate": 9.537414965986396e-06, + "loss": 0.0003, + "step": 2199 + }, + { + "epoch": 0.1417799832441838, + "grad_norm": 0.009083953664856316, + "learning_rate": 9.536698890082349e-06, + "loss": 0.0, + "step": 2200 + }, + { + "epoch": 0.14184442869111297, + "grad_norm": 0.005521489335318659, + "learning_rate": 9.535982814178303e-06, + "loss": 0.0, + "step": 2201 + }, + { + "epoch": 0.14190887413804215, + "grad_norm": 0.0010786474428082949, + "learning_rate": 9.535266738274258e-06, + "loss": 0.0, + "step": 2202 + }, + { + "epoch": 0.1419733195849713, + "grad_norm": 0.0008330341741979345, + "learning_rate": 9.534550662370212e-06, + "loss": 0.0, + "step": 2203 + }, + { + "epoch": 0.1420377650319005, + "grad_norm": 0.004503439156467866, + "learning_rate": 9.533834586466166e-06, + "loss": 0.0, + "step": 2204 + }, + { + "epoch": 0.14210221047882968, + "grad_norm": 0.1447155653550225, + "learning_rate": 9.53311851056212e-06, + "loss": 0.0003, + "step": 2205 + }, + { + "epoch": 0.14216665592575883, + "grad_norm": 0.0006731134985055816, + "learning_rate": 9.532402434658075e-06, + "loss": 0.0, + "step": 2206 + }, + { + "epoch": 0.14223110137268802, + "grad_norm": 0.000276659143474887, + "learning_rate": 9.53168635875403e-06, + "loss": 0.0, + "step": 2207 + }, + { + "epoch": 0.1422955468196172, + "grad_norm": 0.0007131897573369064, + "learning_rate": 9.530970282849983e-06, + "loss": 0.0, + "step": 2208 + }, + { + "epoch": 0.14235999226654636, + "grad_norm": 0.007994417837624694, + "learning_rate": 9.530254206945938e-06, + "loss": 0.0, + "step": 2209 + }, + { + "epoch": 0.14242443771347554, + "grad_norm": 0.181136117707174, + "learning_rate": 9.52953813104189e-06, + "loss": 0.0005, + "step": 2210 + }, + { + "epoch": 0.14248888316040473, + "grad_norm": 0.15367599099740947, + "learning_rate": 9.528822055137845e-06, + "loss": 0.0005, + "step": 2211 + }, + { + "epoch": 0.14255332860733388, + "grad_norm": 0.15235054783448743, + "learning_rate": 9.528105979233799e-06, + "loss": 0.0007, + "step": 2212 + }, + { + "epoch": 0.14261777405426307, + "grad_norm": 0.004490466231885834, + "learning_rate": 9.527389903329753e-06, + "loss": 0.0, + "step": 2213 + }, + { + "epoch": 0.14268221950119225, + "grad_norm": 0.1511576771537659, + "learning_rate": 9.526673827425708e-06, + "loss": 0.0017, + "step": 2214 + }, + { + "epoch": 0.1427466649481214, + "grad_norm": 0.013676781391319589, + "learning_rate": 9.525957751521662e-06, + "loss": 0.0001, + "step": 2215 + }, + { + "epoch": 0.1428111103950506, + "grad_norm": 0.06545610037074183, + "learning_rate": 9.525241675617616e-06, + "loss": 0.0005, + "step": 2216 + }, + { + "epoch": 0.14287555584197975, + "grad_norm": 0.9775896697611458, + "learning_rate": 9.52452559971357e-06, + "loss": 0.0047, + "step": 2217 + }, + { + "epoch": 0.14294000128890894, + "grad_norm": 0.00031478760827874825, + "learning_rate": 9.523809523809525e-06, + "loss": 0.0, + "step": 2218 + }, + { + "epoch": 0.14300444673583812, + "grad_norm": 0.030597664652693826, + "learning_rate": 9.523093447905479e-06, + "loss": 0.0002, + "step": 2219 + }, + { + "epoch": 0.14306889218276728, + "grad_norm": 1.7293522593468222, + "learning_rate": 9.522377372001433e-06, + "loss": 0.0008, + "step": 2220 + }, + { + "epoch": 0.14313333762969646, + "grad_norm": 0.01601536033865269, + "learning_rate": 9.521661296097388e-06, + "loss": 0.0, + "step": 2221 + }, + { + "epoch": 0.14319778307662565, + "grad_norm": 0.0006310703572563955, + "learning_rate": 9.520945220193342e-06, + "loss": 0.0, + "step": 2222 + }, + { + "epoch": 0.1432622285235548, + "grad_norm": 0.0005210939538044766, + "learning_rate": 9.520229144289296e-06, + "loss": 0.0, + "step": 2223 + }, + { + "epoch": 0.143326673970484, + "grad_norm": 0.018941119371520383, + "learning_rate": 9.51951306838525e-06, + "loss": 0.0002, + "step": 2224 + }, + { + "epoch": 0.14339111941741317, + "grad_norm": 0.05937819495828138, + "learning_rate": 9.518796992481205e-06, + "loss": 0.0001, + "step": 2225 + }, + { + "epoch": 0.14345556486434233, + "grad_norm": 0.018362872540066737, + "learning_rate": 9.518080916577157e-06, + "loss": 0.0001, + "step": 2226 + }, + { + "epoch": 0.1435200103112715, + "grad_norm": 0.01831749307485675, + "learning_rate": 9.517364840673112e-06, + "loss": 0.0001, + "step": 2227 + }, + { + "epoch": 0.1435844557582007, + "grad_norm": 0.006862687030695175, + "learning_rate": 9.516648764769066e-06, + "loss": 0.0, + "step": 2228 + }, + { + "epoch": 0.14364890120512985, + "grad_norm": 7.471194955517663e-05, + "learning_rate": 9.51593268886502e-06, + "loss": 0.0, + "step": 2229 + }, + { + "epoch": 0.14371334665205904, + "grad_norm": 0.003645056892950574, + "learning_rate": 9.515216612960975e-06, + "loss": 0.0, + "step": 2230 + }, + { + "epoch": 0.1437777920989882, + "grad_norm": 0.016415402507622183, + "learning_rate": 9.514500537056929e-06, + "loss": 0.0001, + "step": 2231 + }, + { + "epoch": 0.14384223754591738, + "grad_norm": 0.0016457805006108836, + "learning_rate": 9.513784461152883e-06, + "loss": 0.0, + "step": 2232 + }, + { + "epoch": 0.14390668299284656, + "grad_norm": 0.026616648264700046, + "learning_rate": 9.513068385248838e-06, + "loss": 0.0001, + "step": 2233 + }, + { + "epoch": 0.14397112843977572, + "grad_norm": 0.5394570109498802, + "learning_rate": 9.512352309344792e-06, + "loss": 0.0036, + "step": 2234 + }, + { + "epoch": 0.1440355738867049, + "grad_norm": 0.006396931689943378, + "learning_rate": 9.511636233440744e-06, + "loss": 0.0, + "step": 2235 + }, + { + "epoch": 0.1441000193336341, + "grad_norm": 0.012580438401703474, + "learning_rate": 9.510920157536699e-06, + "loss": 0.0, + "step": 2236 + }, + { + "epoch": 0.14416446478056325, + "grad_norm": 0.01920853147496425, + "learning_rate": 9.510204081632653e-06, + "loss": 0.0001, + "step": 2237 + }, + { + "epoch": 0.14422891022749243, + "grad_norm": 0.002245388255484627, + "learning_rate": 9.509488005728607e-06, + "loss": 0.0, + "step": 2238 + }, + { + "epoch": 0.14429335567442161, + "grad_norm": 0.579263982784339, + "learning_rate": 9.508771929824562e-06, + "loss": 0.0168, + "step": 2239 + }, + { + "epoch": 0.14435780112135077, + "grad_norm": 0.00028066203840411235, + "learning_rate": 9.508055853920516e-06, + "loss": 0.0, + "step": 2240 + }, + { + "epoch": 0.14442224656827995, + "grad_norm": 0.021214743917220127, + "learning_rate": 9.50733977801647e-06, + "loss": 0.0001, + "step": 2241 + }, + { + "epoch": 0.1444866920152091, + "grad_norm": 0.00032764501369088723, + "learning_rate": 9.506623702112425e-06, + "loss": 0.0, + "step": 2242 + }, + { + "epoch": 0.1445511374621383, + "grad_norm": 0.0058459261688533605, + "learning_rate": 9.505907626208379e-06, + "loss": 0.0, + "step": 2243 + }, + { + "epoch": 0.14461558290906748, + "grad_norm": 0.051741941735831465, + "learning_rate": 9.505191550304333e-06, + "loss": 0.0, + "step": 2244 + }, + { + "epoch": 0.14468002835599664, + "grad_norm": 0.32622149655365945, + "learning_rate": 9.504475474400287e-06, + "loss": 0.0003, + "step": 2245 + }, + { + "epoch": 0.14474447380292582, + "grad_norm": 0.055378686377618995, + "learning_rate": 9.503759398496242e-06, + "loss": 0.0001, + "step": 2246 + }, + { + "epoch": 0.144808919249855, + "grad_norm": 0.02396991245736209, + "learning_rate": 9.503043322592196e-06, + "loss": 0.0001, + "step": 2247 + }, + { + "epoch": 0.14487336469678416, + "grad_norm": 0.03345312463997792, + "learning_rate": 9.50232724668815e-06, + "loss": 0.0015, + "step": 2248 + }, + { + "epoch": 0.14493781014371335, + "grad_norm": 0.15419614497691245, + "learning_rate": 9.501611170784105e-06, + "loss": 0.002, + "step": 2249 + }, + { + "epoch": 0.14500225559064253, + "grad_norm": 0.25495189154503606, + "learning_rate": 9.500895094880059e-06, + "loss": 0.001, + "step": 2250 + }, + { + "epoch": 0.1450667010375717, + "grad_norm": 0.06784105322185756, + "learning_rate": 9.500179018976012e-06, + "loss": 0.0004, + "step": 2251 + }, + { + "epoch": 0.14513114648450087, + "grad_norm": 0.0008401214103608888, + "learning_rate": 9.499462943071966e-06, + "loss": 0.0, + "step": 2252 + }, + { + "epoch": 0.14519559193143006, + "grad_norm": 0.7134616185045286, + "learning_rate": 9.49874686716792e-06, + "loss": 0.0016, + "step": 2253 + }, + { + "epoch": 0.1452600373783592, + "grad_norm": 0.005298159616167539, + "learning_rate": 9.498030791263874e-06, + "loss": 0.0, + "step": 2254 + }, + { + "epoch": 0.1453244828252884, + "grad_norm": 0.24674281553149796, + "learning_rate": 9.497314715359829e-06, + "loss": 0.0005, + "step": 2255 + }, + { + "epoch": 0.14538892827221755, + "grad_norm": 0.015423563114479719, + "learning_rate": 9.496598639455783e-06, + "loss": 0.0001, + "step": 2256 + }, + { + "epoch": 0.14545337371914674, + "grad_norm": 0.014220395291028429, + "learning_rate": 9.495882563551737e-06, + "loss": 0.0001, + "step": 2257 + }, + { + "epoch": 0.14551781916607592, + "grad_norm": 0.0020908002801696174, + "learning_rate": 9.495166487647692e-06, + "loss": 0.0, + "step": 2258 + }, + { + "epoch": 0.14558226461300508, + "grad_norm": 0.004630563261824797, + "learning_rate": 9.494450411743646e-06, + "loss": 0.0, + "step": 2259 + }, + { + "epoch": 0.14564671005993426, + "grad_norm": 0.0005906543258460047, + "learning_rate": 9.4937343358396e-06, + "loss": 0.0, + "step": 2260 + }, + { + "epoch": 0.14571115550686345, + "grad_norm": 0.00957402495164633, + "learning_rate": 9.493018259935553e-06, + "loss": 0.0, + "step": 2261 + }, + { + "epoch": 0.1457756009537926, + "grad_norm": 0.014087719869458042, + "learning_rate": 9.492302184031507e-06, + "loss": 0.0, + "step": 2262 + }, + { + "epoch": 0.1458400464007218, + "grad_norm": 0.0029143890782602393, + "learning_rate": 9.491586108127461e-06, + "loss": 0.0, + "step": 2263 + }, + { + "epoch": 0.14590449184765097, + "grad_norm": 0.03503983481883921, + "learning_rate": 9.490870032223416e-06, + "loss": 0.0004, + "step": 2264 + }, + { + "epoch": 0.14596893729458013, + "grad_norm": 0.013767286296889813, + "learning_rate": 9.49015395631937e-06, + "loss": 0.0, + "step": 2265 + }, + { + "epoch": 0.14603338274150932, + "grad_norm": 0.25470119158679877, + "learning_rate": 9.489437880415326e-06, + "loss": 0.0005, + "step": 2266 + }, + { + "epoch": 0.1460978281884385, + "grad_norm": 0.007543495843438581, + "learning_rate": 9.488721804511279e-06, + "loss": 0.0, + "step": 2267 + }, + { + "epoch": 0.14616227363536766, + "grad_norm": 0.0020839204456079203, + "learning_rate": 9.488005728607233e-06, + "loss": 0.0, + "step": 2268 + }, + { + "epoch": 0.14622671908229684, + "grad_norm": 0.06732914003313965, + "learning_rate": 9.487289652703187e-06, + "loss": 0.0003, + "step": 2269 + }, + { + "epoch": 0.146291164529226, + "grad_norm": 0.0016212466885061447, + "learning_rate": 9.486573576799142e-06, + "loss": 0.0, + "step": 2270 + }, + { + "epoch": 0.14635560997615518, + "grad_norm": 0.3236607034805371, + "learning_rate": 9.485857500895096e-06, + "loss": 0.0045, + "step": 2271 + }, + { + "epoch": 0.14642005542308437, + "grad_norm": 0.004995574742021267, + "learning_rate": 9.48514142499105e-06, + "loss": 0.0, + "step": 2272 + }, + { + "epoch": 0.14648450087001352, + "grad_norm": 0.00117536297122479, + "learning_rate": 9.484425349087004e-06, + "loss": 0.0, + "step": 2273 + }, + { + "epoch": 0.1465489463169427, + "grad_norm": 0.0005086225519439412, + "learning_rate": 9.483709273182959e-06, + "loss": 0.0, + "step": 2274 + }, + { + "epoch": 0.1466133917638719, + "grad_norm": 0.0030946496454177484, + "learning_rate": 9.482993197278913e-06, + "loss": 0.0, + "step": 2275 + }, + { + "epoch": 0.14667783721080105, + "grad_norm": 0.0010062472442629099, + "learning_rate": 9.482277121374867e-06, + "loss": 0.0, + "step": 2276 + }, + { + "epoch": 0.14674228265773023, + "grad_norm": 0.03180384283377841, + "learning_rate": 9.48156104547082e-06, + "loss": 0.0002, + "step": 2277 + }, + { + "epoch": 0.14680672810465942, + "grad_norm": 0.21418460822901342, + "learning_rate": 9.480844969566774e-06, + "loss": 0.0015, + "step": 2278 + }, + { + "epoch": 0.14687117355158857, + "grad_norm": 0.010110387519656235, + "learning_rate": 9.480128893662729e-06, + "loss": 0.0, + "step": 2279 + }, + { + "epoch": 0.14693561899851776, + "grad_norm": 0.0013718162505616696, + "learning_rate": 9.479412817758683e-06, + "loss": 0.0, + "step": 2280 + }, + { + "epoch": 0.14700006444544694, + "grad_norm": 0.001147183938640018, + "learning_rate": 9.478696741854637e-06, + "loss": 0.0, + "step": 2281 + }, + { + "epoch": 0.1470645098923761, + "grad_norm": 0.0038179217062423933, + "learning_rate": 9.477980665950591e-06, + "loss": 0.0, + "step": 2282 + }, + { + "epoch": 0.14712895533930528, + "grad_norm": 0.16823613707940346, + "learning_rate": 9.477264590046546e-06, + "loss": 0.0001, + "step": 2283 + }, + { + "epoch": 0.14719340078623444, + "grad_norm": 0.0007041555804207547, + "learning_rate": 9.4765485141425e-06, + "loss": 0.0, + "step": 2284 + }, + { + "epoch": 0.14725784623316363, + "grad_norm": 0.17469604142040665, + "learning_rate": 9.475832438238454e-06, + "loss": 0.0006, + "step": 2285 + }, + { + "epoch": 0.1473222916800928, + "grad_norm": 0.07642142365060478, + "learning_rate": 9.475116362334409e-06, + "loss": 0.0002, + "step": 2286 + }, + { + "epoch": 0.14738673712702197, + "grad_norm": 0.026083890142809724, + "learning_rate": 9.474400286430361e-06, + "loss": 0.0003, + "step": 2287 + }, + { + "epoch": 0.14745118257395115, + "grad_norm": 0.0013347215920775026, + "learning_rate": 9.473684210526315e-06, + "loss": 0.0, + "step": 2288 + }, + { + "epoch": 0.14751562802088033, + "grad_norm": 0.0656069517862333, + "learning_rate": 9.472968134622271e-06, + "loss": 0.0001, + "step": 2289 + }, + { + "epoch": 0.1475800734678095, + "grad_norm": 0.0008470576602853886, + "learning_rate": 9.472252058718226e-06, + "loss": 0.0, + "step": 2290 + }, + { + "epoch": 0.14764451891473868, + "grad_norm": 0.16749845691760115, + "learning_rate": 9.47153598281418e-06, + "loss": 0.003, + "step": 2291 + }, + { + "epoch": 0.14770896436166786, + "grad_norm": 0.0016121071686281076, + "learning_rate": 9.470819906910134e-06, + "loss": 0.0, + "step": 2292 + }, + { + "epoch": 0.14777340980859702, + "grad_norm": 0.006441744083881955, + "learning_rate": 9.470103831006087e-06, + "loss": 0.0, + "step": 2293 + }, + { + "epoch": 0.1478378552555262, + "grad_norm": 0.016728643202810173, + "learning_rate": 9.469387755102041e-06, + "loss": 0.0001, + "step": 2294 + }, + { + "epoch": 0.14790230070245536, + "grad_norm": 0.005027691402007882, + "learning_rate": 9.468671679197996e-06, + "loss": 0.0, + "step": 2295 + }, + { + "epoch": 0.14796674614938454, + "grad_norm": 0.001770350858388834, + "learning_rate": 9.46795560329395e-06, + "loss": 0.0, + "step": 2296 + }, + { + "epoch": 0.14803119159631373, + "grad_norm": 0.00011038801131939273, + "learning_rate": 9.467239527389904e-06, + "loss": 0.0, + "step": 2297 + }, + { + "epoch": 0.14809563704324288, + "grad_norm": 0.30113663671343743, + "learning_rate": 9.466523451485858e-06, + "loss": 0.0011, + "step": 2298 + }, + { + "epoch": 0.14816008249017207, + "grad_norm": 0.0003501895384979152, + "learning_rate": 9.465807375581813e-06, + "loss": 0.0, + "step": 2299 + }, + { + "epoch": 0.14822452793710125, + "grad_norm": 0.001076580935332914, + "learning_rate": 9.465091299677767e-06, + "loss": 0.0, + "step": 2300 + }, + { + "epoch": 0.1482889733840304, + "grad_norm": 0.880960361418177, + "learning_rate": 9.464375223773721e-06, + "loss": 0.0051, + "step": 2301 + }, + { + "epoch": 0.1483534188309596, + "grad_norm": 0.03526416745467504, + "learning_rate": 9.463659147869676e-06, + "loss": 0.0002, + "step": 2302 + }, + { + "epoch": 0.14841786427788878, + "grad_norm": 0.0025750307660047872, + "learning_rate": 9.462943071965628e-06, + "loss": 0.0, + "step": 2303 + }, + { + "epoch": 0.14848230972481793, + "grad_norm": 0.004023243719837641, + "learning_rate": 9.462226996061583e-06, + "loss": 0.0, + "step": 2304 + }, + { + "epoch": 0.14854675517174712, + "grad_norm": 0.9786404284533876, + "learning_rate": 9.461510920157537e-06, + "loss": 0.0027, + "step": 2305 + }, + { + "epoch": 0.1486112006186763, + "grad_norm": 0.13937884764855718, + "learning_rate": 9.460794844253491e-06, + "loss": 0.0018, + "step": 2306 + }, + { + "epoch": 0.14867564606560546, + "grad_norm": 0.004105793329801603, + "learning_rate": 9.460078768349445e-06, + "loss": 0.0, + "step": 2307 + }, + { + "epoch": 0.14874009151253464, + "grad_norm": 0.0005642601189968466, + "learning_rate": 9.4593626924454e-06, + "loss": 0.0, + "step": 2308 + }, + { + "epoch": 0.1488045369594638, + "grad_norm": 0.041690372482371, + "learning_rate": 9.458646616541354e-06, + "loss": 0.0001, + "step": 2309 + }, + { + "epoch": 0.14886898240639299, + "grad_norm": 0.0004829795518174653, + "learning_rate": 9.457930540637308e-06, + "loss": 0.0, + "step": 2310 + }, + { + "epoch": 0.14893342785332217, + "grad_norm": 0.0004682333152799085, + "learning_rate": 9.457214464733263e-06, + "loss": 0.0, + "step": 2311 + }, + { + "epoch": 0.14899787330025133, + "grad_norm": 0.016004459520340634, + "learning_rate": 9.456498388829217e-06, + "loss": 0.0002, + "step": 2312 + }, + { + "epoch": 0.1490623187471805, + "grad_norm": 0.6353641212082908, + "learning_rate": 9.455782312925171e-06, + "loss": 0.0041, + "step": 2313 + }, + { + "epoch": 0.1491267641941097, + "grad_norm": 0.0006008079600321058, + "learning_rate": 9.455066237021126e-06, + "loss": 0.0, + "step": 2314 + }, + { + "epoch": 0.14919120964103885, + "grad_norm": 0.002256655415846708, + "learning_rate": 9.45435016111708e-06, + "loss": 0.0, + "step": 2315 + }, + { + "epoch": 0.14925565508796804, + "grad_norm": 0.00014758511457507473, + "learning_rate": 9.453634085213034e-06, + "loss": 0.0, + "step": 2316 + }, + { + "epoch": 0.14932010053489722, + "grad_norm": 0.006368467140729355, + "learning_rate": 9.452918009308988e-06, + "loss": 0.0, + "step": 2317 + }, + { + "epoch": 0.14938454598182638, + "grad_norm": 0.0032855975036902166, + "learning_rate": 9.452201933404943e-06, + "loss": 0.0, + "step": 2318 + }, + { + "epoch": 0.14944899142875556, + "grad_norm": 0.6131575149796049, + "learning_rate": 9.451485857500895e-06, + "loss": 0.003, + "step": 2319 + }, + { + "epoch": 0.14951343687568475, + "grad_norm": 0.275169933440258, + "learning_rate": 9.45076978159685e-06, + "loss": 0.0007, + "step": 2320 + }, + { + "epoch": 0.1495778823226139, + "grad_norm": 0.0023692457357848964, + "learning_rate": 9.450053705692804e-06, + "loss": 0.0, + "step": 2321 + }, + { + "epoch": 0.1496423277695431, + "grad_norm": 0.0025468429265798303, + "learning_rate": 9.449337629788758e-06, + "loss": 0.0, + "step": 2322 + }, + { + "epoch": 0.14970677321647224, + "grad_norm": 0.6888988525450286, + "learning_rate": 9.448621553884713e-06, + "loss": 0.0026, + "step": 2323 + }, + { + "epoch": 0.14977121866340143, + "grad_norm": 0.000541195102515391, + "learning_rate": 9.447905477980667e-06, + "loss": 0.0, + "step": 2324 + }, + { + "epoch": 0.1498356641103306, + "grad_norm": 0.04337376750890723, + "learning_rate": 9.447189402076621e-06, + "loss": 0.0001, + "step": 2325 + }, + { + "epoch": 0.14990010955725977, + "grad_norm": 0.05995612202794425, + "learning_rate": 9.446473326172575e-06, + "loss": 0.0002, + "step": 2326 + }, + { + "epoch": 0.14996455500418895, + "grad_norm": 0.014729528683343611, + "learning_rate": 9.44575725026853e-06, + "loss": 0.0001, + "step": 2327 + }, + { + "epoch": 0.15002900045111814, + "grad_norm": 0.015224141839539764, + "learning_rate": 9.445041174364482e-06, + "loss": 0.0, + "step": 2328 + }, + { + "epoch": 0.1500934458980473, + "grad_norm": 0.0020922033316262286, + "learning_rate": 9.444325098460437e-06, + "loss": 0.0, + "step": 2329 + }, + { + "epoch": 0.15015789134497648, + "grad_norm": 0.00976562052965062, + "learning_rate": 9.443609022556391e-06, + "loss": 0.0, + "step": 2330 + }, + { + "epoch": 0.15022233679190566, + "grad_norm": 0.003141049531346141, + "learning_rate": 9.442892946652345e-06, + "loss": 0.0, + "step": 2331 + }, + { + "epoch": 0.15028678223883482, + "grad_norm": 0.2375710487754487, + "learning_rate": 9.4421768707483e-06, + "loss": 0.0007, + "step": 2332 + }, + { + "epoch": 0.150351227685764, + "grad_norm": 0.10324509375902188, + "learning_rate": 9.441460794844254e-06, + "loss": 0.0003, + "step": 2333 + }, + { + "epoch": 0.15041567313269316, + "grad_norm": 0.011364228373043003, + "learning_rate": 9.440744718940208e-06, + "loss": 0.0, + "step": 2334 + }, + { + "epoch": 0.15048011857962235, + "grad_norm": 0.033959039268354946, + "learning_rate": 9.440028643036162e-06, + "loss": 0.0001, + "step": 2335 + }, + { + "epoch": 0.15054456402655153, + "grad_norm": 0.0010414789426565346, + "learning_rate": 9.439312567132117e-06, + "loss": 0.0, + "step": 2336 + }, + { + "epoch": 0.1506090094734807, + "grad_norm": 0.011555880377593744, + "learning_rate": 9.438596491228071e-06, + "loss": 0.0001, + "step": 2337 + }, + { + "epoch": 0.15067345492040987, + "grad_norm": 0.13693876274950872, + "learning_rate": 9.437880415324025e-06, + "loss": 0.0005, + "step": 2338 + }, + { + "epoch": 0.15073790036733906, + "grad_norm": 0.046467830614872876, + "learning_rate": 9.43716433941998e-06, + "loss": 0.0002, + "step": 2339 + }, + { + "epoch": 0.1508023458142682, + "grad_norm": 0.28248522299966256, + "learning_rate": 9.436448263515934e-06, + "loss": 0.0005, + "step": 2340 + }, + { + "epoch": 0.1508667912611974, + "grad_norm": 0.010665106642022313, + "learning_rate": 9.435732187611888e-06, + "loss": 0.0001, + "step": 2341 + }, + { + "epoch": 0.15093123670812658, + "grad_norm": 0.01410729256099583, + "learning_rate": 9.435016111707843e-06, + "loss": 0.0001, + "step": 2342 + }, + { + "epoch": 0.15099568215505574, + "grad_norm": 0.006377867812248754, + "learning_rate": 9.434300035803797e-06, + "loss": 0.0, + "step": 2343 + }, + { + "epoch": 0.15106012760198492, + "grad_norm": 0.3001965872068028, + "learning_rate": 9.43358395989975e-06, + "loss": 0.0005, + "step": 2344 + }, + { + "epoch": 0.1511245730489141, + "grad_norm": 0.0054143970512442385, + "learning_rate": 9.432867883995704e-06, + "loss": 0.0, + "step": 2345 + }, + { + "epoch": 0.15118901849584326, + "grad_norm": 0.003879576657613195, + "learning_rate": 9.432151808091658e-06, + "loss": 0.0, + "step": 2346 + }, + { + "epoch": 0.15125346394277245, + "grad_norm": 0.017826363177415604, + "learning_rate": 9.431435732187612e-06, + "loss": 0.0002, + "step": 2347 + }, + { + "epoch": 0.1513179093897016, + "grad_norm": 0.14413647606933994, + "learning_rate": 9.430719656283567e-06, + "loss": 0.0004, + "step": 2348 + }, + { + "epoch": 0.1513823548366308, + "grad_norm": 0.0006606705735611096, + "learning_rate": 9.430003580379521e-06, + "loss": 0.0, + "step": 2349 + }, + { + "epoch": 0.15144680028355997, + "grad_norm": 0.001281065290532309, + "learning_rate": 9.429287504475475e-06, + "loss": 0.0, + "step": 2350 + }, + { + "epoch": 0.15151124573048913, + "grad_norm": 0.002033793603108425, + "learning_rate": 9.42857142857143e-06, + "loss": 0.0, + "step": 2351 + }, + { + "epoch": 0.15157569117741831, + "grad_norm": 0.006509579136491605, + "learning_rate": 9.427855352667384e-06, + "loss": 0.0, + "step": 2352 + }, + { + "epoch": 0.1516401366243475, + "grad_norm": 0.015385489864271826, + "learning_rate": 9.427139276763338e-06, + "loss": 0.0001, + "step": 2353 + }, + { + "epoch": 0.15170458207127666, + "grad_norm": 0.00034424994771698977, + "learning_rate": 9.42642320085929e-06, + "loss": 0.0, + "step": 2354 + }, + { + "epoch": 0.15176902751820584, + "grad_norm": 0.0008804023377011956, + "learning_rate": 9.425707124955245e-06, + "loss": 0.0, + "step": 2355 + }, + { + "epoch": 0.15183347296513502, + "grad_norm": 0.547700177660738, + "learning_rate": 9.4249910490512e-06, + "loss": 0.0022, + "step": 2356 + }, + { + "epoch": 0.15189791841206418, + "grad_norm": 0.0032957801919053283, + "learning_rate": 9.424274973147154e-06, + "loss": 0.0, + "step": 2357 + }, + { + "epoch": 0.15196236385899337, + "grad_norm": 0.0027416642189764164, + "learning_rate": 9.423558897243108e-06, + "loss": 0.0, + "step": 2358 + }, + { + "epoch": 0.15202680930592255, + "grad_norm": 0.001389136393539497, + "learning_rate": 9.422842821339064e-06, + "loss": 0.0, + "step": 2359 + }, + { + "epoch": 0.1520912547528517, + "grad_norm": 0.0016453047623652915, + "learning_rate": 9.422126745435017e-06, + "loss": 0.0, + "step": 2360 + }, + { + "epoch": 0.1521557001997809, + "grad_norm": 0.003564651534562986, + "learning_rate": 9.42141066953097e-06, + "loss": 0.0, + "step": 2361 + }, + { + "epoch": 0.15222014564671005, + "grad_norm": 0.001581220919916646, + "learning_rate": 9.420694593626925e-06, + "loss": 0.0, + "step": 2362 + }, + { + "epoch": 0.15228459109363923, + "grad_norm": 1.166109780962107, + "learning_rate": 9.41997851772288e-06, + "loss": 0.0068, + "step": 2363 + }, + { + "epoch": 0.15234903654056842, + "grad_norm": 0.001095153365991885, + "learning_rate": 9.419262441818834e-06, + "loss": 0.0, + "step": 2364 + }, + { + "epoch": 0.15241348198749757, + "grad_norm": 0.00018614029556956357, + "learning_rate": 9.418546365914788e-06, + "loss": 0.0, + "step": 2365 + }, + { + "epoch": 0.15247792743442676, + "grad_norm": 0.1916720432757476, + "learning_rate": 9.417830290010742e-06, + "loss": 0.0003, + "step": 2366 + }, + { + "epoch": 0.15254237288135594, + "grad_norm": 0.17782862752708178, + "learning_rate": 9.417114214106697e-06, + "loss": 0.0004, + "step": 2367 + }, + { + "epoch": 0.1526068183282851, + "grad_norm": 0.01575983901171653, + "learning_rate": 9.416398138202651e-06, + "loss": 0.0, + "step": 2368 + }, + { + "epoch": 0.15267126377521428, + "grad_norm": 0.0023644075189573855, + "learning_rate": 9.415682062298605e-06, + "loss": 0.0, + "step": 2369 + }, + { + "epoch": 0.15273570922214347, + "grad_norm": 0.03693351516886743, + "learning_rate": 9.414965986394558e-06, + "loss": 0.0001, + "step": 2370 + }, + { + "epoch": 0.15280015466907262, + "grad_norm": 0.0012187265154810075, + "learning_rate": 9.414249910490512e-06, + "loss": 0.0, + "step": 2371 + }, + { + "epoch": 0.1528646001160018, + "grad_norm": 0.024398932537146707, + "learning_rate": 9.413533834586466e-06, + "loss": 0.0001, + "step": 2372 + }, + { + "epoch": 0.15292904556293097, + "grad_norm": 0.038167990425758816, + "learning_rate": 9.41281775868242e-06, + "loss": 0.0, + "step": 2373 + }, + { + "epoch": 0.15299349100986015, + "grad_norm": 0.0026435490808041713, + "learning_rate": 9.412101682778375e-06, + "loss": 0.0, + "step": 2374 + }, + { + "epoch": 0.15305793645678933, + "grad_norm": 0.06960011244006364, + "learning_rate": 9.41138560687433e-06, + "loss": 0.0001, + "step": 2375 + }, + { + "epoch": 0.1531223819037185, + "grad_norm": 0.0007201561743522177, + "learning_rate": 9.410669530970284e-06, + "loss": 0.0, + "step": 2376 + }, + { + "epoch": 0.15318682735064768, + "grad_norm": 0.003125982065041039, + "learning_rate": 9.409953455066238e-06, + "loss": 0.0, + "step": 2377 + }, + { + "epoch": 0.15325127279757686, + "grad_norm": 0.0042100056123061, + "learning_rate": 9.409237379162192e-06, + "loss": 0.0, + "step": 2378 + }, + { + "epoch": 0.15331571824450602, + "grad_norm": 0.004587416136859462, + "learning_rate": 9.408521303258147e-06, + "loss": 0.0, + "step": 2379 + }, + { + "epoch": 0.1533801636914352, + "grad_norm": 0.04717204972693149, + "learning_rate": 9.407805227354099e-06, + "loss": 0.0, + "step": 2380 + }, + { + "epoch": 0.15344460913836439, + "grad_norm": 0.04171196733747788, + "learning_rate": 9.407089151450053e-06, + "loss": 0.0001, + "step": 2381 + }, + { + "epoch": 0.15350905458529354, + "grad_norm": 0.17253016101069935, + "learning_rate": 9.406373075546008e-06, + "loss": 0.0005, + "step": 2382 + }, + { + "epoch": 0.15357350003222273, + "grad_norm": 0.03761734075604174, + "learning_rate": 9.405656999641964e-06, + "loss": 0.0, + "step": 2383 + }, + { + "epoch": 0.1536379454791519, + "grad_norm": 0.011965065388645795, + "learning_rate": 9.404940923737918e-06, + "loss": 0.0, + "step": 2384 + }, + { + "epoch": 0.15370239092608107, + "grad_norm": 0.04018939250144867, + "learning_rate": 9.404224847833872e-06, + "loss": 0.0, + "step": 2385 + }, + { + "epoch": 0.15376683637301025, + "grad_norm": 0.3395482345981454, + "learning_rate": 9.403508771929825e-06, + "loss": 0.0008, + "step": 2386 + }, + { + "epoch": 0.1538312818199394, + "grad_norm": 0.03146444998541529, + "learning_rate": 9.40279269602578e-06, + "loss": 0.0, + "step": 2387 + }, + { + "epoch": 0.1538957272668686, + "grad_norm": 0.01010570402105599, + "learning_rate": 9.402076620121734e-06, + "loss": 0.0, + "step": 2388 + }, + { + "epoch": 0.15396017271379778, + "grad_norm": 0.006316466860131675, + "learning_rate": 9.401360544217688e-06, + "loss": 0.0, + "step": 2389 + }, + { + "epoch": 0.15402461816072693, + "grad_norm": 0.004210654737544568, + "learning_rate": 9.400644468313642e-06, + "loss": 0.0, + "step": 2390 + }, + { + "epoch": 0.15408906360765612, + "grad_norm": 0.1352283370985708, + "learning_rate": 9.399928392409596e-06, + "loss": 0.0004, + "step": 2391 + }, + { + "epoch": 0.1541535090545853, + "grad_norm": 0.02761470179688728, + "learning_rate": 9.39921231650555e-06, + "loss": 0.0001, + "step": 2392 + }, + { + "epoch": 0.15421795450151446, + "grad_norm": 0.0005356983755468877, + "learning_rate": 9.398496240601505e-06, + "loss": 0.0, + "step": 2393 + }, + { + "epoch": 0.15428239994844364, + "grad_norm": 0.11308521952425876, + "learning_rate": 9.39778016469746e-06, + "loss": 0.0004, + "step": 2394 + }, + { + "epoch": 0.15434684539537283, + "grad_norm": 0.0003210339035201644, + "learning_rate": 9.397064088793414e-06, + "loss": 0.0, + "step": 2395 + }, + { + "epoch": 0.15441129084230198, + "grad_norm": 0.36937508378132805, + "learning_rate": 9.396348012889366e-06, + "loss": 0.0027, + "step": 2396 + }, + { + "epoch": 0.15447573628923117, + "grad_norm": 0.03054608444162845, + "learning_rate": 9.39563193698532e-06, + "loss": 0.0001, + "step": 2397 + }, + { + "epoch": 0.15454018173616035, + "grad_norm": 0.49067405346123016, + "learning_rate": 9.394915861081275e-06, + "loss": 0.0011, + "step": 2398 + }, + { + "epoch": 0.1546046271830895, + "grad_norm": 0.0164183152203303, + "learning_rate": 9.394199785177229e-06, + "loss": 0.0001, + "step": 2399 + }, + { + "epoch": 0.1546690726300187, + "grad_norm": 0.0048164873590324534, + "learning_rate": 9.393483709273183e-06, + "loss": 0.0, + "step": 2400 + }, + { + "epoch": 0.15473351807694785, + "grad_norm": 0.003437130484469497, + "learning_rate": 9.392767633369138e-06, + "loss": 0.0, + "step": 2401 + }, + { + "epoch": 0.15479796352387704, + "grad_norm": 1.1498831648148473, + "learning_rate": 9.392051557465092e-06, + "loss": 0.0075, + "step": 2402 + }, + { + "epoch": 0.15486240897080622, + "grad_norm": 0.5590278432647267, + "learning_rate": 9.391335481561046e-06, + "loss": 0.0039, + "step": 2403 + }, + { + "epoch": 0.15492685441773538, + "grad_norm": 0.006440467338636328, + "learning_rate": 9.390619405657e-06, + "loss": 0.0, + "step": 2404 + }, + { + "epoch": 0.15499129986466456, + "grad_norm": 0.005089887857211715, + "learning_rate": 9.389903329752955e-06, + "loss": 0.0, + "step": 2405 + }, + { + "epoch": 0.15505574531159375, + "grad_norm": 0.03226839406679151, + "learning_rate": 9.38918725384891e-06, + "loss": 0.0002, + "step": 2406 + }, + { + "epoch": 0.1551201907585229, + "grad_norm": 0.005760741049848393, + "learning_rate": 9.388471177944863e-06, + "loss": 0.0, + "step": 2407 + }, + { + "epoch": 0.1551846362054521, + "grad_norm": 0.002409811586602318, + "learning_rate": 9.387755102040818e-06, + "loss": 0.0, + "step": 2408 + }, + { + "epoch": 0.15524908165238127, + "grad_norm": 0.0002274626718670526, + "learning_rate": 9.387039026136772e-06, + "loss": 0.0, + "step": 2409 + }, + { + "epoch": 0.15531352709931043, + "grad_norm": 0.009568083954336773, + "learning_rate": 9.386322950232726e-06, + "loss": 0.0, + "step": 2410 + }, + { + "epoch": 0.1553779725462396, + "grad_norm": 0.0010595213282637791, + "learning_rate": 9.38560687432868e-06, + "loss": 0.0, + "step": 2411 + }, + { + "epoch": 0.15544241799316877, + "grad_norm": 0.0015494971480794625, + "learning_rate": 9.384890798424633e-06, + "loss": 0.0, + "step": 2412 + }, + { + "epoch": 0.15550686344009795, + "grad_norm": 0.027000724745569585, + "learning_rate": 9.384174722520588e-06, + "loss": 0.0001, + "step": 2413 + }, + { + "epoch": 0.15557130888702714, + "grad_norm": 0.00021975748346756332, + "learning_rate": 9.383458646616542e-06, + "loss": 0.0, + "step": 2414 + }, + { + "epoch": 0.1556357543339563, + "grad_norm": 0.22442388753739564, + "learning_rate": 9.382742570712496e-06, + "loss": 0.001, + "step": 2415 + }, + { + "epoch": 0.15570019978088548, + "grad_norm": 0.058095847062474275, + "learning_rate": 9.38202649480845e-06, + "loss": 0.0002, + "step": 2416 + }, + { + "epoch": 0.15576464522781466, + "grad_norm": 0.004947700956219011, + "learning_rate": 9.381310418904405e-06, + "loss": 0.0, + "step": 2417 + }, + { + "epoch": 0.15582909067474382, + "grad_norm": 0.013964975233892866, + "learning_rate": 9.380594343000359e-06, + "loss": 0.0, + "step": 2418 + }, + { + "epoch": 0.155893536121673, + "grad_norm": 0.00045088995588918313, + "learning_rate": 9.379878267096313e-06, + "loss": 0.0, + "step": 2419 + }, + { + "epoch": 0.1559579815686022, + "grad_norm": 0.009997687306932127, + "learning_rate": 9.379162191192268e-06, + "loss": 0.0, + "step": 2420 + }, + { + "epoch": 0.15602242701553135, + "grad_norm": 0.013743444217281438, + "learning_rate": 9.378446115288222e-06, + "loss": 0.0001, + "step": 2421 + }, + { + "epoch": 0.15608687246246053, + "grad_norm": 0.0011079386020701972, + "learning_rate": 9.377730039384175e-06, + "loss": 0.0, + "step": 2422 + }, + { + "epoch": 0.15615131790938971, + "grad_norm": 0.0005776427554553396, + "learning_rate": 9.377013963480129e-06, + "loss": 0.0, + "step": 2423 + }, + { + "epoch": 0.15621576335631887, + "grad_norm": 0.02744104327440318, + "learning_rate": 9.376297887576083e-06, + "loss": 0.0001, + "step": 2424 + }, + { + "epoch": 0.15628020880324806, + "grad_norm": 0.0002999203618423623, + "learning_rate": 9.375581811672037e-06, + "loss": 0.0, + "step": 2425 + }, + { + "epoch": 0.1563446542501772, + "grad_norm": 0.0001923817727009459, + "learning_rate": 9.374865735767992e-06, + "loss": 0.0, + "step": 2426 + }, + { + "epoch": 0.1564090996971064, + "grad_norm": 0.15707550729938025, + "learning_rate": 9.374149659863946e-06, + "loss": 0.0003, + "step": 2427 + }, + { + "epoch": 0.15647354514403558, + "grad_norm": 0.0005202558905786767, + "learning_rate": 9.3734335839599e-06, + "loss": 0.0, + "step": 2428 + }, + { + "epoch": 0.15653799059096474, + "grad_norm": 0.1341350078461987, + "learning_rate": 9.372717508055855e-06, + "loss": 0.0005, + "step": 2429 + }, + { + "epoch": 0.15660243603789392, + "grad_norm": 0.03295119511432381, + "learning_rate": 9.372001432151809e-06, + "loss": 0.0001, + "step": 2430 + }, + { + "epoch": 0.1566668814848231, + "grad_norm": 0.008991741543612479, + "learning_rate": 9.371285356247763e-06, + "loss": 0.0001, + "step": 2431 + }, + { + "epoch": 0.15673132693175226, + "grad_norm": 0.005882962626867871, + "learning_rate": 9.370569280343718e-06, + "loss": 0.0, + "step": 2432 + }, + { + "epoch": 0.15679577237868145, + "grad_norm": 0.14532828296553948, + "learning_rate": 9.369853204439672e-06, + "loss": 0.0001, + "step": 2433 + }, + { + "epoch": 0.15686021782561063, + "grad_norm": 0.001661131622482854, + "learning_rate": 9.369137128535626e-06, + "loss": 0.0, + "step": 2434 + }, + { + "epoch": 0.1569246632725398, + "grad_norm": 0.0013453870636462811, + "learning_rate": 9.36842105263158e-06, + "loss": 0.0, + "step": 2435 + }, + { + "epoch": 0.15698910871946897, + "grad_norm": 0.0007773121412426143, + "learning_rate": 9.367704976727535e-06, + "loss": 0.0, + "step": 2436 + }, + { + "epoch": 0.15705355416639816, + "grad_norm": 0.002945040172602348, + "learning_rate": 9.366988900823489e-06, + "loss": 0.0, + "step": 2437 + }, + { + "epoch": 0.1571179996133273, + "grad_norm": 0.007691798343803482, + "learning_rate": 9.366272824919442e-06, + "loss": 0.0, + "step": 2438 + }, + { + "epoch": 0.1571824450602565, + "grad_norm": 0.001754172241999642, + "learning_rate": 9.365556749015396e-06, + "loss": 0.0, + "step": 2439 + }, + { + "epoch": 0.15724689050718565, + "grad_norm": 0.039875313526018905, + "learning_rate": 9.36484067311135e-06, + "loss": 0.0004, + "step": 2440 + }, + { + "epoch": 0.15731133595411484, + "grad_norm": 0.15183961423677111, + "learning_rate": 9.364124597207305e-06, + "loss": 0.0005, + "step": 2441 + }, + { + "epoch": 0.15737578140104402, + "grad_norm": 0.32147288064350765, + "learning_rate": 9.363408521303259e-06, + "loss": 0.0019, + "step": 2442 + }, + { + "epoch": 0.15744022684797318, + "grad_norm": 0.015395113417899842, + "learning_rate": 9.362692445399213e-06, + "loss": 0.0, + "step": 2443 + }, + { + "epoch": 0.15750467229490236, + "grad_norm": 0.007149663212477922, + "learning_rate": 9.361976369495167e-06, + "loss": 0.0, + "step": 2444 + }, + { + "epoch": 0.15756911774183155, + "grad_norm": 0.006734754031812976, + "learning_rate": 9.361260293591122e-06, + "loss": 0.0, + "step": 2445 + }, + { + "epoch": 0.1576335631887607, + "grad_norm": 0.0014427696968621231, + "learning_rate": 9.360544217687076e-06, + "loss": 0.0, + "step": 2446 + }, + { + "epoch": 0.1576980086356899, + "grad_norm": 0.0067118994039908, + "learning_rate": 9.359828141783029e-06, + "loss": 0.0, + "step": 2447 + }, + { + "epoch": 0.15776245408261907, + "grad_norm": 0.003678528787548895, + "learning_rate": 9.359112065878983e-06, + "loss": 0.0, + "step": 2448 + }, + { + "epoch": 0.15782689952954823, + "grad_norm": 0.0029926002214066875, + "learning_rate": 9.358395989974937e-06, + "loss": 0.0, + "step": 2449 + }, + { + "epoch": 0.15789134497647742, + "grad_norm": 0.0005757762038000473, + "learning_rate": 9.357679914070892e-06, + "loss": 0.0, + "step": 2450 + }, + { + "epoch": 0.1579557904234066, + "grad_norm": 0.16343268918931092, + "learning_rate": 9.356963838166846e-06, + "loss": 0.0018, + "step": 2451 + }, + { + "epoch": 0.15802023587033576, + "grad_norm": 0.011417497251374545, + "learning_rate": 9.3562477622628e-06, + "loss": 0.0001, + "step": 2452 + }, + { + "epoch": 0.15808468131726494, + "grad_norm": 0.00034124654959428265, + "learning_rate": 9.355531686358756e-06, + "loss": 0.0, + "step": 2453 + }, + { + "epoch": 0.1581491267641941, + "grad_norm": 0.009350126488007813, + "learning_rate": 9.354815610454709e-06, + "loss": 0.0, + "step": 2454 + }, + { + "epoch": 0.15821357221112328, + "grad_norm": 0.012536311772587556, + "learning_rate": 9.354099534550663e-06, + "loss": 0.0, + "step": 2455 + }, + { + "epoch": 0.15827801765805247, + "grad_norm": 0.0006414442846759456, + "learning_rate": 9.353383458646617e-06, + "loss": 0.0, + "step": 2456 + }, + { + "epoch": 0.15834246310498162, + "grad_norm": 0.0014955118526876147, + "learning_rate": 9.352667382742572e-06, + "loss": 0.0, + "step": 2457 + }, + { + "epoch": 0.1584069085519108, + "grad_norm": 0.012644152882045408, + "learning_rate": 9.351951306838526e-06, + "loss": 0.0, + "step": 2458 + }, + { + "epoch": 0.15847135399884, + "grad_norm": 0.0022283322330553974, + "learning_rate": 9.35123523093448e-06, + "loss": 0.0, + "step": 2459 + }, + { + "epoch": 0.15853579944576915, + "grad_norm": 0.00216890313118297, + "learning_rate": 9.350519155030435e-06, + "loss": 0.0, + "step": 2460 + }, + { + "epoch": 0.15860024489269833, + "grad_norm": 4.773645306590904e-05, + "learning_rate": 9.349803079126389e-06, + "loss": 0.0, + "step": 2461 + }, + { + "epoch": 0.15866469033962752, + "grad_norm": 0.0026673034609326894, + "learning_rate": 9.349087003222343e-06, + "loss": 0.0, + "step": 2462 + }, + { + "epoch": 0.15872913578655667, + "grad_norm": 0.00012853098665182515, + "learning_rate": 9.348370927318296e-06, + "loss": 0.0, + "step": 2463 + }, + { + "epoch": 0.15879358123348586, + "grad_norm": 0.023990334793908007, + "learning_rate": 9.34765485141425e-06, + "loss": 0.0002, + "step": 2464 + }, + { + "epoch": 0.15885802668041502, + "grad_norm": 0.0015653462913017993, + "learning_rate": 9.346938775510204e-06, + "loss": 0.0, + "step": 2465 + }, + { + "epoch": 0.1589224721273442, + "grad_norm": 0.0017677698246296816, + "learning_rate": 9.346222699606159e-06, + "loss": 0.0, + "step": 2466 + }, + { + "epoch": 0.15898691757427338, + "grad_norm": 0.19936779468096666, + "learning_rate": 9.345506623702113e-06, + "loss": 0.0053, + "step": 2467 + }, + { + "epoch": 0.15905136302120254, + "grad_norm": 0.010805132866747108, + "learning_rate": 9.344790547798067e-06, + "loss": 0.0001, + "step": 2468 + }, + { + "epoch": 0.15911580846813173, + "grad_norm": 0.014871396083823452, + "learning_rate": 9.344074471894022e-06, + "loss": 0.0002, + "step": 2469 + }, + { + "epoch": 0.1591802539150609, + "grad_norm": 0.003370385419454828, + "learning_rate": 9.343358395989976e-06, + "loss": 0.0, + "step": 2470 + }, + { + "epoch": 0.15924469936199007, + "grad_norm": 0.0023136418019346424, + "learning_rate": 9.34264232008593e-06, + "loss": 0.0, + "step": 2471 + }, + { + "epoch": 0.15930914480891925, + "grad_norm": 0.006850854848157116, + "learning_rate": 9.341926244181884e-06, + "loss": 0.0001, + "step": 2472 + }, + { + "epoch": 0.15937359025584844, + "grad_norm": 0.011098353460658784, + "learning_rate": 9.341210168277837e-06, + "loss": 0.0, + "step": 2473 + }, + { + "epoch": 0.1594380357027776, + "grad_norm": 0.0040408311809586265, + "learning_rate": 9.340494092373791e-06, + "loss": 0.0, + "step": 2474 + }, + { + "epoch": 0.15950248114970678, + "grad_norm": 0.0017714230238855028, + "learning_rate": 9.339778016469746e-06, + "loss": 0.0, + "step": 2475 + }, + { + "epoch": 0.15956692659663596, + "grad_norm": 0.0035697117594498335, + "learning_rate": 9.339061940565702e-06, + "loss": 0.0, + "step": 2476 + }, + { + "epoch": 0.15963137204356512, + "grad_norm": 0.0014668561974660174, + "learning_rate": 9.338345864661656e-06, + "loss": 0.0, + "step": 2477 + }, + { + "epoch": 0.1596958174904943, + "grad_norm": 1.4915300288922788, + "learning_rate": 9.33762978875761e-06, + "loss": 0.008, + "step": 2478 + }, + { + "epoch": 0.15976026293742346, + "grad_norm": 0.0040878630786619, + "learning_rate": 9.336913712853563e-06, + "loss": 0.0, + "step": 2479 + }, + { + "epoch": 0.15982470838435264, + "grad_norm": 0.0013943551804445802, + "learning_rate": 9.336197636949517e-06, + "loss": 0.0, + "step": 2480 + }, + { + "epoch": 0.15988915383128183, + "grad_norm": 0.006055781747204741, + "learning_rate": 9.335481561045471e-06, + "loss": 0.0, + "step": 2481 + }, + { + "epoch": 0.15995359927821098, + "grad_norm": 0.02593562146286775, + "learning_rate": 9.334765485141426e-06, + "loss": 0.0001, + "step": 2482 + }, + { + "epoch": 0.16001804472514017, + "grad_norm": 0.00026559044513447465, + "learning_rate": 9.33404940923738e-06, + "loss": 0.0, + "step": 2483 + }, + { + "epoch": 0.16008249017206935, + "grad_norm": 0.0022454719980279157, + "learning_rate": 9.333333333333334e-06, + "loss": 0.0, + "step": 2484 + }, + { + "epoch": 0.1601469356189985, + "grad_norm": 0.019448823897386905, + "learning_rate": 9.332617257429289e-06, + "loss": 0.0001, + "step": 2485 + }, + { + "epoch": 0.1602113810659277, + "grad_norm": 0.002218050878402869, + "learning_rate": 9.331901181525243e-06, + "loss": 0.0, + "step": 2486 + }, + { + "epoch": 0.16027582651285688, + "grad_norm": 0.3381958283192187, + "learning_rate": 9.331185105621197e-06, + "loss": 0.0011, + "step": 2487 + }, + { + "epoch": 0.16034027195978603, + "grad_norm": 0.00022144595144902289, + "learning_rate": 9.330469029717152e-06, + "loss": 0.0, + "step": 2488 + }, + { + "epoch": 0.16040471740671522, + "grad_norm": 0.016463564310275253, + "learning_rate": 9.329752953813104e-06, + "loss": 0.0001, + "step": 2489 + }, + { + "epoch": 0.1604691628536444, + "grad_norm": 0.0029835897225247788, + "learning_rate": 9.329036877909058e-06, + "loss": 0.0, + "step": 2490 + }, + { + "epoch": 0.16053360830057356, + "grad_norm": 0.18282084405229299, + "learning_rate": 9.328320802005013e-06, + "loss": 0.0017, + "step": 2491 + }, + { + "epoch": 0.16059805374750274, + "grad_norm": 0.011583676702772308, + "learning_rate": 9.327604726100967e-06, + "loss": 0.0001, + "step": 2492 + }, + { + "epoch": 0.1606624991944319, + "grad_norm": 0.0034710967822639046, + "learning_rate": 9.326888650196921e-06, + "loss": 0.0, + "step": 2493 + }, + { + "epoch": 0.16072694464136109, + "grad_norm": 0.0011947749959884256, + "learning_rate": 9.326172574292876e-06, + "loss": 0.0, + "step": 2494 + }, + { + "epoch": 0.16079139008829027, + "grad_norm": 0.3944731943453859, + "learning_rate": 9.32545649838883e-06, + "loss": 0.0024, + "step": 2495 + }, + { + "epoch": 0.16085583553521943, + "grad_norm": 0.0018753792654615559, + "learning_rate": 9.324740422484784e-06, + "loss": 0.0, + "step": 2496 + }, + { + "epoch": 0.1609202809821486, + "grad_norm": 0.00033919158385829816, + "learning_rate": 9.324024346580739e-06, + "loss": 0.0, + "step": 2497 + }, + { + "epoch": 0.1609847264290778, + "grad_norm": 0.008896867719477038, + "learning_rate": 9.323308270676693e-06, + "loss": 0.0001, + "step": 2498 + }, + { + "epoch": 0.16104917187600695, + "grad_norm": 0.0010966606224628967, + "learning_rate": 9.322592194772647e-06, + "loss": 0.0, + "step": 2499 + }, + { + "epoch": 0.16111361732293614, + "grad_norm": 6.286197510243029e-05, + "learning_rate": 9.321876118868601e-06, + "loss": 0.0, + "step": 2500 + }, + { + "epoch": 0.16117806276986532, + "grad_norm": 0.012560768212650635, + "learning_rate": 9.321160042964556e-06, + "loss": 0.0001, + "step": 2501 + }, + { + "epoch": 0.16124250821679448, + "grad_norm": 0.01912257629322411, + "learning_rate": 9.32044396706051e-06, + "loss": 0.0002, + "step": 2502 + }, + { + "epoch": 0.16130695366372366, + "grad_norm": 0.016693878853747644, + "learning_rate": 9.319727891156464e-06, + "loss": 0.0001, + "step": 2503 + }, + { + "epoch": 0.16137139911065282, + "grad_norm": 0.07219035829834394, + "learning_rate": 9.319011815252419e-06, + "loss": 0.0002, + "step": 2504 + }, + { + "epoch": 0.161435844557582, + "grad_norm": 0.0012386043214729372, + "learning_rate": 9.318295739348371e-06, + "loss": 0.0, + "step": 2505 + }, + { + "epoch": 0.1615002900045112, + "grad_norm": 0.016528571301615474, + "learning_rate": 9.317579663444326e-06, + "loss": 0.0001, + "step": 2506 + }, + { + "epoch": 0.16156473545144034, + "grad_norm": 0.001469249498037701, + "learning_rate": 9.31686358754028e-06, + "loss": 0.0, + "step": 2507 + }, + { + "epoch": 0.16162918089836953, + "grad_norm": 0.034161891732316735, + "learning_rate": 9.316147511636234e-06, + "loss": 0.0001, + "step": 2508 + }, + { + "epoch": 0.1616936263452987, + "grad_norm": 0.4474646446177699, + "learning_rate": 9.315431435732188e-06, + "loss": 0.0014, + "step": 2509 + }, + { + "epoch": 0.16175807179222787, + "grad_norm": 0.00017682347512700295, + "learning_rate": 9.314715359828143e-06, + "loss": 0.0, + "step": 2510 + }, + { + "epoch": 0.16182251723915705, + "grad_norm": 0.0003216794260017332, + "learning_rate": 9.313999283924097e-06, + "loss": 0.0, + "step": 2511 + }, + { + "epoch": 0.16188696268608624, + "grad_norm": 0.14043387697255008, + "learning_rate": 9.313283208020051e-06, + "loss": 0.0005, + "step": 2512 + }, + { + "epoch": 0.1619514081330154, + "grad_norm": 0.005593976827827833, + "learning_rate": 9.312567132116006e-06, + "loss": 0.0, + "step": 2513 + }, + { + "epoch": 0.16201585357994458, + "grad_norm": 0.00858530863042198, + "learning_rate": 9.31185105621196e-06, + "loss": 0.0, + "step": 2514 + }, + { + "epoch": 0.16208029902687376, + "grad_norm": 0.21605553042851372, + "learning_rate": 9.311134980307913e-06, + "loss": 0.0015, + "step": 2515 + }, + { + "epoch": 0.16214474447380292, + "grad_norm": 0.5249323210916667, + "learning_rate": 9.310418904403867e-06, + "loss": 0.0019, + "step": 2516 + }, + { + "epoch": 0.1622091899207321, + "grad_norm": 0.0031300865219468605, + "learning_rate": 9.309702828499821e-06, + "loss": 0.0, + "step": 2517 + }, + { + "epoch": 0.16227363536766126, + "grad_norm": 0.016066443398272942, + "learning_rate": 9.308986752595775e-06, + "loss": 0.0002, + "step": 2518 + }, + { + "epoch": 0.16233808081459045, + "grad_norm": 0.011800571617004192, + "learning_rate": 9.30827067669173e-06, + "loss": 0.0001, + "step": 2519 + }, + { + "epoch": 0.16240252626151963, + "grad_norm": 0.006443131808993591, + "learning_rate": 9.307554600787684e-06, + "loss": 0.0, + "step": 2520 + }, + { + "epoch": 0.1624669717084488, + "grad_norm": 0.0002683456713499837, + "learning_rate": 9.306838524883638e-06, + "loss": 0.0, + "step": 2521 + }, + { + "epoch": 0.16253141715537797, + "grad_norm": 0.012876411028502148, + "learning_rate": 9.306122448979593e-06, + "loss": 0.0, + "step": 2522 + }, + { + "epoch": 0.16259586260230716, + "grad_norm": 0.052757710574246656, + "learning_rate": 9.305406373075547e-06, + "loss": 0.0, + "step": 2523 + }, + { + "epoch": 0.1626603080492363, + "grad_norm": 0.001491306592329016, + "learning_rate": 9.304690297171501e-06, + "loss": 0.0, + "step": 2524 + }, + { + "epoch": 0.1627247534961655, + "grad_norm": 0.03234166304160187, + "learning_rate": 9.303974221267455e-06, + "loss": 0.0002, + "step": 2525 + }, + { + "epoch": 0.16278919894309468, + "grad_norm": 0.003989245770081959, + "learning_rate": 9.30325814536341e-06, + "loss": 0.0, + "step": 2526 + }, + { + "epoch": 0.16285364439002384, + "grad_norm": 0.03451099249804787, + "learning_rate": 9.302542069459364e-06, + "loss": 0.0004, + "step": 2527 + }, + { + "epoch": 0.16291808983695302, + "grad_norm": 0.0022142978663875178, + "learning_rate": 9.301825993555318e-06, + "loss": 0.0, + "step": 2528 + }, + { + "epoch": 0.1629825352838822, + "grad_norm": 0.0008906830224453514, + "learning_rate": 9.301109917651273e-06, + "loss": 0.0, + "step": 2529 + }, + { + "epoch": 0.16304698073081136, + "grad_norm": 0.40542301956458016, + "learning_rate": 9.300393841747227e-06, + "loss": 0.0016, + "step": 2530 + }, + { + "epoch": 0.16311142617774055, + "grad_norm": 0.0012230303771541364, + "learning_rate": 9.29967776584318e-06, + "loss": 0.0, + "step": 2531 + }, + { + "epoch": 0.1631758716246697, + "grad_norm": 0.056274097549309655, + "learning_rate": 9.298961689939134e-06, + "loss": 0.0, + "step": 2532 + }, + { + "epoch": 0.1632403170715989, + "grad_norm": 0.001701698073510463, + "learning_rate": 9.298245614035088e-06, + "loss": 0.0, + "step": 2533 + }, + { + "epoch": 0.16330476251852807, + "grad_norm": 0.004243688258801596, + "learning_rate": 9.297529538131042e-06, + "loss": 0.0, + "step": 2534 + }, + { + "epoch": 0.16336920796545723, + "grad_norm": 0.0004493868624061053, + "learning_rate": 9.296813462226997e-06, + "loss": 0.0, + "step": 2535 + }, + { + "epoch": 0.16343365341238641, + "grad_norm": 0.15120959538283224, + "learning_rate": 9.296097386322951e-06, + "loss": 0.0004, + "step": 2536 + }, + { + "epoch": 0.1634980988593156, + "grad_norm": 0.012329401352514077, + "learning_rate": 9.295381310418905e-06, + "loss": 0.0001, + "step": 2537 + }, + { + "epoch": 0.16356254430624476, + "grad_norm": 0.006242194356227512, + "learning_rate": 9.29466523451486e-06, + "loss": 0.0, + "step": 2538 + }, + { + "epoch": 0.16362698975317394, + "grad_norm": 0.0004984472123917352, + "learning_rate": 9.293949158610814e-06, + "loss": 0.0, + "step": 2539 + }, + { + "epoch": 0.16369143520010312, + "grad_norm": 0.005776456899099466, + "learning_rate": 9.293233082706767e-06, + "loss": 0.0, + "step": 2540 + }, + { + "epoch": 0.16375588064703228, + "grad_norm": 0.0032904222645318234, + "learning_rate": 9.292517006802721e-06, + "loss": 0.0, + "step": 2541 + }, + { + "epoch": 0.16382032609396147, + "grad_norm": 0.005594085108149277, + "learning_rate": 9.291800930898675e-06, + "loss": 0.0, + "step": 2542 + }, + { + "epoch": 0.16388477154089062, + "grad_norm": 0.7958945806678982, + "learning_rate": 9.29108485499463e-06, + "loss": 0.0036, + "step": 2543 + }, + { + "epoch": 0.1639492169878198, + "grad_norm": 0.007064345315062998, + "learning_rate": 9.290368779090584e-06, + "loss": 0.0, + "step": 2544 + }, + { + "epoch": 0.164013662434749, + "grad_norm": 0.020122688256661223, + "learning_rate": 9.289652703186538e-06, + "loss": 0.0002, + "step": 2545 + }, + { + "epoch": 0.16407810788167815, + "grad_norm": 0.024931361317891115, + "learning_rate": 9.288936627282494e-06, + "loss": 0.0, + "step": 2546 + }, + { + "epoch": 0.16414255332860733, + "grad_norm": 0.2923833720903259, + "learning_rate": 9.288220551378447e-06, + "loss": 0.0008, + "step": 2547 + }, + { + "epoch": 0.16420699877553652, + "grad_norm": 0.002371580849569522, + "learning_rate": 9.287504475474401e-06, + "loss": 0.0, + "step": 2548 + }, + { + "epoch": 0.16427144422246567, + "grad_norm": 0.043302525215061886, + "learning_rate": 9.286788399570355e-06, + "loss": 0.0001, + "step": 2549 + }, + { + "epoch": 0.16433588966939486, + "grad_norm": 0.002335608467180129, + "learning_rate": 9.28607232366631e-06, + "loss": 0.0, + "step": 2550 + }, + { + "epoch": 0.16440033511632404, + "grad_norm": 0.06442228576493117, + "learning_rate": 9.285356247762264e-06, + "loss": 0.0006, + "step": 2551 + }, + { + "epoch": 0.1644647805632532, + "grad_norm": 0.0255342682065745, + "learning_rate": 9.284640171858218e-06, + "loss": 0.0002, + "step": 2552 + }, + { + "epoch": 0.16452922601018238, + "grad_norm": 0.023540083579128342, + "learning_rate": 9.283924095954172e-06, + "loss": 0.0001, + "step": 2553 + }, + { + "epoch": 0.16459367145711157, + "grad_norm": 0.0017502331113364701, + "learning_rate": 9.283208020050127e-06, + "loss": 0.0, + "step": 2554 + }, + { + "epoch": 0.16465811690404072, + "grad_norm": 0.0006908666154918732, + "learning_rate": 9.282491944146081e-06, + "loss": 0.0, + "step": 2555 + }, + { + "epoch": 0.1647225623509699, + "grad_norm": 0.00048313697026926197, + "learning_rate": 9.281775868242034e-06, + "loss": 0.0, + "step": 2556 + }, + { + "epoch": 0.16478700779789907, + "grad_norm": 0.0008612412057671483, + "learning_rate": 9.281059792337988e-06, + "loss": 0.0, + "step": 2557 + }, + { + "epoch": 0.16485145324482825, + "grad_norm": 0.6262135406738908, + "learning_rate": 9.280343716433942e-06, + "loss": 0.0034, + "step": 2558 + }, + { + "epoch": 0.16491589869175743, + "grad_norm": 0.0006356426223387895, + "learning_rate": 9.279627640529897e-06, + "loss": 0.0, + "step": 2559 + }, + { + "epoch": 0.1649803441386866, + "grad_norm": 0.001305826023262251, + "learning_rate": 9.278911564625851e-06, + "loss": 0.0, + "step": 2560 + }, + { + "epoch": 0.16504478958561578, + "grad_norm": 0.2166476208464855, + "learning_rate": 9.278195488721805e-06, + "loss": 0.0007, + "step": 2561 + }, + { + "epoch": 0.16510923503254496, + "grad_norm": 0.0017773730090488518, + "learning_rate": 9.27747941281776e-06, + "loss": 0.0, + "step": 2562 + }, + { + "epoch": 0.16517368047947412, + "grad_norm": 0.01864715096330182, + "learning_rate": 9.276763336913714e-06, + "loss": 0.0, + "step": 2563 + }, + { + "epoch": 0.1652381259264033, + "grad_norm": 0.0009938659133838495, + "learning_rate": 9.276047261009668e-06, + "loss": 0.0, + "step": 2564 + }, + { + "epoch": 0.16530257137333249, + "grad_norm": 0.1604910932287349, + "learning_rate": 9.275331185105622e-06, + "loss": 0.0001, + "step": 2565 + }, + { + "epoch": 0.16536701682026164, + "grad_norm": 0.013365406361419188, + "learning_rate": 9.274615109201575e-06, + "loss": 0.0, + "step": 2566 + }, + { + "epoch": 0.16543146226719083, + "grad_norm": 0.0015641780087289204, + "learning_rate": 9.27389903329753e-06, + "loss": 0.0, + "step": 2567 + }, + { + "epoch": 0.16549590771412, + "grad_norm": 0.0015147951019427674, + "learning_rate": 9.273182957393484e-06, + "loss": 0.0, + "step": 2568 + }, + { + "epoch": 0.16556035316104917, + "grad_norm": 0.003322727915939341, + "learning_rate": 9.27246688148944e-06, + "loss": 0.0, + "step": 2569 + }, + { + "epoch": 0.16562479860797835, + "grad_norm": 0.3481242640776877, + "learning_rate": 9.271750805585394e-06, + "loss": 0.0082, + "step": 2570 + }, + { + "epoch": 0.1656892440549075, + "grad_norm": 0.05511486226657862, + "learning_rate": 9.271034729681348e-06, + "loss": 0.0003, + "step": 2571 + }, + { + "epoch": 0.1657536895018367, + "grad_norm": 0.0013772392389835638, + "learning_rate": 9.2703186537773e-06, + "loss": 0.0, + "step": 2572 + }, + { + "epoch": 0.16581813494876588, + "grad_norm": 0.0028322900559294185, + "learning_rate": 9.269602577873255e-06, + "loss": 0.0, + "step": 2573 + }, + { + "epoch": 0.16588258039569503, + "grad_norm": 0.0008771691379030808, + "learning_rate": 9.26888650196921e-06, + "loss": 0.0, + "step": 2574 + }, + { + "epoch": 0.16594702584262422, + "grad_norm": 0.001837532065417171, + "learning_rate": 9.268170426065164e-06, + "loss": 0.0, + "step": 2575 + }, + { + "epoch": 0.1660114712895534, + "grad_norm": 0.011364499836188676, + "learning_rate": 9.267454350161118e-06, + "loss": 0.0, + "step": 2576 + }, + { + "epoch": 0.16607591673648256, + "grad_norm": 0.43601692392264785, + "learning_rate": 9.266738274257072e-06, + "loss": 0.0017, + "step": 2577 + }, + { + "epoch": 0.16614036218341174, + "grad_norm": 0.2836992981645201, + "learning_rate": 9.266022198353027e-06, + "loss": 0.0005, + "step": 2578 + }, + { + "epoch": 0.16620480763034093, + "grad_norm": 0.006381917270187372, + "learning_rate": 9.26530612244898e-06, + "loss": 0.0, + "step": 2579 + }, + { + "epoch": 0.16626925307727008, + "grad_norm": 0.0995379968224912, + "learning_rate": 9.264590046544935e-06, + "loss": 0.0001, + "step": 2580 + }, + { + "epoch": 0.16633369852419927, + "grad_norm": 0.005838220002381634, + "learning_rate": 9.26387397064089e-06, + "loss": 0.0, + "step": 2581 + }, + { + "epoch": 0.16639814397112843, + "grad_norm": 0.1734023075655848, + "learning_rate": 9.263157894736842e-06, + "loss": 0.0015, + "step": 2582 + }, + { + "epoch": 0.1664625894180576, + "grad_norm": 0.32543608770325944, + "learning_rate": 9.262441818832796e-06, + "loss": 0.001, + "step": 2583 + }, + { + "epoch": 0.1665270348649868, + "grad_norm": 0.035015456749274816, + "learning_rate": 9.26172574292875e-06, + "loss": 0.002, + "step": 2584 + }, + { + "epoch": 0.16659148031191595, + "grad_norm": 0.00900476722572773, + "learning_rate": 9.261009667024705e-06, + "loss": 0.0, + "step": 2585 + }, + { + "epoch": 0.16665592575884514, + "grad_norm": 0.0008566073306815689, + "learning_rate": 9.26029359112066e-06, + "loss": 0.0, + "step": 2586 + }, + { + "epoch": 0.16672037120577432, + "grad_norm": 0.4923978083746918, + "learning_rate": 9.259577515216614e-06, + "loss": 0.0022, + "step": 2587 + }, + { + "epoch": 0.16678481665270348, + "grad_norm": 0.0018108477937682785, + "learning_rate": 9.258861439312568e-06, + "loss": 0.0, + "step": 2588 + }, + { + "epoch": 0.16684926209963266, + "grad_norm": 0.004562858927769996, + "learning_rate": 9.258145363408522e-06, + "loss": 0.0, + "step": 2589 + }, + { + "epoch": 0.16691370754656185, + "grad_norm": 0.07446345657383704, + "learning_rate": 9.257429287504476e-06, + "loss": 0.0001, + "step": 2590 + }, + { + "epoch": 0.166978152993491, + "grad_norm": 0.0009006018433352618, + "learning_rate": 9.25671321160043e-06, + "loss": 0.0, + "step": 2591 + }, + { + "epoch": 0.1670425984404202, + "grad_norm": 0.007707075664561584, + "learning_rate": 9.255997135696383e-06, + "loss": 0.0, + "step": 2592 + }, + { + "epoch": 0.16710704388734937, + "grad_norm": 0.1405618313798484, + "learning_rate": 9.25528105979234e-06, + "loss": 0.0002, + "step": 2593 + }, + { + "epoch": 0.16717148933427853, + "grad_norm": 0.0008934174065720114, + "learning_rate": 9.254564983888294e-06, + "loss": 0.0, + "step": 2594 + }, + { + "epoch": 0.1672359347812077, + "grad_norm": 0.5161244256757117, + "learning_rate": 9.253848907984248e-06, + "loss": 0.0014, + "step": 2595 + }, + { + "epoch": 0.16730038022813687, + "grad_norm": 0.05120134815256295, + "learning_rate": 9.253132832080202e-06, + "loss": 0.0001, + "step": 2596 + }, + { + "epoch": 0.16736482567506605, + "grad_norm": 0.021043779453902272, + "learning_rate": 9.252416756176157e-06, + "loss": 0.0002, + "step": 2597 + }, + { + "epoch": 0.16742927112199524, + "grad_norm": 0.0009231523664426139, + "learning_rate": 9.251700680272109e-06, + "loss": 0.0, + "step": 2598 + }, + { + "epoch": 0.1674937165689244, + "grad_norm": 0.016473526227955518, + "learning_rate": 9.250984604368063e-06, + "loss": 0.0, + "step": 2599 + }, + { + "epoch": 0.16755816201585358, + "grad_norm": 0.0006848207539497617, + "learning_rate": 9.250268528464018e-06, + "loss": 0.0, + "step": 2600 + }, + { + "epoch": 0.16762260746278276, + "grad_norm": 0.001094208815293132, + "learning_rate": 9.249552452559972e-06, + "loss": 0.0, + "step": 2601 + }, + { + "epoch": 0.16768705290971192, + "grad_norm": 0.00046785349861243374, + "learning_rate": 9.248836376655926e-06, + "loss": 0.0, + "step": 2602 + }, + { + "epoch": 0.1677514983566411, + "grad_norm": 0.00014457733525141594, + "learning_rate": 9.24812030075188e-06, + "loss": 0.0, + "step": 2603 + }, + { + "epoch": 0.1678159438035703, + "grad_norm": 0.029550263242095656, + "learning_rate": 9.247404224847835e-06, + "loss": 0.0001, + "step": 2604 + }, + { + "epoch": 0.16788038925049945, + "grad_norm": 0.8862753948667788, + "learning_rate": 9.24668814894379e-06, + "loss": 0.003, + "step": 2605 + }, + { + "epoch": 0.16794483469742863, + "grad_norm": 0.011991181543029776, + "learning_rate": 9.245972073039744e-06, + "loss": 0.0001, + "step": 2606 + }, + { + "epoch": 0.16800928014435781, + "grad_norm": 0.05132832108348396, + "learning_rate": 9.245255997135698e-06, + "loss": 0.0016, + "step": 2607 + }, + { + "epoch": 0.16807372559128697, + "grad_norm": 0.01007424389617214, + "learning_rate": 9.24453992123165e-06, + "loss": 0.0001, + "step": 2608 + }, + { + "epoch": 0.16813817103821616, + "grad_norm": 0.3279116481888165, + "learning_rate": 9.243823845327605e-06, + "loss": 0.0008, + "step": 2609 + }, + { + "epoch": 0.1682026164851453, + "grad_norm": 0.0010222354990563067, + "learning_rate": 9.243107769423559e-06, + "loss": 0.0, + "step": 2610 + }, + { + "epoch": 0.1682670619320745, + "grad_norm": 0.0002879770515881767, + "learning_rate": 9.242391693519513e-06, + "loss": 0.0, + "step": 2611 + }, + { + "epoch": 0.16833150737900368, + "grad_norm": 0.04284265966657461, + "learning_rate": 9.241675617615468e-06, + "loss": 0.0004, + "step": 2612 + }, + { + "epoch": 0.16839595282593284, + "grad_norm": 0.02611223447159595, + "learning_rate": 9.240959541711422e-06, + "loss": 0.0004, + "step": 2613 + }, + { + "epoch": 0.16846039827286202, + "grad_norm": 0.000562746261733419, + "learning_rate": 9.240243465807376e-06, + "loss": 0.0, + "step": 2614 + }, + { + "epoch": 0.1685248437197912, + "grad_norm": 0.006530159238405348, + "learning_rate": 9.23952738990333e-06, + "loss": 0.0, + "step": 2615 + }, + { + "epoch": 0.16858928916672036, + "grad_norm": 0.004683470390970493, + "learning_rate": 9.238811313999285e-06, + "loss": 0.0, + "step": 2616 + }, + { + "epoch": 0.16865373461364955, + "grad_norm": 0.4705708583019044, + "learning_rate": 9.238095238095239e-06, + "loss": 0.0032, + "step": 2617 + }, + { + "epoch": 0.16871818006057873, + "grad_norm": 0.0005363429561250486, + "learning_rate": 9.237379162191193e-06, + "loss": 0.0, + "step": 2618 + }, + { + "epoch": 0.1687826255075079, + "grad_norm": 0.15183939342721997, + "learning_rate": 9.236663086287148e-06, + "loss": 0.0017, + "step": 2619 + }, + { + "epoch": 0.16884707095443707, + "grad_norm": 0.0001052157179584856, + "learning_rate": 9.235947010383102e-06, + "loss": 0.0, + "step": 2620 + }, + { + "epoch": 0.16891151640136626, + "grad_norm": 0.0007306970136187449, + "learning_rate": 9.235230934479056e-06, + "loss": 0.0, + "step": 2621 + }, + { + "epoch": 0.16897596184829541, + "grad_norm": 0.002508690093415649, + "learning_rate": 9.23451485857501e-06, + "loss": 0.0, + "step": 2622 + }, + { + "epoch": 0.1690404072952246, + "grad_norm": 0.0005834624117502399, + "learning_rate": 9.233798782670965e-06, + "loss": 0.0, + "step": 2623 + }, + { + "epoch": 0.16910485274215376, + "grad_norm": 0.002580928592817899, + "learning_rate": 9.233082706766918e-06, + "loss": 0.0, + "step": 2624 + }, + { + "epoch": 0.16916929818908294, + "grad_norm": 0.004280836334426163, + "learning_rate": 9.232366630862872e-06, + "loss": 0.0, + "step": 2625 + }, + { + "epoch": 0.16923374363601212, + "grad_norm": 0.00018424144724289227, + "learning_rate": 9.231650554958826e-06, + "loss": 0.0, + "step": 2626 + }, + { + "epoch": 0.16929818908294128, + "grad_norm": 0.0010581367877079988, + "learning_rate": 9.23093447905478e-06, + "loss": 0.0, + "step": 2627 + }, + { + "epoch": 0.16936263452987046, + "grad_norm": 0.0007131972095357654, + "learning_rate": 9.230218403150735e-06, + "loss": 0.0, + "step": 2628 + }, + { + "epoch": 0.16942707997679965, + "grad_norm": 0.00043519554939918065, + "learning_rate": 9.229502327246689e-06, + "loss": 0.0, + "step": 2629 + }, + { + "epoch": 0.1694915254237288, + "grad_norm": 0.001351474322390404, + "learning_rate": 9.228786251342643e-06, + "loss": 0.0, + "step": 2630 + }, + { + "epoch": 0.169555970870658, + "grad_norm": 0.020752774250390287, + "learning_rate": 9.228070175438598e-06, + "loss": 0.0002, + "step": 2631 + }, + { + "epoch": 0.16962041631758717, + "grad_norm": 0.018716232806307055, + "learning_rate": 9.227354099534552e-06, + "loss": 0.0001, + "step": 2632 + }, + { + "epoch": 0.16968486176451633, + "grad_norm": 0.00029643028839327425, + "learning_rate": 9.226638023630505e-06, + "loss": 0.0, + "step": 2633 + }, + { + "epoch": 0.16974930721144552, + "grad_norm": 0.0022362696824027137, + "learning_rate": 9.225921947726459e-06, + "loss": 0.0, + "step": 2634 + }, + { + "epoch": 0.16981375265837467, + "grad_norm": 0.00021475709016773186, + "learning_rate": 9.225205871822413e-06, + "loss": 0.0, + "step": 2635 + }, + { + "epoch": 0.16987819810530386, + "grad_norm": 0.16918964578636836, + "learning_rate": 9.224489795918367e-06, + "loss": 0.0019, + "step": 2636 + }, + { + "epoch": 0.16994264355223304, + "grad_norm": 0.12910555113432312, + "learning_rate": 9.223773720014322e-06, + "loss": 0.0002, + "step": 2637 + }, + { + "epoch": 0.1700070889991622, + "grad_norm": 0.024268443228424336, + "learning_rate": 9.223057644110276e-06, + "loss": 0.0001, + "step": 2638 + }, + { + "epoch": 0.17007153444609138, + "grad_norm": 0.0003196529914171969, + "learning_rate": 9.222341568206232e-06, + "loss": 0.0, + "step": 2639 + }, + { + "epoch": 0.17013597989302057, + "grad_norm": 0.09965735102900866, + "learning_rate": 9.221625492302185e-06, + "loss": 0.0038, + "step": 2640 + }, + { + "epoch": 0.17020042533994972, + "grad_norm": 0.009036427241455914, + "learning_rate": 9.220909416398139e-06, + "loss": 0.0001, + "step": 2641 + }, + { + "epoch": 0.1702648707868789, + "grad_norm": 0.00011327834905210328, + "learning_rate": 9.220193340494093e-06, + "loss": 0.0, + "step": 2642 + }, + { + "epoch": 0.1703293162338081, + "grad_norm": 0.00287235194554443, + "learning_rate": 9.219477264590047e-06, + "loss": 0.0, + "step": 2643 + }, + { + "epoch": 0.17039376168073725, + "grad_norm": 0.015226695642371184, + "learning_rate": 9.218761188686002e-06, + "loss": 0.0001, + "step": 2644 + }, + { + "epoch": 0.17045820712766643, + "grad_norm": 0.00020810014028015193, + "learning_rate": 9.218045112781956e-06, + "loss": 0.0, + "step": 2645 + }, + { + "epoch": 0.17052265257459562, + "grad_norm": 0.00020155726396008463, + "learning_rate": 9.21732903687791e-06, + "loss": 0.0, + "step": 2646 + }, + { + "epoch": 0.17058709802152477, + "grad_norm": 0.0010949986795731515, + "learning_rate": 9.216612960973865e-06, + "loss": 0.0, + "step": 2647 + }, + { + "epoch": 0.17065154346845396, + "grad_norm": 0.0076949646895825684, + "learning_rate": 9.215896885069819e-06, + "loss": 0.0001, + "step": 2648 + }, + { + "epoch": 0.17071598891538312, + "grad_norm": 0.001981525942460112, + "learning_rate": 9.215180809165772e-06, + "loss": 0.0, + "step": 2649 + }, + { + "epoch": 0.1707804343623123, + "grad_norm": 0.038440089378423435, + "learning_rate": 9.214464733261726e-06, + "loss": 0.0001, + "step": 2650 + }, + { + "epoch": 0.17084487980924148, + "grad_norm": 0.10907353425293582, + "learning_rate": 9.21374865735768e-06, + "loss": 0.0001, + "step": 2651 + }, + { + "epoch": 0.17090932525617064, + "grad_norm": 0.0063255364810587255, + "learning_rate": 9.213032581453634e-06, + "loss": 0.0, + "step": 2652 + }, + { + "epoch": 0.17097377070309983, + "grad_norm": 0.012754438453606867, + "learning_rate": 9.212316505549589e-06, + "loss": 0.0, + "step": 2653 + }, + { + "epoch": 0.171038216150029, + "grad_norm": 0.00026067457484949056, + "learning_rate": 9.211600429645543e-06, + "loss": 0.0, + "step": 2654 + }, + { + "epoch": 0.17110266159695817, + "grad_norm": 0.00021310395223525356, + "learning_rate": 9.210884353741497e-06, + "loss": 0.0, + "step": 2655 + }, + { + "epoch": 0.17116710704388735, + "grad_norm": 0.010862934374835192, + "learning_rate": 9.210168277837452e-06, + "loss": 0.0, + "step": 2656 + }, + { + "epoch": 0.17123155249081654, + "grad_norm": 0.06877897773526727, + "learning_rate": 9.209452201933406e-06, + "loss": 0.0002, + "step": 2657 + }, + { + "epoch": 0.1712959979377457, + "grad_norm": 0.0003992406670996045, + "learning_rate": 9.20873612602936e-06, + "loss": 0.0, + "step": 2658 + }, + { + "epoch": 0.17136044338467488, + "grad_norm": 0.000843099911044625, + "learning_rate": 9.208020050125313e-06, + "loss": 0.0, + "step": 2659 + }, + { + "epoch": 0.17142488883160406, + "grad_norm": 0.03777898066302125, + "learning_rate": 9.207303974221267e-06, + "loss": 0.0004, + "step": 2660 + }, + { + "epoch": 0.17148933427853322, + "grad_norm": 0.0013253862296127193, + "learning_rate": 9.206587898317221e-06, + "loss": 0.0, + "step": 2661 + }, + { + "epoch": 0.1715537797254624, + "grad_norm": 0.0019887129236439605, + "learning_rate": 9.205871822413176e-06, + "loss": 0.0, + "step": 2662 + }, + { + "epoch": 0.17161822517239156, + "grad_norm": 0.01141948088125074, + "learning_rate": 9.205155746509132e-06, + "loss": 0.0, + "step": 2663 + }, + { + "epoch": 0.17168267061932074, + "grad_norm": 0.007296598030753204, + "learning_rate": 9.204439670605086e-06, + "loss": 0.0, + "step": 2664 + }, + { + "epoch": 0.17174711606624993, + "grad_norm": 0.015167112561033686, + "learning_rate": 9.203723594701039e-06, + "loss": 0.0002, + "step": 2665 + }, + { + "epoch": 0.17181156151317908, + "grad_norm": 0.006666472705684092, + "learning_rate": 9.203007518796993e-06, + "loss": 0.0, + "step": 2666 + }, + { + "epoch": 0.17187600696010827, + "grad_norm": 0.005332642179530365, + "learning_rate": 9.202291442892947e-06, + "loss": 0.0, + "step": 2667 + }, + { + "epoch": 0.17194045240703745, + "grad_norm": 0.0003287436028234088, + "learning_rate": 9.201575366988902e-06, + "loss": 0.0, + "step": 2668 + }, + { + "epoch": 0.1720048978539666, + "grad_norm": 0.008185411046763031, + "learning_rate": 9.200859291084856e-06, + "loss": 0.0, + "step": 2669 + }, + { + "epoch": 0.1720693433008958, + "grad_norm": 0.0003072117989800363, + "learning_rate": 9.20014321518081e-06, + "loss": 0.0, + "step": 2670 + }, + { + "epoch": 0.17213378874782498, + "grad_norm": 0.14845676673531352, + "learning_rate": 9.199427139276764e-06, + "loss": 0.0001, + "step": 2671 + }, + { + "epoch": 0.17219823419475414, + "grad_norm": 0.0023122495109587056, + "learning_rate": 9.198711063372719e-06, + "loss": 0.0, + "step": 2672 + }, + { + "epoch": 0.17226267964168332, + "grad_norm": 2.9962439866178707, + "learning_rate": 9.197994987468673e-06, + "loss": 0.0269, + "step": 2673 + }, + { + "epoch": 0.17232712508861248, + "grad_norm": 0.040175158651624854, + "learning_rate": 9.197278911564627e-06, + "loss": 0.0002, + "step": 2674 + }, + { + "epoch": 0.17239157053554166, + "grad_norm": 0.0012490798280579728, + "learning_rate": 9.19656283566058e-06, + "loss": 0.0, + "step": 2675 + }, + { + "epoch": 0.17245601598247084, + "grad_norm": 0.003016418042880189, + "learning_rate": 9.195846759756534e-06, + "loss": 0.0, + "step": 2676 + }, + { + "epoch": 0.1725204614294, + "grad_norm": 0.003284682926061627, + "learning_rate": 9.195130683852489e-06, + "loss": 0.0, + "step": 2677 + }, + { + "epoch": 0.17258490687632919, + "grad_norm": 0.011853928536630667, + "learning_rate": 9.194414607948443e-06, + "loss": 0.0001, + "step": 2678 + }, + { + "epoch": 0.17264935232325837, + "grad_norm": 0.09941864921166386, + "learning_rate": 9.193698532044397e-06, + "loss": 0.0002, + "step": 2679 + }, + { + "epoch": 0.17271379777018753, + "grad_norm": 0.008067099418627759, + "learning_rate": 9.192982456140351e-06, + "loss": 0.0, + "step": 2680 + }, + { + "epoch": 0.1727782432171167, + "grad_norm": 0.5154138768623525, + "learning_rate": 9.192266380236306e-06, + "loss": 0.0166, + "step": 2681 + }, + { + "epoch": 0.1728426886640459, + "grad_norm": 0.01469303930545766, + "learning_rate": 9.19155030433226e-06, + "loss": 0.0001, + "step": 2682 + }, + { + "epoch": 0.17290713411097505, + "grad_norm": 0.220314544641192, + "learning_rate": 9.190834228428214e-06, + "loss": 0.006, + "step": 2683 + }, + { + "epoch": 0.17297157955790424, + "grad_norm": 0.09990222740725906, + "learning_rate": 9.190118152524169e-06, + "loss": 0.0004, + "step": 2684 + }, + { + "epoch": 0.17303602500483342, + "grad_norm": 0.00873880537834303, + "learning_rate": 9.189402076620121e-06, + "loss": 0.0, + "step": 2685 + }, + { + "epoch": 0.17310047045176258, + "grad_norm": 0.0012456913305398055, + "learning_rate": 9.188686000716077e-06, + "loss": 0.0, + "step": 2686 + }, + { + "epoch": 0.17316491589869176, + "grad_norm": 0.003980550458896421, + "learning_rate": 9.187969924812032e-06, + "loss": 0.0, + "step": 2687 + }, + { + "epoch": 0.17322936134562092, + "grad_norm": 0.24734072155041284, + "learning_rate": 9.187253848907986e-06, + "loss": 0.0062, + "step": 2688 + }, + { + "epoch": 0.1732938067925501, + "grad_norm": 0.0014917908984364223, + "learning_rate": 9.18653777300394e-06, + "loss": 0.0, + "step": 2689 + }, + { + "epoch": 0.1733582522394793, + "grad_norm": 0.018215838914686967, + "learning_rate": 9.185821697099894e-06, + "loss": 0.0, + "step": 2690 + }, + { + "epoch": 0.17342269768640844, + "grad_norm": 0.0007429930106887829, + "learning_rate": 9.185105621195847e-06, + "loss": 0.0, + "step": 2691 + }, + { + "epoch": 0.17348714313333763, + "grad_norm": 0.0001631497864257658, + "learning_rate": 9.184389545291801e-06, + "loss": 0.0, + "step": 2692 + }, + { + "epoch": 0.1735515885802668, + "grad_norm": 0.010403012017535249, + "learning_rate": 9.183673469387756e-06, + "loss": 0.0, + "step": 2693 + }, + { + "epoch": 0.17361603402719597, + "grad_norm": 0.008222731931976913, + "learning_rate": 9.18295739348371e-06, + "loss": 0.0001, + "step": 2694 + }, + { + "epoch": 0.17368047947412515, + "grad_norm": 0.03993690792998894, + "learning_rate": 9.182241317579664e-06, + "loss": 0.0001, + "step": 2695 + }, + { + "epoch": 0.17374492492105434, + "grad_norm": 0.00030802079541501006, + "learning_rate": 9.181525241675619e-06, + "loss": 0.0, + "step": 2696 + }, + { + "epoch": 0.1738093703679835, + "grad_norm": 0.005414459370388794, + "learning_rate": 9.180809165771573e-06, + "loss": 0.0, + "step": 2697 + }, + { + "epoch": 0.17387381581491268, + "grad_norm": 0.006613869105241104, + "learning_rate": 9.180093089867527e-06, + "loss": 0.0, + "step": 2698 + }, + { + "epoch": 0.17393826126184186, + "grad_norm": 0.3907094673384912, + "learning_rate": 9.179377013963481e-06, + "loss": 0.0019, + "step": 2699 + }, + { + "epoch": 0.17400270670877102, + "grad_norm": 0.009057081484954533, + "learning_rate": 9.178660938059436e-06, + "loss": 0.0001, + "step": 2700 + }, + { + "epoch": 0.1740671521557002, + "grad_norm": 0.7250289730333702, + "learning_rate": 9.177944862155388e-06, + "loss": 0.0027, + "step": 2701 + }, + { + "epoch": 0.17413159760262936, + "grad_norm": 0.005397337466009714, + "learning_rate": 9.177228786251343e-06, + "loss": 0.0001, + "step": 2702 + }, + { + "epoch": 0.17419604304955855, + "grad_norm": 0.0006209733526635416, + "learning_rate": 9.176512710347297e-06, + "loss": 0.0, + "step": 2703 + }, + { + "epoch": 0.17426048849648773, + "grad_norm": 0.04377721584989303, + "learning_rate": 9.175796634443251e-06, + "loss": 0.0002, + "step": 2704 + }, + { + "epoch": 0.1743249339434169, + "grad_norm": 0.0001704777087367316, + "learning_rate": 9.175080558539206e-06, + "loss": 0.0, + "step": 2705 + }, + { + "epoch": 0.17438937939034607, + "grad_norm": 0.002415898461971982, + "learning_rate": 9.17436448263516e-06, + "loss": 0.0, + "step": 2706 + }, + { + "epoch": 0.17445382483727526, + "grad_norm": 0.00010784267160302466, + "learning_rate": 9.173648406731114e-06, + "loss": 0.0, + "step": 2707 + }, + { + "epoch": 0.1745182702842044, + "grad_norm": 0.382636049048164, + "learning_rate": 9.172932330827068e-06, + "loss": 0.0106, + "step": 2708 + }, + { + "epoch": 0.1745827157311336, + "grad_norm": 0.0011183936260544516, + "learning_rate": 9.172216254923023e-06, + "loss": 0.0, + "step": 2709 + }, + { + "epoch": 0.17464716117806278, + "grad_norm": 0.0002463259427477652, + "learning_rate": 9.171500179018977e-06, + "loss": 0.0, + "step": 2710 + }, + { + "epoch": 0.17471160662499194, + "grad_norm": 1.1865234375, + "learning_rate": 9.170784103114931e-06, + "loss": 0.0072, + "step": 2711 + }, + { + "epoch": 0.17477605207192112, + "grad_norm": 0.002127015512866035, + "learning_rate": 9.170068027210886e-06, + "loss": 0.0, + "step": 2712 + }, + { + "epoch": 0.17484049751885028, + "grad_norm": 0.12030690802546253, + "learning_rate": 9.16935195130684e-06, + "loss": 0.0005, + "step": 2713 + }, + { + "epoch": 0.17490494296577946, + "grad_norm": 0.521378924391397, + "learning_rate": 9.168635875402794e-06, + "loss": 0.0034, + "step": 2714 + }, + { + "epoch": 0.17496938841270865, + "grad_norm": 0.018006203519038057, + "learning_rate": 9.167919799498749e-06, + "loss": 0.0001, + "step": 2715 + }, + { + "epoch": 0.1750338338596378, + "grad_norm": 0.004143970490399163, + "learning_rate": 9.167203723594703e-06, + "loss": 0.0, + "step": 2716 + }, + { + "epoch": 0.175098279306567, + "grad_norm": 0.00027283137921355467, + "learning_rate": 9.166487647690655e-06, + "loss": 0.0, + "step": 2717 + }, + { + "epoch": 0.17516272475349617, + "grad_norm": 0.0491949441081807, + "learning_rate": 9.16577157178661e-06, + "loss": 0.0001, + "step": 2718 + }, + { + "epoch": 0.17522717020042533, + "grad_norm": 0.0031770309749900405, + "learning_rate": 9.165055495882564e-06, + "loss": 0.0, + "step": 2719 + }, + { + "epoch": 0.17529161564735452, + "grad_norm": 0.10751967746263864, + "learning_rate": 9.164339419978518e-06, + "loss": 0.0005, + "step": 2720 + }, + { + "epoch": 0.1753560610942837, + "grad_norm": 0.14063707273371076, + "learning_rate": 9.163623344074473e-06, + "loss": 0.0003, + "step": 2721 + }, + { + "epoch": 0.17542050654121286, + "grad_norm": 0.0011253487254859079, + "learning_rate": 9.162907268170427e-06, + "loss": 0.0, + "step": 2722 + }, + { + "epoch": 0.17548495198814204, + "grad_norm": 0.030041388852838077, + "learning_rate": 9.162191192266381e-06, + "loss": 0.0001, + "step": 2723 + }, + { + "epoch": 0.17554939743507122, + "grad_norm": 0.0030749777231569345, + "learning_rate": 9.161475116362336e-06, + "loss": 0.0, + "step": 2724 + }, + { + "epoch": 0.17561384288200038, + "grad_norm": 0.01139113188354577, + "learning_rate": 9.16075904045829e-06, + "loss": 0.0001, + "step": 2725 + }, + { + "epoch": 0.17567828832892957, + "grad_norm": 0.005132686334298028, + "learning_rate": 9.160042964554242e-06, + "loss": 0.0, + "step": 2726 + }, + { + "epoch": 0.17574273377585872, + "grad_norm": 0.011252321098629682, + "learning_rate": 9.159326888650197e-06, + "loss": 0.0, + "step": 2727 + }, + { + "epoch": 0.1758071792227879, + "grad_norm": 0.0001379375049884014, + "learning_rate": 9.158610812746151e-06, + "loss": 0.0, + "step": 2728 + }, + { + "epoch": 0.1758716246697171, + "grad_norm": 0.009681077505664085, + "learning_rate": 9.157894736842105e-06, + "loss": 0.0001, + "step": 2729 + }, + { + "epoch": 0.17593607011664625, + "grad_norm": 0.005116961384670569, + "learning_rate": 9.15717866093806e-06, + "loss": 0.0, + "step": 2730 + }, + { + "epoch": 0.17600051556357543, + "grad_norm": 0.0266595739957785, + "learning_rate": 9.156462585034014e-06, + "loss": 0.0001, + "step": 2731 + }, + { + "epoch": 0.17606496101050462, + "grad_norm": 0.00023531374018092132, + "learning_rate": 9.155746509129968e-06, + "loss": 0.0, + "step": 2732 + }, + { + "epoch": 0.17612940645743377, + "grad_norm": 0.004908907718591901, + "learning_rate": 9.155030433225923e-06, + "loss": 0.0, + "step": 2733 + }, + { + "epoch": 0.17619385190436296, + "grad_norm": 0.0026669341550138548, + "learning_rate": 9.154314357321877e-06, + "loss": 0.0, + "step": 2734 + }, + { + "epoch": 0.17625829735129214, + "grad_norm": 0.024920427777757822, + "learning_rate": 9.153598281417831e-06, + "loss": 0.0001, + "step": 2735 + }, + { + "epoch": 0.1763227427982213, + "grad_norm": 0.0002157229860912765, + "learning_rate": 9.152882205513785e-06, + "loss": 0.0, + "step": 2736 + }, + { + "epoch": 0.17638718824515048, + "grad_norm": 0.003320399939081684, + "learning_rate": 9.15216612960974e-06, + "loss": 0.0, + "step": 2737 + }, + { + "epoch": 0.17645163369207967, + "grad_norm": 0.0021092644703280157, + "learning_rate": 9.151450053705694e-06, + "loss": 0.0, + "step": 2738 + }, + { + "epoch": 0.17651607913900882, + "grad_norm": 0.28572773821913267, + "learning_rate": 9.150733977801648e-06, + "loss": 0.0014, + "step": 2739 + }, + { + "epoch": 0.176580524585938, + "grad_norm": 0.007860353343515316, + "learning_rate": 9.150017901897603e-06, + "loss": 0.0, + "step": 2740 + }, + { + "epoch": 0.17664497003286717, + "grad_norm": 0.0074805734351772474, + "learning_rate": 9.149301825993557e-06, + "loss": 0.0001, + "step": 2741 + }, + { + "epoch": 0.17670941547979635, + "grad_norm": 0.00011039618974302341, + "learning_rate": 9.14858575008951e-06, + "loss": 0.0, + "step": 2742 + }, + { + "epoch": 0.17677386092672553, + "grad_norm": 0.0070548084467875975, + "learning_rate": 9.147869674185464e-06, + "loss": 0.0, + "step": 2743 + }, + { + "epoch": 0.1768383063736547, + "grad_norm": 0.010001704121558984, + "learning_rate": 9.147153598281418e-06, + "loss": 0.0, + "step": 2744 + }, + { + "epoch": 0.17690275182058388, + "grad_norm": 0.0019253162831213417, + "learning_rate": 9.146437522377372e-06, + "loss": 0.0, + "step": 2745 + }, + { + "epoch": 0.17696719726751306, + "grad_norm": 0.01767115100918315, + "learning_rate": 9.145721446473327e-06, + "loss": 0.0001, + "step": 2746 + }, + { + "epoch": 0.17703164271444222, + "grad_norm": 0.012779574033902407, + "learning_rate": 9.145005370569281e-06, + "loss": 0.0001, + "step": 2747 + }, + { + "epoch": 0.1770960881613714, + "grad_norm": 0.0015406080229237796, + "learning_rate": 9.144289294665235e-06, + "loss": 0.0, + "step": 2748 + }, + { + "epoch": 0.17716053360830059, + "grad_norm": 0.008029459760126534, + "learning_rate": 9.14357321876119e-06, + "loss": 0.0, + "step": 2749 + }, + { + "epoch": 0.17722497905522974, + "grad_norm": 0.007691325124199665, + "learning_rate": 9.142857142857144e-06, + "loss": 0.0, + "step": 2750 + }, + { + "epoch": 0.17728942450215893, + "grad_norm": 0.0007767056720782113, + "learning_rate": 9.142141066953098e-06, + "loss": 0.0, + "step": 2751 + }, + { + "epoch": 0.17735386994908808, + "grad_norm": 0.0006003394971469371, + "learning_rate": 9.14142499104905e-06, + "loss": 0.0, + "step": 2752 + }, + { + "epoch": 0.17741831539601727, + "grad_norm": 2.471258603101776, + "learning_rate": 9.140708915145005e-06, + "loss": 0.0177, + "step": 2753 + }, + { + "epoch": 0.17748276084294645, + "grad_norm": 0.022019753049725126, + "learning_rate": 9.13999283924096e-06, + "loss": 0.0001, + "step": 2754 + }, + { + "epoch": 0.1775472062898756, + "grad_norm": 0.42206641552426616, + "learning_rate": 9.139276763336914e-06, + "loss": 0.0028, + "step": 2755 + }, + { + "epoch": 0.1776116517368048, + "grad_norm": 0.0005502695640614431, + "learning_rate": 9.13856068743287e-06, + "loss": 0.0, + "step": 2756 + }, + { + "epoch": 0.17767609718373398, + "grad_norm": 0.01922958508482105, + "learning_rate": 9.137844611528824e-06, + "loss": 0.0, + "step": 2757 + }, + { + "epoch": 0.17774054263066313, + "grad_norm": 0.19847522624781685, + "learning_rate": 9.137128535624777e-06, + "loss": 0.0001, + "step": 2758 + }, + { + "epoch": 0.17780498807759232, + "grad_norm": 0.46543913466278036, + "learning_rate": 9.136412459720731e-06, + "loss": 0.0013, + "step": 2759 + }, + { + "epoch": 0.1778694335245215, + "grad_norm": 0.02188902143711718, + "learning_rate": 9.135696383816685e-06, + "loss": 0.0001, + "step": 2760 + }, + { + "epoch": 0.17793387897145066, + "grad_norm": 0.35200723492504954, + "learning_rate": 9.13498030791264e-06, + "loss": 0.0011, + "step": 2761 + }, + { + "epoch": 0.17799832441837984, + "grad_norm": 0.000621212590110667, + "learning_rate": 9.134264232008594e-06, + "loss": 0.0, + "step": 2762 + }, + { + "epoch": 0.17806276986530903, + "grad_norm": 0.0041229362491163645, + "learning_rate": 9.133548156104548e-06, + "loss": 0.0, + "step": 2763 + }, + { + "epoch": 0.17812721531223819, + "grad_norm": 0.1805057863321118, + "learning_rate": 9.132832080200502e-06, + "loss": 0.0005, + "step": 2764 + }, + { + "epoch": 0.17819166075916737, + "grad_norm": 0.04216859049336486, + "learning_rate": 9.132116004296457e-06, + "loss": 0.0005, + "step": 2765 + }, + { + "epoch": 0.17825610620609653, + "grad_norm": 0.007158825226122906, + "learning_rate": 9.131399928392411e-06, + "loss": 0.0, + "step": 2766 + }, + { + "epoch": 0.1783205516530257, + "grad_norm": 0.0007151610788651363, + "learning_rate": 9.130683852488365e-06, + "loss": 0.0, + "step": 2767 + }, + { + "epoch": 0.1783849970999549, + "grad_norm": 0.026471367081296403, + "learning_rate": 9.129967776584318e-06, + "loss": 0.0, + "step": 2768 + }, + { + "epoch": 0.17844944254688405, + "grad_norm": 0.028729129156446532, + "learning_rate": 9.129251700680272e-06, + "loss": 0.0001, + "step": 2769 + }, + { + "epoch": 0.17851388799381324, + "grad_norm": 0.05488665115941958, + "learning_rate": 9.128535624776226e-06, + "loss": 0.0001, + "step": 2770 + }, + { + "epoch": 0.17857833344074242, + "grad_norm": 0.06607381270845136, + "learning_rate": 9.12781954887218e-06, + "loss": 0.0002, + "step": 2771 + }, + { + "epoch": 0.17864277888767158, + "grad_norm": 0.04787404155176431, + "learning_rate": 9.127103472968135e-06, + "loss": 0.0001, + "step": 2772 + }, + { + "epoch": 0.17870722433460076, + "grad_norm": 0.10605978049032681, + "learning_rate": 9.12638739706409e-06, + "loss": 0.0003, + "step": 2773 + }, + { + "epoch": 0.17877166978152995, + "grad_norm": 0.0007944870144507999, + "learning_rate": 9.125671321160044e-06, + "loss": 0.0, + "step": 2774 + }, + { + "epoch": 0.1788361152284591, + "grad_norm": 0.00026383111149123895, + "learning_rate": 9.124955245255998e-06, + "loss": 0.0, + "step": 2775 + }, + { + "epoch": 0.1789005606753883, + "grad_norm": 0.00323695762220021, + "learning_rate": 9.124239169351952e-06, + "loss": 0.0, + "step": 2776 + }, + { + "epoch": 0.17896500612231747, + "grad_norm": 0.12140674287801527, + "learning_rate": 9.123523093447907e-06, + "loss": 0.0002, + "step": 2777 + }, + { + "epoch": 0.17902945156924663, + "grad_norm": 0.05398408619186501, + "learning_rate": 9.12280701754386e-06, + "loss": 0.0001, + "step": 2778 + }, + { + "epoch": 0.1790938970161758, + "grad_norm": 0.0087486923606527, + "learning_rate": 9.122090941639813e-06, + "loss": 0.0001, + "step": 2779 + }, + { + "epoch": 0.17915834246310497, + "grad_norm": 0.0002230895636351413, + "learning_rate": 9.12137486573577e-06, + "loss": 0.0, + "step": 2780 + }, + { + "epoch": 0.17922278791003415, + "grad_norm": 0.33086147690502815, + "learning_rate": 9.120658789831724e-06, + "loss": 0.0008, + "step": 2781 + }, + { + "epoch": 0.17928723335696334, + "grad_norm": 0.909170935370485, + "learning_rate": 9.119942713927678e-06, + "loss": 0.0027, + "step": 2782 + }, + { + "epoch": 0.1793516788038925, + "grad_norm": 0.0024790891799992814, + "learning_rate": 9.119226638023632e-06, + "loss": 0.0, + "step": 2783 + }, + { + "epoch": 0.17941612425082168, + "grad_norm": 0.10431613976366509, + "learning_rate": 9.118510562119585e-06, + "loss": 0.0004, + "step": 2784 + }, + { + "epoch": 0.17948056969775086, + "grad_norm": 0.012409698026434208, + "learning_rate": 9.11779448621554e-06, + "loss": 0.0, + "step": 2785 + }, + { + "epoch": 0.17954501514468002, + "grad_norm": 0.2690126294650971, + "learning_rate": 9.117078410311494e-06, + "loss": 0.0038, + "step": 2786 + }, + { + "epoch": 0.1796094605916092, + "grad_norm": 0.0033128166764828226, + "learning_rate": 9.116362334407448e-06, + "loss": 0.0, + "step": 2787 + }, + { + "epoch": 0.1796739060385384, + "grad_norm": 0.0009229370739567911, + "learning_rate": 9.115646258503402e-06, + "loss": 0.0, + "step": 2788 + }, + { + "epoch": 0.17973835148546755, + "grad_norm": 0.012281191385820282, + "learning_rate": 9.114930182599356e-06, + "loss": 0.0, + "step": 2789 + }, + { + "epoch": 0.17980279693239673, + "grad_norm": 0.0010946180994970097, + "learning_rate": 9.11421410669531e-06, + "loss": 0.0, + "step": 2790 + }, + { + "epoch": 0.1798672423793259, + "grad_norm": 0.0022886774893961163, + "learning_rate": 9.113498030791265e-06, + "loss": 0.0, + "step": 2791 + }, + { + "epoch": 0.17993168782625507, + "grad_norm": 0.18050407336452123, + "learning_rate": 9.11278195488722e-06, + "loss": 0.0004, + "step": 2792 + }, + { + "epoch": 0.17999613327318426, + "grad_norm": 0.00776095822730694, + "learning_rate": 9.112065878983174e-06, + "loss": 0.0, + "step": 2793 + }, + { + "epoch": 0.1800605787201134, + "grad_norm": 0.010017379040207797, + "learning_rate": 9.111349803079126e-06, + "loss": 0.0, + "step": 2794 + }, + { + "epoch": 0.1801250241670426, + "grad_norm": 0.06924958627733009, + "learning_rate": 9.11063372717508e-06, + "loss": 0.0003, + "step": 2795 + }, + { + "epoch": 0.18018946961397178, + "grad_norm": 0.2812996264226042, + "learning_rate": 9.109917651271035e-06, + "loss": 0.0004, + "step": 2796 + }, + { + "epoch": 0.18025391506090094, + "grad_norm": 0.0031109287427372747, + "learning_rate": 9.10920157536699e-06, + "loss": 0.0, + "step": 2797 + }, + { + "epoch": 0.18031836050783012, + "grad_norm": 0.005299891749785947, + "learning_rate": 9.108485499462943e-06, + "loss": 0.0, + "step": 2798 + }, + { + "epoch": 0.1803828059547593, + "grad_norm": 0.159811229772372, + "learning_rate": 9.107769423558898e-06, + "loss": 0.0094, + "step": 2799 + }, + { + "epoch": 0.18044725140168846, + "grad_norm": 0.3035861307779333, + "learning_rate": 9.107053347654852e-06, + "loss": 0.0011, + "step": 2800 + }, + { + "epoch": 0.18051169684861765, + "grad_norm": 0.10506831640956556, + "learning_rate": 9.106337271750806e-06, + "loss": 0.0003, + "step": 2801 + }, + { + "epoch": 0.18057614229554683, + "grad_norm": 0.06075173568196115, + "learning_rate": 9.10562119584676e-06, + "loss": 0.0001, + "step": 2802 + }, + { + "epoch": 0.180640587742476, + "grad_norm": 0.011675944604708841, + "learning_rate": 9.104905119942715e-06, + "loss": 0.0, + "step": 2803 + }, + { + "epoch": 0.18070503318940517, + "grad_norm": 0.017210122448086982, + "learning_rate": 9.10418904403867e-06, + "loss": 0.0001, + "step": 2804 + }, + { + "epoch": 0.18076947863633433, + "grad_norm": 0.0010844863697502473, + "learning_rate": 9.103472968134624e-06, + "loss": 0.0, + "step": 2805 + }, + { + "epoch": 0.18083392408326351, + "grad_norm": 0.03426975746377976, + "learning_rate": 9.102756892230578e-06, + "loss": 0.0, + "step": 2806 + }, + { + "epoch": 0.1808983695301927, + "grad_norm": 0.0514928133710475, + "learning_rate": 9.102040816326532e-06, + "loss": 0.0001, + "step": 2807 + }, + { + "epoch": 0.18096281497712186, + "grad_norm": 0.00034102055632519174, + "learning_rate": 9.101324740422486e-06, + "loss": 0.0, + "step": 2808 + }, + { + "epoch": 0.18102726042405104, + "grad_norm": 0.007900137649701421, + "learning_rate": 9.10060866451844e-06, + "loss": 0.0, + "step": 2809 + }, + { + "epoch": 0.18109170587098022, + "grad_norm": 0.0012440836350956035, + "learning_rate": 9.099892588614393e-06, + "loss": 0.0, + "step": 2810 + }, + { + "epoch": 0.18115615131790938, + "grad_norm": 0.0045855459787961725, + "learning_rate": 9.099176512710348e-06, + "loss": 0.0, + "step": 2811 + }, + { + "epoch": 0.18122059676483857, + "grad_norm": 0.1941159407275899, + "learning_rate": 9.098460436806302e-06, + "loss": 0.0007, + "step": 2812 + }, + { + "epoch": 0.18128504221176775, + "grad_norm": 0.025967662808676583, + "learning_rate": 9.097744360902256e-06, + "loss": 0.0001, + "step": 2813 + }, + { + "epoch": 0.1813494876586969, + "grad_norm": 0.0331286074506034, + "learning_rate": 9.09702828499821e-06, + "loss": 0.0001, + "step": 2814 + }, + { + "epoch": 0.1814139331056261, + "grad_norm": 0.08495830115250962, + "learning_rate": 9.096312209094165e-06, + "loss": 0.0022, + "step": 2815 + }, + { + "epoch": 0.18147837855255528, + "grad_norm": 0.00813836807265155, + "learning_rate": 9.095596133190119e-06, + "loss": 0.0, + "step": 2816 + }, + { + "epoch": 0.18154282399948443, + "grad_norm": 0.008071176914842531, + "learning_rate": 9.094880057286073e-06, + "loss": 0.0, + "step": 2817 + }, + { + "epoch": 0.18160726944641362, + "grad_norm": 0.03360537068790458, + "learning_rate": 9.094163981382028e-06, + "loss": 0.0, + "step": 2818 + }, + { + "epoch": 0.18167171489334277, + "grad_norm": 3.9930071502552744, + "learning_rate": 9.09344790547798e-06, + "loss": 0.0428, + "step": 2819 + }, + { + "epoch": 0.18173616034027196, + "grad_norm": 0.3067133750386272, + "learning_rate": 9.092731829573935e-06, + "loss": 0.0021, + "step": 2820 + }, + { + "epoch": 0.18180060578720114, + "grad_norm": 0.09692007100777007, + "learning_rate": 9.092015753669889e-06, + "loss": 0.0003, + "step": 2821 + }, + { + "epoch": 0.1818650512341303, + "grad_norm": 0.008326567539547143, + "learning_rate": 9.091299677765843e-06, + "loss": 0.0, + "step": 2822 + }, + { + "epoch": 0.18192949668105948, + "grad_norm": 0.09641887182616365, + "learning_rate": 9.090583601861798e-06, + "loss": 0.0011, + "step": 2823 + }, + { + "epoch": 0.18199394212798867, + "grad_norm": 0.007352684257062721, + "learning_rate": 9.089867525957752e-06, + "loss": 0.0, + "step": 2824 + }, + { + "epoch": 0.18205838757491782, + "grad_norm": 0.030274536320525708, + "learning_rate": 9.089151450053706e-06, + "loss": 0.0001, + "step": 2825 + }, + { + "epoch": 0.182122833021847, + "grad_norm": 0.0006278500122715883, + "learning_rate": 9.08843537414966e-06, + "loss": 0.0, + "step": 2826 + }, + { + "epoch": 0.1821872784687762, + "grad_norm": 0.0030603911056034895, + "learning_rate": 9.087719298245615e-06, + "loss": 0.0, + "step": 2827 + }, + { + "epoch": 0.18225172391570535, + "grad_norm": 0.0004505991303399384, + "learning_rate": 9.087003222341569e-06, + "loss": 0.0, + "step": 2828 + }, + { + "epoch": 0.18231616936263453, + "grad_norm": 0.017218593559380928, + "learning_rate": 9.086287146437523e-06, + "loss": 0.0001, + "step": 2829 + }, + { + "epoch": 0.18238061480956372, + "grad_norm": 0.0021886677003908977, + "learning_rate": 9.085571070533478e-06, + "loss": 0.0, + "step": 2830 + }, + { + "epoch": 0.18244506025649287, + "grad_norm": 0.0004787494491278347, + "learning_rate": 9.084854994629432e-06, + "loss": 0.0, + "step": 2831 + }, + { + "epoch": 0.18250950570342206, + "grad_norm": 0.0060502932594086204, + "learning_rate": 9.084138918725386e-06, + "loss": 0.0, + "step": 2832 + }, + { + "epoch": 0.18257395115035122, + "grad_norm": 0.00035090977070178326, + "learning_rate": 9.08342284282134e-06, + "loss": 0.0, + "step": 2833 + }, + { + "epoch": 0.1826383965972804, + "grad_norm": 0.12514816384262056, + "learning_rate": 9.082706766917295e-06, + "loss": 0.0004, + "step": 2834 + }, + { + "epoch": 0.18270284204420958, + "grad_norm": 0.3302171515942961, + "learning_rate": 9.081990691013247e-06, + "loss": 0.0015, + "step": 2835 + }, + { + "epoch": 0.18276728749113874, + "grad_norm": 0.012892377663801916, + "learning_rate": 9.081274615109202e-06, + "loss": 0.0001, + "step": 2836 + }, + { + "epoch": 0.18283173293806793, + "grad_norm": 0.1393849681809212, + "learning_rate": 9.080558539205156e-06, + "loss": 0.0001, + "step": 2837 + }, + { + "epoch": 0.1828961783849971, + "grad_norm": 0.004492926812105762, + "learning_rate": 9.07984246330111e-06, + "loss": 0.0, + "step": 2838 + }, + { + "epoch": 0.18296062383192627, + "grad_norm": 0.03560397375172943, + "learning_rate": 9.079126387397065e-06, + "loss": 0.0, + "step": 2839 + }, + { + "epoch": 0.18302506927885545, + "grad_norm": 0.32795772375842486, + "learning_rate": 9.078410311493019e-06, + "loss": 0.0007, + "step": 2840 + }, + { + "epoch": 0.18308951472578464, + "grad_norm": 0.01319255954466059, + "learning_rate": 9.077694235588973e-06, + "loss": 0.0, + "step": 2841 + }, + { + "epoch": 0.1831539601727138, + "grad_norm": 0.1890710931196515, + "learning_rate": 9.076978159684928e-06, + "loss": 0.0016, + "step": 2842 + }, + { + "epoch": 0.18321840561964298, + "grad_norm": 0.019405285126641924, + "learning_rate": 9.076262083780882e-06, + "loss": 0.0001, + "step": 2843 + }, + { + "epoch": 0.18328285106657213, + "grad_norm": 0.000751944682332039, + "learning_rate": 9.075546007876836e-06, + "loss": 0.0, + "step": 2844 + }, + { + "epoch": 0.18334729651350132, + "grad_norm": 0.3132701562643847, + "learning_rate": 9.074829931972789e-06, + "loss": 0.0006, + "step": 2845 + }, + { + "epoch": 0.1834117419604305, + "grad_norm": 0.000840455758111162, + "learning_rate": 9.074113856068743e-06, + "loss": 0.0, + "step": 2846 + }, + { + "epoch": 0.18347618740735966, + "grad_norm": 0.010120712128541575, + "learning_rate": 9.073397780164697e-06, + "loss": 0.0001, + "step": 2847 + }, + { + "epoch": 0.18354063285428884, + "grad_norm": 0.009966635973211107, + "learning_rate": 9.072681704260652e-06, + "loss": 0.0001, + "step": 2848 + }, + { + "epoch": 0.18360507830121803, + "grad_norm": 0.006565756711698041, + "learning_rate": 9.071965628356606e-06, + "loss": 0.0, + "step": 2849 + }, + { + "epoch": 0.18366952374814718, + "grad_norm": 0.0005553512116047497, + "learning_rate": 9.071249552452562e-06, + "loss": 0.0, + "step": 2850 + }, + { + "epoch": 0.18373396919507637, + "grad_norm": 0.00028353594343350657, + "learning_rate": 9.070533476548515e-06, + "loss": 0.0, + "step": 2851 + }, + { + "epoch": 0.18379841464200555, + "grad_norm": 0.20578208174425175, + "learning_rate": 9.069817400644469e-06, + "loss": 0.0014, + "step": 2852 + }, + { + "epoch": 0.1838628600889347, + "grad_norm": 0.04055908694973615, + "learning_rate": 9.069101324740423e-06, + "loss": 0.0002, + "step": 2853 + }, + { + "epoch": 0.1839273055358639, + "grad_norm": 0.06899265144614615, + "learning_rate": 9.068385248836377e-06, + "loss": 0.0007, + "step": 2854 + }, + { + "epoch": 0.18399175098279308, + "grad_norm": 0.0006986279102526963, + "learning_rate": 9.067669172932332e-06, + "loss": 0.0, + "step": 2855 + }, + { + "epoch": 0.18405619642972224, + "grad_norm": 0.00019407767512415275, + "learning_rate": 9.066953097028286e-06, + "loss": 0.0, + "step": 2856 + }, + { + "epoch": 0.18412064187665142, + "grad_norm": 0.15103341496607176, + "learning_rate": 9.06623702112424e-06, + "loss": 0.0008, + "step": 2857 + }, + { + "epoch": 0.18418508732358058, + "grad_norm": 0.12748822634696041, + "learning_rate": 9.065520945220195e-06, + "loss": 0.0002, + "step": 2858 + }, + { + "epoch": 0.18424953277050976, + "grad_norm": 0.010185844246403907, + "learning_rate": 9.064804869316149e-06, + "loss": 0.0, + "step": 2859 + }, + { + "epoch": 0.18431397821743895, + "grad_norm": 0.0005492418436434324, + "learning_rate": 9.064088793412103e-06, + "loss": 0.0, + "step": 2860 + }, + { + "epoch": 0.1843784236643681, + "grad_norm": 0.06054778444468528, + "learning_rate": 9.063372717508056e-06, + "loss": 0.0002, + "step": 2861 + }, + { + "epoch": 0.1844428691112973, + "grad_norm": 0.00015870494723684052, + "learning_rate": 9.06265664160401e-06, + "loss": 0.0, + "step": 2862 + }, + { + "epoch": 0.18450731455822647, + "grad_norm": 0.2531298430886071, + "learning_rate": 9.061940565699964e-06, + "loss": 0.0008, + "step": 2863 + }, + { + "epoch": 0.18457176000515563, + "grad_norm": 0.00035488257119721176, + "learning_rate": 9.061224489795919e-06, + "loss": 0.0, + "step": 2864 + }, + { + "epoch": 0.1846362054520848, + "grad_norm": 0.001592378038638373, + "learning_rate": 9.060508413891873e-06, + "loss": 0.0, + "step": 2865 + }, + { + "epoch": 0.184700650899014, + "grad_norm": 0.005239850862371177, + "learning_rate": 9.059792337987827e-06, + "loss": 0.0, + "step": 2866 + }, + { + "epoch": 0.18476509634594315, + "grad_norm": 0.0012565288853653319, + "learning_rate": 9.059076262083782e-06, + "loss": 0.0, + "step": 2867 + }, + { + "epoch": 0.18482954179287234, + "grad_norm": 0.003256885058237763, + "learning_rate": 9.058360186179736e-06, + "loss": 0.0, + "step": 2868 + }, + { + "epoch": 0.18489398723980152, + "grad_norm": 0.0013340724323056313, + "learning_rate": 9.05764411027569e-06, + "loss": 0.0, + "step": 2869 + }, + { + "epoch": 0.18495843268673068, + "grad_norm": 0.009623622364578288, + "learning_rate": 9.056928034371644e-06, + "loss": 0.0001, + "step": 2870 + }, + { + "epoch": 0.18502287813365986, + "grad_norm": 0.02283470135952233, + "learning_rate": 9.056211958467597e-06, + "loss": 0.0, + "step": 2871 + }, + { + "epoch": 0.18508732358058902, + "grad_norm": 3.1352344007944293, + "learning_rate": 9.055495882563551e-06, + "loss": 0.0312, + "step": 2872 + }, + { + "epoch": 0.1851517690275182, + "grad_norm": 0.1283916257810219, + "learning_rate": 9.054779806659507e-06, + "loss": 0.0022, + "step": 2873 + }, + { + "epoch": 0.1852162144744474, + "grad_norm": 0.0017243622373718673, + "learning_rate": 9.054063730755462e-06, + "loss": 0.0, + "step": 2874 + }, + { + "epoch": 0.18528065992137654, + "grad_norm": 0.02626256695329013, + "learning_rate": 9.053347654851416e-06, + "loss": 0.0001, + "step": 2875 + }, + { + "epoch": 0.18534510536830573, + "grad_norm": 1.8560834970519415, + "learning_rate": 9.05263157894737e-06, + "loss": 0.0055, + "step": 2876 + }, + { + "epoch": 0.1854095508152349, + "grad_norm": 0.22453528301728987, + "learning_rate": 9.051915503043323e-06, + "loss": 0.0003, + "step": 2877 + }, + { + "epoch": 0.18547399626216407, + "grad_norm": 0.01902833171278348, + "learning_rate": 9.051199427139277e-06, + "loss": 0.0001, + "step": 2878 + }, + { + "epoch": 0.18553844170909325, + "grad_norm": 0.0015275143263481297, + "learning_rate": 9.050483351235231e-06, + "loss": 0.0, + "step": 2879 + }, + { + "epoch": 0.18560288715602244, + "grad_norm": 0.0001940454087729942, + "learning_rate": 9.049767275331186e-06, + "loss": 0.0, + "step": 2880 + }, + { + "epoch": 0.1856673326029516, + "grad_norm": 0.00048513116384772117, + "learning_rate": 9.04905119942714e-06, + "loss": 0.0, + "step": 2881 + }, + { + "epoch": 0.18573177804988078, + "grad_norm": 0.0037608151201876908, + "learning_rate": 9.048335123523094e-06, + "loss": 0.0, + "step": 2882 + }, + { + "epoch": 0.18579622349680994, + "grad_norm": 1.1987838702145488, + "learning_rate": 9.047619047619049e-06, + "loss": 0.001, + "step": 2883 + }, + { + "epoch": 0.18586066894373912, + "grad_norm": 0.010770438174594095, + "learning_rate": 9.046902971715003e-06, + "loss": 0.0001, + "step": 2884 + }, + { + "epoch": 0.1859251143906683, + "grad_norm": 0.003791891232412315, + "learning_rate": 9.046186895810957e-06, + "loss": 0.0, + "step": 2885 + }, + { + "epoch": 0.18598955983759746, + "grad_norm": 1.4171702761376253, + "learning_rate": 9.045470819906912e-06, + "loss": 0.0036, + "step": 2886 + }, + { + "epoch": 0.18605400528452665, + "grad_norm": 0.013678521963010951, + "learning_rate": 9.044754744002864e-06, + "loss": 0.0, + "step": 2887 + }, + { + "epoch": 0.18611845073145583, + "grad_norm": 0.2775792210281082, + "learning_rate": 9.044038668098818e-06, + "loss": 0.0021, + "step": 2888 + }, + { + "epoch": 0.186182896178385, + "grad_norm": 0.021857381142348316, + "learning_rate": 9.043322592194773e-06, + "loss": 0.0001, + "step": 2889 + }, + { + "epoch": 0.18624734162531417, + "grad_norm": 0.10352626332013336, + "learning_rate": 9.042606516290727e-06, + "loss": 0.0002, + "step": 2890 + }, + { + "epoch": 0.18631178707224336, + "grad_norm": 0.043887566667352385, + "learning_rate": 9.041890440386681e-06, + "loss": 0.0001, + "step": 2891 + }, + { + "epoch": 0.1863762325191725, + "grad_norm": 0.00426374508594024, + "learning_rate": 9.041174364482636e-06, + "loss": 0.0, + "step": 2892 + }, + { + "epoch": 0.1864406779661017, + "grad_norm": 0.0017327150241980725, + "learning_rate": 9.04045828857859e-06, + "loss": 0.0, + "step": 2893 + }, + { + "epoch": 0.18650512341303088, + "grad_norm": 0.16207233258521805, + "learning_rate": 9.039742212674544e-06, + "loss": 0.0003, + "step": 2894 + }, + { + "epoch": 0.18656956885996004, + "grad_norm": 0.08029956264882238, + "learning_rate": 9.039026136770499e-06, + "loss": 0.0002, + "step": 2895 + }, + { + "epoch": 0.18663401430688922, + "grad_norm": 0.002952531018276622, + "learning_rate": 9.038310060866453e-06, + "loss": 0.0, + "step": 2896 + }, + { + "epoch": 0.18669845975381838, + "grad_norm": 0.001077306109601978, + "learning_rate": 9.037593984962407e-06, + "loss": 0.0, + "step": 2897 + }, + { + "epoch": 0.18676290520074756, + "grad_norm": 0.5327763508814157, + "learning_rate": 9.036877909058361e-06, + "loss": 0.0018, + "step": 2898 + }, + { + "epoch": 0.18682735064767675, + "grad_norm": 0.0011653264573898313, + "learning_rate": 9.036161833154316e-06, + "loss": 0.0, + "step": 2899 + }, + { + "epoch": 0.1868917960946059, + "grad_norm": 0.010833147865541404, + "learning_rate": 9.03544575725027e-06, + "loss": 0.0001, + "step": 2900 + }, + { + "epoch": 0.1869562415415351, + "grad_norm": 0.0035407077531820256, + "learning_rate": 9.034729681346224e-06, + "loss": 0.0, + "step": 2901 + }, + { + "epoch": 0.18702068698846427, + "grad_norm": 0.19864257068351776, + "learning_rate": 9.034013605442179e-06, + "loss": 0.0003, + "step": 2902 + }, + { + "epoch": 0.18708513243539343, + "grad_norm": 0.5965458646506104, + "learning_rate": 9.033297529538131e-06, + "loss": 0.0033, + "step": 2903 + }, + { + "epoch": 0.18714957788232262, + "grad_norm": 0.0016764956381874533, + "learning_rate": 9.032581453634086e-06, + "loss": 0.0, + "step": 2904 + }, + { + "epoch": 0.1872140233292518, + "grad_norm": 0.3784683095611159, + "learning_rate": 9.03186537773004e-06, + "loss": 0.0007, + "step": 2905 + }, + { + "epoch": 0.18727846877618096, + "grad_norm": 0.005355622180015852, + "learning_rate": 9.031149301825994e-06, + "loss": 0.0, + "step": 2906 + }, + { + "epoch": 0.18734291422311014, + "grad_norm": 0.1916487189541943, + "learning_rate": 9.030433225921948e-06, + "loss": 0.0003, + "step": 2907 + }, + { + "epoch": 0.18740735967003933, + "grad_norm": 0.08662085151423458, + "learning_rate": 9.029717150017903e-06, + "loss": 0.0006, + "step": 2908 + }, + { + "epoch": 0.18747180511696848, + "grad_norm": 0.1926151557278316, + "learning_rate": 9.029001074113857e-06, + "loss": 0.0004, + "step": 2909 + }, + { + "epoch": 0.18753625056389767, + "grad_norm": 0.3587466842551305, + "learning_rate": 9.028284998209811e-06, + "loss": 0.003, + "step": 2910 + }, + { + "epoch": 0.18760069601082682, + "grad_norm": 0.01522610071722062, + "learning_rate": 9.027568922305766e-06, + "loss": 0.0001, + "step": 2911 + }, + { + "epoch": 0.187665141457756, + "grad_norm": 0.09025906889595814, + "learning_rate": 9.026852846401718e-06, + "loss": 0.001, + "step": 2912 + }, + { + "epoch": 0.1877295869046852, + "grad_norm": 0.0005104981904117869, + "learning_rate": 9.026136770497673e-06, + "loss": 0.0, + "step": 2913 + }, + { + "epoch": 0.18779403235161435, + "grad_norm": 0.009975611318601715, + "learning_rate": 9.025420694593627e-06, + "loss": 0.0, + "step": 2914 + }, + { + "epoch": 0.18785847779854353, + "grad_norm": 0.04623598655275114, + "learning_rate": 9.024704618689581e-06, + "loss": 0.0002, + "step": 2915 + }, + { + "epoch": 0.18792292324547272, + "grad_norm": 0.003555617732486728, + "learning_rate": 9.023988542785535e-06, + "loss": 0.0, + "step": 2916 + }, + { + "epoch": 0.18798736869240187, + "grad_norm": 0.018560679936225576, + "learning_rate": 9.02327246688149e-06, + "loss": 0.0, + "step": 2917 + }, + { + "epoch": 0.18805181413933106, + "grad_norm": 0.43325214848147997, + "learning_rate": 9.022556390977444e-06, + "loss": 0.0021, + "step": 2918 + }, + { + "epoch": 0.18811625958626024, + "grad_norm": 0.07998140821266131, + "learning_rate": 9.021840315073398e-06, + "loss": 0.0017, + "step": 2919 + }, + { + "epoch": 0.1881807050331894, + "grad_norm": 0.07237012820470437, + "learning_rate": 9.021124239169353e-06, + "loss": 0.0002, + "step": 2920 + }, + { + "epoch": 0.18824515048011858, + "grad_norm": 0.000759668939913165, + "learning_rate": 9.020408163265307e-06, + "loss": 0.0, + "step": 2921 + }, + { + "epoch": 0.18830959592704774, + "grad_norm": 0.18338005331275695, + "learning_rate": 9.019692087361261e-06, + "loss": 0.0006, + "step": 2922 + }, + { + "epoch": 0.18837404137397692, + "grad_norm": 0.2098501933593441, + "learning_rate": 9.018976011457216e-06, + "loss": 0.0005, + "step": 2923 + }, + { + "epoch": 0.1884384868209061, + "grad_norm": 0.0018100893660102637, + "learning_rate": 9.01825993555317e-06, + "loss": 0.0, + "step": 2924 + }, + { + "epoch": 0.18850293226783527, + "grad_norm": 0.0450810406327063, + "learning_rate": 9.017543859649124e-06, + "loss": 0.0001, + "step": 2925 + }, + { + "epoch": 0.18856737771476445, + "grad_norm": 0.014848699673825443, + "learning_rate": 9.016827783745078e-06, + "loss": 0.0, + "step": 2926 + }, + { + "epoch": 0.18863182316169363, + "grad_norm": 0.015089833418442411, + "learning_rate": 9.016111707841033e-06, + "loss": 0.0, + "step": 2927 + }, + { + "epoch": 0.1886962686086228, + "grad_norm": 0.37162824586463794, + "learning_rate": 9.015395631936985e-06, + "loss": 0.003, + "step": 2928 + }, + { + "epoch": 0.18876071405555198, + "grad_norm": 0.00679900766430204, + "learning_rate": 9.01467955603294e-06, + "loss": 0.0, + "step": 2929 + }, + { + "epoch": 0.18882515950248116, + "grad_norm": 0.19478660354221924, + "learning_rate": 9.013963480128894e-06, + "loss": 0.0007, + "step": 2930 + }, + { + "epoch": 0.18888960494941032, + "grad_norm": 0.08228354880526026, + "learning_rate": 9.013247404224848e-06, + "loss": 0.0003, + "step": 2931 + }, + { + "epoch": 0.1889540503963395, + "grad_norm": 0.07801816609421663, + "learning_rate": 9.012531328320803e-06, + "loss": 0.0002, + "step": 2932 + }, + { + "epoch": 0.18901849584326869, + "grad_norm": 0.015425874953560122, + "learning_rate": 9.011815252416757e-06, + "loss": 0.0, + "step": 2933 + }, + { + "epoch": 0.18908294129019784, + "grad_norm": 0.12362967331985299, + "learning_rate": 9.011099176512711e-06, + "loss": 0.0001, + "step": 2934 + }, + { + "epoch": 0.18914738673712703, + "grad_norm": 0.002316840415064825, + "learning_rate": 9.010383100608665e-06, + "loss": 0.0, + "step": 2935 + }, + { + "epoch": 0.18921183218405618, + "grad_norm": 0.0053035649105632595, + "learning_rate": 9.00966702470462e-06, + "loss": 0.0, + "step": 2936 + }, + { + "epoch": 0.18927627763098537, + "grad_norm": 0.016294097918252204, + "learning_rate": 9.008950948800574e-06, + "loss": 0.0, + "step": 2937 + }, + { + "epoch": 0.18934072307791455, + "grad_norm": 0.008288165719355206, + "learning_rate": 9.008234872896527e-06, + "loss": 0.0, + "step": 2938 + }, + { + "epoch": 0.1894051685248437, + "grad_norm": 0.19698208553230082, + "learning_rate": 9.007518796992481e-06, + "loss": 0.0014, + "step": 2939 + }, + { + "epoch": 0.1894696139717729, + "grad_norm": 0.003667401671959796, + "learning_rate": 9.006802721088435e-06, + "loss": 0.0, + "step": 2940 + }, + { + "epoch": 0.18953405941870208, + "grad_norm": 0.002796827125301907, + "learning_rate": 9.00608664518439e-06, + "loss": 0.0, + "step": 2941 + }, + { + "epoch": 0.18959850486563123, + "grad_norm": 0.3992818972816783, + "learning_rate": 9.005370569280344e-06, + "loss": 0.0002, + "step": 2942 + }, + { + "epoch": 0.18966295031256042, + "grad_norm": 0.05893500611468803, + "learning_rate": 9.0046544933763e-06, + "loss": 0.0004, + "step": 2943 + }, + { + "epoch": 0.1897273957594896, + "grad_norm": 0.002527947056167966, + "learning_rate": 9.003938417472252e-06, + "loss": 0.0, + "step": 2944 + }, + { + "epoch": 0.18979184120641876, + "grad_norm": 0.03283716045698447, + "learning_rate": 9.003222341568207e-06, + "loss": 0.0001, + "step": 2945 + }, + { + "epoch": 0.18985628665334794, + "grad_norm": 0.003239810900464001, + "learning_rate": 9.002506265664161e-06, + "loss": 0.0, + "step": 2946 + }, + { + "epoch": 0.18992073210027713, + "grad_norm": 0.034203012205389266, + "learning_rate": 9.001790189760115e-06, + "loss": 0.0003, + "step": 2947 + }, + { + "epoch": 0.18998517754720629, + "grad_norm": 0.01083160668170668, + "learning_rate": 9.00107411385607e-06, + "loss": 0.0, + "step": 2948 + }, + { + "epoch": 0.19004962299413547, + "grad_norm": 0.007940915211813882, + "learning_rate": 9.000358037952024e-06, + "loss": 0.0, + "step": 2949 + }, + { + "epoch": 0.19011406844106463, + "grad_norm": 0.1084319956272911, + "learning_rate": 8.999641962047978e-06, + "loss": 0.0008, + "step": 2950 + }, + { + "epoch": 0.1901785138879938, + "grad_norm": 0.01166456268566552, + "learning_rate": 8.998925886143933e-06, + "loss": 0.0, + "step": 2951 + }, + { + "epoch": 0.190242959334923, + "grad_norm": 0.009255105740318771, + "learning_rate": 8.998209810239887e-06, + "loss": 0.0, + "step": 2952 + }, + { + "epoch": 0.19030740478185215, + "grad_norm": 0.046569376752111036, + "learning_rate": 8.997493734335841e-06, + "loss": 0.0001, + "step": 2953 + }, + { + "epoch": 0.19037185022878134, + "grad_norm": 0.01405700880200295, + "learning_rate": 8.996777658431794e-06, + "loss": 0.0001, + "step": 2954 + }, + { + "epoch": 0.19043629567571052, + "grad_norm": 0.01268672480723258, + "learning_rate": 8.996061582527748e-06, + "loss": 0.0, + "step": 2955 + }, + { + "epoch": 0.19050074112263968, + "grad_norm": 0.002450229581765563, + "learning_rate": 8.995345506623702e-06, + "loss": 0.0, + "step": 2956 + }, + { + "epoch": 0.19056518656956886, + "grad_norm": 0.0003489092380548279, + "learning_rate": 8.994629430719657e-06, + "loss": 0.0, + "step": 2957 + }, + { + "epoch": 0.19062963201649805, + "grad_norm": 0.0029454670887810084, + "learning_rate": 8.993913354815611e-06, + "loss": 0.0, + "step": 2958 + }, + { + "epoch": 0.1906940774634272, + "grad_norm": 0.1718268218574252, + "learning_rate": 8.993197278911565e-06, + "loss": 0.0018, + "step": 2959 + }, + { + "epoch": 0.1907585229103564, + "grad_norm": 0.11376702629245902, + "learning_rate": 8.99248120300752e-06, + "loss": 0.0003, + "step": 2960 + }, + { + "epoch": 0.19082296835728554, + "grad_norm": 0.0007795738133778679, + "learning_rate": 8.991765127103474e-06, + "loss": 0.0, + "step": 2961 + }, + { + "epoch": 0.19088741380421473, + "grad_norm": 1.4979985713009711, + "learning_rate": 8.991049051199428e-06, + "loss": 0.0109, + "step": 2962 + }, + { + "epoch": 0.1909518592511439, + "grad_norm": 0.8103406896663958, + "learning_rate": 8.990332975295382e-06, + "loss": 0.0002, + "step": 2963 + }, + { + "epoch": 0.19101630469807307, + "grad_norm": 0.02842256055845789, + "learning_rate": 8.989616899391335e-06, + "loss": 0.0001, + "step": 2964 + }, + { + "epoch": 0.19108075014500225, + "grad_norm": 0.020678844417695182, + "learning_rate": 8.98890082348729e-06, + "loss": 0.0002, + "step": 2965 + }, + { + "epoch": 0.19114519559193144, + "grad_norm": 0.0028593214061483157, + "learning_rate": 8.988184747583245e-06, + "loss": 0.0, + "step": 2966 + }, + { + "epoch": 0.1912096410388606, + "grad_norm": 0.009822979528012175, + "learning_rate": 8.9874686716792e-06, + "loss": 0.0, + "step": 2967 + }, + { + "epoch": 0.19127408648578978, + "grad_norm": 0.022687103734926797, + "learning_rate": 8.986752595775154e-06, + "loss": 0.0001, + "step": 2968 + }, + { + "epoch": 0.19133853193271896, + "grad_norm": 0.06149599999979963, + "learning_rate": 8.986036519871108e-06, + "loss": 0.0002, + "step": 2969 + }, + { + "epoch": 0.19140297737964812, + "grad_norm": 1.2065422771333003, + "learning_rate": 8.98532044396706e-06, + "loss": 0.0034, + "step": 2970 + }, + { + "epoch": 0.1914674228265773, + "grad_norm": 0.018639562538012196, + "learning_rate": 8.984604368063015e-06, + "loss": 0.0, + "step": 2971 + }, + { + "epoch": 0.1915318682735065, + "grad_norm": 0.01474286627896713, + "learning_rate": 8.98388829215897e-06, + "loss": 0.0001, + "step": 2972 + }, + { + "epoch": 0.19159631372043565, + "grad_norm": 1.5631704036642082, + "learning_rate": 8.983172216254924e-06, + "loss": 0.0107, + "step": 2973 + }, + { + "epoch": 0.19166075916736483, + "grad_norm": 0.00574327599457403, + "learning_rate": 8.982456140350878e-06, + "loss": 0.0001, + "step": 2974 + }, + { + "epoch": 0.191725204614294, + "grad_norm": 0.23094776449504884, + "learning_rate": 8.981740064446832e-06, + "loss": 0.0028, + "step": 2975 + }, + { + "epoch": 0.19178965006122317, + "grad_norm": 0.0028454445462574494, + "learning_rate": 8.981023988542787e-06, + "loss": 0.0, + "step": 2976 + }, + { + "epoch": 0.19185409550815236, + "grad_norm": 0.004666714035464535, + "learning_rate": 8.980307912638741e-06, + "loss": 0.0, + "step": 2977 + }, + { + "epoch": 0.1919185409550815, + "grad_norm": 0.004573597119030054, + "learning_rate": 8.979591836734695e-06, + "loss": 0.0, + "step": 2978 + }, + { + "epoch": 0.1919829864020107, + "grad_norm": 0.011798074217064835, + "learning_rate": 8.97887576083065e-06, + "loss": 0.0, + "step": 2979 + }, + { + "epoch": 0.19204743184893988, + "grad_norm": 0.0004367517808790769, + "learning_rate": 8.978159684926602e-06, + "loss": 0.0, + "step": 2980 + }, + { + "epoch": 0.19211187729586904, + "grad_norm": 0.0033874185502569326, + "learning_rate": 8.977443609022556e-06, + "loss": 0.0, + "step": 2981 + }, + { + "epoch": 0.19217632274279822, + "grad_norm": 0.003283052887576775, + "learning_rate": 8.97672753311851e-06, + "loss": 0.0, + "step": 2982 + }, + { + "epoch": 0.1922407681897274, + "grad_norm": 0.19926155789775196, + "learning_rate": 8.976011457214465e-06, + "loss": 0.0006, + "step": 2983 + }, + { + "epoch": 0.19230521363665656, + "grad_norm": 0.004192863160750905, + "learning_rate": 8.97529538131042e-06, + "loss": 0.0, + "step": 2984 + }, + { + "epoch": 0.19236965908358575, + "grad_norm": 0.00110007929077268, + "learning_rate": 8.974579305406374e-06, + "loss": 0.0, + "step": 2985 + }, + { + "epoch": 0.19243410453051493, + "grad_norm": 0.039425082084022116, + "learning_rate": 8.973863229502328e-06, + "loss": 0.0001, + "step": 2986 + }, + { + "epoch": 0.1924985499774441, + "grad_norm": 0.00043550038157316295, + "learning_rate": 8.973147153598282e-06, + "loss": 0.0, + "step": 2987 + }, + { + "epoch": 0.19256299542437327, + "grad_norm": 0.05183727983491678, + "learning_rate": 8.972431077694236e-06, + "loss": 0.0016, + "step": 2988 + }, + { + "epoch": 0.19262744087130243, + "grad_norm": 0.15039922020860932, + "learning_rate": 8.97171500179019e-06, + "loss": 0.0019, + "step": 2989 + }, + { + "epoch": 0.19269188631823161, + "grad_norm": 0.0006762037012585999, + "learning_rate": 8.970998925886145e-06, + "loss": 0.0, + "step": 2990 + }, + { + "epoch": 0.1927563317651608, + "grad_norm": 0.10825269799498864, + "learning_rate": 8.9702828499821e-06, + "loss": 0.0011, + "step": 2991 + }, + { + "epoch": 0.19282077721208996, + "grad_norm": 0.3270196347969351, + "learning_rate": 8.969566774078054e-06, + "loss": 0.0011, + "step": 2992 + }, + { + "epoch": 0.19288522265901914, + "grad_norm": 0.007802911038854596, + "learning_rate": 8.968850698174008e-06, + "loss": 0.0, + "step": 2993 + }, + { + "epoch": 0.19294966810594832, + "grad_norm": 0.0008345035135385656, + "learning_rate": 8.968134622269962e-06, + "loss": 0.0, + "step": 2994 + }, + { + "epoch": 0.19301411355287748, + "grad_norm": 0.08876152359166628, + "learning_rate": 8.967418546365917e-06, + "loss": 0.0001, + "step": 2995 + }, + { + "epoch": 0.19307855899980667, + "grad_norm": 0.1668909055741231, + "learning_rate": 8.96670247046187e-06, + "loss": 0.0005, + "step": 2996 + }, + { + "epoch": 0.19314300444673585, + "grad_norm": 0.060401569099000595, + "learning_rate": 8.965986394557823e-06, + "loss": 0.0003, + "step": 2997 + }, + { + "epoch": 0.193207449893665, + "grad_norm": 0.0018142782884662798, + "learning_rate": 8.965270318653778e-06, + "loss": 0.0, + "step": 2998 + }, + { + "epoch": 0.1932718953405942, + "grad_norm": 0.01917432223078058, + "learning_rate": 8.964554242749732e-06, + "loss": 0.0001, + "step": 2999 + }, + { + "epoch": 0.19333634078752338, + "grad_norm": 0.00013875258473411826, + "learning_rate": 8.963838166845686e-06, + "loss": 0.0, + "step": 3000 + }, + { + "epoch": 0.19340078623445253, + "grad_norm": 0.11576714445206707, + "learning_rate": 8.96312209094164e-06, + "loss": 0.0028, + "step": 3001 + }, + { + "epoch": 0.19346523168138172, + "grad_norm": 0.6482484840760784, + "learning_rate": 8.962406015037595e-06, + "loss": 0.0014, + "step": 3002 + }, + { + "epoch": 0.19352967712831087, + "grad_norm": 0.6482484840760784, + "learning_rate": 8.962406015037595e-06, + "loss": 0.001, + "step": 3003 + }, + { + "epoch": 0.19359412257524006, + "grad_norm": 0.0007944590031801373, + "learning_rate": 8.96168993913355e-06, + "loss": 0.0, + "step": 3004 + }, + { + "epoch": 0.19365856802216924, + "grad_norm": 0.003729115719289283, + "learning_rate": 8.960973863229504e-06, + "loss": 0.0, + "step": 3005 + }, + { + "epoch": 0.1937230134690984, + "grad_norm": 0.0008843086516153837, + "learning_rate": 8.960257787325458e-06, + "loss": 0.0, + "step": 3006 + }, + { + "epoch": 0.19378745891602758, + "grad_norm": 0.21990022366392237, + "learning_rate": 8.95954171142141e-06, + "loss": 0.0021, + "step": 3007 + }, + { + "epoch": 0.19385190436295677, + "grad_norm": 0.15541553592994967, + "learning_rate": 8.958825635517365e-06, + "loss": 0.0018, + "step": 3008 + }, + { + "epoch": 0.19391634980988592, + "grad_norm": 0.02511120376754912, + "learning_rate": 8.958109559613319e-06, + "loss": 0.0001, + "step": 3009 + }, + { + "epoch": 0.1939807952568151, + "grad_norm": 0.001173436761283878, + "learning_rate": 8.957393483709273e-06, + "loss": 0.0, + "step": 3010 + }, + { + "epoch": 0.1940452407037443, + "grad_norm": 0.014118849072544383, + "learning_rate": 8.956677407805228e-06, + "loss": 0.0001, + "step": 3011 + }, + { + "epoch": 0.19410968615067345, + "grad_norm": 0.02563359621196255, + "learning_rate": 8.955961331901182e-06, + "loss": 0.0015, + "step": 3012 + }, + { + "epoch": 0.19417413159760263, + "grad_norm": 0.0006445896627271484, + "learning_rate": 8.955245255997136e-06, + "loss": 0.0, + "step": 3013 + }, + { + "epoch": 0.1942385770445318, + "grad_norm": 0.008495704221291287, + "learning_rate": 8.95452918009309e-06, + "loss": 0.0001, + "step": 3014 + }, + { + "epoch": 0.19430302249146097, + "grad_norm": 0.9487614727206138, + "learning_rate": 8.953813104189045e-06, + "loss": 0.0081, + "step": 3015 + }, + { + "epoch": 0.19436746793839016, + "grad_norm": 0.0034573415451793093, + "learning_rate": 8.953097028285e-06, + "loss": 0.0001, + "step": 3016 + }, + { + "epoch": 0.19443191338531932, + "grad_norm": 0.024194100837835554, + "learning_rate": 8.952380952380953e-06, + "loss": 0.0002, + "step": 3017 + }, + { + "epoch": 0.1944963588322485, + "grad_norm": 0.08266217977115524, + "learning_rate": 8.951664876476908e-06, + "loss": 0.0002, + "step": 3018 + }, + { + "epoch": 0.19456080427917768, + "grad_norm": 0.05253149672988689, + "learning_rate": 8.950948800572862e-06, + "loss": 0.0001, + "step": 3019 + }, + { + "epoch": 0.19462524972610684, + "grad_norm": 0.26598516057325405, + "learning_rate": 8.950232724668816e-06, + "loss": 0.0012, + "step": 3020 + }, + { + "epoch": 0.19468969517303603, + "grad_norm": 0.001171713922511211, + "learning_rate": 8.94951664876477e-06, + "loss": 0.0, + "step": 3021 + }, + { + "epoch": 0.1947541406199652, + "grad_norm": 0.006140044099780373, + "learning_rate": 8.948800572860725e-06, + "loss": 0.0, + "step": 3022 + }, + { + "epoch": 0.19481858606689437, + "grad_norm": 0.17568814673045194, + "learning_rate": 8.948084496956678e-06, + "loss": 0.0008, + "step": 3023 + }, + { + "epoch": 0.19488303151382355, + "grad_norm": 0.0013122715834712388, + "learning_rate": 8.947368421052632e-06, + "loss": 0.0, + "step": 3024 + }, + { + "epoch": 0.19494747696075274, + "grad_norm": 0.004215928276184871, + "learning_rate": 8.946652345148586e-06, + "loss": 0.0, + "step": 3025 + }, + { + "epoch": 0.1950119224076819, + "grad_norm": 0.17655713718844582, + "learning_rate": 8.94593626924454e-06, + "loss": 0.0013, + "step": 3026 + }, + { + "epoch": 0.19507636785461108, + "grad_norm": 0.0023065439930651447, + "learning_rate": 8.945220193340495e-06, + "loss": 0.0, + "step": 3027 + }, + { + "epoch": 0.19514081330154023, + "grad_norm": 0.00540466461391632, + "learning_rate": 8.944504117436449e-06, + "loss": 0.0, + "step": 3028 + }, + { + "epoch": 0.19520525874846942, + "grad_norm": 0.0007297223900245063, + "learning_rate": 8.943788041532403e-06, + "loss": 0.0, + "step": 3029 + }, + { + "epoch": 0.1952697041953986, + "grad_norm": 0.05844247682053747, + "learning_rate": 8.943071965628358e-06, + "loss": 0.0003, + "step": 3030 + }, + { + "epoch": 0.19533414964232776, + "grad_norm": 0.31681117297173256, + "learning_rate": 8.942355889724312e-06, + "loss": 0.001, + "step": 3031 + }, + { + "epoch": 0.19539859508925694, + "grad_norm": 0.28899940884754083, + "learning_rate": 8.941639813820265e-06, + "loss": 0.0007, + "step": 3032 + }, + { + "epoch": 0.19546304053618613, + "grad_norm": 0.0033367080062562983, + "learning_rate": 8.940923737916219e-06, + "loss": 0.0, + "step": 3033 + }, + { + "epoch": 0.19552748598311528, + "grad_norm": 0.09525066093311885, + "learning_rate": 8.940207662012173e-06, + "loss": 0.0007, + "step": 3034 + }, + { + "epoch": 0.19559193143004447, + "grad_norm": 0.00012517123703986328, + "learning_rate": 8.939491586108127e-06, + "loss": 0.0, + "step": 3035 + }, + { + "epoch": 0.19565637687697365, + "grad_norm": 0.05815377551468865, + "learning_rate": 8.938775510204082e-06, + "loss": 0.0003, + "step": 3036 + }, + { + "epoch": 0.1957208223239028, + "grad_norm": 0.0007073388646762897, + "learning_rate": 8.938059434300038e-06, + "loss": 0.0, + "step": 3037 + }, + { + "epoch": 0.195785267770832, + "grad_norm": 0.015940901006851475, + "learning_rate": 8.937343358395992e-06, + "loss": 0.0001, + "step": 3038 + }, + { + "epoch": 0.19584971321776118, + "grad_norm": 0.0006422838104322162, + "learning_rate": 8.936627282491945e-06, + "loss": 0.0, + "step": 3039 + }, + { + "epoch": 0.19591415866469034, + "grad_norm": 0.010320363651081372, + "learning_rate": 8.935911206587899e-06, + "loss": 0.0, + "step": 3040 + }, + { + "epoch": 0.19597860411161952, + "grad_norm": 0.016090234798197458, + "learning_rate": 8.935195130683853e-06, + "loss": 0.0001, + "step": 3041 + }, + { + "epoch": 0.19604304955854868, + "grad_norm": 0.35970697411549507, + "learning_rate": 8.934479054779808e-06, + "loss": 0.0013, + "step": 3042 + }, + { + "epoch": 0.19610749500547786, + "grad_norm": 0.0001557431387520329, + "learning_rate": 8.933762978875762e-06, + "loss": 0.0, + "step": 3043 + }, + { + "epoch": 0.19617194045240705, + "grad_norm": 0.001276511107407491, + "learning_rate": 8.933046902971716e-06, + "loss": 0.0, + "step": 3044 + }, + { + "epoch": 0.1962363858993362, + "grad_norm": 0.0005064007813871364, + "learning_rate": 8.93233082706767e-06, + "loss": 0.0, + "step": 3045 + }, + { + "epoch": 0.1963008313462654, + "grad_norm": 0.006093401575577427, + "learning_rate": 8.931614751163625e-06, + "loss": 0.0001, + "step": 3046 + }, + { + "epoch": 0.19636527679319457, + "grad_norm": 0.11769669691171146, + "learning_rate": 8.930898675259579e-06, + "loss": 0.0003, + "step": 3047 + }, + { + "epoch": 0.19642972224012373, + "grad_norm": 0.0010719782400106074, + "learning_rate": 8.930182599355532e-06, + "loss": 0.0, + "step": 3048 + }, + { + "epoch": 0.1964941676870529, + "grad_norm": 0.0005714457417295161, + "learning_rate": 8.929466523451486e-06, + "loss": 0.0, + "step": 3049 + }, + { + "epoch": 0.1965586131339821, + "grad_norm": 0.4581438037557388, + "learning_rate": 8.92875044754744e-06, + "loss": 0.0014, + "step": 3050 + }, + { + "epoch": 0.19662305858091125, + "grad_norm": 0.0068193888884800335, + "learning_rate": 8.928034371643395e-06, + "loss": 0.0001, + "step": 3051 + }, + { + "epoch": 0.19668750402784044, + "grad_norm": 0.02994317793845591, + "learning_rate": 8.927318295739349e-06, + "loss": 0.0, + "step": 3052 + }, + { + "epoch": 0.1967519494747696, + "grad_norm": 0.014345473768226343, + "learning_rate": 8.926602219835303e-06, + "loss": 0.0001, + "step": 3053 + }, + { + "epoch": 0.19681639492169878, + "grad_norm": 0.0005049755597789164, + "learning_rate": 8.925886143931257e-06, + "loss": 0.0, + "step": 3054 + }, + { + "epoch": 0.19688084036862796, + "grad_norm": 0.2326998450872154, + "learning_rate": 8.925170068027212e-06, + "loss": 0.0002, + "step": 3055 + }, + { + "epoch": 0.19694528581555712, + "grad_norm": 0.2033664753568645, + "learning_rate": 8.924453992123166e-06, + "loss": 0.0006, + "step": 3056 + }, + { + "epoch": 0.1970097312624863, + "grad_norm": 0.06406544280271197, + "learning_rate": 8.92373791621912e-06, + "loss": 0.0002, + "step": 3057 + }, + { + "epoch": 0.1970741767094155, + "grad_norm": 0.01635489248533111, + "learning_rate": 8.923021840315073e-06, + "loss": 0.0001, + "step": 3058 + }, + { + "epoch": 0.19713862215634465, + "grad_norm": 0.0020399615266398115, + "learning_rate": 8.922305764411027e-06, + "loss": 0.0, + "step": 3059 + }, + { + "epoch": 0.19720306760327383, + "grad_norm": 0.007834388015339783, + "learning_rate": 8.921589688506982e-06, + "loss": 0.0, + "step": 3060 + }, + { + "epoch": 0.197267513050203, + "grad_norm": 0.0030011656472259296, + "learning_rate": 8.920873612602938e-06, + "loss": 0.0, + "step": 3061 + }, + { + "epoch": 0.19733195849713217, + "grad_norm": 0.010005728413372437, + "learning_rate": 8.920157536698892e-06, + "loss": 0.0001, + "step": 3062 + }, + { + "epoch": 0.19739640394406135, + "grad_norm": 0.0002145815971270866, + "learning_rate": 8.919441460794846e-06, + "loss": 0.0, + "step": 3063 + }, + { + "epoch": 0.19746084939099054, + "grad_norm": 0.11205250164730646, + "learning_rate": 8.918725384890799e-06, + "loss": 0.0019, + "step": 3064 + }, + { + "epoch": 0.1975252948379197, + "grad_norm": 0.02060544100981016, + "learning_rate": 8.918009308986753e-06, + "loss": 0.0, + "step": 3065 + }, + { + "epoch": 0.19758974028484888, + "grad_norm": 0.001616753284672874, + "learning_rate": 8.917293233082707e-06, + "loss": 0.0, + "step": 3066 + }, + { + "epoch": 0.19765418573177804, + "grad_norm": 0.005112549724463105, + "learning_rate": 8.916577157178662e-06, + "loss": 0.0, + "step": 3067 + }, + { + "epoch": 0.19771863117870722, + "grad_norm": 0.003087953257347784, + "learning_rate": 8.915861081274616e-06, + "loss": 0.0, + "step": 3068 + }, + { + "epoch": 0.1977830766256364, + "grad_norm": 0.012052220127453446, + "learning_rate": 8.91514500537057e-06, + "loss": 0.0001, + "step": 3069 + }, + { + "epoch": 0.19784752207256556, + "grad_norm": 0.0009411924538636635, + "learning_rate": 8.914428929466525e-06, + "loss": 0.0, + "step": 3070 + }, + { + "epoch": 0.19791196751949475, + "grad_norm": 0.018949288941459586, + "learning_rate": 8.913712853562479e-06, + "loss": 0.0002, + "step": 3071 + }, + { + "epoch": 0.19797641296642393, + "grad_norm": 0.011494391843931702, + "learning_rate": 8.912996777658433e-06, + "loss": 0.0, + "step": 3072 + }, + { + "epoch": 0.1980408584133531, + "grad_norm": 0.0031116002090642793, + "learning_rate": 8.912280701754387e-06, + "loss": 0.0, + "step": 3073 + }, + { + "epoch": 0.19810530386028227, + "grad_norm": 0.022021763285966188, + "learning_rate": 8.91156462585034e-06, + "loss": 0.0001, + "step": 3074 + }, + { + "epoch": 0.19816974930721146, + "grad_norm": 0.007751726489644879, + "learning_rate": 8.910848549946294e-06, + "loss": 0.0, + "step": 3075 + }, + { + "epoch": 0.1982341947541406, + "grad_norm": 0.0032938459913306323, + "learning_rate": 8.910132474042249e-06, + "loss": 0.0, + "step": 3076 + }, + { + "epoch": 0.1982986402010698, + "grad_norm": 0.026446706021640256, + "learning_rate": 8.909416398138203e-06, + "loss": 0.0003, + "step": 3077 + }, + { + "epoch": 0.19836308564799898, + "grad_norm": 0.0027147322304744165, + "learning_rate": 8.908700322234157e-06, + "loss": 0.0, + "step": 3078 + }, + { + "epoch": 0.19842753109492814, + "grad_norm": 0.03446864700507342, + "learning_rate": 8.907984246330112e-06, + "loss": 0.0, + "step": 3079 + }, + { + "epoch": 0.19849197654185732, + "grad_norm": 0.002107508255067804, + "learning_rate": 8.907268170426066e-06, + "loss": 0.0, + "step": 3080 + }, + { + "epoch": 0.19855642198878648, + "grad_norm": 0.0015772077216705193, + "learning_rate": 8.90655209452202e-06, + "loss": 0.0, + "step": 3081 + }, + { + "epoch": 0.19862086743571566, + "grad_norm": 0.0014874691914585742, + "learning_rate": 8.905836018617974e-06, + "loss": 0.0, + "step": 3082 + }, + { + "epoch": 0.19868531288264485, + "grad_norm": 0.0007431047392924327, + "learning_rate": 8.905119942713929e-06, + "loss": 0.0, + "step": 3083 + }, + { + "epoch": 0.198749758329574, + "grad_norm": 0.0018363159310733638, + "learning_rate": 8.904403866809883e-06, + "loss": 0.0, + "step": 3084 + }, + { + "epoch": 0.1988142037765032, + "grad_norm": 0.0013460365521944676, + "learning_rate": 8.903687790905837e-06, + "loss": 0.0, + "step": 3085 + }, + { + "epoch": 0.19887864922343237, + "grad_norm": 0.15260297166684111, + "learning_rate": 8.902971715001792e-06, + "loss": 0.0019, + "step": 3086 + }, + { + "epoch": 0.19894309467036153, + "grad_norm": 0.005164714457936164, + "learning_rate": 8.902255639097746e-06, + "loss": 0.0, + "step": 3087 + }, + { + "epoch": 0.19900754011729072, + "grad_norm": 0.005644038945726389, + "learning_rate": 8.9015395631937e-06, + "loss": 0.0, + "step": 3088 + }, + { + "epoch": 0.1990719855642199, + "grad_norm": 0.05408689312074812, + "learning_rate": 8.900823487289655e-06, + "loss": 0.0005, + "step": 3089 + }, + { + "epoch": 0.19913643101114906, + "grad_norm": 0.016265788623832402, + "learning_rate": 8.900107411385607e-06, + "loss": 0.0, + "step": 3090 + }, + { + "epoch": 0.19920087645807824, + "grad_norm": 0.2519519865025074, + "learning_rate": 8.899391335481561e-06, + "loss": 0.0026, + "step": 3091 + }, + { + "epoch": 0.1992653219050074, + "grad_norm": 0.019179351740819434, + "learning_rate": 8.898675259577516e-06, + "loss": 0.0003, + "step": 3092 + }, + { + "epoch": 0.19932976735193658, + "grad_norm": 0.00027986388418838086, + "learning_rate": 8.89795918367347e-06, + "loss": 0.0, + "step": 3093 + }, + { + "epoch": 0.19939421279886577, + "grad_norm": 0.015353708675322387, + "learning_rate": 8.897243107769424e-06, + "loss": 0.0, + "step": 3094 + }, + { + "epoch": 0.19945865824579492, + "grad_norm": 0.0026218758351446818, + "learning_rate": 8.896527031865379e-06, + "loss": 0.0, + "step": 3095 + }, + { + "epoch": 0.1995231036927241, + "grad_norm": 0.0033979739761200935, + "learning_rate": 8.895810955961333e-06, + "loss": 0.0, + "step": 3096 + }, + { + "epoch": 0.1995875491396533, + "grad_norm": 0.18854867217886892, + "learning_rate": 8.895094880057287e-06, + "loss": 0.0007, + "step": 3097 + }, + { + "epoch": 0.19965199458658245, + "grad_norm": 0.20573892851899744, + "learning_rate": 8.894378804153241e-06, + "loss": 0.0006, + "step": 3098 + }, + { + "epoch": 0.19971644003351163, + "grad_norm": 0.497828072749145, + "learning_rate": 8.893662728249196e-06, + "loss": 0.0014, + "step": 3099 + }, + { + "epoch": 0.19978088548044082, + "grad_norm": 0.00453579094805275, + "learning_rate": 8.892946652345148e-06, + "loss": 0.0, + "step": 3100 + }, + { + "epoch": 0.19984533092736997, + "grad_norm": 0.005429255738304247, + "learning_rate": 8.892230576441103e-06, + "loss": 0.0, + "step": 3101 + }, + { + "epoch": 0.19990977637429916, + "grad_norm": 0.0028107191266686154, + "learning_rate": 8.891514500537057e-06, + "loss": 0.0, + "step": 3102 + }, + { + "epoch": 0.19997422182122834, + "grad_norm": 0.001090017515992669, + "learning_rate": 8.890798424633011e-06, + "loss": 0.0, + "step": 3103 + }, + { + "epoch": 0.2000386672681575, + "grad_norm": 0.005462728398524046, + "learning_rate": 8.890082348728966e-06, + "loss": 0.0001, + "step": 3104 + }, + { + "epoch": 0.20010311271508668, + "grad_norm": 0.21584340658087367, + "learning_rate": 8.88936627282492e-06, + "loss": 0.0011, + "step": 3105 + }, + { + "epoch": 0.20016755816201584, + "grad_norm": 0.026409045302834108, + "learning_rate": 8.888650196920874e-06, + "loss": 0.0001, + "step": 3106 + }, + { + "epoch": 0.20023200360894503, + "grad_norm": 0.00022268280929360672, + "learning_rate": 8.887934121016828e-06, + "loss": 0.0, + "step": 3107 + }, + { + "epoch": 0.2002964490558742, + "grad_norm": 0.7561233808588724, + "learning_rate": 8.887218045112783e-06, + "loss": 0.0056, + "step": 3108 + }, + { + "epoch": 0.20036089450280337, + "grad_norm": 0.0008315596068303258, + "learning_rate": 8.886501969208737e-06, + "loss": 0.0, + "step": 3109 + }, + { + "epoch": 0.20042533994973255, + "grad_norm": 0.03906717868442174, + "learning_rate": 8.885785893304691e-06, + "loss": 0.0001, + "step": 3110 + }, + { + "epoch": 0.20048978539666173, + "grad_norm": 0.004649140480185962, + "learning_rate": 8.885069817400646e-06, + "loss": 0.0, + "step": 3111 + }, + { + "epoch": 0.2005542308435909, + "grad_norm": 7.197497173441826e-05, + "learning_rate": 8.8843537414966e-06, + "loss": 0.0, + "step": 3112 + }, + { + "epoch": 0.20061867629052008, + "grad_norm": 0.01971261560067173, + "learning_rate": 8.883637665592554e-06, + "loss": 0.0, + "step": 3113 + }, + { + "epoch": 0.20068312173744926, + "grad_norm": 0.008505374211518214, + "learning_rate": 8.882921589688509e-06, + "loss": 0.0, + "step": 3114 + }, + { + "epoch": 0.20074756718437842, + "grad_norm": 0.0005514684611620498, + "learning_rate": 8.882205513784463e-06, + "loss": 0.0, + "step": 3115 + }, + { + "epoch": 0.2008120126313076, + "grad_norm": 0.3043897714692487, + "learning_rate": 8.881489437880415e-06, + "loss": 0.0014, + "step": 3116 + }, + { + "epoch": 0.20087645807823679, + "grad_norm": 0.0006227491004178154, + "learning_rate": 8.88077336197637e-06, + "loss": 0.0, + "step": 3117 + }, + { + "epoch": 0.20094090352516594, + "grad_norm": 0.2678906919268471, + "learning_rate": 8.880057286072324e-06, + "loss": 0.0005, + "step": 3118 + }, + { + "epoch": 0.20100534897209513, + "grad_norm": 0.015165462236630976, + "learning_rate": 8.879341210168278e-06, + "loss": 0.0, + "step": 3119 + }, + { + "epoch": 0.20106979441902428, + "grad_norm": 2.0520360976719396, + "learning_rate": 8.878625134264233e-06, + "loss": 0.0165, + "step": 3120 + }, + { + "epoch": 0.20113423986595347, + "grad_norm": 0.35805703099009856, + "learning_rate": 8.877909058360187e-06, + "loss": 0.0012, + "step": 3121 + }, + { + "epoch": 0.20119868531288265, + "grad_norm": 0.17738543831285275, + "learning_rate": 8.877192982456141e-06, + "loss": 0.0019, + "step": 3122 + }, + { + "epoch": 0.2012631307598118, + "grad_norm": 0.00017845454458807985, + "learning_rate": 8.876476906552096e-06, + "loss": 0.0, + "step": 3123 + }, + { + "epoch": 0.201327576206741, + "grad_norm": 0.1073608615304861, + "learning_rate": 8.87576083064805e-06, + "loss": 0.0004, + "step": 3124 + }, + { + "epoch": 0.20139202165367018, + "grad_norm": 0.0004145412500641842, + "learning_rate": 8.875044754744002e-06, + "loss": 0.0, + "step": 3125 + }, + { + "epoch": 0.20145646710059933, + "grad_norm": 0.003809234403064291, + "learning_rate": 8.874328678839957e-06, + "loss": 0.0001, + "step": 3126 + }, + { + "epoch": 0.20152091254752852, + "grad_norm": 0.00033783976806183987, + "learning_rate": 8.873612602935911e-06, + "loss": 0.0, + "step": 3127 + }, + { + "epoch": 0.2015853579944577, + "grad_norm": 0.013849292965756455, + "learning_rate": 8.872896527031865e-06, + "loss": 0.0, + "step": 3128 + }, + { + "epoch": 0.20164980344138686, + "grad_norm": 0.045313850949965165, + "learning_rate": 8.87218045112782e-06, + "loss": 0.0002, + "step": 3129 + }, + { + "epoch": 0.20171424888831604, + "grad_norm": 0.236568006038209, + "learning_rate": 8.871464375223774e-06, + "loss": 0.0003, + "step": 3130 + }, + { + "epoch": 0.2017786943352452, + "grad_norm": 0.0038200420844738477, + "learning_rate": 8.87074829931973e-06, + "loss": 0.0, + "step": 3131 + }, + { + "epoch": 0.20184313978217439, + "grad_norm": 0.014478036074284993, + "learning_rate": 8.870032223415683e-06, + "loss": 0.0001, + "step": 3132 + }, + { + "epoch": 0.20190758522910357, + "grad_norm": 0.015151909350715245, + "learning_rate": 8.869316147511637e-06, + "loss": 0.0, + "step": 3133 + }, + { + "epoch": 0.20197203067603273, + "grad_norm": 0.009762932361485831, + "learning_rate": 8.868600071607591e-06, + "loss": 0.0001, + "step": 3134 + }, + { + "epoch": 0.2020364761229619, + "grad_norm": 0.00021053239735446382, + "learning_rate": 8.867883995703545e-06, + "loss": 0.0, + "step": 3135 + }, + { + "epoch": 0.2021009215698911, + "grad_norm": 0.44029179548069913, + "learning_rate": 8.8671679197995e-06, + "loss": 0.0016, + "step": 3136 + }, + { + "epoch": 0.20216536701682025, + "grad_norm": 0.0019125566904534606, + "learning_rate": 8.866451843895454e-06, + "loss": 0.0, + "step": 3137 + }, + { + "epoch": 0.20222981246374944, + "grad_norm": 0.0030594121784513393, + "learning_rate": 8.865735767991408e-06, + "loss": 0.0, + "step": 3138 + }, + { + "epoch": 0.20229425791067862, + "grad_norm": 0.002322376722079305, + "learning_rate": 8.865019692087363e-06, + "loss": 0.0, + "step": 3139 + }, + { + "epoch": 0.20235870335760778, + "grad_norm": 0.025830919381848335, + "learning_rate": 8.864303616183317e-06, + "loss": 0.0001, + "step": 3140 + }, + { + "epoch": 0.20242314880453696, + "grad_norm": 0.004267439275289735, + "learning_rate": 8.86358754027927e-06, + "loss": 0.0, + "step": 3141 + }, + { + "epoch": 0.20248759425146615, + "grad_norm": 0.0005659775357358226, + "learning_rate": 8.862871464375224e-06, + "loss": 0.0, + "step": 3142 + }, + { + "epoch": 0.2025520396983953, + "grad_norm": 0.03815054914874814, + "learning_rate": 8.862155388471178e-06, + "loss": 0.0003, + "step": 3143 + }, + { + "epoch": 0.2026164851453245, + "grad_norm": 0.3565567469487967, + "learning_rate": 8.861439312567132e-06, + "loss": 0.0031, + "step": 3144 + }, + { + "epoch": 0.20268093059225364, + "grad_norm": 0.040210932144915466, + "learning_rate": 8.860723236663087e-06, + "loss": 0.0002, + "step": 3145 + }, + { + "epoch": 0.20274537603918283, + "grad_norm": 0.41041854464235217, + "learning_rate": 8.860007160759041e-06, + "loss": 0.0008, + "step": 3146 + }, + { + "epoch": 0.202809821486112, + "grad_norm": 0.25541055057218365, + "learning_rate": 8.859291084854995e-06, + "loss": 0.001, + "step": 3147 + }, + { + "epoch": 0.20287426693304117, + "grad_norm": 0.010501430279528583, + "learning_rate": 8.85857500895095e-06, + "loss": 0.0001, + "step": 3148 + }, + { + "epoch": 0.20293871237997035, + "grad_norm": 0.003696393179009786, + "learning_rate": 8.857858933046904e-06, + "loss": 0.0, + "step": 3149 + }, + { + "epoch": 0.20300315782689954, + "grad_norm": 0.003631618092946609, + "learning_rate": 8.857142857142858e-06, + "loss": 0.0, + "step": 3150 + }, + { + "epoch": 0.2030676032738287, + "grad_norm": 0.00258595354685201, + "learning_rate": 8.856426781238811e-06, + "loss": 0.0, + "step": 3151 + }, + { + "epoch": 0.20313204872075788, + "grad_norm": 0.0008821024753706213, + "learning_rate": 8.855710705334765e-06, + "loss": 0.0, + "step": 3152 + }, + { + "epoch": 0.20319649416768706, + "grad_norm": 0.3704561075782924, + "learning_rate": 8.85499462943072e-06, + "loss": 0.0043, + "step": 3153 + }, + { + "epoch": 0.20326093961461622, + "grad_norm": 0.014684063583619683, + "learning_rate": 8.854278553526675e-06, + "loss": 0.0, + "step": 3154 + }, + { + "epoch": 0.2033253850615454, + "grad_norm": 0.006794537516212787, + "learning_rate": 8.85356247762263e-06, + "loss": 0.0, + "step": 3155 + }, + { + "epoch": 0.2033898305084746, + "grad_norm": 1.0445839023372976, + "learning_rate": 8.852846401718584e-06, + "loss": 0.0022, + "step": 3156 + }, + { + "epoch": 0.20345427595540375, + "grad_norm": 0.004375746475512265, + "learning_rate": 8.852130325814537e-06, + "loss": 0.0, + "step": 3157 + }, + { + "epoch": 0.20351872140233293, + "grad_norm": 0.15032337592505507, + "learning_rate": 8.851414249910491e-06, + "loss": 0.0002, + "step": 3158 + }, + { + "epoch": 0.2035831668492621, + "grad_norm": 0.0040612406359037175, + "learning_rate": 8.850698174006445e-06, + "loss": 0.0, + "step": 3159 + }, + { + "epoch": 0.20364761229619127, + "grad_norm": 0.0018185464603422453, + "learning_rate": 8.8499820981024e-06, + "loss": 0.0, + "step": 3160 + }, + { + "epoch": 0.20371205774312046, + "grad_norm": 0.11170526669906726, + "learning_rate": 8.849266022198354e-06, + "loss": 0.0005, + "step": 3161 + }, + { + "epoch": 0.2037765031900496, + "grad_norm": 0.1659282005347964, + "learning_rate": 8.848549946294308e-06, + "loss": 0.0016, + "step": 3162 + }, + { + "epoch": 0.2038409486369788, + "grad_norm": 0.016492687346802256, + "learning_rate": 8.847833870390262e-06, + "loss": 0.0001, + "step": 3163 + }, + { + "epoch": 0.20390539408390798, + "grad_norm": 0.9390481563748461, + "learning_rate": 8.847117794486217e-06, + "loss": 0.0031, + "step": 3164 + }, + { + "epoch": 0.20396983953083714, + "grad_norm": 0.0022533603744609204, + "learning_rate": 8.846401718582171e-06, + "loss": 0.0, + "step": 3165 + }, + { + "epoch": 0.20403428497776632, + "grad_norm": 0.07041904205568279, + "learning_rate": 8.845685642678125e-06, + "loss": 0.0001, + "step": 3166 + }, + { + "epoch": 0.2040987304246955, + "grad_norm": 0.2668981613989854, + "learning_rate": 8.844969566774078e-06, + "loss": 0.0052, + "step": 3167 + }, + { + "epoch": 0.20416317587162466, + "grad_norm": 0.027716524548335512, + "learning_rate": 8.844253490870032e-06, + "loss": 0.0, + "step": 3168 + }, + { + "epoch": 0.20422762131855385, + "grad_norm": 0.003601240274471943, + "learning_rate": 8.843537414965987e-06, + "loss": 0.0, + "step": 3169 + }, + { + "epoch": 0.20429206676548303, + "grad_norm": 0.012017656279619291, + "learning_rate": 8.84282133906194e-06, + "loss": 0.0, + "step": 3170 + }, + { + "epoch": 0.2043565122124122, + "grad_norm": 0.02095705244991158, + "learning_rate": 8.842105263157895e-06, + "loss": 0.0, + "step": 3171 + }, + { + "epoch": 0.20442095765934137, + "grad_norm": 0.040138181546046925, + "learning_rate": 8.84138918725385e-06, + "loss": 0.0, + "step": 3172 + }, + { + "epoch": 0.20448540310627053, + "grad_norm": 0.009901321405853671, + "learning_rate": 8.840673111349804e-06, + "loss": 0.0, + "step": 3173 + }, + { + "epoch": 0.20454984855319971, + "grad_norm": 0.007040968629763532, + "learning_rate": 8.839957035445758e-06, + "loss": 0.0, + "step": 3174 + }, + { + "epoch": 0.2046142940001289, + "grad_norm": 0.014848915765067815, + "learning_rate": 8.839240959541712e-06, + "loss": 0.0, + "step": 3175 + }, + { + "epoch": 0.20467873944705806, + "grad_norm": 0.23642315018145912, + "learning_rate": 8.838524883637667e-06, + "loss": 0.0008, + "step": 3176 + }, + { + "epoch": 0.20474318489398724, + "grad_norm": 0.0029172454636086998, + "learning_rate": 8.83780880773362e-06, + "loss": 0.0, + "step": 3177 + }, + { + "epoch": 0.20480763034091642, + "grad_norm": 0.10179572652957916, + "learning_rate": 8.837092731829575e-06, + "loss": 0.0002, + "step": 3178 + }, + { + "epoch": 0.20487207578784558, + "grad_norm": 0.24513314487750718, + "learning_rate": 8.83637665592553e-06, + "loss": 0.0041, + "step": 3179 + }, + { + "epoch": 0.20493652123477477, + "grad_norm": 0.012036311721826174, + "learning_rate": 8.835660580021484e-06, + "loss": 0.0, + "step": 3180 + }, + { + "epoch": 0.20500096668170395, + "grad_norm": 0.2696918479020835, + "learning_rate": 8.834944504117438e-06, + "loss": 0.0005, + "step": 3181 + }, + { + "epoch": 0.2050654121286331, + "grad_norm": 0.0003936673405125266, + "learning_rate": 8.834228428213392e-06, + "loss": 0.0, + "step": 3182 + }, + { + "epoch": 0.2051298575755623, + "grad_norm": 0.058986318316246796, + "learning_rate": 8.833512352309345e-06, + "loss": 0.0, + "step": 3183 + }, + { + "epoch": 0.20519430302249145, + "grad_norm": 0.13323453158189, + "learning_rate": 8.8327962764053e-06, + "loss": 0.0013, + "step": 3184 + }, + { + "epoch": 0.20525874846942063, + "grad_norm": 1.0706027674551744, + "learning_rate": 8.832080200501254e-06, + "loss": 0.0052, + "step": 3185 + }, + { + "epoch": 0.20532319391634982, + "grad_norm": 0.04365196867916086, + "learning_rate": 8.831364124597208e-06, + "loss": 0.0001, + "step": 3186 + }, + { + "epoch": 0.20538763936327897, + "grad_norm": 0.00400724211170088, + "learning_rate": 8.830648048693162e-06, + "loss": 0.0, + "step": 3187 + }, + { + "epoch": 0.20545208481020816, + "grad_norm": 0.25079876728614287, + "learning_rate": 8.829931972789117e-06, + "loss": 0.0051, + "step": 3188 + }, + { + "epoch": 0.20551653025713734, + "grad_norm": 0.6116119554626651, + "learning_rate": 8.82921589688507e-06, + "loss": 0.002, + "step": 3189 + }, + { + "epoch": 0.2055809757040665, + "grad_norm": 0.9003768555752997, + "learning_rate": 8.828499820981025e-06, + "loss": 0.0081, + "step": 3190 + }, + { + "epoch": 0.20564542115099568, + "grad_norm": 0.14866784067739025, + "learning_rate": 8.82778374507698e-06, + "loss": 0.0005, + "step": 3191 + }, + { + "epoch": 0.20570986659792487, + "grad_norm": 0.004681531754656857, + "learning_rate": 8.827067669172934e-06, + "loss": 0.0001, + "step": 3192 + }, + { + "epoch": 0.20577431204485402, + "grad_norm": 0.0016474154499777846, + "learning_rate": 8.826351593268886e-06, + "loss": 0.0, + "step": 3193 + }, + { + "epoch": 0.2058387574917832, + "grad_norm": 0.7216092334678865, + "learning_rate": 8.82563551736484e-06, + "loss": 0.0047, + "step": 3194 + }, + { + "epoch": 0.2059032029387124, + "grad_norm": 0.046842534027833414, + "learning_rate": 8.824919441460795e-06, + "loss": 0.0, + "step": 3195 + }, + { + "epoch": 0.20596764838564155, + "grad_norm": 0.008319201874511975, + "learning_rate": 8.82420336555675e-06, + "loss": 0.0, + "step": 3196 + }, + { + "epoch": 0.20603209383257073, + "grad_norm": 0.07627724121336131, + "learning_rate": 8.823487289652704e-06, + "loss": 0.0001, + "step": 3197 + }, + { + "epoch": 0.2060965392794999, + "grad_norm": 0.04090758410395465, + "learning_rate": 8.822771213748658e-06, + "loss": 0.0001, + "step": 3198 + }, + { + "epoch": 0.20616098472642908, + "grad_norm": 0.004577226024324245, + "learning_rate": 8.822055137844612e-06, + "loss": 0.0, + "step": 3199 + }, + { + "epoch": 0.20622543017335826, + "grad_norm": 0.5695110071506763, + "learning_rate": 8.821339061940566e-06, + "loss": 0.0015, + "step": 3200 + }, + { + "epoch": 0.20628987562028742, + "grad_norm": 0.20783832970335034, + "learning_rate": 8.82062298603652e-06, + "loss": 0.0007, + "step": 3201 + }, + { + "epoch": 0.2063543210672166, + "grad_norm": 0.07589570440082365, + "learning_rate": 8.819906910132475e-06, + "loss": 0.0003, + "step": 3202 + }, + { + "epoch": 0.20641876651414579, + "grad_norm": 0.025595976228796117, + "learning_rate": 8.81919083422843e-06, + "loss": 0.0001, + "step": 3203 + }, + { + "epoch": 0.20648321196107494, + "grad_norm": 0.008649726581106711, + "learning_rate": 8.818474758324384e-06, + "loss": 0.0001, + "step": 3204 + }, + { + "epoch": 0.20654765740800413, + "grad_norm": 0.019401712300670126, + "learning_rate": 8.817758682420338e-06, + "loss": 0.0001, + "step": 3205 + }, + { + "epoch": 0.2066121028549333, + "grad_norm": 0.018649523956299447, + "learning_rate": 8.817042606516292e-06, + "loss": 0.0, + "step": 3206 + }, + { + "epoch": 0.20667654830186247, + "grad_norm": 0.004175074407233036, + "learning_rate": 8.816326530612247e-06, + "loss": 0.0, + "step": 3207 + }, + { + "epoch": 0.20674099374879165, + "grad_norm": 0.001537868439082998, + "learning_rate": 8.8156104547082e-06, + "loss": 0.0, + "step": 3208 + }, + { + "epoch": 0.20680543919572084, + "grad_norm": 0.004367193650683198, + "learning_rate": 8.814894378804153e-06, + "loss": 0.0, + "step": 3209 + }, + { + "epoch": 0.20686988464265, + "grad_norm": 0.0008591282411366313, + "learning_rate": 8.814178302900108e-06, + "loss": 0.0, + "step": 3210 + }, + { + "epoch": 0.20693433008957918, + "grad_norm": 0.029196901412854644, + "learning_rate": 8.813462226996062e-06, + "loss": 0.0001, + "step": 3211 + }, + { + "epoch": 0.20699877553650833, + "grad_norm": 0.014151467748943703, + "learning_rate": 8.812746151092016e-06, + "loss": 0.0, + "step": 3212 + }, + { + "epoch": 0.20706322098343752, + "grad_norm": 0.0005395895476555126, + "learning_rate": 8.81203007518797e-06, + "loss": 0.0, + "step": 3213 + }, + { + "epoch": 0.2071276664303667, + "grad_norm": 0.1063896173481837, + "learning_rate": 8.811313999283925e-06, + "loss": 0.0008, + "step": 3214 + }, + { + "epoch": 0.20719211187729586, + "grad_norm": 0.04907908321108309, + "learning_rate": 8.81059792337988e-06, + "loss": 0.0001, + "step": 3215 + }, + { + "epoch": 0.20725655732422504, + "grad_norm": 0.0009487456876486122, + "learning_rate": 8.809881847475833e-06, + "loss": 0.0, + "step": 3216 + }, + { + "epoch": 0.20732100277115423, + "grad_norm": 0.012295272892583854, + "learning_rate": 8.809165771571788e-06, + "loss": 0.0, + "step": 3217 + }, + { + "epoch": 0.20738544821808338, + "grad_norm": 0.003048664850237199, + "learning_rate": 8.80844969566774e-06, + "loss": 0.0, + "step": 3218 + }, + { + "epoch": 0.20744989366501257, + "grad_norm": 0.007495688176306054, + "learning_rate": 8.807733619763695e-06, + "loss": 0.0001, + "step": 3219 + }, + { + "epoch": 0.20751433911194175, + "grad_norm": 0.0035060227618026463, + "learning_rate": 8.807017543859649e-06, + "loss": 0.0, + "step": 3220 + }, + { + "epoch": 0.2075787845588709, + "grad_norm": 0.03656436499960002, + "learning_rate": 8.806301467955603e-06, + "loss": 0.0002, + "step": 3221 + }, + { + "epoch": 0.2076432300058001, + "grad_norm": 0.29249599460157977, + "learning_rate": 8.805585392051558e-06, + "loss": 0.0006, + "step": 3222 + }, + { + "epoch": 0.20770767545272925, + "grad_norm": 0.011994526837623365, + "learning_rate": 8.804869316147512e-06, + "loss": 0.0, + "step": 3223 + }, + { + "epoch": 0.20777212089965844, + "grad_norm": 0.004782641801488116, + "learning_rate": 8.804153240243468e-06, + "loss": 0.0, + "step": 3224 + }, + { + "epoch": 0.20783656634658762, + "grad_norm": 0.04384486055700967, + "learning_rate": 8.80343716433942e-06, + "loss": 0.0001, + "step": 3225 + }, + { + "epoch": 0.20790101179351678, + "grad_norm": 0.0033315971745620817, + "learning_rate": 8.802721088435375e-06, + "loss": 0.0, + "step": 3226 + }, + { + "epoch": 0.20796545724044596, + "grad_norm": 0.016471005833125225, + "learning_rate": 8.802005012531329e-06, + "loss": 0.0, + "step": 3227 + }, + { + "epoch": 0.20802990268737515, + "grad_norm": 0.008197529248738967, + "learning_rate": 8.801288936627283e-06, + "loss": 0.0001, + "step": 3228 + }, + { + "epoch": 0.2080943481343043, + "grad_norm": 0.012575679376179708, + "learning_rate": 8.800572860723238e-06, + "loss": 0.0, + "step": 3229 + }, + { + "epoch": 0.2081587935812335, + "grad_norm": 0.0986802487508774, + "learning_rate": 8.799856784819192e-06, + "loss": 0.0021, + "step": 3230 + }, + { + "epoch": 0.20822323902816267, + "grad_norm": 0.0008527289712501518, + "learning_rate": 8.799140708915146e-06, + "loss": 0.0, + "step": 3231 + }, + { + "epoch": 0.20828768447509183, + "grad_norm": 0.9886804912648364, + "learning_rate": 8.7984246330111e-06, + "loss": 0.0021, + "step": 3232 + }, + { + "epoch": 0.208352129922021, + "grad_norm": 0.016484567955490047, + "learning_rate": 8.797708557107055e-06, + "loss": 0.0002, + "step": 3233 + }, + { + "epoch": 0.2084165753689502, + "grad_norm": 0.28071698703188924, + "learning_rate": 8.796992481203007e-06, + "loss": 0.0022, + "step": 3234 + }, + { + "epoch": 0.20848102081587935, + "grad_norm": 0.021459780780179235, + "learning_rate": 8.796276405298962e-06, + "loss": 0.0002, + "step": 3235 + }, + { + "epoch": 0.20854546626280854, + "grad_norm": 0.058684126375482114, + "learning_rate": 8.795560329394916e-06, + "loss": 0.0017, + "step": 3236 + }, + { + "epoch": 0.2086099117097377, + "grad_norm": 0.0011527392584699309, + "learning_rate": 8.79484425349087e-06, + "loss": 0.0, + "step": 3237 + }, + { + "epoch": 0.20867435715666688, + "grad_norm": 0.0020617141823999893, + "learning_rate": 8.794128177586825e-06, + "loss": 0.0, + "step": 3238 + }, + { + "epoch": 0.20873880260359606, + "grad_norm": 0.2818069506547426, + "learning_rate": 8.793412101682779e-06, + "loss": 0.0003, + "step": 3239 + }, + { + "epoch": 0.20880324805052522, + "grad_norm": 0.0039035938963142623, + "learning_rate": 8.792696025778733e-06, + "loss": 0.0, + "step": 3240 + }, + { + "epoch": 0.2088676934974544, + "grad_norm": 0.2977870433530856, + "learning_rate": 8.791979949874688e-06, + "loss": 0.0011, + "step": 3241 + }, + { + "epoch": 0.2089321389443836, + "grad_norm": 0.0770880788983326, + "learning_rate": 8.791263873970642e-06, + "loss": 0.0003, + "step": 3242 + }, + { + "epoch": 0.20899658439131275, + "grad_norm": 2.4195809605252174, + "learning_rate": 8.790547798066596e-06, + "loss": 0.0161, + "step": 3243 + }, + { + "epoch": 0.20906102983824193, + "grad_norm": 0.0061660864423312214, + "learning_rate": 8.789831722162549e-06, + "loss": 0.0, + "step": 3244 + }, + { + "epoch": 0.20912547528517111, + "grad_norm": 0.12889428516693582, + "learning_rate": 8.789115646258503e-06, + "loss": 0.0001, + "step": 3245 + }, + { + "epoch": 0.20918992073210027, + "grad_norm": 0.09151028984381641, + "learning_rate": 8.788399570354457e-06, + "loss": 0.0001, + "step": 3246 + }, + { + "epoch": 0.20925436617902946, + "grad_norm": 0.009779283910463333, + "learning_rate": 8.787683494450412e-06, + "loss": 0.0, + "step": 3247 + }, + { + "epoch": 0.20931881162595864, + "grad_norm": 0.006560546257411386, + "learning_rate": 8.786967418546368e-06, + "loss": 0.0, + "step": 3248 + }, + { + "epoch": 0.2093832570728878, + "grad_norm": 0.0038019288472966287, + "learning_rate": 8.786251342642322e-06, + "loss": 0.0, + "step": 3249 + }, + { + "epoch": 0.20944770251981698, + "grad_norm": 0.09019723577914596, + "learning_rate": 8.785535266738275e-06, + "loss": 0.0029, + "step": 3250 + }, + { + "epoch": 0.20951214796674614, + "grad_norm": 0.050902314027495524, + "learning_rate": 8.784819190834229e-06, + "loss": 0.0001, + "step": 3251 + }, + { + "epoch": 0.20957659341367532, + "grad_norm": 0.006432140117759434, + "learning_rate": 8.784103114930183e-06, + "loss": 0.0001, + "step": 3252 + }, + { + "epoch": 0.2096410388606045, + "grad_norm": 0.006941493151334226, + "learning_rate": 8.783387039026137e-06, + "loss": 0.0, + "step": 3253 + }, + { + "epoch": 0.20970548430753366, + "grad_norm": 0.13453681844557985, + "learning_rate": 8.782670963122092e-06, + "loss": 0.0007, + "step": 3254 + }, + { + "epoch": 0.20976992975446285, + "grad_norm": 0.16346047845287467, + "learning_rate": 8.781954887218046e-06, + "loss": 0.0008, + "step": 3255 + }, + { + "epoch": 0.20983437520139203, + "grad_norm": 0.0006596375563727548, + "learning_rate": 8.781238811314e-06, + "loss": 0.0, + "step": 3256 + }, + { + "epoch": 0.2098988206483212, + "grad_norm": 0.004409057604210094, + "learning_rate": 8.780522735409955e-06, + "loss": 0.0, + "step": 3257 + }, + { + "epoch": 0.20996326609525037, + "grad_norm": 0.003006464597436745, + "learning_rate": 8.779806659505909e-06, + "loss": 0.0, + "step": 3258 + }, + { + "epoch": 0.21002771154217956, + "grad_norm": 2.8408788922178685, + "learning_rate": 8.779090583601863e-06, + "loss": 0.0237, + "step": 3259 + }, + { + "epoch": 0.2100921569891087, + "grad_norm": 0.01016413326544834, + "learning_rate": 8.778374507697816e-06, + "loss": 0.0, + "step": 3260 + }, + { + "epoch": 0.2101566024360379, + "grad_norm": 0.01772877014468529, + "learning_rate": 8.77765843179377e-06, + "loss": 0.0, + "step": 3261 + }, + { + "epoch": 0.21022104788296705, + "grad_norm": 0.0024843355972861882, + "learning_rate": 8.776942355889724e-06, + "loss": 0.0, + "step": 3262 + }, + { + "epoch": 0.21028549332989624, + "grad_norm": 0.016083136451063594, + "learning_rate": 8.776226279985679e-06, + "loss": 0.0001, + "step": 3263 + }, + { + "epoch": 0.21034993877682542, + "grad_norm": 0.006143627681656301, + "learning_rate": 8.775510204081633e-06, + "loss": 0.0, + "step": 3264 + }, + { + "epoch": 0.21041438422375458, + "grad_norm": 0.6915285034718149, + "learning_rate": 8.774794128177587e-06, + "loss": 0.0009, + "step": 3265 + }, + { + "epoch": 0.21047882967068376, + "grad_norm": 0.003211719254704211, + "learning_rate": 8.774078052273542e-06, + "loss": 0.0, + "step": 3266 + }, + { + "epoch": 0.21054327511761295, + "grad_norm": 0.04889199609424895, + "learning_rate": 8.773361976369496e-06, + "loss": 0.0003, + "step": 3267 + }, + { + "epoch": 0.2106077205645421, + "grad_norm": 0.005836996667072234, + "learning_rate": 8.77264590046545e-06, + "loss": 0.0, + "step": 3268 + }, + { + "epoch": 0.2106721660114713, + "grad_norm": 0.04504886589983157, + "learning_rate": 8.771929824561405e-06, + "loss": 0.0002, + "step": 3269 + }, + { + "epoch": 0.21073661145840047, + "grad_norm": 0.007049011440276931, + "learning_rate": 8.771213748657357e-06, + "loss": 0.0, + "step": 3270 + }, + { + "epoch": 0.21080105690532963, + "grad_norm": 0.01300277264543531, + "learning_rate": 8.770497672753313e-06, + "loss": 0.0, + "step": 3271 + }, + { + "epoch": 0.21086550235225882, + "grad_norm": 0.09114491995853359, + "learning_rate": 8.769781596849267e-06, + "loss": 0.0004, + "step": 3272 + }, + { + "epoch": 0.210929947799188, + "grad_norm": 0.20353056693551833, + "learning_rate": 8.769065520945222e-06, + "loss": 0.0003, + "step": 3273 + }, + { + "epoch": 0.21099439324611716, + "grad_norm": 0.7785442417855192, + "learning_rate": 8.768349445041176e-06, + "loss": 0.0016, + "step": 3274 + }, + { + "epoch": 0.21105883869304634, + "grad_norm": 0.979300330893555, + "learning_rate": 8.76763336913713e-06, + "loss": 0.0006, + "step": 3275 + }, + { + "epoch": 0.2111232841399755, + "grad_norm": 0.004026414319159687, + "learning_rate": 8.766917293233083e-06, + "loss": 0.0, + "step": 3276 + }, + { + "epoch": 0.21118772958690468, + "grad_norm": 1.9066745097770412, + "learning_rate": 8.766201217329037e-06, + "loss": 0.0091, + "step": 3277 + }, + { + "epoch": 0.21125217503383387, + "grad_norm": 0.02170203435620058, + "learning_rate": 8.765485141424992e-06, + "loss": 0.0001, + "step": 3278 + }, + { + "epoch": 0.21131662048076302, + "grad_norm": 0.001396143696547249, + "learning_rate": 8.764769065520946e-06, + "loss": 0.0, + "step": 3279 + }, + { + "epoch": 0.2113810659276922, + "grad_norm": 0.07585580675491908, + "learning_rate": 8.7640529896169e-06, + "loss": 0.0002, + "step": 3280 + }, + { + "epoch": 0.2114455113746214, + "grad_norm": 0.05296037073165522, + "learning_rate": 8.763336913712854e-06, + "loss": 0.0001, + "step": 3281 + }, + { + "epoch": 0.21150995682155055, + "grad_norm": 0.00801547689700731, + "learning_rate": 8.762620837808809e-06, + "loss": 0.0, + "step": 3282 + }, + { + "epoch": 0.21157440226847973, + "grad_norm": 0.05558661669894557, + "learning_rate": 8.761904761904763e-06, + "loss": 0.0001, + "step": 3283 + }, + { + "epoch": 0.21163884771540892, + "grad_norm": 0.03368207035083663, + "learning_rate": 8.761188686000717e-06, + "loss": 0.0, + "step": 3284 + }, + { + "epoch": 0.21170329316233807, + "grad_norm": 0.16717958253313203, + "learning_rate": 8.760472610096672e-06, + "loss": 0.0005, + "step": 3285 + }, + { + "epoch": 0.21176773860926726, + "grad_norm": 0.20278401364321336, + "learning_rate": 8.759756534192624e-06, + "loss": 0.002, + "step": 3286 + }, + { + "epoch": 0.21183218405619644, + "grad_norm": 0.03134582958365876, + "learning_rate": 8.759040458288579e-06, + "loss": 0.0002, + "step": 3287 + }, + { + "epoch": 0.2118966295031256, + "grad_norm": 0.003064029577034968, + "learning_rate": 8.758324382384533e-06, + "loss": 0.0, + "step": 3288 + }, + { + "epoch": 0.21196107495005478, + "grad_norm": 0.013964800171850807, + "learning_rate": 8.757608306480487e-06, + "loss": 0.0001, + "step": 3289 + }, + { + "epoch": 0.21202552039698394, + "grad_norm": 0.3455218362415116, + "learning_rate": 8.756892230576441e-06, + "loss": 0.0008, + "step": 3290 + }, + { + "epoch": 0.21208996584391313, + "grad_norm": 0.1274534928593735, + "learning_rate": 8.756176154672396e-06, + "loss": 0.0003, + "step": 3291 + }, + { + "epoch": 0.2121544112908423, + "grad_norm": 0.02204250914795714, + "learning_rate": 8.75546007876835e-06, + "loss": 0.0001, + "step": 3292 + }, + { + "epoch": 0.21221885673777147, + "grad_norm": 0.0020332618231904947, + "learning_rate": 8.754744002864304e-06, + "loss": 0.0, + "step": 3293 + }, + { + "epoch": 0.21228330218470065, + "grad_norm": 0.010261200257673677, + "learning_rate": 8.754027926960259e-06, + "loss": 0.0, + "step": 3294 + }, + { + "epoch": 0.21234774763162984, + "grad_norm": 0.005997398083936662, + "learning_rate": 8.753311851056213e-06, + "loss": 0.0, + "step": 3295 + }, + { + "epoch": 0.212412193078559, + "grad_norm": 0.007660184481136306, + "learning_rate": 8.752595775152167e-06, + "loss": 0.0, + "step": 3296 + }, + { + "epoch": 0.21247663852548818, + "grad_norm": 0.038827415225720155, + "learning_rate": 8.751879699248122e-06, + "loss": 0.0, + "step": 3297 + }, + { + "epoch": 0.21254108397241736, + "grad_norm": 0.33644458291898005, + "learning_rate": 8.751163623344076e-06, + "loss": 0.0024, + "step": 3298 + }, + { + "epoch": 0.21260552941934652, + "grad_norm": 0.017198641204569848, + "learning_rate": 8.75044754744003e-06, + "loss": 0.0001, + "step": 3299 + }, + { + "epoch": 0.2126699748662757, + "grad_norm": 0.15987451125906116, + "learning_rate": 8.749731471535984e-06, + "loss": 0.0014, + "step": 3300 + }, + { + "epoch": 0.21273442031320486, + "grad_norm": 0.007898780452521827, + "learning_rate": 8.749015395631939e-06, + "loss": 0.0, + "step": 3301 + }, + { + "epoch": 0.21279886576013404, + "grad_norm": 0.2562249607552893, + "learning_rate": 8.748299319727891e-06, + "loss": 0.003, + "step": 3302 + }, + { + "epoch": 0.21286331120706323, + "grad_norm": 0.0024193017311194913, + "learning_rate": 8.747583243823846e-06, + "loss": 0.0, + "step": 3303 + }, + { + "epoch": 0.21292775665399238, + "grad_norm": 0.005256035814212943, + "learning_rate": 8.7468671679198e-06, + "loss": 0.0, + "step": 3304 + }, + { + "epoch": 0.21299220210092157, + "grad_norm": 0.012534387630057374, + "learning_rate": 8.746151092015754e-06, + "loss": 0.0, + "step": 3305 + }, + { + "epoch": 0.21305664754785075, + "grad_norm": 0.002335593572439098, + "learning_rate": 8.745435016111709e-06, + "loss": 0.0, + "step": 3306 + }, + { + "epoch": 0.2131210929947799, + "grad_norm": 3.6138689872499303, + "learning_rate": 8.744718940207663e-06, + "loss": 0.0088, + "step": 3307 + }, + { + "epoch": 0.2131855384417091, + "grad_norm": 0.021828677739768283, + "learning_rate": 8.744002864303617e-06, + "loss": 0.0001, + "step": 3308 + }, + { + "epoch": 0.21324998388863828, + "grad_norm": 0.008283227528941124, + "learning_rate": 8.743286788399571e-06, + "loss": 0.0, + "step": 3309 + }, + { + "epoch": 0.21331442933556743, + "grad_norm": 0.05812750326426742, + "learning_rate": 8.742570712495526e-06, + "loss": 0.0006, + "step": 3310 + }, + { + "epoch": 0.21337887478249662, + "grad_norm": 0.016621163486587685, + "learning_rate": 8.741854636591478e-06, + "loss": 0.0, + "step": 3311 + }, + { + "epoch": 0.2134433202294258, + "grad_norm": 0.00017105550687940947, + "learning_rate": 8.741138560687433e-06, + "loss": 0.0, + "step": 3312 + }, + { + "epoch": 0.21350776567635496, + "grad_norm": 0.6441706949390169, + "learning_rate": 8.740422484783387e-06, + "loss": 0.0009, + "step": 3313 + }, + { + "epoch": 0.21357221112328414, + "grad_norm": 0.04369000739798586, + "learning_rate": 8.739706408879341e-06, + "loss": 0.0, + "step": 3314 + }, + { + "epoch": 0.2136366565702133, + "grad_norm": 0.09402632719574942, + "learning_rate": 8.738990332975296e-06, + "loss": 0.0003, + "step": 3315 + }, + { + "epoch": 0.21370110201714249, + "grad_norm": 0.05016807446108145, + "learning_rate": 8.73827425707125e-06, + "loss": 0.0001, + "step": 3316 + }, + { + "epoch": 0.21376554746407167, + "grad_norm": 0.010129704308617241, + "learning_rate": 8.737558181167204e-06, + "loss": 0.0, + "step": 3317 + }, + { + "epoch": 0.21382999291100083, + "grad_norm": 0.06499619789764004, + "learning_rate": 8.736842105263158e-06, + "loss": 0.0001, + "step": 3318 + }, + { + "epoch": 0.21389443835793, + "grad_norm": 0.005310078119373596, + "learning_rate": 8.736126029359113e-06, + "loss": 0.0, + "step": 3319 + }, + { + "epoch": 0.2139588838048592, + "grad_norm": 0.0006348497178582359, + "learning_rate": 8.735409953455067e-06, + "loss": 0.0, + "step": 3320 + }, + { + "epoch": 0.21402332925178835, + "grad_norm": 0.00472366162278607, + "learning_rate": 8.734693877551021e-06, + "loss": 0.0, + "step": 3321 + }, + { + "epoch": 0.21408777469871754, + "grad_norm": 0.015531686896680753, + "learning_rate": 8.733977801646976e-06, + "loss": 0.0002, + "step": 3322 + }, + { + "epoch": 0.21415222014564672, + "grad_norm": 0.02151993293429727, + "learning_rate": 8.73326172574293e-06, + "loss": 0.0002, + "step": 3323 + }, + { + "epoch": 0.21421666559257588, + "grad_norm": 0.010721432144462448, + "learning_rate": 8.732545649838884e-06, + "loss": 0.0, + "step": 3324 + }, + { + "epoch": 0.21428111103950506, + "grad_norm": 0.07421909194164812, + "learning_rate": 8.731829573934839e-06, + "loss": 0.0034, + "step": 3325 + }, + { + "epoch": 0.21434555648643425, + "grad_norm": 0.003490125792385133, + "learning_rate": 8.731113498030793e-06, + "loss": 0.0, + "step": 3326 + }, + { + "epoch": 0.2144100019333634, + "grad_norm": 0.6003774389582377, + "learning_rate": 8.730397422126745e-06, + "loss": 0.003, + "step": 3327 + }, + { + "epoch": 0.2144744473802926, + "grad_norm": 0.0031710861298442842, + "learning_rate": 8.7296813462227e-06, + "loss": 0.0, + "step": 3328 + }, + { + "epoch": 0.21453889282722174, + "grad_norm": 0.006062084484470189, + "learning_rate": 8.728965270318654e-06, + "loss": 0.0, + "step": 3329 + }, + { + "epoch": 0.21460333827415093, + "grad_norm": 0.0046390183140608615, + "learning_rate": 8.728249194414608e-06, + "loss": 0.0, + "step": 3330 + }, + { + "epoch": 0.2146677837210801, + "grad_norm": 0.0028126984205885065, + "learning_rate": 8.727533118510563e-06, + "loss": 0.0, + "step": 3331 + }, + { + "epoch": 0.21473222916800927, + "grad_norm": 0.33433982271918034, + "learning_rate": 8.726817042606517e-06, + "loss": 0.0026, + "step": 3332 + }, + { + "epoch": 0.21479667461493845, + "grad_norm": 0.09134759936800937, + "learning_rate": 8.726100966702471e-06, + "loss": 0.0004, + "step": 3333 + }, + { + "epoch": 0.21486112006186764, + "grad_norm": 0.10058142364892066, + "learning_rate": 8.725384890798425e-06, + "loss": 0.0001, + "step": 3334 + }, + { + "epoch": 0.2149255655087968, + "grad_norm": 0.0016853936002382036, + "learning_rate": 8.72466881489438e-06, + "loss": 0.0, + "step": 3335 + }, + { + "epoch": 0.21499001095572598, + "grad_norm": 0.15968984634818126, + "learning_rate": 8.723952738990334e-06, + "loss": 0.0004, + "step": 3336 + }, + { + "epoch": 0.21505445640265516, + "grad_norm": 0.014139192268035285, + "learning_rate": 8.723236663086287e-06, + "loss": 0.0, + "step": 3337 + }, + { + "epoch": 0.21511890184958432, + "grad_norm": 0.0609662983623595, + "learning_rate": 8.722520587182241e-06, + "loss": 0.0001, + "step": 3338 + }, + { + "epoch": 0.2151833472965135, + "grad_norm": 0.00030584942007946705, + "learning_rate": 8.721804511278195e-06, + "loss": 0.0, + "step": 3339 + }, + { + "epoch": 0.2152477927434427, + "grad_norm": 0.010851115075375938, + "learning_rate": 8.72108843537415e-06, + "loss": 0.0, + "step": 3340 + }, + { + "epoch": 0.21531223819037185, + "grad_norm": 0.0025828937431415975, + "learning_rate": 8.720372359470106e-06, + "loss": 0.0, + "step": 3341 + }, + { + "epoch": 0.21537668363730103, + "grad_norm": 0.27412838166825926, + "learning_rate": 8.71965628356606e-06, + "loss": 0.0002, + "step": 3342 + }, + { + "epoch": 0.2154411290842302, + "grad_norm": 0.0014394433601773147, + "learning_rate": 8.718940207662012e-06, + "loss": 0.0, + "step": 3343 + }, + { + "epoch": 0.21550557453115937, + "grad_norm": 0.03302873013551384, + "learning_rate": 8.718224131757967e-06, + "loss": 0.0001, + "step": 3344 + }, + { + "epoch": 0.21557001997808856, + "grad_norm": 0.003994157454168405, + "learning_rate": 8.717508055853921e-06, + "loss": 0.0, + "step": 3345 + }, + { + "epoch": 0.2156344654250177, + "grad_norm": 0.03668763155929853, + "learning_rate": 8.716791979949875e-06, + "loss": 0.0001, + "step": 3346 + }, + { + "epoch": 0.2156989108719469, + "grad_norm": 0.0035984155267912584, + "learning_rate": 8.71607590404583e-06, + "loss": 0.0, + "step": 3347 + }, + { + "epoch": 0.21576335631887608, + "grad_norm": 0.000645077782702887, + "learning_rate": 8.715359828141784e-06, + "loss": 0.0, + "step": 3348 + }, + { + "epoch": 0.21582780176580524, + "grad_norm": 0.0007157999193808737, + "learning_rate": 8.714643752237738e-06, + "loss": 0.0, + "step": 3349 + }, + { + "epoch": 0.21589224721273442, + "grad_norm": 0.08779469924341486, + "learning_rate": 8.713927676333693e-06, + "loss": 0.0007, + "step": 3350 + }, + { + "epoch": 0.2159566926596636, + "grad_norm": 0.0009734634672225255, + "learning_rate": 8.713211600429647e-06, + "loss": 0.0, + "step": 3351 + }, + { + "epoch": 0.21602113810659276, + "grad_norm": 0.009129763667514236, + "learning_rate": 8.712495524525601e-06, + "loss": 0.0001, + "step": 3352 + }, + { + "epoch": 0.21608558355352195, + "grad_norm": 0.0015141456217384983, + "learning_rate": 8.711779448621554e-06, + "loss": 0.0, + "step": 3353 + }, + { + "epoch": 0.2161500290004511, + "grad_norm": 0.009750178906905728, + "learning_rate": 8.711063372717508e-06, + "loss": 0.0, + "step": 3354 + }, + { + "epoch": 0.2162144744473803, + "grad_norm": 0.7822016070286729, + "learning_rate": 8.710347296813462e-06, + "loss": 0.0092, + "step": 3355 + }, + { + "epoch": 0.21627891989430947, + "grad_norm": 0.0008353497719524981, + "learning_rate": 8.709631220909417e-06, + "loss": 0.0, + "step": 3356 + }, + { + "epoch": 0.21634336534123863, + "grad_norm": 0.0837409934184648, + "learning_rate": 8.708915145005371e-06, + "loss": 0.0001, + "step": 3357 + }, + { + "epoch": 0.21640781078816781, + "grad_norm": 0.022414822248022617, + "learning_rate": 8.708199069101325e-06, + "loss": 0.0001, + "step": 3358 + }, + { + "epoch": 0.216472256235097, + "grad_norm": 0.11345686946837005, + "learning_rate": 8.70748299319728e-06, + "loss": 0.0001, + "step": 3359 + }, + { + "epoch": 0.21653670168202616, + "grad_norm": 0.08448661016414556, + "learning_rate": 8.706766917293234e-06, + "loss": 0.0009, + "step": 3360 + }, + { + "epoch": 0.21660114712895534, + "grad_norm": 0.0220006539202053, + "learning_rate": 8.706050841389188e-06, + "loss": 0.0, + "step": 3361 + }, + { + "epoch": 0.21666559257588452, + "grad_norm": 0.08631784358443949, + "learning_rate": 8.705334765485142e-06, + "loss": 0.0001, + "step": 3362 + }, + { + "epoch": 0.21673003802281368, + "grad_norm": 0.029033055171980918, + "learning_rate": 8.704618689581095e-06, + "loss": 0.0001, + "step": 3363 + }, + { + "epoch": 0.21679448346974287, + "grad_norm": 0.4350533533262354, + "learning_rate": 8.703902613677051e-06, + "loss": 0.0039, + "step": 3364 + }, + { + "epoch": 0.21685892891667205, + "grad_norm": 0.001498889728926459, + "learning_rate": 8.703186537773005e-06, + "loss": 0.0, + "step": 3365 + }, + { + "epoch": 0.2169233743636012, + "grad_norm": 0.010148400387620866, + "learning_rate": 8.70247046186896e-06, + "loss": 0.0, + "step": 3366 + }, + { + "epoch": 0.2169878198105304, + "grad_norm": 0.007033440864080341, + "learning_rate": 8.701754385964914e-06, + "loss": 0.0, + "step": 3367 + }, + { + "epoch": 0.21705226525745955, + "grad_norm": 0.05006730273660623, + "learning_rate": 8.701038310060868e-06, + "loss": 0.0001, + "step": 3368 + }, + { + "epoch": 0.21711671070438873, + "grad_norm": 0.007402992324705263, + "learning_rate": 8.700322234156821e-06, + "loss": 0.0, + "step": 3369 + }, + { + "epoch": 0.21718115615131792, + "grad_norm": 0.0057464255085265354, + "learning_rate": 8.699606158252775e-06, + "loss": 0.0, + "step": 3370 + }, + { + "epoch": 0.21724560159824707, + "grad_norm": 0.05918935218428574, + "learning_rate": 8.69889008234873e-06, + "loss": 0.0003, + "step": 3371 + }, + { + "epoch": 0.21731004704517626, + "grad_norm": 0.025653915972399525, + "learning_rate": 8.698174006444684e-06, + "loss": 0.0002, + "step": 3372 + }, + { + "epoch": 0.21737449249210544, + "grad_norm": 0.012820261677368707, + "learning_rate": 8.697457930540638e-06, + "loss": 0.0002, + "step": 3373 + }, + { + "epoch": 0.2174389379390346, + "grad_norm": 0.003222630195440105, + "learning_rate": 8.696741854636592e-06, + "loss": 0.0, + "step": 3374 + }, + { + "epoch": 0.21750338338596378, + "grad_norm": 0.005536295847970089, + "learning_rate": 8.696025778732547e-06, + "loss": 0.0, + "step": 3375 + }, + { + "epoch": 0.21756782883289297, + "grad_norm": 0.0475632780772114, + "learning_rate": 8.695309702828501e-06, + "loss": 0.0001, + "step": 3376 + }, + { + "epoch": 0.21763227427982212, + "grad_norm": 0.013851927429286565, + "learning_rate": 8.694593626924455e-06, + "loss": 0.0, + "step": 3377 + }, + { + "epoch": 0.2176967197267513, + "grad_norm": 0.006665526372697656, + "learning_rate": 8.69387755102041e-06, + "loss": 0.0, + "step": 3378 + }, + { + "epoch": 0.2177611651736805, + "grad_norm": 0.0003302123249551213, + "learning_rate": 8.693161475116362e-06, + "loss": 0.0, + "step": 3379 + }, + { + "epoch": 0.21782561062060965, + "grad_norm": 0.04042210473255954, + "learning_rate": 8.692445399212316e-06, + "loss": 0.0, + "step": 3380 + }, + { + "epoch": 0.21789005606753883, + "grad_norm": 0.032240357119762836, + "learning_rate": 8.69172932330827e-06, + "loss": 0.0001, + "step": 3381 + }, + { + "epoch": 0.217954501514468, + "grad_norm": 0.028356624417666038, + "learning_rate": 8.691013247404225e-06, + "loss": 0.0001, + "step": 3382 + }, + { + "epoch": 0.21801894696139718, + "grad_norm": 0.24312603511442854, + "learning_rate": 8.69029717150018e-06, + "loss": 0.0007, + "step": 3383 + }, + { + "epoch": 0.21808339240832636, + "grad_norm": 0.06490065794875387, + "learning_rate": 8.689581095596134e-06, + "loss": 0.0001, + "step": 3384 + }, + { + "epoch": 0.21814783785525552, + "grad_norm": 0.0003474384346755857, + "learning_rate": 8.688865019692088e-06, + "loss": 0.0, + "step": 3385 + }, + { + "epoch": 0.2182122833021847, + "grad_norm": 0.010435808402401832, + "learning_rate": 8.688148943788042e-06, + "loss": 0.0001, + "step": 3386 + }, + { + "epoch": 0.21827672874911389, + "grad_norm": 0.10267596869027987, + "learning_rate": 8.687432867883997e-06, + "loss": 0.0004, + "step": 3387 + }, + { + "epoch": 0.21834117419604304, + "grad_norm": 0.00680259145453217, + "learning_rate": 8.686716791979951e-06, + "loss": 0.0, + "step": 3388 + }, + { + "epoch": 0.21840561964297223, + "grad_norm": 0.0007357830666917632, + "learning_rate": 8.686000716075905e-06, + "loss": 0.0, + "step": 3389 + }, + { + "epoch": 0.2184700650899014, + "grad_norm": 0.017961030447330713, + "learning_rate": 8.68528464017186e-06, + "loss": 0.0, + "step": 3390 + }, + { + "epoch": 0.21853451053683057, + "grad_norm": 0.015245958178808318, + "learning_rate": 8.684568564267814e-06, + "loss": 0.0, + "step": 3391 + }, + { + "epoch": 0.21859895598375975, + "grad_norm": 0.2522366576990323, + "learning_rate": 8.683852488363768e-06, + "loss": 0.0013, + "step": 3392 + }, + { + "epoch": 0.2186634014306889, + "grad_norm": 0.004980891866569009, + "learning_rate": 8.683136412459722e-06, + "loss": 0.0, + "step": 3393 + }, + { + "epoch": 0.2187278468776181, + "grad_norm": 0.005101799462129407, + "learning_rate": 8.682420336555677e-06, + "loss": 0.0, + "step": 3394 + }, + { + "epoch": 0.21879229232454728, + "grad_norm": 0.0034149603184703468, + "learning_rate": 8.68170426065163e-06, + "loss": 0.0, + "step": 3395 + }, + { + "epoch": 0.21885673777147643, + "grad_norm": 0.03652058159275285, + "learning_rate": 8.680988184747584e-06, + "loss": 0.0002, + "step": 3396 + }, + { + "epoch": 0.21892118321840562, + "grad_norm": 0.007019698589223098, + "learning_rate": 8.680272108843538e-06, + "loss": 0.0, + "step": 3397 + }, + { + "epoch": 0.2189856286653348, + "grad_norm": 0.0005208419070086863, + "learning_rate": 8.679556032939492e-06, + "loss": 0.0, + "step": 3398 + }, + { + "epoch": 0.21905007411226396, + "grad_norm": 0.022129151887111145, + "learning_rate": 8.678839957035446e-06, + "loss": 0.0, + "step": 3399 + }, + { + "epoch": 0.21911451955919314, + "grad_norm": 0.008741674890565838, + "learning_rate": 8.6781238811314e-06, + "loss": 0.0, + "step": 3400 + }, + { + "epoch": 0.21917896500612233, + "grad_norm": 0.0028274312509968943, + "learning_rate": 8.677407805227355e-06, + "loss": 0.0, + "step": 3401 + }, + { + "epoch": 0.21924341045305148, + "grad_norm": 0.027357096054692588, + "learning_rate": 8.67669172932331e-06, + "loss": 0.0001, + "step": 3402 + }, + { + "epoch": 0.21930785589998067, + "grad_norm": 9.549562120148108e-05, + "learning_rate": 8.675975653419264e-06, + "loss": 0.0, + "step": 3403 + }, + { + "epoch": 0.21937230134690985, + "grad_norm": 0.00035971321640775695, + "learning_rate": 8.675259577515216e-06, + "loss": 0.0, + "step": 3404 + }, + { + "epoch": 0.219436746793839, + "grad_norm": 0.001148672309388114, + "learning_rate": 8.67454350161117e-06, + "loss": 0.0, + "step": 3405 + }, + { + "epoch": 0.2195011922407682, + "grad_norm": 0.0022274004349375665, + "learning_rate": 8.673827425707125e-06, + "loss": 0.0, + "step": 3406 + }, + { + "epoch": 0.21956563768769735, + "grad_norm": 0.001988663302904296, + "learning_rate": 8.673111349803079e-06, + "loss": 0.0, + "step": 3407 + }, + { + "epoch": 0.21963008313462654, + "grad_norm": 0.004393998501194764, + "learning_rate": 8.672395273899033e-06, + "loss": 0.0, + "step": 3408 + }, + { + "epoch": 0.21969452858155572, + "grad_norm": 0.026292318215849746, + "learning_rate": 8.671679197994988e-06, + "loss": 0.0, + "step": 3409 + }, + { + "epoch": 0.21975897402848488, + "grad_norm": 0.037110821168728526, + "learning_rate": 8.670963122090942e-06, + "loss": 0.0004, + "step": 3410 + }, + { + "epoch": 0.21982341947541406, + "grad_norm": 0.0004869931471118294, + "learning_rate": 8.670247046186896e-06, + "loss": 0.0, + "step": 3411 + }, + { + "epoch": 0.21988786492234325, + "grad_norm": 0.035249077579844044, + "learning_rate": 8.66953097028285e-06, + "loss": 0.0003, + "step": 3412 + }, + { + "epoch": 0.2199523103692724, + "grad_norm": 0.001970382648606595, + "learning_rate": 8.668814894378805e-06, + "loss": 0.0, + "step": 3413 + }, + { + "epoch": 0.2200167558162016, + "grad_norm": 0.011623401973596743, + "learning_rate": 8.66809881847476e-06, + "loss": 0.0001, + "step": 3414 + }, + { + "epoch": 0.22008120126313077, + "grad_norm": 0.00034764643982887803, + "learning_rate": 8.667382742570714e-06, + "loss": 0.0, + "step": 3415 + }, + { + "epoch": 0.22014564671005993, + "grad_norm": 8.975251794623639e-05, + "learning_rate": 8.666666666666668e-06, + "loss": 0.0, + "step": 3416 + }, + { + "epoch": 0.2202100921569891, + "grad_norm": 0.07822764627206331, + "learning_rate": 8.665950590762622e-06, + "loss": 0.0007, + "step": 3417 + }, + { + "epoch": 0.2202745376039183, + "grad_norm": 0.020361744152594136, + "learning_rate": 8.665234514858576e-06, + "loss": 0.0001, + "step": 3418 + }, + { + "epoch": 0.22033898305084745, + "grad_norm": 0.031516272508776165, + "learning_rate": 8.66451843895453e-06, + "loss": 0.0001, + "step": 3419 + }, + { + "epoch": 0.22040342849777664, + "grad_norm": 0.0008974531050145935, + "learning_rate": 8.663802363050483e-06, + "loss": 0.0, + "step": 3420 + }, + { + "epoch": 0.2204678739447058, + "grad_norm": 0.16912508093612774, + "learning_rate": 8.663086287146438e-06, + "loss": 0.0003, + "step": 3421 + }, + { + "epoch": 0.22053231939163498, + "grad_norm": 0.00018829353512949963, + "learning_rate": 8.662370211242392e-06, + "loss": 0.0, + "step": 3422 + }, + { + "epoch": 0.22059676483856416, + "grad_norm": 0.00521659484946839, + "learning_rate": 8.661654135338346e-06, + "loss": 0.0, + "step": 3423 + }, + { + "epoch": 0.22066121028549332, + "grad_norm": 0.35939284985206355, + "learning_rate": 8.6609380594343e-06, + "loss": 0.0027, + "step": 3424 + }, + { + "epoch": 0.2207256557324225, + "grad_norm": 0.07403550301307933, + "learning_rate": 8.660221983530255e-06, + "loss": 0.0009, + "step": 3425 + }, + { + "epoch": 0.2207901011793517, + "grad_norm": 0.004889930625470678, + "learning_rate": 8.659505907626209e-06, + "loss": 0.0, + "step": 3426 + }, + { + "epoch": 0.22085454662628085, + "grad_norm": 0.003421520565400845, + "learning_rate": 8.658789831722163e-06, + "loss": 0.0, + "step": 3427 + }, + { + "epoch": 0.22091899207321003, + "grad_norm": 0.0053065978274310565, + "learning_rate": 8.658073755818118e-06, + "loss": 0.0, + "step": 3428 + }, + { + "epoch": 0.22098343752013921, + "grad_norm": 0.013211383273678805, + "learning_rate": 8.657357679914072e-06, + "loss": 0.0001, + "step": 3429 + }, + { + "epoch": 0.22104788296706837, + "grad_norm": 0.0009982449528573667, + "learning_rate": 8.656641604010025e-06, + "loss": 0.0, + "step": 3430 + }, + { + "epoch": 0.22111232841399756, + "grad_norm": 0.0015086585130352167, + "learning_rate": 8.655925528105979e-06, + "loss": 0.0, + "step": 3431 + }, + { + "epoch": 0.2211767738609267, + "grad_norm": 0.02614516921378581, + "learning_rate": 8.655209452201933e-06, + "loss": 0.0001, + "step": 3432 + }, + { + "epoch": 0.2212412193078559, + "grad_norm": 0.018234096510467625, + "learning_rate": 8.654493376297888e-06, + "loss": 0.0001, + "step": 3433 + }, + { + "epoch": 0.22130566475478508, + "grad_norm": 0.0011630213225405184, + "learning_rate": 8.653777300393844e-06, + "loss": 0.0, + "step": 3434 + }, + { + "epoch": 0.22137011020171424, + "grad_norm": 0.7110074187051323, + "learning_rate": 8.653061224489798e-06, + "loss": 0.0061, + "step": 3435 + }, + { + "epoch": 0.22143455564864342, + "grad_norm": 0.00033229507672846427, + "learning_rate": 8.65234514858575e-06, + "loss": 0.0, + "step": 3436 + }, + { + "epoch": 0.2214990010955726, + "grad_norm": 0.002534487956548706, + "learning_rate": 8.651629072681705e-06, + "loss": 0.0, + "step": 3437 + }, + { + "epoch": 0.22156344654250176, + "grad_norm": 0.17261034087608992, + "learning_rate": 8.650912996777659e-06, + "loss": 0.0003, + "step": 3438 + }, + { + "epoch": 0.22162789198943095, + "grad_norm": 0.002774698501918162, + "learning_rate": 8.650196920873613e-06, + "loss": 0.0, + "step": 3439 + }, + { + "epoch": 0.22169233743636013, + "grad_norm": 0.008339194776038943, + "learning_rate": 8.649480844969568e-06, + "loss": 0.0, + "step": 3440 + }, + { + "epoch": 0.2217567828832893, + "grad_norm": 0.004585672715958108, + "learning_rate": 8.648764769065522e-06, + "loss": 0.0, + "step": 3441 + }, + { + "epoch": 0.22182122833021847, + "grad_norm": 0.0030527512602284202, + "learning_rate": 8.648048693161476e-06, + "loss": 0.0, + "step": 3442 + }, + { + "epoch": 0.22188567377714766, + "grad_norm": 0.0939224861790215, + "learning_rate": 8.64733261725743e-06, + "loss": 0.0001, + "step": 3443 + }, + { + "epoch": 0.22195011922407681, + "grad_norm": 0.003773801458241794, + "learning_rate": 8.646616541353385e-06, + "loss": 0.0, + "step": 3444 + }, + { + "epoch": 0.222014564671006, + "grad_norm": 0.019486883556046962, + "learning_rate": 8.645900465449339e-06, + "loss": 0.0, + "step": 3445 + }, + { + "epoch": 0.22207901011793515, + "grad_norm": 0.07323750163029638, + "learning_rate": 8.645184389545292e-06, + "loss": 0.0005, + "step": 3446 + }, + { + "epoch": 0.22214345556486434, + "grad_norm": 0.00814745306731119, + "learning_rate": 8.644468313641246e-06, + "loss": 0.0, + "step": 3447 + }, + { + "epoch": 0.22220790101179352, + "grad_norm": 0.29745805104008016, + "learning_rate": 8.6437522377372e-06, + "loss": 0.0002, + "step": 3448 + }, + { + "epoch": 0.22227234645872268, + "grad_norm": 0.010189280976991384, + "learning_rate": 8.643036161833155e-06, + "loss": 0.0, + "step": 3449 + }, + { + "epoch": 0.22233679190565186, + "grad_norm": 0.350625219243027, + "learning_rate": 8.642320085929109e-06, + "loss": 0.0011, + "step": 3450 + }, + { + "epoch": 0.22240123735258105, + "grad_norm": 0.09586779868305924, + "learning_rate": 8.641604010025063e-06, + "loss": 0.0014, + "step": 3451 + }, + { + "epoch": 0.2224656827995102, + "grad_norm": 0.002273792676528464, + "learning_rate": 8.640887934121017e-06, + "loss": 0.0, + "step": 3452 + }, + { + "epoch": 0.2225301282464394, + "grad_norm": 0.0036304762924196463, + "learning_rate": 8.640171858216972e-06, + "loss": 0.0, + "step": 3453 + }, + { + "epoch": 0.22259457369336857, + "grad_norm": 0.24947871309025466, + "learning_rate": 8.639455782312926e-06, + "loss": 0.0037, + "step": 3454 + }, + { + "epoch": 0.22265901914029773, + "grad_norm": 0.00028613456653341616, + "learning_rate": 8.63873970640888e-06, + "loss": 0.0, + "step": 3455 + }, + { + "epoch": 0.22272346458722692, + "grad_norm": 7.548515570305155e-05, + "learning_rate": 8.638023630504833e-06, + "loss": 0.0, + "step": 3456 + }, + { + "epoch": 0.2227879100341561, + "grad_norm": 0.07466611177857914, + "learning_rate": 8.637307554600787e-06, + "loss": 0.0002, + "step": 3457 + }, + { + "epoch": 0.22285235548108526, + "grad_norm": 0.0011913602007814256, + "learning_rate": 8.636591478696743e-06, + "loss": 0.0, + "step": 3458 + }, + { + "epoch": 0.22291680092801444, + "grad_norm": 0.0005066447257724092, + "learning_rate": 8.635875402792698e-06, + "loss": 0.0, + "step": 3459 + }, + { + "epoch": 0.2229812463749436, + "grad_norm": 0.00195445982760958, + "learning_rate": 8.635159326888652e-06, + "loss": 0.0, + "step": 3460 + }, + { + "epoch": 0.22304569182187278, + "grad_norm": 0.00460945396853242, + "learning_rate": 8.634443250984606e-06, + "loss": 0.0, + "step": 3461 + }, + { + "epoch": 0.22311013726880197, + "grad_norm": 0.22631486151336772, + "learning_rate": 8.633727175080559e-06, + "loss": 0.0005, + "step": 3462 + }, + { + "epoch": 0.22317458271573112, + "grad_norm": 1.5542394097752636, + "learning_rate": 8.633011099176513e-06, + "loss": 0.0056, + "step": 3463 + }, + { + "epoch": 0.2232390281626603, + "grad_norm": 0.0003889234814934518, + "learning_rate": 8.632295023272467e-06, + "loss": 0.0, + "step": 3464 + }, + { + "epoch": 0.2233034736095895, + "grad_norm": 0.014474523316253513, + "learning_rate": 8.631578947368422e-06, + "loss": 0.0001, + "step": 3465 + }, + { + "epoch": 0.22336791905651865, + "grad_norm": 0.000770039970009564, + "learning_rate": 8.630862871464376e-06, + "loss": 0.0, + "step": 3466 + }, + { + "epoch": 0.22343236450344783, + "grad_norm": 0.02406613506883519, + "learning_rate": 8.63014679556033e-06, + "loss": 0.0002, + "step": 3467 + }, + { + "epoch": 0.22349680995037702, + "grad_norm": 0.8807060632696435, + "learning_rate": 8.629430719656285e-06, + "loss": 0.0038, + "step": 3468 + }, + { + "epoch": 0.22356125539730617, + "grad_norm": 0.010200052529701138, + "learning_rate": 8.628714643752239e-06, + "loss": 0.0001, + "step": 3469 + }, + { + "epoch": 0.22362570084423536, + "grad_norm": 0.004726596178967311, + "learning_rate": 8.627998567848193e-06, + "loss": 0.0001, + "step": 3470 + }, + { + "epoch": 0.22369014629116452, + "grad_norm": 0.0035062183509143477, + "learning_rate": 8.627282491944147e-06, + "loss": 0.0, + "step": 3471 + }, + { + "epoch": 0.2237545917380937, + "grad_norm": 0.006413737365501235, + "learning_rate": 8.6265664160401e-06, + "loss": 0.0, + "step": 3472 + }, + { + "epoch": 0.22381903718502288, + "grad_norm": 0.17002202513660772, + "learning_rate": 8.625850340136054e-06, + "loss": 0.002, + "step": 3473 + }, + { + "epoch": 0.22388348263195204, + "grad_norm": 0.000330794695075691, + "learning_rate": 8.625134264232009e-06, + "loss": 0.0, + "step": 3474 + }, + { + "epoch": 0.22394792807888123, + "grad_norm": 0.2625006482706694, + "learning_rate": 8.624418188327963e-06, + "loss": 0.0024, + "step": 3475 + }, + { + "epoch": 0.2240123735258104, + "grad_norm": 0.00048218474812608737, + "learning_rate": 8.623702112423917e-06, + "loss": 0.0, + "step": 3476 + }, + { + "epoch": 0.22407681897273957, + "grad_norm": 1.2268953297000005, + "learning_rate": 8.622986036519872e-06, + "loss": 0.0004, + "step": 3477 + }, + { + "epoch": 0.22414126441966875, + "grad_norm": 0.03838107792094849, + "learning_rate": 8.622269960615826e-06, + "loss": 0.0, + "step": 3478 + }, + { + "epoch": 0.22420570986659794, + "grad_norm": 0.0007060704934618854, + "learning_rate": 8.62155388471178e-06, + "loss": 0.0, + "step": 3479 + }, + { + "epoch": 0.2242701553135271, + "grad_norm": 0.01650770562641032, + "learning_rate": 8.620837808807734e-06, + "loss": 0.0, + "step": 3480 + }, + { + "epoch": 0.22433460076045628, + "grad_norm": 0.18594924064241675, + "learning_rate": 8.620121732903689e-06, + "loss": 0.0019, + "step": 3481 + }, + { + "epoch": 0.22439904620738546, + "grad_norm": 0.0001218993788027957, + "learning_rate": 8.619405656999643e-06, + "loss": 0.0, + "step": 3482 + }, + { + "epoch": 0.22446349165431462, + "grad_norm": 0.012246611324123326, + "learning_rate": 8.618689581095597e-06, + "loss": 0.0, + "step": 3483 + }, + { + "epoch": 0.2245279371012438, + "grad_norm": 0.0007658951102295896, + "learning_rate": 8.617973505191552e-06, + "loss": 0.0, + "step": 3484 + }, + { + "epoch": 0.22459238254817296, + "grad_norm": 0.024163193440480502, + "learning_rate": 8.617257429287506e-06, + "loss": 0.0002, + "step": 3485 + }, + { + "epoch": 0.22465682799510214, + "grad_norm": 0.00452679815357398, + "learning_rate": 8.61654135338346e-06, + "loss": 0.0, + "step": 3486 + }, + { + "epoch": 0.22472127344203133, + "grad_norm": 0.11612035923873511, + "learning_rate": 8.615825277479415e-06, + "loss": 0.0006, + "step": 3487 + }, + { + "epoch": 0.22478571888896048, + "grad_norm": 0.040972251040244025, + "learning_rate": 8.615109201575367e-06, + "loss": 0.0, + "step": 3488 + }, + { + "epoch": 0.22485016433588967, + "grad_norm": 0.001675342434363501, + "learning_rate": 8.614393125671321e-06, + "loss": 0.0, + "step": 3489 + }, + { + "epoch": 0.22491460978281885, + "grad_norm": 0.002689598627667504, + "learning_rate": 8.613677049767276e-06, + "loss": 0.0, + "step": 3490 + }, + { + "epoch": 0.224979055229748, + "grad_norm": 0.0001554998765619099, + "learning_rate": 8.61296097386323e-06, + "loss": 0.0, + "step": 3491 + }, + { + "epoch": 0.2250435006766772, + "grad_norm": 0.0007973969882462369, + "learning_rate": 8.612244897959184e-06, + "loss": 0.0, + "step": 3492 + }, + { + "epoch": 0.22510794612360638, + "grad_norm": 0.00010330225671837098, + "learning_rate": 8.611528822055139e-06, + "loss": 0.0, + "step": 3493 + }, + { + "epoch": 0.22517239157053553, + "grad_norm": 0.00965204197384073, + "learning_rate": 8.610812746151093e-06, + "loss": 0.0, + "step": 3494 + }, + { + "epoch": 0.22523683701746472, + "grad_norm": 0.28994619764491436, + "learning_rate": 8.610096670247047e-06, + "loss": 0.0006, + "step": 3495 + }, + { + "epoch": 0.2253012824643939, + "grad_norm": 0.45118794268115947, + "learning_rate": 8.609380594343002e-06, + "loss": 0.0011, + "step": 3496 + }, + { + "epoch": 0.22536572791132306, + "grad_norm": 0.056652606321714157, + "learning_rate": 8.608664518438954e-06, + "loss": 0.0001, + "step": 3497 + }, + { + "epoch": 0.22543017335825224, + "grad_norm": 0.002017892148967793, + "learning_rate": 8.607948442534908e-06, + "loss": 0.0, + "step": 3498 + }, + { + "epoch": 0.2254946188051814, + "grad_norm": 0.005166001572256319, + "learning_rate": 8.607232366630863e-06, + "loss": 0.0, + "step": 3499 + }, + { + "epoch": 0.22555906425211059, + "grad_norm": 0.002040933558769715, + "learning_rate": 8.606516290726817e-06, + "loss": 0.0, + "step": 3500 + }, + { + "epoch": 0.22562350969903977, + "grad_norm": 0.0007145762792561211, + "learning_rate": 8.605800214822771e-06, + "loss": 0.0, + "step": 3501 + }, + { + "epoch": 0.22568795514596893, + "grad_norm": 0.0016717906006389666, + "learning_rate": 8.605084138918726e-06, + "loss": 0.0, + "step": 3502 + }, + { + "epoch": 0.2257524005928981, + "grad_norm": 0.05478180477274337, + "learning_rate": 8.60436806301468e-06, + "loss": 0.0001, + "step": 3503 + }, + { + "epoch": 0.2258168460398273, + "grad_norm": 0.009749687123417545, + "learning_rate": 8.603651987110634e-06, + "loss": 0.0, + "step": 3504 + }, + { + "epoch": 0.22588129148675645, + "grad_norm": 0.023990730276947616, + "learning_rate": 8.602935911206589e-06, + "loss": 0.0, + "step": 3505 + }, + { + "epoch": 0.22594573693368564, + "grad_norm": 0.07996477268825748, + "learning_rate": 8.602219835302543e-06, + "loss": 0.0016, + "step": 3506 + }, + { + "epoch": 0.22601018238061482, + "grad_norm": 0.0019798777230370587, + "learning_rate": 8.601503759398497e-06, + "loss": 0.0, + "step": 3507 + }, + { + "epoch": 0.22607462782754398, + "grad_norm": 0.0003005708285917906, + "learning_rate": 8.600787683494451e-06, + "loss": 0.0, + "step": 3508 + }, + { + "epoch": 0.22613907327447316, + "grad_norm": 0.05497608294365255, + "learning_rate": 8.600071607590406e-06, + "loss": 0.0001, + "step": 3509 + }, + { + "epoch": 0.22620351872140232, + "grad_norm": 0.00847252292242908, + "learning_rate": 8.59935553168636e-06, + "loss": 0.0, + "step": 3510 + }, + { + "epoch": 0.2262679641683315, + "grad_norm": 0.0018614056583017003, + "learning_rate": 8.598639455782314e-06, + "loss": 0.0, + "step": 3511 + }, + { + "epoch": 0.2263324096152607, + "grad_norm": 0.005697144685217808, + "learning_rate": 8.597923379878269e-06, + "loss": 0.0, + "step": 3512 + }, + { + "epoch": 0.22639685506218984, + "grad_norm": 0.01307997836978058, + "learning_rate": 8.597207303974223e-06, + "loss": 0.0001, + "step": 3513 + }, + { + "epoch": 0.22646130050911903, + "grad_norm": 0.1307518360599829, + "learning_rate": 8.596491228070176e-06, + "loss": 0.0004, + "step": 3514 + }, + { + "epoch": 0.2265257459560482, + "grad_norm": 0.016693761174956682, + "learning_rate": 8.59577515216613e-06, + "loss": 0.0, + "step": 3515 + }, + { + "epoch": 0.22659019140297737, + "grad_norm": 0.033633965786489854, + "learning_rate": 8.595059076262084e-06, + "loss": 0.0001, + "step": 3516 + }, + { + "epoch": 0.22665463684990655, + "grad_norm": 0.034370924494014785, + "learning_rate": 8.594343000358038e-06, + "loss": 0.0, + "step": 3517 + }, + { + "epoch": 0.22671908229683574, + "grad_norm": 0.4692173535367172, + "learning_rate": 8.593626924453993e-06, + "loss": 0.002, + "step": 3518 + }, + { + "epoch": 0.2267835277437649, + "grad_norm": 0.16348664515859737, + "learning_rate": 8.592910848549947e-06, + "loss": 0.0003, + "step": 3519 + }, + { + "epoch": 0.22684797319069408, + "grad_norm": 0.02663825689310431, + "learning_rate": 8.592194772645901e-06, + "loss": 0.0002, + "step": 3520 + }, + { + "epoch": 0.22691241863762326, + "grad_norm": 0.23243357524753472, + "learning_rate": 8.591478696741856e-06, + "loss": 0.0002, + "step": 3521 + }, + { + "epoch": 0.22697686408455242, + "grad_norm": 0.00017325221716987894, + "learning_rate": 8.59076262083781e-06, + "loss": 0.0, + "step": 3522 + }, + { + "epoch": 0.2270413095314816, + "grad_norm": 0.0010475463938276882, + "learning_rate": 8.590046544933763e-06, + "loss": 0.0, + "step": 3523 + }, + { + "epoch": 0.22710575497841076, + "grad_norm": 0.0006956761215428136, + "learning_rate": 8.589330469029717e-06, + "loss": 0.0, + "step": 3524 + }, + { + "epoch": 0.22717020042533995, + "grad_norm": 0.026205720830062656, + "learning_rate": 8.588614393125671e-06, + "loss": 0.0003, + "step": 3525 + }, + { + "epoch": 0.22723464587226913, + "grad_norm": 0.007105951163493245, + "learning_rate": 8.587898317221625e-06, + "loss": 0.0, + "step": 3526 + }, + { + "epoch": 0.2272990913191983, + "grad_norm": 0.0004057145569787648, + "learning_rate": 8.58718224131758e-06, + "loss": 0.0, + "step": 3527 + }, + { + "epoch": 0.22736353676612747, + "grad_norm": 0.02197380725177132, + "learning_rate": 8.586466165413536e-06, + "loss": 0.0001, + "step": 3528 + }, + { + "epoch": 0.22742798221305666, + "grad_norm": 0.001348719214099292, + "learning_rate": 8.58575008950949e-06, + "loss": 0.0, + "step": 3529 + }, + { + "epoch": 0.2274924276599858, + "grad_norm": 0.010772461928954678, + "learning_rate": 8.585034013605443e-06, + "loss": 0.0001, + "step": 3530 + }, + { + "epoch": 0.227556873106915, + "grad_norm": 0.08330517966318159, + "learning_rate": 8.584317937701397e-06, + "loss": 0.0019, + "step": 3531 + }, + { + "epoch": 0.22762131855384418, + "grad_norm": 0.19010628847131272, + "learning_rate": 8.583601861797351e-06, + "loss": 0.0027, + "step": 3532 + }, + { + "epoch": 0.22768576400077334, + "grad_norm": 0.0011382736201224818, + "learning_rate": 8.582885785893306e-06, + "loss": 0.0, + "step": 3533 + }, + { + "epoch": 0.22775020944770252, + "grad_norm": 0.01510820616315032, + "learning_rate": 8.58216970998926e-06, + "loss": 0.0001, + "step": 3534 + }, + { + "epoch": 0.2278146548946317, + "grad_norm": 0.0022582360408176425, + "learning_rate": 8.581453634085214e-06, + "loss": 0.0, + "step": 3535 + }, + { + "epoch": 0.22787910034156086, + "grad_norm": 0.0024001167201978904, + "learning_rate": 8.580737558181168e-06, + "loss": 0.0, + "step": 3536 + }, + { + "epoch": 0.22794354578849005, + "grad_norm": 0.00014460807293517424, + "learning_rate": 8.580021482277123e-06, + "loss": 0.0, + "step": 3537 + }, + { + "epoch": 0.2280079912354192, + "grad_norm": 0.004371144356167792, + "learning_rate": 8.579305406373077e-06, + "loss": 0.0, + "step": 3538 + }, + { + "epoch": 0.2280724366823484, + "grad_norm": 0.12752316946926834, + "learning_rate": 8.57858933046903e-06, + "loss": 0.0012, + "step": 3539 + }, + { + "epoch": 0.22813688212927757, + "grad_norm": 6.635877081708909e-05, + "learning_rate": 8.577873254564984e-06, + "loss": 0.0, + "step": 3540 + }, + { + "epoch": 0.22820132757620673, + "grad_norm": 0.035709400460215875, + "learning_rate": 8.577157178660938e-06, + "loss": 0.0, + "step": 3541 + }, + { + "epoch": 0.22826577302313591, + "grad_norm": 0.043633021594940764, + "learning_rate": 8.576441102756893e-06, + "loss": 0.0, + "step": 3542 + }, + { + "epoch": 0.2283302184700651, + "grad_norm": 0.198884992954439, + "learning_rate": 8.575725026852847e-06, + "loss": 0.0002, + "step": 3543 + }, + { + "epoch": 0.22839466391699426, + "grad_norm": 0.03614280375678468, + "learning_rate": 8.575008950948801e-06, + "loss": 0.0001, + "step": 3544 + }, + { + "epoch": 0.22845910936392344, + "grad_norm": 0.0005592311449027554, + "learning_rate": 8.574292875044755e-06, + "loss": 0.0, + "step": 3545 + }, + { + "epoch": 0.22852355481085262, + "grad_norm": 0.0407157248262875, + "learning_rate": 8.57357679914071e-06, + "loss": 0.0001, + "step": 3546 + }, + { + "epoch": 0.22858800025778178, + "grad_norm": 0.04273274208048958, + "learning_rate": 8.572860723236664e-06, + "loss": 0.0001, + "step": 3547 + }, + { + "epoch": 0.22865244570471097, + "grad_norm": 0.00011219605860635525, + "learning_rate": 8.572144647332618e-06, + "loss": 0.0, + "step": 3548 + }, + { + "epoch": 0.22871689115164015, + "grad_norm": 0.0011935116431631536, + "learning_rate": 8.571428571428571e-06, + "loss": 0.0, + "step": 3549 + }, + { + "epoch": 0.2287813365985693, + "grad_norm": 0.041032109467459874, + "learning_rate": 8.570712495524525e-06, + "loss": 0.0005, + "step": 3550 + }, + { + "epoch": 0.2288457820454985, + "grad_norm": 0.0033587200385650448, + "learning_rate": 8.569996419620481e-06, + "loss": 0.0, + "step": 3551 + }, + { + "epoch": 0.22891022749242765, + "grad_norm": 0.25359755604274065, + "learning_rate": 8.569280343716436e-06, + "loss": 0.0005, + "step": 3552 + }, + { + "epoch": 0.22897467293935683, + "grad_norm": 0.00030970704197332206, + "learning_rate": 8.56856426781239e-06, + "loss": 0.0, + "step": 3553 + }, + { + "epoch": 0.22903911838628602, + "grad_norm": 0.003074699240476695, + "learning_rate": 8.567848191908344e-06, + "loss": 0.0, + "step": 3554 + }, + { + "epoch": 0.22910356383321517, + "grad_norm": 0.04174109869575229, + "learning_rate": 8.567132116004297e-06, + "loss": 0.0001, + "step": 3555 + }, + { + "epoch": 0.22916800928014436, + "grad_norm": 0.00035973275162782593, + "learning_rate": 8.566416040100251e-06, + "loss": 0.0, + "step": 3556 + }, + { + "epoch": 0.22923245472707354, + "grad_norm": 0.17487531971851106, + "learning_rate": 8.565699964196205e-06, + "loss": 0.0078, + "step": 3557 + }, + { + "epoch": 0.2292969001740027, + "grad_norm": 0.0021126300294926044, + "learning_rate": 8.56498388829216e-06, + "loss": 0.0, + "step": 3558 + }, + { + "epoch": 0.22936134562093188, + "grad_norm": 0.0005282738112423214, + "learning_rate": 8.564267812388114e-06, + "loss": 0.0, + "step": 3559 + }, + { + "epoch": 0.22942579106786107, + "grad_norm": 0.09196037584755035, + "learning_rate": 8.563551736484068e-06, + "loss": 0.0008, + "step": 3560 + }, + { + "epoch": 0.22949023651479022, + "grad_norm": 0.0020641696227278188, + "learning_rate": 8.562835660580023e-06, + "loss": 0.0, + "step": 3561 + }, + { + "epoch": 0.2295546819617194, + "grad_norm": 0.0005468917117046053, + "learning_rate": 8.562119584675977e-06, + "loss": 0.0, + "step": 3562 + }, + { + "epoch": 0.22961912740864857, + "grad_norm": 0.1214281701890994, + "learning_rate": 8.561403508771931e-06, + "loss": 0.0002, + "step": 3563 + }, + { + "epoch": 0.22968357285557775, + "grad_norm": 0.027091342936798053, + "learning_rate": 8.560687432867885e-06, + "loss": 0.0001, + "step": 3564 + }, + { + "epoch": 0.22974801830250693, + "grad_norm": 0.003703475196088253, + "learning_rate": 8.559971356963838e-06, + "loss": 0.0, + "step": 3565 + }, + { + "epoch": 0.2298124637494361, + "grad_norm": 0.0004199587784974058, + "learning_rate": 8.559255281059792e-06, + "loss": 0.0, + "step": 3566 + }, + { + "epoch": 0.22987690919636528, + "grad_norm": 2.4043078821272714, + "learning_rate": 8.558539205155747e-06, + "loss": 0.0121, + "step": 3567 + }, + { + "epoch": 0.22994135464329446, + "grad_norm": 0.00202259349133905, + "learning_rate": 8.557823129251701e-06, + "loss": 0.0, + "step": 3568 + }, + { + "epoch": 0.23000580009022362, + "grad_norm": 0.3955692008143024, + "learning_rate": 8.557107053347655e-06, + "loss": 0.0015, + "step": 3569 + }, + { + "epoch": 0.2300702455371528, + "grad_norm": 0.10374625174088346, + "learning_rate": 8.55639097744361e-06, + "loss": 0.0004, + "step": 3570 + }, + { + "epoch": 0.23013469098408199, + "grad_norm": 0.0004160911240873582, + "learning_rate": 8.555674901539564e-06, + "loss": 0.0, + "step": 3571 + }, + { + "epoch": 0.23019913643101114, + "grad_norm": 0.38638667095974427, + "learning_rate": 8.554958825635518e-06, + "loss": 0.0014, + "step": 3572 + }, + { + "epoch": 0.23026358187794033, + "grad_norm": 0.0017868331589009291, + "learning_rate": 8.554242749731472e-06, + "loss": 0.0, + "step": 3573 + }, + { + "epoch": 0.2303280273248695, + "grad_norm": 0.0008067980443222023, + "learning_rate": 8.553526673827427e-06, + "loss": 0.0, + "step": 3574 + }, + { + "epoch": 0.23039247277179867, + "grad_norm": 0.14120487685539893, + "learning_rate": 8.552810597923381e-06, + "loss": 0.0002, + "step": 3575 + }, + { + "epoch": 0.23045691821872785, + "grad_norm": 0.01688292958076883, + "learning_rate": 8.552094522019335e-06, + "loss": 0.0001, + "step": 3576 + }, + { + "epoch": 0.230521363665657, + "grad_norm": 0.016395282165946354, + "learning_rate": 8.55137844611529e-06, + "loss": 0.0, + "step": 3577 + }, + { + "epoch": 0.2305858091125862, + "grad_norm": 0.0002731364490621126, + "learning_rate": 8.550662370211244e-06, + "loss": 0.0, + "step": 3578 + }, + { + "epoch": 0.23065025455951538, + "grad_norm": 0.0024728902830418137, + "learning_rate": 8.549946294307198e-06, + "loss": 0.0, + "step": 3579 + }, + { + "epoch": 0.23071470000644453, + "grad_norm": 0.0009555167639653805, + "learning_rate": 8.549230218403152e-06, + "loss": 0.0, + "step": 3580 + }, + { + "epoch": 0.23077914545337372, + "grad_norm": 0.10064114749816049, + "learning_rate": 8.548514142499105e-06, + "loss": 0.0001, + "step": 3581 + }, + { + "epoch": 0.2308435909003029, + "grad_norm": 0.10534812510748648, + "learning_rate": 8.54779806659506e-06, + "loss": 0.0002, + "step": 3582 + }, + { + "epoch": 0.23090803634723206, + "grad_norm": 0.003143158927519917, + "learning_rate": 8.547081990691014e-06, + "loss": 0.0, + "step": 3583 + }, + { + "epoch": 0.23097248179416124, + "grad_norm": 0.0013283154727609836, + "learning_rate": 8.546365914786968e-06, + "loss": 0.0, + "step": 3584 + }, + { + "epoch": 0.23103692724109043, + "grad_norm": 0.0009492733557983951, + "learning_rate": 8.545649838882922e-06, + "loss": 0.0, + "step": 3585 + }, + { + "epoch": 0.23110137268801959, + "grad_norm": 0.000137084553934003, + "learning_rate": 8.544933762978877e-06, + "loss": 0.0, + "step": 3586 + }, + { + "epoch": 0.23116581813494877, + "grad_norm": 0.013543951428487857, + "learning_rate": 8.544217687074831e-06, + "loss": 0.0001, + "step": 3587 + }, + { + "epoch": 0.23123026358187795, + "grad_norm": 0.004368855218247446, + "learning_rate": 8.543501611170785e-06, + "loss": 0.0, + "step": 3588 + }, + { + "epoch": 0.2312947090288071, + "grad_norm": 0.08383096778560901, + "learning_rate": 8.54278553526674e-06, + "loss": 0.0001, + "step": 3589 + }, + { + "epoch": 0.2313591544757363, + "grad_norm": 0.014596576080533902, + "learning_rate": 8.542069459362694e-06, + "loss": 0.0001, + "step": 3590 + }, + { + "epoch": 0.23142359992266545, + "grad_norm": 0.003054285641641687, + "learning_rate": 8.541353383458646e-06, + "loss": 0.0, + "step": 3591 + }, + { + "epoch": 0.23148804536959464, + "grad_norm": 0.010529457621076216, + "learning_rate": 8.5406373075546e-06, + "loss": 0.0001, + "step": 3592 + }, + { + "epoch": 0.23155249081652382, + "grad_norm": 0.03386443048252686, + "learning_rate": 8.539921231650555e-06, + "loss": 0.0001, + "step": 3593 + }, + { + "epoch": 0.23161693626345298, + "grad_norm": 0.000382320509569171, + "learning_rate": 8.53920515574651e-06, + "loss": 0.0, + "step": 3594 + }, + { + "epoch": 0.23168138171038216, + "grad_norm": 0.03725659818404199, + "learning_rate": 8.538489079842464e-06, + "loss": 0.0, + "step": 3595 + }, + { + "epoch": 0.23174582715731135, + "grad_norm": 0.002715356890662232, + "learning_rate": 8.537773003938418e-06, + "loss": 0.0, + "step": 3596 + }, + { + "epoch": 0.2318102726042405, + "grad_norm": 0.0005005922412607236, + "learning_rate": 8.537056928034372e-06, + "loss": 0.0, + "step": 3597 + }, + { + "epoch": 0.2318747180511697, + "grad_norm": 0.23895372802173373, + "learning_rate": 8.536340852130326e-06, + "loss": 0.0005, + "step": 3598 + }, + { + "epoch": 0.23193916349809887, + "grad_norm": 0.0035535297376758264, + "learning_rate": 8.53562477622628e-06, + "loss": 0.0, + "step": 3599 + }, + { + "epoch": 0.23200360894502803, + "grad_norm": 1.192982066141991, + "learning_rate": 8.534908700322235e-06, + "loss": 0.0016, + "step": 3600 + }, + { + "epoch": 0.2320680543919572, + "grad_norm": 0.0006909154871493346, + "learning_rate": 8.53419262441819e-06, + "loss": 0.0, + "step": 3601 + }, + { + "epoch": 0.23213249983888637, + "grad_norm": 0.003394634640801154, + "learning_rate": 8.533476548514144e-06, + "loss": 0.0, + "step": 3602 + }, + { + "epoch": 0.23219694528581555, + "grad_norm": 0.16565397364526332, + "learning_rate": 8.532760472610098e-06, + "loss": 0.0029, + "step": 3603 + }, + { + "epoch": 0.23226139073274474, + "grad_norm": 0.0007924288655583169, + "learning_rate": 8.532044396706052e-06, + "loss": 0.0, + "step": 3604 + }, + { + "epoch": 0.2323258361796739, + "grad_norm": 0.0008002846613981609, + "learning_rate": 8.531328320802007e-06, + "loss": 0.0, + "step": 3605 + }, + { + "epoch": 0.23239028162660308, + "grad_norm": 0.028553978107853775, + "learning_rate": 8.530612244897961e-06, + "loss": 0.0001, + "step": 3606 + }, + { + "epoch": 0.23245472707353226, + "grad_norm": 0.018178990149575903, + "learning_rate": 8.529896168993913e-06, + "loss": 0.0001, + "step": 3607 + }, + { + "epoch": 0.23251917252046142, + "grad_norm": 0.026175255165739573, + "learning_rate": 8.529180093089868e-06, + "loss": 0.0, + "step": 3608 + }, + { + "epoch": 0.2325836179673906, + "grad_norm": 0.00863108323449065, + "learning_rate": 8.528464017185822e-06, + "loss": 0.0, + "step": 3609 + }, + { + "epoch": 0.2326480634143198, + "grad_norm": 0.00367795649645988, + "learning_rate": 8.527747941281776e-06, + "loss": 0.0, + "step": 3610 + }, + { + "epoch": 0.23271250886124895, + "grad_norm": 0.023703811777085085, + "learning_rate": 8.52703186537773e-06, + "loss": 0.0001, + "step": 3611 + }, + { + "epoch": 0.23277695430817813, + "grad_norm": 0.41825952843291414, + "learning_rate": 8.526315789473685e-06, + "loss": 0.0023, + "step": 3612 + }, + { + "epoch": 0.23284139975510731, + "grad_norm": 0.0002994700953356274, + "learning_rate": 8.52559971356964e-06, + "loss": 0.0, + "step": 3613 + }, + { + "epoch": 0.23290584520203647, + "grad_norm": 0.03925458351539499, + "learning_rate": 8.524883637665594e-06, + "loss": 0.0004, + "step": 3614 + }, + { + "epoch": 0.23297029064896566, + "grad_norm": 0.010272126599741443, + "learning_rate": 8.524167561761548e-06, + "loss": 0.0001, + "step": 3615 + }, + { + "epoch": 0.2330347360958948, + "grad_norm": 0.0008049833577190571, + "learning_rate": 8.5234514858575e-06, + "loss": 0.0, + "step": 3616 + }, + { + "epoch": 0.233099181542824, + "grad_norm": 0.00012036471779414499, + "learning_rate": 8.522735409953455e-06, + "loss": 0.0, + "step": 3617 + }, + { + "epoch": 0.23316362698975318, + "grad_norm": 0.0011984086577387977, + "learning_rate": 8.522019334049409e-06, + "loss": 0.0, + "step": 3618 + }, + { + "epoch": 0.23322807243668234, + "grad_norm": 0.0835934790491009, + "learning_rate": 8.521303258145363e-06, + "loss": 0.0006, + "step": 3619 + }, + { + "epoch": 0.23329251788361152, + "grad_norm": 0.08836004941212419, + "learning_rate": 8.520587182241318e-06, + "loss": 0.0002, + "step": 3620 + }, + { + "epoch": 0.2333569633305407, + "grad_norm": 0.18029412205686446, + "learning_rate": 8.519871106337274e-06, + "loss": 0.0038, + "step": 3621 + }, + { + "epoch": 0.23342140877746986, + "grad_norm": 0.00026738501476959226, + "learning_rate": 8.519155030433228e-06, + "loss": 0.0, + "step": 3622 + }, + { + "epoch": 0.23348585422439905, + "grad_norm": 0.005370139991002869, + "learning_rate": 8.51843895452918e-06, + "loss": 0.0001, + "step": 3623 + }, + { + "epoch": 0.23355029967132823, + "grad_norm": 0.2452207171892442, + "learning_rate": 8.517722878625135e-06, + "loss": 0.0005, + "step": 3624 + }, + { + "epoch": 0.2336147451182574, + "grad_norm": 0.0010621327473918718, + "learning_rate": 8.517006802721089e-06, + "loss": 0.0, + "step": 3625 + }, + { + "epoch": 0.23367919056518657, + "grad_norm": 0.00016653019881507862, + "learning_rate": 8.516290726817043e-06, + "loss": 0.0, + "step": 3626 + }, + { + "epoch": 0.23374363601211576, + "grad_norm": 0.06785625871955342, + "learning_rate": 8.515574650912998e-06, + "loss": 0.0005, + "step": 3627 + }, + { + "epoch": 0.23380808145904491, + "grad_norm": 1.7781309962381047, + "learning_rate": 8.514858575008952e-06, + "loss": 0.0043, + "step": 3628 + }, + { + "epoch": 0.2338725269059741, + "grad_norm": 0.00018354352976572647, + "learning_rate": 8.514142499104906e-06, + "loss": 0.0, + "step": 3629 + }, + { + "epoch": 0.23393697235290326, + "grad_norm": 0.0003587856822891483, + "learning_rate": 8.51342642320086e-06, + "loss": 0.0, + "step": 3630 + }, + { + "epoch": 0.23400141779983244, + "grad_norm": 0.003481401644091111, + "learning_rate": 8.512710347296815e-06, + "loss": 0.0, + "step": 3631 + }, + { + "epoch": 0.23406586324676162, + "grad_norm": 0.0014548914336011787, + "learning_rate": 8.511994271392768e-06, + "loss": 0.0, + "step": 3632 + }, + { + "epoch": 0.23413030869369078, + "grad_norm": 0.02106593887574402, + "learning_rate": 8.511278195488722e-06, + "loss": 0.0001, + "step": 3633 + }, + { + "epoch": 0.23419475414061997, + "grad_norm": 0.1377033136626864, + "learning_rate": 8.510562119584676e-06, + "loss": 0.002, + "step": 3634 + }, + { + "epoch": 0.23425919958754915, + "grad_norm": 0.04813451217798247, + "learning_rate": 8.50984604368063e-06, + "loss": 0.0002, + "step": 3635 + }, + { + "epoch": 0.2343236450344783, + "grad_norm": 0.012981266824628336, + "learning_rate": 8.509129967776585e-06, + "loss": 0.0, + "step": 3636 + }, + { + "epoch": 0.2343880904814075, + "grad_norm": 0.12003216797220177, + "learning_rate": 8.508413891872539e-06, + "loss": 0.0001, + "step": 3637 + }, + { + "epoch": 0.23445253592833667, + "grad_norm": 0.003240766907488695, + "learning_rate": 8.507697815968493e-06, + "loss": 0.0, + "step": 3638 + }, + { + "epoch": 0.23451698137526583, + "grad_norm": 0.07065803673474678, + "learning_rate": 8.506981740064448e-06, + "loss": 0.0003, + "step": 3639 + }, + { + "epoch": 0.23458142682219502, + "grad_norm": 0.8096147071804877, + "learning_rate": 8.506265664160402e-06, + "loss": 0.0021, + "step": 3640 + }, + { + "epoch": 0.23464587226912417, + "grad_norm": 0.00021237418484407522, + "learning_rate": 8.505549588256356e-06, + "loss": 0.0, + "step": 3641 + }, + { + "epoch": 0.23471031771605336, + "grad_norm": 0.002760565859295997, + "learning_rate": 8.504833512352309e-06, + "loss": 0.0, + "step": 3642 + }, + { + "epoch": 0.23477476316298254, + "grad_norm": 0.004335590145388964, + "learning_rate": 8.504117436448263e-06, + "loss": 0.0001, + "step": 3643 + }, + { + "epoch": 0.2348392086099117, + "grad_norm": 0.3139593026011861, + "learning_rate": 8.503401360544217e-06, + "loss": 0.0005, + "step": 3644 + }, + { + "epoch": 0.23490365405684088, + "grad_norm": 0.15569056856024935, + "learning_rate": 8.502685284640173e-06, + "loss": 0.0021, + "step": 3645 + }, + { + "epoch": 0.23496809950377007, + "grad_norm": 0.0906748991919936, + "learning_rate": 8.501969208736128e-06, + "loss": 0.0001, + "step": 3646 + }, + { + "epoch": 0.23503254495069922, + "grad_norm": 0.014439834483945311, + "learning_rate": 8.501253132832082e-06, + "loss": 0.0001, + "step": 3647 + }, + { + "epoch": 0.2350969903976284, + "grad_norm": 0.019942436114608407, + "learning_rate": 8.500537056928035e-06, + "loss": 0.0001, + "step": 3648 + }, + { + "epoch": 0.2351614358445576, + "grad_norm": 0.0053793905060603225, + "learning_rate": 8.499820981023989e-06, + "loss": 0.0, + "step": 3649 + }, + { + "epoch": 0.23522588129148675, + "grad_norm": 0.00658721510723763, + "learning_rate": 8.499104905119943e-06, + "loss": 0.0001, + "step": 3650 + }, + { + "epoch": 0.23529032673841593, + "grad_norm": 0.0024141843857270616, + "learning_rate": 8.498388829215898e-06, + "loss": 0.0, + "step": 3651 + }, + { + "epoch": 0.23535477218534512, + "grad_norm": 0.00412149132628881, + "learning_rate": 8.497672753311852e-06, + "loss": 0.0, + "step": 3652 + }, + { + "epoch": 0.23541921763227427, + "grad_norm": 0.0017195526767637631, + "learning_rate": 8.496956677407806e-06, + "loss": 0.0, + "step": 3653 + }, + { + "epoch": 0.23548366307920346, + "grad_norm": 0.0001600318501965816, + "learning_rate": 8.49624060150376e-06, + "loss": 0.0, + "step": 3654 + }, + { + "epoch": 0.23554810852613262, + "grad_norm": 0.0015997134786524389, + "learning_rate": 8.495524525599715e-06, + "loss": 0.0, + "step": 3655 + }, + { + "epoch": 0.2356125539730618, + "grad_norm": 0.00019996158198574133, + "learning_rate": 8.494808449695669e-06, + "loss": 0.0, + "step": 3656 + }, + { + "epoch": 0.23567699941999098, + "grad_norm": 0.022086918620967465, + "learning_rate": 8.494092373791623e-06, + "loss": 0.0001, + "step": 3657 + }, + { + "epoch": 0.23574144486692014, + "grad_norm": 0.010196585178414033, + "learning_rate": 8.493376297887576e-06, + "loss": 0.0, + "step": 3658 + }, + { + "epoch": 0.23580589031384933, + "grad_norm": 0.006909177117970666, + "learning_rate": 8.49266022198353e-06, + "loss": 0.0, + "step": 3659 + }, + { + "epoch": 0.2358703357607785, + "grad_norm": 2.2550082099579014, + "learning_rate": 8.491944146079485e-06, + "loss": 0.015, + "step": 3660 + }, + { + "epoch": 0.23593478120770767, + "grad_norm": 0.023382369159834774, + "learning_rate": 8.491228070175439e-06, + "loss": 0.0002, + "step": 3661 + }, + { + "epoch": 0.23599922665463685, + "grad_norm": 0.6735246278908694, + "learning_rate": 8.490511994271393e-06, + "loss": 0.0021, + "step": 3662 + }, + { + "epoch": 0.23606367210156604, + "grad_norm": 0.0016992829250492205, + "learning_rate": 8.489795918367347e-06, + "loss": 0.0, + "step": 3663 + }, + { + "epoch": 0.2361281175484952, + "grad_norm": 0.14514165931232095, + "learning_rate": 8.489079842463302e-06, + "loss": 0.0003, + "step": 3664 + }, + { + "epoch": 0.23619256299542438, + "grad_norm": 0.056902135808290705, + "learning_rate": 8.488363766559256e-06, + "loss": 0.0006, + "step": 3665 + }, + { + "epoch": 0.23625700844235356, + "grad_norm": 0.00035760265499879115, + "learning_rate": 8.48764769065521e-06, + "loss": 0.0, + "step": 3666 + }, + { + "epoch": 0.23632145388928272, + "grad_norm": 0.015011496814008625, + "learning_rate": 8.486931614751165e-06, + "loss": 0.0001, + "step": 3667 + }, + { + "epoch": 0.2363858993362119, + "grad_norm": 0.00026337167364298936, + "learning_rate": 8.486215538847119e-06, + "loss": 0.0, + "step": 3668 + }, + { + "epoch": 0.23645034478314106, + "grad_norm": 0.0029167551721233755, + "learning_rate": 8.485499462943073e-06, + "loss": 0.0, + "step": 3669 + }, + { + "epoch": 0.23651479023007024, + "grad_norm": 0.01064145374267631, + "learning_rate": 8.484783387039028e-06, + "loss": 0.0, + "step": 3670 + }, + { + "epoch": 0.23657923567699943, + "grad_norm": 0.002452641484903878, + "learning_rate": 8.484067311134982e-06, + "loss": 0.0, + "step": 3671 + }, + { + "epoch": 0.23664368112392858, + "grad_norm": 0.009032862405537249, + "learning_rate": 8.483351235230936e-06, + "loss": 0.0001, + "step": 3672 + }, + { + "epoch": 0.23670812657085777, + "grad_norm": 0.000362237704258534, + "learning_rate": 8.48263515932689e-06, + "loss": 0.0, + "step": 3673 + }, + { + "epoch": 0.23677257201778695, + "grad_norm": 0.0007830415697379507, + "learning_rate": 8.481919083422843e-06, + "loss": 0.0, + "step": 3674 + }, + { + "epoch": 0.2368370174647161, + "grad_norm": 0.0008006848452688169, + "learning_rate": 8.481203007518797e-06, + "loss": 0.0, + "step": 3675 + }, + { + "epoch": 0.2369014629116453, + "grad_norm": 1.025715803628338, + "learning_rate": 8.480486931614752e-06, + "loss": 0.005, + "step": 3676 + }, + { + "epoch": 0.23696590835857448, + "grad_norm": 0.06137980239797591, + "learning_rate": 8.479770855710706e-06, + "loss": 0.0002, + "step": 3677 + }, + { + "epoch": 0.23703035380550364, + "grad_norm": 0.37055178769912805, + "learning_rate": 8.47905477980666e-06, + "loss": 0.0035, + "step": 3678 + }, + { + "epoch": 0.23709479925243282, + "grad_norm": 0.00016133238319453695, + "learning_rate": 8.478338703902615e-06, + "loss": 0.0, + "step": 3679 + }, + { + "epoch": 0.23715924469936198, + "grad_norm": 0.45889901524037224, + "learning_rate": 8.477622627998569e-06, + "loss": 0.0006, + "step": 3680 + }, + { + "epoch": 0.23722369014629116, + "grad_norm": 0.04829375159957358, + "learning_rate": 8.476906552094523e-06, + "loss": 0.0001, + "step": 3681 + }, + { + "epoch": 0.23728813559322035, + "grad_norm": 0.04483302025827112, + "learning_rate": 8.476190476190477e-06, + "loss": 0.0, + "step": 3682 + }, + { + "epoch": 0.2373525810401495, + "grad_norm": 0.006915118970124431, + "learning_rate": 8.475474400286432e-06, + "loss": 0.0015, + "step": 3683 + }, + { + "epoch": 0.2374170264870787, + "grad_norm": 0.029743070065202375, + "learning_rate": 8.474758324382384e-06, + "loss": 0.0001, + "step": 3684 + }, + { + "epoch": 0.23748147193400787, + "grad_norm": 0.010346678082478153, + "learning_rate": 8.474042248478339e-06, + "loss": 0.0001, + "step": 3685 + }, + { + "epoch": 0.23754591738093703, + "grad_norm": 0.003808709571335096, + "learning_rate": 8.473326172574293e-06, + "loss": 0.0, + "step": 3686 + }, + { + "epoch": 0.2376103628278662, + "grad_norm": 0.03439312438102102, + "learning_rate": 8.472610096670247e-06, + "loss": 0.0001, + "step": 3687 + }, + { + "epoch": 0.2376748082747954, + "grad_norm": 0.0006371055023281539, + "learning_rate": 8.471894020766201e-06, + "loss": 0.0, + "step": 3688 + }, + { + "epoch": 0.23773925372172455, + "grad_norm": 0.008961421343413054, + "learning_rate": 8.471177944862156e-06, + "loss": 0.0, + "step": 3689 + }, + { + "epoch": 0.23780369916865374, + "grad_norm": 0.01069256581449394, + "learning_rate": 8.47046186895811e-06, + "loss": 0.0, + "step": 3690 + }, + { + "epoch": 0.23786814461558292, + "grad_norm": 0.07071569525956135, + "learning_rate": 8.469745793054064e-06, + "loss": 0.0005, + "step": 3691 + }, + { + "epoch": 0.23793259006251208, + "grad_norm": 0.001481707000711012, + "learning_rate": 8.469029717150019e-06, + "loss": 0.0, + "step": 3692 + }, + { + "epoch": 0.23799703550944126, + "grad_norm": 0.02562734064927472, + "learning_rate": 8.468313641245973e-06, + "loss": 0.0004, + "step": 3693 + }, + { + "epoch": 0.23806148095637042, + "grad_norm": 0.009852533383060125, + "learning_rate": 8.467597565341927e-06, + "loss": 0.0, + "step": 3694 + }, + { + "epoch": 0.2381259264032996, + "grad_norm": 0.024841071931428666, + "learning_rate": 8.466881489437882e-06, + "loss": 0.0, + "step": 3695 + }, + { + "epoch": 0.2381903718502288, + "grad_norm": 0.0019675953712899397, + "learning_rate": 8.466165413533836e-06, + "loss": 0.0, + "step": 3696 + }, + { + "epoch": 0.23825481729715794, + "grad_norm": 0.005357394982959551, + "learning_rate": 8.46544933762979e-06, + "loss": 0.0001, + "step": 3697 + }, + { + "epoch": 0.23831926274408713, + "grad_norm": 0.018464397355722775, + "learning_rate": 8.464733261725744e-06, + "loss": 0.0002, + "step": 3698 + }, + { + "epoch": 0.2383837081910163, + "grad_norm": 0.030809908730295932, + "learning_rate": 8.464017185821699e-06, + "loss": 0.0004, + "step": 3699 + }, + { + "epoch": 0.23844815363794547, + "grad_norm": 0.0016479319719454062, + "learning_rate": 8.463301109917651e-06, + "loss": 0.0, + "step": 3700 + }, + { + "epoch": 0.23851259908487465, + "grad_norm": 0.0053921943411062425, + "learning_rate": 8.462585034013606e-06, + "loss": 0.0, + "step": 3701 + }, + { + "epoch": 0.23857704453180384, + "grad_norm": 0.010372835272828807, + "learning_rate": 8.46186895810956e-06, + "loss": 0.0, + "step": 3702 + }, + { + "epoch": 0.238641489978733, + "grad_norm": 0.009654674728061838, + "learning_rate": 8.461152882205514e-06, + "loss": 0.0, + "step": 3703 + }, + { + "epoch": 0.23870593542566218, + "grad_norm": 0.0007935941029292266, + "learning_rate": 8.460436806301469e-06, + "loss": 0.0, + "step": 3704 + }, + { + "epoch": 0.23877038087259136, + "grad_norm": 0.0005360464652635607, + "learning_rate": 8.459720730397423e-06, + "loss": 0.0, + "step": 3705 + }, + { + "epoch": 0.23883482631952052, + "grad_norm": 0.0045528072264818704, + "learning_rate": 8.459004654493377e-06, + "loss": 0.0, + "step": 3706 + }, + { + "epoch": 0.2388992717664497, + "grad_norm": 0.0004918566041346934, + "learning_rate": 8.458288578589331e-06, + "loss": 0.0, + "step": 3707 + }, + { + "epoch": 0.23896371721337886, + "grad_norm": 0.01280604837715533, + "learning_rate": 8.457572502685286e-06, + "loss": 0.0, + "step": 3708 + }, + { + "epoch": 0.23902816266030805, + "grad_norm": 0.014364710962126858, + "learning_rate": 8.456856426781238e-06, + "loss": 0.0001, + "step": 3709 + }, + { + "epoch": 0.23909260810723723, + "grad_norm": 0.002629268415838148, + "learning_rate": 8.456140350877193e-06, + "loss": 0.0, + "step": 3710 + }, + { + "epoch": 0.2391570535541664, + "grad_norm": 0.07447493721494977, + "learning_rate": 8.455424274973147e-06, + "loss": 0.0008, + "step": 3711 + }, + { + "epoch": 0.23922149900109557, + "grad_norm": 0.5730290447183728, + "learning_rate": 8.454708199069101e-06, + "loss": 0.0059, + "step": 3712 + }, + { + "epoch": 0.23928594444802476, + "grad_norm": 0.0012635572436632849, + "learning_rate": 8.453992123165056e-06, + "loss": 0.0, + "step": 3713 + }, + { + "epoch": 0.2393503898949539, + "grad_norm": 0.024047417080047616, + "learning_rate": 8.45327604726101e-06, + "loss": 0.0, + "step": 3714 + }, + { + "epoch": 0.2394148353418831, + "grad_norm": 0.2183628402735261, + "learning_rate": 8.452559971356966e-06, + "loss": 0.0009, + "step": 3715 + }, + { + "epoch": 0.23947928078881228, + "grad_norm": 0.0006938746252782697, + "learning_rate": 8.451843895452918e-06, + "loss": 0.0, + "step": 3716 + }, + { + "epoch": 0.23954372623574144, + "grad_norm": 0.0038200120856466476, + "learning_rate": 8.451127819548873e-06, + "loss": 0.0, + "step": 3717 + }, + { + "epoch": 0.23960817168267062, + "grad_norm": 0.21408100848688394, + "learning_rate": 8.450411743644827e-06, + "loss": 0.0116, + "step": 3718 + }, + { + "epoch": 0.2396726171295998, + "grad_norm": 0.0025670123515598884, + "learning_rate": 8.449695667740781e-06, + "loss": 0.0, + "step": 3719 + }, + { + "epoch": 0.23973706257652896, + "grad_norm": 0.017562171034058886, + "learning_rate": 8.448979591836736e-06, + "loss": 0.0, + "step": 3720 + }, + { + "epoch": 0.23980150802345815, + "grad_norm": 0.0006715029546785951, + "learning_rate": 8.44826351593269e-06, + "loss": 0.0, + "step": 3721 + }, + { + "epoch": 0.2398659534703873, + "grad_norm": 0.03602618355161491, + "learning_rate": 8.447547440028644e-06, + "loss": 0.0001, + "step": 3722 + }, + { + "epoch": 0.2399303989173165, + "grad_norm": 0.018207445749766946, + "learning_rate": 8.446831364124599e-06, + "loss": 0.0001, + "step": 3723 + }, + { + "epoch": 0.23999484436424567, + "grad_norm": 0.12231537824269463, + "learning_rate": 8.446115288220553e-06, + "loss": 0.0002, + "step": 3724 + }, + { + "epoch": 0.24005928981117483, + "grad_norm": 0.11532271765071904, + "learning_rate": 8.445399212316505e-06, + "loss": 0.0019, + "step": 3725 + }, + { + "epoch": 0.24012373525810402, + "grad_norm": 2.264262664560703, + "learning_rate": 8.44468313641246e-06, + "loss": 0.0274, + "step": 3726 + }, + { + "epoch": 0.2401881807050332, + "grad_norm": 0.0012899088975698734, + "learning_rate": 8.443967060508414e-06, + "loss": 0.0, + "step": 3727 + }, + { + "epoch": 0.24025262615196236, + "grad_norm": 0.01366843452761101, + "learning_rate": 8.443250984604368e-06, + "loss": 0.0, + "step": 3728 + }, + { + "epoch": 0.24031707159889154, + "grad_norm": 0.15249607222806652, + "learning_rate": 8.442534908700323e-06, + "loss": 0.0006, + "step": 3729 + }, + { + "epoch": 0.24038151704582073, + "grad_norm": 0.021124642403800617, + "learning_rate": 8.441818832796277e-06, + "loss": 0.0, + "step": 3730 + }, + { + "epoch": 0.24044596249274988, + "grad_norm": 0.12230660647504508, + "learning_rate": 8.441102756892231e-06, + "loss": 0.0011, + "step": 3731 + }, + { + "epoch": 0.24051040793967907, + "grad_norm": 0.9040274496763755, + "learning_rate": 8.440386680988186e-06, + "loss": 0.0071, + "step": 3732 + }, + { + "epoch": 0.24057485338660822, + "grad_norm": 0.0016440136482255797, + "learning_rate": 8.43967060508414e-06, + "loss": 0.0, + "step": 3733 + }, + { + "epoch": 0.2406392988335374, + "grad_norm": 0.003047586958538098, + "learning_rate": 8.438954529180094e-06, + "loss": 0.0, + "step": 3734 + }, + { + "epoch": 0.2407037442804666, + "grad_norm": 0.0009673710376870563, + "learning_rate": 8.438238453276047e-06, + "loss": 0.0, + "step": 3735 + }, + { + "epoch": 0.24076818972739575, + "grad_norm": 0.016850687581852576, + "learning_rate": 8.437522377372001e-06, + "loss": 0.0001, + "step": 3736 + }, + { + "epoch": 0.24083263517432493, + "grad_norm": 0.007526774476757326, + "learning_rate": 8.436806301467955e-06, + "loss": 0.0001, + "step": 3737 + }, + { + "epoch": 0.24089708062125412, + "grad_norm": 0.13067564235859247, + "learning_rate": 8.436090225563911e-06, + "loss": 0.0003, + "step": 3738 + }, + { + "epoch": 0.24096152606818327, + "grad_norm": 0.10502113619162073, + "learning_rate": 8.435374149659866e-06, + "loss": 0.0004, + "step": 3739 + }, + { + "epoch": 0.24102597151511246, + "grad_norm": 0.05960794534318693, + "learning_rate": 8.43465807375582e-06, + "loss": 0.0001, + "step": 3740 + }, + { + "epoch": 0.24109041696204164, + "grad_norm": 0.04238541013913969, + "learning_rate": 8.433941997851773e-06, + "loss": 0.0003, + "step": 3741 + }, + { + "epoch": 0.2411548624089708, + "grad_norm": 0.018773961960864012, + "learning_rate": 8.433225921947727e-06, + "loss": 0.0016, + "step": 3742 + }, + { + "epoch": 0.24121930785589998, + "grad_norm": 0.002479640335606827, + "learning_rate": 8.432509846043681e-06, + "loss": 0.0, + "step": 3743 + }, + { + "epoch": 0.24128375330282917, + "grad_norm": 0.06248413383738307, + "learning_rate": 8.431793770139635e-06, + "loss": 0.0001, + "step": 3744 + }, + { + "epoch": 0.24134819874975832, + "grad_norm": 0.0005678648591910123, + "learning_rate": 8.43107769423559e-06, + "loss": 0.0, + "step": 3745 + }, + { + "epoch": 0.2414126441966875, + "grad_norm": 0.0013594156974299353, + "learning_rate": 8.430361618331544e-06, + "loss": 0.0, + "step": 3746 + }, + { + "epoch": 0.24147708964361667, + "grad_norm": 0.0005139733092100992, + "learning_rate": 8.429645542427498e-06, + "loss": 0.0, + "step": 3747 + }, + { + "epoch": 0.24154153509054585, + "grad_norm": 0.08670038037296418, + "learning_rate": 8.428929466523453e-06, + "loss": 0.0001, + "step": 3748 + }, + { + "epoch": 0.24160598053747503, + "grad_norm": 0.8153278218675962, + "learning_rate": 8.428213390619407e-06, + "loss": 0.0049, + "step": 3749 + }, + { + "epoch": 0.2416704259844042, + "grad_norm": 0.0035986476689472626, + "learning_rate": 8.427497314715361e-06, + "loss": 0.0, + "step": 3750 + }, + { + "epoch": 0.24173487143133338, + "grad_norm": 0.00044391004078189354, + "learning_rate": 8.426781238811314e-06, + "loss": 0.0, + "step": 3751 + }, + { + "epoch": 0.24179931687826256, + "grad_norm": 0.01262247356549068, + "learning_rate": 8.426065162907268e-06, + "loss": 0.0001, + "step": 3752 + }, + { + "epoch": 0.24186376232519172, + "grad_norm": 0.02262652431529532, + "learning_rate": 8.425349087003222e-06, + "loss": 0.0, + "step": 3753 + }, + { + "epoch": 0.2419282077721209, + "grad_norm": 0.0008362116585841875, + "learning_rate": 8.424633011099177e-06, + "loss": 0.0, + "step": 3754 + }, + { + "epoch": 0.24199265321905009, + "grad_norm": 0.005311226408071963, + "learning_rate": 8.423916935195131e-06, + "loss": 0.0, + "step": 3755 + }, + { + "epoch": 0.24205709866597924, + "grad_norm": 7.897391349834373e-05, + "learning_rate": 8.423200859291085e-06, + "loss": 0.0, + "step": 3756 + }, + { + "epoch": 0.24212154411290843, + "grad_norm": 0.007048575324203542, + "learning_rate": 8.42248478338704e-06, + "loss": 0.0001, + "step": 3757 + }, + { + "epoch": 0.2421859895598376, + "grad_norm": 0.002582242938985846, + "learning_rate": 8.421768707482994e-06, + "loss": 0.0, + "step": 3758 + }, + { + "epoch": 0.24225043500676677, + "grad_norm": 0.0048435534680228, + "learning_rate": 8.421052631578948e-06, + "loss": 0.0, + "step": 3759 + }, + { + "epoch": 0.24231488045369595, + "grad_norm": 0.4563282507589461, + "learning_rate": 8.420336555674903e-06, + "loss": 0.0009, + "step": 3760 + }, + { + "epoch": 0.2423793259006251, + "grad_norm": 0.002779418637291888, + "learning_rate": 8.419620479770857e-06, + "loss": 0.0, + "step": 3761 + }, + { + "epoch": 0.2424437713475543, + "grad_norm": 0.0027071330977519806, + "learning_rate": 8.418904403866811e-06, + "loss": 0.0, + "step": 3762 + }, + { + "epoch": 0.24250821679448348, + "grad_norm": 0.010813946842943904, + "learning_rate": 8.418188327962765e-06, + "loss": 0.0001, + "step": 3763 + }, + { + "epoch": 0.24257266224141263, + "grad_norm": 0.4170509314868395, + "learning_rate": 8.41747225205872e-06, + "loss": 0.0019, + "step": 3764 + }, + { + "epoch": 0.24263710768834182, + "grad_norm": 0.005691707930402684, + "learning_rate": 8.416756176154674e-06, + "loss": 0.0, + "step": 3765 + }, + { + "epoch": 0.242701553135271, + "grad_norm": 0.03813354713701013, + "learning_rate": 8.416040100250628e-06, + "loss": 0.0004, + "step": 3766 + }, + { + "epoch": 0.24276599858220016, + "grad_norm": 0.032524933022695664, + "learning_rate": 8.415324024346581e-06, + "loss": 0.0003, + "step": 3767 + }, + { + "epoch": 0.24283044402912934, + "grad_norm": 0.004065155547747669, + "learning_rate": 8.414607948442535e-06, + "loss": 0.0, + "step": 3768 + }, + { + "epoch": 0.24289488947605853, + "grad_norm": 0.25707170231896603, + "learning_rate": 8.41389187253849e-06, + "loss": 0.0003, + "step": 3769 + }, + { + "epoch": 0.24295933492298769, + "grad_norm": 0.9013399427931535, + "learning_rate": 8.413175796634444e-06, + "loss": 0.0075, + "step": 3770 + }, + { + "epoch": 0.24302378036991687, + "grad_norm": 0.0021442310344541708, + "learning_rate": 8.412459720730398e-06, + "loss": 0.0, + "step": 3771 + }, + { + "epoch": 0.24308822581684603, + "grad_norm": 0.10145635743766157, + "learning_rate": 8.411743644826352e-06, + "loss": 0.0005, + "step": 3772 + }, + { + "epoch": 0.2431526712637752, + "grad_norm": 0.0016105101427803788, + "learning_rate": 8.411027568922307e-06, + "loss": 0.0, + "step": 3773 + }, + { + "epoch": 0.2432171167107044, + "grad_norm": 0.00012493315542511565, + "learning_rate": 8.410311493018261e-06, + "loss": 0.0, + "step": 3774 + }, + { + "epoch": 0.24328156215763355, + "grad_norm": 0.019223677016477023, + "learning_rate": 8.409595417114215e-06, + "loss": 0.0001, + "step": 3775 + }, + { + "epoch": 0.24334600760456274, + "grad_norm": 0.005873850051817299, + "learning_rate": 8.40887934121017e-06, + "loss": 0.0001, + "step": 3776 + }, + { + "epoch": 0.24341045305149192, + "grad_norm": 0.011179190813958233, + "learning_rate": 8.408163265306122e-06, + "loss": 0.0001, + "step": 3777 + }, + { + "epoch": 0.24347489849842108, + "grad_norm": 0.0015477369522274402, + "learning_rate": 8.407447189402077e-06, + "loss": 0.0, + "step": 3778 + }, + { + "epoch": 0.24353934394535026, + "grad_norm": 0.0017133007265412168, + "learning_rate": 8.40673111349803e-06, + "loss": 0.0, + "step": 3779 + }, + { + "epoch": 0.24360378939227945, + "grad_norm": 0.0008757344192784564, + "learning_rate": 8.406015037593985e-06, + "loss": 0.0, + "step": 3780 + }, + { + "epoch": 0.2436682348392086, + "grad_norm": 0.007403871914327963, + "learning_rate": 8.40529896168994e-06, + "loss": 0.0, + "step": 3781 + }, + { + "epoch": 0.2437326802861378, + "grad_norm": 0.010201067541391566, + "learning_rate": 8.404582885785894e-06, + "loss": 0.0, + "step": 3782 + }, + { + "epoch": 0.24379712573306697, + "grad_norm": 0.015937982305306662, + "learning_rate": 8.403866809881848e-06, + "loss": 0.0, + "step": 3783 + }, + { + "epoch": 0.24386157117999613, + "grad_norm": 0.028991708253557905, + "learning_rate": 8.403150733977802e-06, + "loss": 0.0002, + "step": 3784 + }, + { + "epoch": 0.2439260166269253, + "grad_norm": 0.04578634280546854, + "learning_rate": 8.402434658073757e-06, + "loss": 0.0003, + "step": 3785 + }, + { + "epoch": 0.24399046207385447, + "grad_norm": 0.003052409221744512, + "learning_rate": 8.401718582169711e-06, + "loss": 0.0, + "step": 3786 + }, + { + "epoch": 0.24405490752078365, + "grad_norm": 0.010366611251185617, + "learning_rate": 8.401002506265665e-06, + "loss": 0.0, + "step": 3787 + }, + { + "epoch": 0.24411935296771284, + "grad_norm": 0.012989847995797284, + "learning_rate": 8.40028643036162e-06, + "loss": 0.0001, + "step": 3788 + }, + { + "epoch": 0.244183798414642, + "grad_norm": 1.509528493086036, + "learning_rate": 8.399570354457574e-06, + "loss": 0.0121, + "step": 3789 + }, + { + "epoch": 0.24424824386157118, + "grad_norm": 0.13255768762843395, + "learning_rate": 8.398854278553528e-06, + "loss": 0.0015, + "step": 3790 + }, + { + "epoch": 0.24431268930850036, + "grad_norm": 0.07067210243967802, + "learning_rate": 8.398138202649482e-06, + "loss": 0.0003, + "step": 3791 + }, + { + "epoch": 0.24437713475542952, + "grad_norm": 0.014960046766105765, + "learning_rate": 8.397422126745437e-06, + "loss": 0.0, + "step": 3792 + }, + { + "epoch": 0.2444415802023587, + "grad_norm": 0.008710688093756856, + "learning_rate": 8.39670605084139e-06, + "loss": 0.0, + "step": 3793 + }, + { + "epoch": 0.2445060256492879, + "grad_norm": 0.255980528473747, + "learning_rate": 8.395989974937344e-06, + "loss": 0.0017, + "step": 3794 + }, + { + "epoch": 0.24457047109621705, + "grad_norm": 0.00026442442359102594, + "learning_rate": 8.395273899033298e-06, + "loss": 0.0, + "step": 3795 + }, + { + "epoch": 0.24463491654314623, + "grad_norm": 0.3572974900567217, + "learning_rate": 8.394557823129252e-06, + "loss": 0.0004, + "step": 3796 + }, + { + "epoch": 0.24469936199007541, + "grad_norm": 0.04774931035227209, + "learning_rate": 8.393841747225207e-06, + "loss": 0.0007, + "step": 3797 + }, + { + "epoch": 0.24476380743700457, + "grad_norm": 0.10715923279977019, + "learning_rate": 8.39312567132116e-06, + "loss": 0.0005, + "step": 3798 + }, + { + "epoch": 0.24482825288393376, + "grad_norm": 0.0012820152532673366, + "learning_rate": 8.392409595417115e-06, + "loss": 0.0, + "step": 3799 + }, + { + "epoch": 0.2448926983308629, + "grad_norm": 0.3422857221960594, + "learning_rate": 8.39169351951307e-06, + "loss": 0.0016, + "step": 3800 + }, + { + "epoch": 0.2449571437777921, + "grad_norm": 0.004484513715100537, + "learning_rate": 8.390977443609024e-06, + "loss": 0.0, + "step": 3801 + }, + { + "epoch": 0.24502158922472128, + "grad_norm": 1.0571975791421366, + "learning_rate": 8.390261367704976e-06, + "loss": 0.0039, + "step": 3802 + }, + { + "epoch": 0.24508603467165044, + "grad_norm": 0.0014573274750767575, + "learning_rate": 8.38954529180093e-06, + "loss": 0.0, + "step": 3803 + }, + { + "epoch": 0.24515048011857962, + "grad_norm": 0.018570197831693735, + "learning_rate": 8.388829215896885e-06, + "loss": 0.0, + "step": 3804 + }, + { + "epoch": 0.2452149255655088, + "grad_norm": 0.0013457453855705087, + "learning_rate": 8.38811313999284e-06, + "loss": 0.0, + "step": 3805 + }, + { + "epoch": 0.24527937101243796, + "grad_norm": 0.0009432171359150775, + "learning_rate": 8.387397064088793e-06, + "loss": 0.0, + "step": 3806 + }, + { + "epoch": 0.24534381645936715, + "grad_norm": 0.04670797332971043, + "learning_rate": 8.386680988184748e-06, + "loss": 0.0003, + "step": 3807 + }, + { + "epoch": 0.24540826190629633, + "grad_norm": 0.0033869906812700606, + "learning_rate": 8.385964912280704e-06, + "loss": 0.0, + "step": 3808 + }, + { + "epoch": 0.2454727073532255, + "grad_norm": 0.001629549809419056, + "learning_rate": 8.385248836376656e-06, + "loss": 0.0, + "step": 3809 + }, + { + "epoch": 0.24553715280015467, + "grad_norm": 0.0008565285923683144, + "learning_rate": 8.38453276047261e-06, + "loss": 0.0, + "step": 3810 + }, + { + "epoch": 0.24560159824708383, + "grad_norm": 0.07435944046824011, + "learning_rate": 8.383816684568565e-06, + "loss": 0.0002, + "step": 3811 + }, + { + "epoch": 0.24566604369401301, + "grad_norm": 1.455835009863066, + "learning_rate": 8.38310060866452e-06, + "loss": 0.0065, + "step": 3812 + }, + { + "epoch": 0.2457304891409422, + "grad_norm": 0.02158769033318039, + "learning_rate": 8.382384532760474e-06, + "loss": 0.0, + "step": 3813 + }, + { + "epoch": 0.24579493458787136, + "grad_norm": 0.009037092296314405, + "learning_rate": 8.381668456856428e-06, + "loss": 0.0, + "step": 3814 + }, + { + "epoch": 0.24585938003480054, + "grad_norm": 0.01045907662154638, + "learning_rate": 8.380952380952382e-06, + "loss": 0.0, + "step": 3815 + }, + { + "epoch": 0.24592382548172972, + "grad_norm": 0.2750826120917977, + "learning_rate": 8.380236305048336e-06, + "loss": 0.001, + "step": 3816 + }, + { + "epoch": 0.24598827092865888, + "grad_norm": 0.00485543121189977, + "learning_rate": 8.37952022914429e-06, + "loss": 0.0, + "step": 3817 + }, + { + "epoch": 0.24605271637558807, + "grad_norm": 0.0069404249703027745, + "learning_rate": 8.378804153240243e-06, + "loss": 0.0, + "step": 3818 + }, + { + "epoch": 0.24611716182251725, + "grad_norm": 0.0008645888421194356, + "learning_rate": 8.378088077336198e-06, + "loss": 0.0, + "step": 3819 + }, + { + "epoch": 0.2461816072694464, + "grad_norm": 0.0006523125352911542, + "learning_rate": 8.377372001432152e-06, + "loss": 0.0, + "step": 3820 + }, + { + "epoch": 0.2462460527163756, + "grad_norm": 0.022340756216589642, + "learning_rate": 8.376655925528106e-06, + "loss": 0.0003, + "step": 3821 + }, + { + "epoch": 0.24631049816330478, + "grad_norm": 0.02572042481418643, + "learning_rate": 8.37593984962406e-06, + "loss": 0.0001, + "step": 3822 + }, + { + "epoch": 0.24637494361023393, + "grad_norm": 0.05793174450618309, + "learning_rate": 8.375223773720015e-06, + "loss": 0.0001, + "step": 3823 + }, + { + "epoch": 0.24643938905716312, + "grad_norm": 0.0013580941737972847, + "learning_rate": 8.37450769781597e-06, + "loss": 0.0, + "step": 3824 + }, + { + "epoch": 0.24650383450409227, + "grad_norm": 0.023467460744772406, + "learning_rate": 8.373791621911923e-06, + "loss": 0.0001, + "step": 3825 + }, + { + "epoch": 0.24656827995102146, + "grad_norm": 0.002280545682215033, + "learning_rate": 8.373075546007878e-06, + "loss": 0.0, + "step": 3826 + }, + { + "epoch": 0.24663272539795064, + "grad_norm": 0.20302869274283905, + "learning_rate": 8.372359470103832e-06, + "loss": 0.0019, + "step": 3827 + }, + { + "epoch": 0.2466971708448798, + "grad_norm": 2.142446703431541, + "learning_rate": 8.371643394199785e-06, + "loss": 0.0132, + "step": 3828 + }, + { + "epoch": 0.24676161629180898, + "grad_norm": 0.11213382494207665, + "learning_rate": 8.370927318295739e-06, + "loss": 0.0018, + "step": 3829 + }, + { + "epoch": 0.24682606173873817, + "grad_norm": 0.5988444067924492, + "learning_rate": 8.370211242391693e-06, + "loss": 0.0016, + "step": 3830 + }, + { + "epoch": 0.24689050718566732, + "grad_norm": 0.002282277636076149, + "learning_rate": 8.36949516648765e-06, + "loss": 0.0, + "step": 3831 + }, + { + "epoch": 0.2469549526325965, + "grad_norm": 0.017599929260292843, + "learning_rate": 8.368779090583604e-06, + "loss": 0.0002, + "step": 3832 + }, + { + "epoch": 0.2470193980795257, + "grad_norm": 8.669601616320829e-05, + "learning_rate": 8.368063014679558e-06, + "loss": 0.0, + "step": 3833 + }, + { + "epoch": 0.24708384352645485, + "grad_norm": 1.5981672071508441, + "learning_rate": 8.36734693877551e-06, + "loss": 0.0037, + "step": 3834 + }, + { + "epoch": 0.24714828897338403, + "grad_norm": 0.008489611550036908, + "learning_rate": 8.366630862871465e-06, + "loss": 0.0001, + "step": 3835 + }, + { + "epoch": 0.24721273442031322, + "grad_norm": 0.003644974177941349, + "learning_rate": 8.365914786967419e-06, + "loss": 0.0, + "step": 3836 + }, + { + "epoch": 0.24727717986724237, + "grad_norm": 0.003976308666016474, + "learning_rate": 8.365198711063373e-06, + "loss": 0.0, + "step": 3837 + }, + { + "epoch": 0.24734162531417156, + "grad_norm": 0.27547633413259537, + "learning_rate": 8.364482635159328e-06, + "loss": 0.0092, + "step": 3838 + }, + { + "epoch": 0.24740607076110072, + "grad_norm": 0.0015722282850001267, + "learning_rate": 8.363766559255282e-06, + "loss": 0.0, + "step": 3839 + }, + { + "epoch": 0.2474705162080299, + "grad_norm": 0.005728081698721112, + "learning_rate": 8.363050483351236e-06, + "loss": 0.0, + "step": 3840 + }, + { + "epoch": 0.24753496165495908, + "grad_norm": 0.019088249596522798, + "learning_rate": 8.36233440744719e-06, + "loss": 0.0002, + "step": 3841 + }, + { + "epoch": 0.24759940710188824, + "grad_norm": 0.13053990981847335, + "learning_rate": 8.361618331543145e-06, + "loss": 0.0004, + "step": 3842 + }, + { + "epoch": 0.24766385254881743, + "grad_norm": 0.022335887407941846, + "learning_rate": 8.3609022556391e-06, + "loss": 0.0002, + "step": 3843 + }, + { + "epoch": 0.2477282979957466, + "grad_norm": 0.029563943186087686, + "learning_rate": 8.360186179735052e-06, + "loss": 0.0001, + "step": 3844 + }, + { + "epoch": 0.24779274344267577, + "grad_norm": 0.009087314706417401, + "learning_rate": 8.359470103831006e-06, + "loss": 0.0, + "step": 3845 + }, + { + "epoch": 0.24785718888960495, + "grad_norm": 0.00653539308431923, + "learning_rate": 8.35875402792696e-06, + "loss": 0.0, + "step": 3846 + }, + { + "epoch": 0.24792163433653414, + "grad_norm": 0.4713887646308475, + "learning_rate": 8.358037952022915e-06, + "loss": 0.0011, + "step": 3847 + }, + { + "epoch": 0.2479860797834633, + "grad_norm": 0.024816317722805743, + "learning_rate": 8.357321876118869e-06, + "loss": 0.0001, + "step": 3848 + }, + { + "epoch": 0.24805052523039248, + "grad_norm": 0.038245842028925114, + "learning_rate": 8.356605800214823e-06, + "loss": 0.0005, + "step": 3849 + }, + { + "epoch": 0.24811497067732163, + "grad_norm": 0.1631073794859977, + "learning_rate": 8.355889724310778e-06, + "loss": 0.0007, + "step": 3850 + }, + { + "epoch": 0.24817941612425082, + "grad_norm": 0.12254443599798615, + "learning_rate": 8.355173648406732e-06, + "loss": 0.0006, + "step": 3851 + }, + { + "epoch": 0.24824386157118, + "grad_norm": 0.000737590175483497, + "learning_rate": 8.354457572502686e-06, + "loss": 0.0, + "step": 3852 + }, + { + "epoch": 0.24830830701810916, + "grad_norm": 0.0006945422121030539, + "learning_rate": 8.35374149659864e-06, + "loss": 0.0, + "step": 3853 + }, + { + "epoch": 0.24837275246503834, + "grad_norm": 0.019502762791774902, + "learning_rate": 8.353025420694593e-06, + "loss": 0.0003, + "step": 3854 + }, + { + "epoch": 0.24843719791196753, + "grad_norm": 0.08053178332273953, + "learning_rate": 8.352309344790549e-06, + "loss": 0.0009, + "step": 3855 + }, + { + "epoch": 0.24850164335889668, + "grad_norm": 0.14025133875090617, + "learning_rate": 8.351593268886503e-06, + "loss": 0.0012, + "step": 3856 + }, + { + "epoch": 0.24856608880582587, + "grad_norm": 0.006439356158143003, + "learning_rate": 8.350877192982458e-06, + "loss": 0.0, + "step": 3857 + }, + { + "epoch": 0.24863053425275505, + "grad_norm": 0.00029574026453269123, + "learning_rate": 8.350161117078412e-06, + "loss": 0.0, + "step": 3858 + }, + { + "epoch": 0.2486949796996842, + "grad_norm": 0.004987156108916005, + "learning_rate": 8.349445041174366e-06, + "loss": 0.0, + "step": 3859 + }, + { + "epoch": 0.2487594251466134, + "grad_norm": 0.015322867790270124, + "learning_rate": 8.348728965270319e-06, + "loss": 0.0, + "step": 3860 + }, + { + "epoch": 0.24882387059354258, + "grad_norm": 0.0010423756801406121, + "learning_rate": 8.348012889366273e-06, + "loss": 0.0, + "step": 3861 + }, + { + "epoch": 0.24888831604047174, + "grad_norm": 0.38380098203053353, + "learning_rate": 8.347296813462227e-06, + "loss": 0.0018, + "step": 3862 + }, + { + "epoch": 0.24895276148740092, + "grad_norm": 0.0022487493770144092, + "learning_rate": 8.346580737558182e-06, + "loss": 0.0, + "step": 3863 + }, + { + "epoch": 0.24901720693433008, + "grad_norm": 0.021213837093024088, + "learning_rate": 8.345864661654136e-06, + "loss": 0.0001, + "step": 3864 + }, + { + "epoch": 0.24908165238125926, + "grad_norm": 0.0027843664534250444, + "learning_rate": 8.34514858575009e-06, + "loss": 0.0, + "step": 3865 + }, + { + "epoch": 0.24914609782818845, + "grad_norm": 0.13952617361567596, + "learning_rate": 8.344432509846045e-06, + "loss": 0.0002, + "step": 3866 + }, + { + "epoch": 0.2492105432751176, + "grad_norm": 0.0010491756332098327, + "learning_rate": 8.343716433941999e-06, + "loss": 0.0, + "step": 3867 + }, + { + "epoch": 0.2492749887220468, + "grad_norm": 0.05011367783332657, + "learning_rate": 8.343000358037953e-06, + "loss": 0.0002, + "step": 3868 + }, + { + "epoch": 0.24933943416897597, + "grad_norm": 0.4646252671662538, + "learning_rate": 8.342284282133908e-06, + "loss": 0.0106, + "step": 3869 + }, + { + "epoch": 0.24940387961590513, + "grad_norm": 0.00023720217135674875, + "learning_rate": 8.34156820622986e-06, + "loss": 0.0, + "step": 3870 + }, + { + "epoch": 0.2494683250628343, + "grad_norm": 0.03269248525753471, + "learning_rate": 8.340852130325814e-06, + "loss": 0.0001, + "step": 3871 + }, + { + "epoch": 0.2495327705097635, + "grad_norm": 0.0042010391733460955, + "learning_rate": 8.340136054421769e-06, + "loss": 0.0, + "step": 3872 + }, + { + "epoch": 0.24959721595669265, + "grad_norm": 0.0717919685100648, + "learning_rate": 8.339419978517723e-06, + "loss": 0.0002, + "step": 3873 + }, + { + "epoch": 0.24966166140362184, + "grad_norm": 0.00015835011294514, + "learning_rate": 8.338703902613677e-06, + "loss": 0.0, + "step": 3874 + }, + { + "epoch": 0.24972610685055102, + "grad_norm": 0.013157551152446972, + "learning_rate": 8.337987826709632e-06, + "loss": 0.0, + "step": 3875 + }, + { + "epoch": 0.24979055229748018, + "grad_norm": 0.0015622204880051153, + "learning_rate": 8.337271750805586e-06, + "loss": 0.0, + "step": 3876 + }, + { + "epoch": 0.24985499774440936, + "grad_norm": 0.03520397086657459, + "learning_rate": 8.33655567490154e-06, + "loss": 0.0004, + "step": 3877 + }, + { + "epoch": 0.24991944319133852, + "grad_norm": 0.02710640237915254, + "learning_rate": 8.335839598997495e-06, + "loss": 0.0001, + "step": 3878 + }, + { + "epoch": 0.2499838886382677, + "grad_norm": 0.0010845842766640747, + "learning_rate": 8.335123523093449e-06, + "loss": 0.0, + "step": 3879 + }, + { + "epoch": 0.2500483340851969, + "grad_norm": 0.18885238021353518, + "learning_rate": 8.334407447189403e-06, + "loss": 0.0005, + "step": 3880 + }, + { + "epoch": 0.2501127795321261, + "grad_norm": 0.14326937212325316, + "learning_rate": 8.333691371285357e-06, + "loss": 0.0007, + "step": 3881 + }, + { + "epoch": 0.25017722497905526, + "grad_norm": 0.17603865426681994, + "learning_rate": 8.332975295381312e-06, + "loss": 0.0005, + "step": 3882 + }, + { + "epoch": 0.2502416704259844, + "grad_norm": 0.08609897712323253, + "learning_rate": 8.332259219477266e-06, + "loss": 0.0003, + "step": 3883 + }, + { + "epoch": 0.25030611587291357, + "grad_norm": 0.007551340900007415, + "learning_rate": 8.33154314357322e-06, + "loss": 0.0, + "step": 3884 + }, + { + "epoch": 0.25037056131984275, + "grad_norm": 0.057125706868368396, + "learning_rate": 8.330827067669175e-06, + "loss": 0.0001, + "step": 3885 + }, + { + "epoch": 0.25043500676677194, + "grad_norm": 0.0034849557503283534, + "learning_rate": 8.330110991765127e-06, + "loss": 0.0, + "step": 3886 + }, + { + "epoch": 0.2504994522137011, + "grad_norm": 0.0025515581732295125, + "learning_rate": 8.329394915861082e-06, + "loss": 0.0, + "step": 3887 + }, + { + "epoch": 0.25056389766063025, + "grad_norm": 0.0006230279157326317, + "learning_rate": 8.328678839957036e-06, + "loss": 0.0, + "step": 3888 + }, + { + "epoch": 0.25062834310755944, + "grad_norm": 0.01758640478425065, + "learning_rate": 8.32796276405299e-06, + "loss": 0.0002, + "step": 3889 + }, + { + "epoch": 0.2506927885544886, + "grad_norm": 0.46716078933857036, + "learning_rate": 8.327246688148944e-06, + "loss": 0.0034, + "step": 3890 + }, + { + "epoch": 0.2507572340014178, + "grad_norm": 0.067406871204759, + "learning_rate": 8.326530612244899e-06, + "loss": 0.0001, + "step": 3891 + }, + { + "epoch": 0.250821679448347, + "grad_norm": 0.0006241835739047378, + "learning_rate": 8.325814536340853e-06, + "loss": 0.0, + "step": 3892 + }, + { + "epoch": 0.2508861248952762, + "grad_norm": 0.31971812344868983, + "learning_rate": 8.325098460436807e-06, + "loss": 0.0009, + "step": 3893 + }, + { + "epoch": 0.2509505703422053, + "grad_norm": 0.028947342519578857, + "learning_rate": 8.324382384532762e-06, + "loss": 0.0002, + "step": 3894 + }, + { + "epoch": 0.2510150157891345, + "grad_norm": 0.004368482565717253, + "learning_rate": 8.323666308628714e-06, + "loss": 0.0, + "step": 3895 + }, + { + "epoch": 0.2510794612360637, + "grad_norm": 0.010188551518575773, + "learning_rate": 8.322950232724669e-06, + "loss": 0.0, + "step": 3896 + }, + { + "epoch": 0.25114390668299286, + "grad_norm": 0.16556811322935083, + "learning_rate": 8.322234156820623e-06, + "loss": 0.0008, + "step": 3897 + }, + { + "epoch": 0.25120835212992204, + "grad_norm": 0.04164728894373619, + "learning_rate": 8.321518080916577e-06, + "loss": 0.0001, + "step": 3898 + }, + { + "epoch": 0.25127279757685117, + "grad_norm": 0.002195380129568669, + "learning_rate": 8.320802005012531e-06, + "loss": 0.0, + "step": 3899 + }, + { + "epoch": 0.25133724302378035, + "grad_norm": 0.001985210248564425, + "learning_rate": 8.320085929108486e-06, + "loss": 0.0, + "step": 3900 + }, + { + "epoch": 0.25140168847070954, + "grad_norm": 0.001143081809185654, + "learning_rate": 8.319369853204442e-06, + "loss": 0.0, + "step": 3901 + }, + { + "epoch": 0.2514661339176387, + "grad_norm": 0.001505042883211427, + "learning_rate": 8.318653777300394e-06, + "loss": 0.0, + "step": 3902 + }, + { + "epoch": 0.2515305793645679, + "grad_norm": 0.07406394901749401, + "learning_rate": 8.317937701396349e-06, + "loss": 0.0006, + "step": 3903 + }, + { + "epoch": 0.2515950248114971, + "grad_norm": 0.0056360193079743005, + "learning_rate": 8.317221625492303e-06, + "loss": 0.0, + "step": 3904 + }, + { + "epoch": 0.2516594702584262, + "grad_norm": 0.061877072578334864, + "learning_rate": 8.316505549588257e-06, + "loss": 0.0002, + "step": 3905 + }, + { + "epoch": 0.2517239157053554, + "grad_norm": 0.1699128915889434, + "learning_rate": 8.315789473684212e-06, + "loss": 0.0005, + "step": 3906 + }, + { + "epoch": 0.2517883611522846, + "grad_norm": 0.00045409563031198523, + "learning_rate": 8.315073397780166e-06, + "loss": 0.0, + "step": 3907 + }, + { + "epoch": 0.2518528065992138, + "grad_norm": 0.025290433653521596, + "learning_rate": 8.31435732187612e-06, + "loss": 0.0, + "step": 3908 + }, + { + "epoch": 0.25191725204614296, + "grad_norm": 0.0014953556261391822, + "learning_rate": 8.313641245972074e-06, + "loss": 0.0, + "step": 3909 + }, + { + "epoch": 0.2519816974930721, + "grad_norm": 0.8381777895944582, + "learning_rate": 8.312925170068029e-06, + "loss": 0.0056, + "step": 3910 + }, + { + "epoch": 0.25204614294000127, + "grad_norm": 0.018405931304765668, + "learning_rate": 8.312209094163981e-06, + "loss": 0.0, + "step": 3911 + }, + { + "epoch": 0.25211058838693046, + "grad_norm": 0.11354576666309285, + "learning_rate": 8.311493018259936e-06, + "loss": 0.0002, + "step": 3912 + }, + { + "epoch": 0.25217503383385964, + "grad_norm": 0.053129490844862515, + "learning_rate": 8.31077694235589e-06, + "loss": 0.0001, + "step": 3913 + }, + { + "epoch": 0.2522394792807888, + "grad_norm": 0.0058663139011346475, + "learning_rate": 8.310060866451844e-06, + "loss": 0.0, + "step": 3914 + }, + { + "epoch": 0.252303924727718, + "grad_norm": 0.08415443369084287, + "learning_rate": 8.309344790547799e-06, + "loss": 0.0001, + "step": 3915 + }, + { + "epoch": 0.25236837017464714, + "grad_norm": 0.47324517501410757, + "learning_rate": 8.308628714643753e-06, + "loss": 0.0034, + "step": 3916 + }, + { + "epoch": 0.2524328156215763, + "grad_norm": 0.00036435772833055455, + "learning_rate": 8.307912638739707e-06, + "loss": 0.0, + "step": 3917 + }, + { + "epoch": 0.2524972610685055, + "grad_norm": 0.0016095858469143312, + "learning_rate": 8.307196562835661e-06, + "loss": 0.0, + "step": 3918 + }, + { + "epoch": 0.2525617065154347, + "grad_norm": 0.680525427646642, + "learning_rate": 8.306480486931616e-06, + "loss": 0.0049, + "step": 3919 + }, + { + "epoch": 0.2526261519623639, + "grad_norm": 0.0160279352790546, + "learning_rate": 8.30576441102757e-06, + "loss": 0.0, + "step": 3920 + }, + { + "epoch": 0.25269059740929306, + "grad_norm": 0.04248723436608822, + "learning_rate": 8.305048335123523e-06, + "loss": 0.0001, + "step": 3921 + }, + { + "epoch": 0.2527550428562222, + "grad_norm": 0.009271066669047413, + "learning_rate": 8.304332259219477e-06, + "loss": 0.0, + "step": 3922 + }, + { + "epoch": 0.2528194883031514, + "grad_norm": 0.0045926226252857045, + "learning_rate": 8.303616183315431e-06, + "loss": 0.0, + "step": 3923 + }, + { + "epoch": 0.25288393375008056, + "grad_norm": 0.0683818303419785, + "learning_rate": 8.302900107411385e-06, + "loss": 0.0001, + "step": 3924 + }, + { + "epoch": 0.25294837919700974, + "grad_norm": 0.4241263926590266, + "learning_rate": 8.302184031507341e-06, + "loss": 0.0016, + "step": 3925 + }, + { + "epoch": 0.2530128246439389, + "grad_norm": 0.002698765370629597, + "learning_rate": 8.301467955603296e-06, + "loss": 0.0, + "step": 3926 + }, + { + "epoch": 0.25307727009086806, + "grad_norm": 0.021524961969480675, + "learning_rate": 8.300751879699248e-06, + "loss": 0.0, + "step": 3927 + }, + { + "epoch": 0.25314171553779724, + "grad_norm": 0.0322740450649284, + "learning_rate": 8.300035803795203e-06, + "loss": 0.0, + "step": 3928 + }, + { + "epoch": 0.2532061609847264, + "grad_norm": 0.018648881771948716, + "learning_rate": 8.299319727891157e-06, + "loss": 0.0, + "step": 3929 + }, + { + "epoch": 0.2532706064316556, + "grad_norm": 0.11561774475944939, + "learning_rate": 8.298603651987111e-06, + "loss": 0.0001, + "step": 3930 + }, + { + "epoch": 0.2533350518785848, + "grad_norm": 0.0038557643494287752, + "learning_rate": 8.297887576083066e-06, + "loss": 0.0, + "step": 3931 + }, + { + "epoch": 0.253399497325514, + "grad_norm": 0.010070178706289873, + "learning_rate": 8.29717150017902e-06, + "loss": 0.0, + "step": 3932 + }, + { + "epoch": 0.2534639427724431, + "grad_norm": 1.0039694205866754, + "learning_rate": 8.296455424274974e-06, + "loss": 0.0032, + "step": 3933 + }, + { + "epoch": 0.2535283882193723, + "grad_norm": 0.06947501096791438, + "learning_rate": 8.295739348370928e-06, + "loss": 0.0, + "step": 3934 + }, + { + "epoch": 0.2535928336663015, + "grad_norm": 0.0042569530407342215, + "learning_rate": 8.295023272466883e-06, + "loss": 0.0, + "step": 3935 + }, + { + "epoch": 0.25365727911323066, + "grad_norm": 0.0014290376859672905, + "learning_rate": 8.294307196562837e-06, + "loss": 0.0, + "step": 3936 + }, + { + "epoch": 0.25372172456015984, + "grad_norm": 0.03599591192270969, + "learning_rate": 8.29359112065879e-06, + "loss": 0.0001, + "step": 3937 + }, + { + "epoch": 0.253786170007089, + "grad_norm": 0.03868331751936182, + "learning_rate": 8.292875044754744e-06, + "loss": 0.0, + "step": 3938 + }, + { + "epoch": 0.25385061545401816, + "grad_norm": 0.019385192551703966, + "learning_rate": 8.292158968850698e-06, + "loss": 0.0, + "step": 3939 + }, + { + "epoch": 0.25391506090094734, + "grad_norm": 0.002924765983032503, + "learning_rate": 8.291442892946653e-06, + "loss": 0.0, + "step": 3940 + }, + { + "epoch": 0.2539795063478765, + "grad_norm": 0.0067314597492258285, + "learning_rate": 8.290726817042607e-06, + "loss": 0.0, + "step": 3941 + }, + { + "epoch": 0.2540439517948057, + "grad_norm": 0.004637551407635732, + "learning_rate": 8.290010741138561e-06, + "loss": 0.0, + "step": 3942 + }, + { + "epoch": 0.2541083972417349, + "grad_norm": 0.0018488212029575274, + "learning_rate": 8.289294665234515e-06, + "loss": 0.0, + "step": 3943 + }, + { + "epoch": 0.254172842688664, + "grad_norm": 0.060456514226417284, + "learning_rate": 8.28857858933047e-06, + "loss": 0.0002, + "step": 3944 + }, + { + "epoch": 0.2542372881355932, + "grad_norm": 0.020665116807158542, + "learning_rate": 8.287862513426424e-06, + "loss": 0.0001, + "step": 3945 + }, + { + "epoch": 0.2543017335825224, + "grad_norm": 0.1538017089756088, + "learning_rate": 8.287146437522378e-06, + "loss": 0.0014, + "step": 3946 + }, + { + "epoch": 0.2543661790294516, + "grad_norm": 4.183620420739566, + "learning_rate": 8.286430361618331e-06, + "loss": 0.0425, + "step": 3947 + }, + { + "epoch": 0.25443062447638076, + "grad_norm": 0.2073996343380915, + "learning_rate": 8.285714285714287e-06, + "loss": 0.0004, + "step": 3948 + }, + { + "epoch": 0.2544950699233099, + "grad_norm": 0.020734861291480815, + "learning_rate": 8.284998209810241e-06, + "loss": 0.0016, + "step": 3949 + }, + { + "epoch": 0.2545595153702391, + "grad_norm": 0.0010731652208121033, + "learning_rate": 8.284282133906196e-06, + "loss": 0.0, + "step": 3950 + }, + { + "epoch": 0.25462396081716826, + "grad_norm": 0.0006424096037444866, + "learning_rate": 8.28356605800215e-06, + "loss": 0.0, + "step": 3951 + }, + { + "epoch": 0.25468840626409744, + "grad_norm": 0.003079481410634368, + "learning_rate": 8.282849982098104e-06, + "loss": 0.0, + "step": 3952 + }, + { + "epoch": 0.25475285171102663, + "grad_norm": 0.23498132479301437, + "learning_rate": 8.282133906194057e-06, + "loss": 0.0006, + "step": 3953 + }, + { + "epoch": 0.2548172971579558, + "grad_norm": 0.013251046524471814, + "learning_rate": 8.281417830290011e-06, + "loss": 0.0, + "step": 3954 + }, + { + "epoch": 0.25488174260488494, + "grad_norm": 0.005255771405272541, + "learning_rate": 8.280701754385965e-06, + "loss": 0.0, + "step": 3955 + }, + { + "epoch": 0.2549461880518141, + "grad_norm": 0.005244493955979896, + "learning_rate": 8.27998567848192e-06, + "loss": 0.0, + "step": 3956 + }, + { + "epoch": 0.2550106334987433, + "grad_norm": 0.0006158653952264748, + "learning_rate": 8.279269602577874e-06, + "loss": 0.0, + "step": 3957 + }, + { + "epoch": 0.2550750789456725, + "grad_norm": 0.00035900625196237423, + "learning_rate": 8.278553526673828e-06, + "loss": 0.0, + "step": 3958 + }, + { + "epoch": 0.2551395243926017, + "grad_norm": 0.008736003219196615, + "learning_rate": 8.277837450769783e-06, + "loss": 0.0001, + "step": 3959 + }, + { + "epoch": 0.25520396983953086, + "grad_norm": 0.0017198369442972868, + "learning_rate": 8.277121374865737e-06, + "loss": 0.0, + "step": 3960 + }, + { + "epoch": 0.25526841528646, + "grad_norm": 0.000357485762814059, + "learning_rate": 8.276405298961691e-06, + "loss": 0.0, + "step": 3961 + }, + { + "epoch": 0.2553328607333892, + "grad_norm": 0.004416114733046556, + "learning_rate": 8.275689223057645e-06, + "loss": 0.0, + "step": 3962 + }, + { + "epoch": 0.25539730618031836, + "grad_norm": 0.05105998267534113, + "learning_rate": 8.274973147153598e-06, + "loss": 0.0001, + "step": 3963 + }, + { + "epoch": 0.25546175162724755, + "grad_norm": 2.68921207630397, + "learning_rate": 8.274257071249552e-06, + "loss": 0.0131, + "step": 3964 + }, + { + "epoch": 0.25552619707417673, + "grad_norm": 0.0021671550621980688, + "learning_rate": 8.273540995345507e-06, + "loss": 0.0, + "step": 3965 + }, + { + "epoch": 0.25559064252110586, + "grad_norm": 0.047147107595446595, + "learning_rate": 8.272824919441461e-06, + "loss": 0.0004, + "step": 3966 + }, + { + "epoch": 0.25565508796803504, + "grad_norm": 0.0007389136439259691, + "learning_rate": 8.272108843537415e-06, + "loss": 0.0, + "step": 3967 + }, + { + "epoch": 0.25571953341496423, + "grad_norm": 0.0011897303535312082, + "learning_rate": 8.27139276763337e-06, + "loss": 0.0, + "step": 3968 + }, + { + "epoch": 0.2557839788618934, + "grad_norm": 0.0008199297088428887, + "learning_rate": 8.270676691729324e-06, + "loss": 0.0, + "step": 3969 + }, + { + "epoch": 0.2558484243088226, + "grad_norm": 0.526262460456898, + "learning_rate": 8.269960615825278e-06, + "loss": 0.0039, + "step": 3970 + }, + { + "epoch": 0.2559128697557518, + "grad_norm": 0.005483051269792115, + "learning_rate": 8.269244539921232e-06, + "loss": 0.0, + "step": 3971 + }, + { + "epoch": 0.2559773152026809, + "grad_norm": 0.020236496196260945, + "learning_rate": 8.268528464017187e-06, + "loss": 0.0, + "step": 3972 + }, + { + "epoch": 0.2560417606496101, + "grad_norm": 0.22438319054480052, + "learning_rate": 8.267812388113141e-06, + "loss": 0.0021, + "step": 3973 + }, + { + "epoch": 0.2561062060965393, + "grad_norm": 0.1771382826278181, + "learning_rate": 8.267096312209095e-06, + "loss": 0.0002, + "step": 3974 + }, + { + "epoch": 0.25617065154346846, + "grad_norm": 0.047601454923286886, + "learning_rate": 8.26638023630505e-06, + "loss": 0.0001, + "step": 3975 + }, + { + "epoch": 0.25623509699039765, + "grad_norm": 0.06528823065675737, + "learning_rate": 8.265664160401004e-06, + "loss": 0.0001, + "step": 3976 + }, + { + "epoch": 0.2562995424373268, + "grad_norm": 0.021962709957251546, + "learning_rate": 8.264948084496958e-06, + "loss": 0.0, + "step": 3977 + }, + { + "epoch": 0.25636398788425596, + "grad_norm": 0.011551023631307096, + "learning_rate": 8.264232008592913e-06, + "loss": 0.0, + "step": 3978 + }, + { + "epoch": 0.25642843333118515, + "grad_norm": 0.0026298561406031358, + "learning_rate": 8.263515932688865e-06, + "loss": 0.0, + "step": 3979 + }, + { + "epoch": 0.25649287877811433, + "grad_norm": 0.002949573749046438, + "learning_rate": 8.26279985678482e-06, + "loss": 0.0, + "step": 3980 + }, + { + "epoch": 0.2565573242250435, + "grad_norm": 0.0030800614041981695, + "learning_rate": 8.262083780880774e-06, + "loss": 0.0, + "step": 3981 + }, + { + "epoch": 0.2566217696719727, + "grad_norm": 0.02935547035373371, + "learning_rate": 8.261367704976728e-06, + "loss": 0.0003, + "step": 3982 + }, + { + "epoch": 0.25668621511890183, + "grad_norm": 0.018741440340363177, + "learning_rate": 8.260651629072682e-06, + "loss": 0.0001, + "step": 3983 + }, + { + "epoch": 0.256750660565831, + "grad_norm": 0.002872186655947733, + "learning_rate": 8.259935553168637e-06, + "loss": 0.0, + "step": 3984 + }, + { + "epoch": 0.2568151060127602, + "grad_norm": 0.054660745871351675, + "learning_rate": 8.259219477264591e-06, + "loss": 0.0002, + "step": 3985 + }, + { + "epoch": 0.2568795514596894, + "grad_norm": 0.0025098992277484064, + "learning_rate": 8.258503401360545e-06, + "loss": 0.0, + "step": 3986 + }, + { + "epoch": 0.25694399690661857, + "grad_norm": 0.11741832015214243, + "learning_rate": 8.2577873254565e-06, + "loss": 0.0006, + "step": 3987 + }, + { + "epoch": 0.2570084423535477, + "grad_norm": 0.008282736051549501, + "learning_rate": 8.257071249552452e-06, + "loss": 0.0, + "step": 3988 + }, + { + "epoch": 0.2570728878004769, + "grad_norm": 0.05705696246490212, + "learning_rate": 8.256355173648406e-06, + "loss": 0.0001, + "step": 3989 + }, + { + "epoch": 0.25713733324740606, + "grad_norm": 0.0049394262891318905, + "learning_rate": 8.25563909774436e-06, + "loss": 0.0, + "step": 3990 + }, + { + "epoch": 0.25720177869433525, + "grad_norm": 0.04868299851585092, + "learning_rate": 8.254923021840315e-06, + "loss": 0.0001, + "step": 3991 + }, + { + "epoch": 0.25726622414126443, + "grad_norm": 0.0073026446153531, + "learning_rate": 8.25420694593627e-06, + "loss": 0.0, + "step": 3992 + }, + { + "epoch": 0.2573306695881936, + "grad_norm": 0.006003148253976413, + "learning_rate": 8.253490870032224e-06, + "loss": 0.0, + "step": 3993 + }, + { + "epoch": 0.25739511503512275, + "grad_norm": 0.0030958094222771513, + "learning_rate": 8.252774794128178e-06, + "loss": 0.0, + "step": 3994 + }, + { + "epoch": 0.25745956048205193, + "grad_norm": 0.005458472731249199, + "learning_rate": 8.252058718224132e-06, + "loss": 0.0, + "step": 3995 + }, + { + "epoch": 0.2575240059289811, + "grad_norm": 0.00035485698227203874, + "learning_rate": 8.251342642320087e-06, + "loss": 0.0, + "step": 3996 + }, + { + "epoch": 0.2575884513759103, + "grad_norm": 0.00017725530470848552, + "learning_rate": 8.25062656641604e-06, + "loss": 0.0, + "step": 3997 + }, + { + "epoch": 0.2576528968228395, + "grad_norm": 0.00317466520196752, + "learning_rate": 8.249910490511995e-06, + "loss": 0.0, + "step": 3998 + }, + { + "epoch": 0.25771734226976867, + "grad_norm": 0.012355914561483782, + "learning_rate": 8.24919441460795e-06, + "loss": 0.0001, + "step": 3999 + }, + { + "epoch": 0.2577817877166978, + "grad_norm": 0.3189945787922834, + "learning_rate": 8.248478338703904e-06, + "loss": 0.0054, + "step": 4000 + }, + { + "epoch": 0.257846233163627, + "grad_norm": 0.0002133263309856516, + "learning_rate": 8.247762262799858e-06, + "loss": 0.0, + "step": 4001 + }, + { + "epoch": 0.25791067861055617, + "grad_norm": 0.005477701385231475, + "learning_rate": 8.247046186895812e-06, + "loss": 0.0, + "step": 4002 + }, + { + "epoch": 0.25797512405748535, + "grad_norm": 0.0050049522296231825, + "learning_rate": 8.246330110991767e-06, + "loss": 0.0, + "step": 4003 + }, + { + "epoch": 0.25803956950441453, + "grad_norm": 0.12513390461850646, + "learning_rate": 8.24561403508772e-06, + "loss": 0.0008, + "step": 4004 + }, + { + "epoch": 0.25810401495134366, + "grad_norm": 0.387589101391816, + "learning_rate": 8.244897959183674e-06, + "loss": 0.0008, + "step": 4005 + }, + { + "epoch": 0.25816846039827285, + "grad_norm": 0.01585431965727312, + "learning_rate": 8.244181883279628e-06, + "loss": 0.0, + "step": 4006 + }, + { + "epoch": 0.25823290584520203, + "grad_norm": 0.00618341276123621, + "learning_rate": 8.243465807375582e-06, + "loss": 0.0, + "step": 4007 + }, + { + "epoch": 0.2582973512921312, + "grad_norm": 0.0026786699895131457, + "learning_rate": 8.242749731471536e-06, + "loss": 0.0, + "step": 4008 + }, + { + "epoch": 0.2583617967390604, + "grad_norm": 0.0036054754536920027, + "learning_rate": 8.24203365556749e-06, + "loss": 0.0, + "step": 4009 + }, + { + "epoch": 0.2584262421859896, + "grad_norm": 0.0007546240302236562, + "learning_rate": 8.241317579663445e-06, + "loss": 0.0, + "step": 4010 + }, + { + "epoch": 0.2584906876329187, + "grad_norm": 0.012423956570293417, + "learning_rate": 8.2406015037594e-06, + "loss": 0.0001, + "step": 4011 + }, + { + "epoch": 0.2585551330798479, + "grad_norm": 0.003081257517508017, + "learning_rate": 8.239885427855354e-06, + "loss": 0.0, + "step": 4012 + }, + { + "epoch": 0.2586195785267771, + "grad_norm": 0.3443340411812879, + "learning_rate": 8.239169351951308e-06, + "loss": 0.0029, + "step": 4013 + }, + { + "epoch": 0.25868402397370627, + "grad_norm": 0.025767518490395797, + "learning_rate": 8.23845327604726e-06, + "loss": 0.0002, + "step": 4014 + }, + { + "epoch": 0.25874846942063545, + "grad_norm": 0.048692089363689406, + "learning_rate": 8.237737200143215e-06, + "loss": 0.0001, + "step": 4015 + }, + { + "epoch": 0.2588129148675646, + "grad_norm": 0.9627859124434754, + "learning_rate": 8.237021124239169e-06, + "loss": 0.0037, + "step": 4016 + }, + { + "epoch": 0.25887736031449377, + "grad_norm": 0.05837690042372548, + "learning_rate": 8.236305048335123e-06, + "loss": 0.0002, + "step": 4017 + }, + { + "epoch": 0.25894180576142295, + "grad_norm": 0.004356229156364595, + "learning_rate": 8.23558897243108e-06, + "loss": 0.0, + "step": 4018 + }, + { + "epoch": 0.25900625120835213, + "grad_norm": 0.03680381490022786, + "learning_rate": 8.234872896527034e-06, + "loss": 0.0, + "step": 4019 + }, + { + "epoch": 0.2590706966552813, + "grad_norm": 0.05317580278091146, + "learning_rate": 8.234156820622986e-06, + "loss": 0.0017, + "step": 4020 + }, + { + "epoch": 0.2591351421022105, + "grad_norm": 0.0026499589169380762, + "learning_rate": 8.23344074471894e-06, + "loss": 0.0, + "step": 4021 + }, + { + "epoch": 0.25919958754913963, + "grad_norm": 0.06186301694838707, + "learning_rate": 8.232724668814895e-06, + "loss": 0.0007, + "step": 4022 + }, + { + "epoch": 0.2592640329960688, + "grad_norm": 0.008444060526545723, + "learning_rate": 8.23200859291085e-06, + "loss": 0.0, + "step": 4023 + }, + { + "epoch": 0.259328478442998, + "grad_norm": 0.0017911749821692064, + "learning_rate": 8.231292517006804e-06, + "loss": 0.0, + "step": 4024 + }, + { + "epoch": 0.2593929238899272, + "grad_norm": 2.548784534768542, + "learning_rate": 8.230576441102758e-06, + "loss": 0.0384, + "step": 4025 + }, + { + "epoch": 0.25945736933685637, + "grad_norm": 2.548784534768542, + "learning_rate": 8.230576441102758e-06, + "loss": 0.0226, + "step": 4026 + }, + { + "epoch": 0.2595218147837855, + "grad_norm": 0.005779961270392248, + "learning_rate": 8.229860365198712e-06, + "loss": 0.0, + "step": 4027 + }, + { + "epoch": 0.2595862602307147, + "grad_norm": 0.0009377977132422742, + "learning_rate": 8.229144289294666e-06, + "loss": 0.0, + "step": 4028 + }, + { + "epoch": 0.25965070567764387, + "grad_norm": 0.03774370585557605, + "learning_rate": 8.22842821339062e-06, + "loss": 0.0, + "step": 4029 + }, + { + "epoch": 0.25971515112457305, + "grad_norm": 0.005797201910791572, + "learning_rate": 8.227712137486575e-06, + "loss": 0.0, + "step": 4030 + }, + { + "epoch": 0.25977959657150224, + "grad_norm": 0.06964904980649746, + "learning_rate": 8.226996061582528e-06, + "loss": 0.0015, + "step": 4031 + }, + { + "epoch": 0.2598440420184314, + "grad_norm": 0.060964751645412936, + "learning_rate": 8.226279985678482e-06, + "loss": 0.0002, + "step": 4032 + }, + { + "epoch": 0.25990848746536055, + "grad_norm": 0.060964751645412936, + "learning_rate": 8.226279985678482e-06, + "loss": 0.0187, + "step": 4033 + }, + { + "epoch": 0.25997293291228973, + "grad_norm": 0.060525086543292334, + "learning_rate": 8.225563909774436e-06, + "loss": 0.0002, + "step": 4034 + }, + { + "epoch": 0.2600373783592189, + "grad_norm": 0.19072903311402004, + "learning_rate": 8.22484783387039e-06, + "loss": 0.0002, + "step": 4035 + }, + { + "epoch": 0.2601018238061481, + "grad_norm": 0.031776800989206204, + "learning_rate": 8.224131757966345e-06, + "loss": 0.0, + "step": 4036 + }, + { + "epoch": 0.2601662692530773, + "grad_norm": 0.08727196607202162, + "learning_rate": 8.223415682062299e-06, + "loss": 0.0002, + "step": 4037 + }, + { + "epoch": 0.26023071470000647, + "grad_norm": 0.08446679894310626, + "learning_rate": 8.222699606158253e-06, + "loss": 0.0002, + "step": 4038 + }, + { + "epoch": 0.2602951601469356, + "grad_norm": 0.002980232543945297, + "learning_rate": 8.221983530254208e-06, + "loss": 0.0, + "step": 4039 + }, + { + "epoch": 0.2603596055938648, + "grad_norm": 0.0011543615042102016, + "learning_rate": 8.221267454350162e-06, + "loss": 0.0, + "step": 4040 + }, + { + "epoch": 0.26042405104079397, + "grad_norm": 0.41192820033215677, + "learning_rate": 8.220551378446116e-06, + "loss": 0.0012, + "step": 4041 + }, + { + "epoch": 0.26048849648772315, + "grad_norm": 0.0010378573531816071, + "learning_rate": 8.219835302542069e-06, + "loss": 0.0, + "step": 4042 + }, + { + "epoch": 0.26055294193465234, + "grad_norm": 0.0006749139181435023, + "learning_rate": 8.219119226638023e-06, + "loss": 0.0, + "step": 4043 + }, + { + "epoch": 0.26061738738158147, + "grad_norm": 0.07496953795127487, + "learning_rate": 8.21840315073398e-06, + "loss": 0.0002, + "step": 4044 + }, + { + "epoch": 0.26068183282851065, + "grad_norm": 0.004533902303890554, + "learning_rate": 8.217687074829933e-06, + "loss": 0.0, + "step": 4045 + }, + { + "epoch": 0.26074627827543984, + "grad_norm": 0.0896993077211288, + "learning_rate": 8.216970998925888e-06, + "loss": 0.0001, + "step": 4046 + }, + { + "epoch": 0.260810723722369, + "grad_norm": 0.0005955184982546869, + "learning_rate": 8.216254923021842e-06, + "loss": 0.0, + "step": 4047 + }, + { + "epoch": 0.2608751691692982, + "grad_norm": 0.004027601831837156, + "learning_rate": 8.215538847117795e-06, + "loss": 0.0, + "step": 4048 + }, + { + "epoch": 0.2609396146162274, + "grad_norm": 0.0069240323019456535, + "learning_rate": 8.214822771213749e-06, + "loss": 0.0001, + "step": 4049 + }, + { + "epoch": 0.2610040600631565, + "grad_norm": 0.0008218871824397413, + "learning_rate": 8.214106695309703e-06, + "loss": 0.0, + "step": 4050 + }, + { + "epoch": 0.2610685055100857, + "grad_norm": 0.0008479160595231805, + "learning_rate": 8.213390619405658e-06, + "loss": 0.0, + "step": 4051 + }, + { + "epoch": 0.2611329509570149, + "grad_norm": 0.1893188846496353, + "learning_rate": 8.212674543501612e-06, + "loss": 0.0006, + "step": 4052 + }, + { + "epoch": 0.26119739640394407, + "grad_norm": 0.015341010774691261, + "learning_rate": 8.211958467597566e-06, + "loss": 0.0001, + "step": 4053 + }, + { + "epoch": 0.26126184185087326, + "grad_norm": 0.0028778119216130286, + "learning_rate": 8.21124239169352e-06, + "loss": 0.0, + "step": 4054 + }, + { + "epoch": 0.2613262872978024, + "grad_norm": 0.07925056622743644, + "learning_rate": 8.210526315789475e-06, + "loss": 0.0009, + "step": 4055 + }, + { + "epoch": 0.26139073274473157, + "grad_norm": 0.20684729806969399, + "learning_rate": 8.209810239885429e-06, + "loss": 0.0043, + "step": 4056 + }, + { + "epoch": 0.26145517819166075, + "grad_norm": 0.0006323088838524224, + "learning_rate": 8.209094163981383e-06, + "loss": 0.0, + "step": 4057 + }, + { + "epoch": 0.26151962363858994, + "grad_norm": 0.0011074759344691717, + "learning_rate": 8.208378088077336e-06, + "loss": 0.0, + "step": 4058 + }, + { + "epoch": 0.2615840690855191, + "grad_norm": 0.00026941518339601263, + "learning_rate": 8.20766201217329e-06, + "loss": 0.0, + "step": 4059 + }, + { + "epoch": 0.2616485145324483, + "grad_norm": 0.04375307634300663, + "learning_rate": 8.206945936269245e-06, + "loss": 0.0001, + "step": 4060 + }, + { + "epoch": 0.26171295997937744, + "grad_norm": 0.05213391882349801, + "learning_rate": 8.206229860365199e-06, + "loss": 0.0001, + "step": 4061 + }, + { + "epoch": 0.2617774054263066, + "grad_norm": 1.516026237311075, + "learning_rate": 8.205513784461153e-06, + "loss": 0.0053, + "step": 4062 + }, + { + "epoch": 0.2618418508732358, + "grad_norm": 0.27357712314436866, + "learning_rate": 8.204797708557107e-06, + "loss": 0.0021, + "step": 4063 + }, + { + "epoch": 0.261906296320165, + "grad_norm": 0.005267339834329356, + "learning_rate": 8.204081632653062e-06, + "loss": 0.0, + "step": 4064 + }, + { + "epoch": 0.2619707417670942, + "grad_norm": 0.006003939047317733, + "learning_rate": 8.203365556749016e-06, + "loss": 0.0, + "step": 4065 + }, + { + "epoch": 0.2620351872140233, + "grad_norm": 0.006873744433309013, + "learning_rate": 8.20264948084497e-06, + "loss": 0.0, + "step": 4066 + }, + { + "epoch": 0.2620996326609525, + "grad_norm": 0.0010702078088623004, + "learning_rate": 8.201933404940925e-06, + "loss": 0.0, + "step": 4067 + }, + { + "epoch": 0.26216407810788167, + "grad_norm": 0.00016045440845831096, + "learning_rate": 8.201217329036879e-06, + "loss": 0.0, + "step": 4068 + }, + { + "epoch": 0.26222852355481086, + "grad_norm": 0.33688526859804885, + "learning_rate": 8.200501253132833e-06, + "loss": 0.0033, + "step": 4069 + }, + { + "epoch": 0.26229296900174004, + "grad_norm": 0.005965846264202467, + "learning_rate": 8.199785177228788e-06, + "loss": 0.0, + "step": 4070 + }, + { + "epoch": 0.2623574144486692, + "grad_norm": 1.2753300557497391, + "learning_rate": 8.199069101324742e-06, + "loss": 0.007, + "step": 4071 + }, + { + "epoch": 0.26242185989559835, + "grad_norm": 0.42094141481840447, + "learning_rate": 8.198353025420696e-06, + "loss": 0.0034, + "step": 4072 + }, + { + "epoch": 0.26248630534252754, + "grad_norm": 0.0007287004405519097, + "learning_rate": 8.19763694951665e-06, + "loss": 0.0, + "step": 4073 + }, + { + "epoch": 0.2625507507894567, + "grad_norm": 0.00040297765390191563, + "learning_rate": 8.196920873612603e-06, + "loss": 0.0, + "step": 4074 + }, + { + "epoch": 0.2626151962363859, + "grad_norm": 0.013456809470448914, + "learning_rate": 8.196204797708557e-06, + "loss": 0.0, + "step": 4075 + }, + { + "epoch": 0.2626796416833151, + "grad_norm": 0.37379560179625704, + "learning_rate": 8.195488721804512e-06, + "loss": 0.0044, + "step": 4076 + }, + { + "epoch": 0.2627440871302443, + "grad_norm": 0.19677700298270354, + "learning_rate": 8.194772645900466e-06, + "loss": 0.0001, + "step": 4077 + }, + { + "epoch": 0.2628085325771734, + "grad_norm": 0.4003689829862226, + "learning_rate": 8.19405656999642e-06, + "loss": 0.0004, + "step": 4078 + }, + { + "epoch": 0.2628729780241026, + "grad_norm": 0.004796831837730866, + "learning_rate": 8.193340494092375e-06, + "loss": 0.0001, + "step": 4079 + }, + { + "epoch": 0.2629374234710318, + "grad_norm": 0.0019953814802931044, + "learning_rate": 8.192624418188329e-06, + "loss": 0.0, + "step": 4080 + }, + { + "epoch": 0.26300186891796096, + "grad_norm": 0.08354990881299791, + "learning_rate": 8.191908342284283e-06, + "loss": 0.0017, + "step": 4081 + }, + { + "epoch": 0.26306631436489014, + "grad_norm": 0.022514688404505278, + "learning_rate": 8.191192266380237e-06, + "loss": 0.0002, + "step": 4082 + }, + { + "epoch": 0.26313075981181927, + "grad_norm": 0.029840040645768735, + "learning_rate": 8.190476190476192e-06, + "loss": 0.0001, + "step": 4083 + }, + { + "epoch": 0.26319520525874845, + "grad_norm": 0.0012259527385874386, + "learning_rate": 8.189760114572144e-06, + "loss": 0.0, + "step": 4084 + }, + { + "epoch": 0.26325965070567764, + "grad_norm": 0.0016036067659748598, + "learning_rate": 8.189044038668099e-06, + "loss": 0.0, + "step": 4085 + }, + { + "epoch": 0.2633240961526068, + "grad_norm": 0.00031105862440330756, + "learning_rate": 8.188327962764053e-06, + "loss": 0.0, + "step": 4086 + }, + { + "epoch": 0.263388541599536, + "grad_norm": 0.01397653280989205, + "learning_rate": 8.187611886860007e-06, + "loss": 0.0001, + "step": 4087 + }, + { + "epoch": 0.2634529870464652, + "grad_norm": 0.001967928846776797, + "learning_rate": 8.186895810955962e-06, + "loss": 0.0, + "step": 4088 + }, + { + "epoch": 0.2635174324933943, + "grad_norm": 0.00274836594875593, + "learning_rate": 8.186179735051916e-06, + "loss": 0.0, + "step": 4089 + }, + { + "epoch": 0.2635818779403235, + "grad_norm": 0.0032691538261679083, + "learning_rate": 8.18546365914787e-06, + "loss": 0.0, + "step": 4090 + }, + { + "epoch": 0.2636463233872527, + "grad_norm": 0.14504496675654013, + "learning_rate": 8.184747583243824e-06, + "loss": 0.0013, + "step": 4091 + }, + { + "epoch": 0.2637107688341819, + "grad_norm": 0.5235929400677679, + "learning_rate": 8.184031507339779e-06, + "loss": 0.0018, + "step": 4092 + }, + { + "epoch": 0.26377521428111106, + "grad_norm": 0.0067012557335894415, + "learning_rate": 8.183315431435733e-06, + "loss": 0.0, + "step": 4093 + }, + { + "epoch": 0.2638396597280402, + "grad_norm": 0.6293055528436677, + "learning_rate": 8.182599355531687e-06, + "loss": 0.0015, + "step": 4094 + }, + { + "epoch": 0.2639041051749694, + "grad_norm": 0.01867268633251904, + "learning_rate": 8.181883279627642e-06, + "loss": 0.0001, + "step": 4095 + }, + { + "epoch": 0.26396855062189856, + "grad_norm": 0.24328595672003867, + "learning_rate": 8.181167203723596e-06, + "loss": 0.0001, + "step": 4096 + }, + { + "epoch": 0.26403299606882774, + "grad_norm": 0.0032203777060472484, + "learning_rate": 8.18045112781955e-06, + "loss": 0.0, + "step": 4097 + }, + { + "epoch": 0.2640974415157569, + "grad_norm": 0.06334367560512613, + "learning_rate": 8.179735051915505e-06, + "loss": 0.0001, + "step": 4098 + }, + { + "epoch": 0.2641618869626861, + "grad_norm": 0.0012013743522975488, + "learning_rate": 8.179018976011459e-06, + "loss": 0.0, + "step": 4099 + }, + { + "epoch": 0.26422633240961524, + "grad_norm": 0.0037318730977894373, + "learning_rate": 8.178302900107411e-06, + "loss": 0.0, + "step": 4100 + }, + { + "epoch": 0.2642907778565444, + "grad_norm": 0.00883380236499844, + "learning_rate": 8.177586824203366e-06, + "loss": 0.0001, + "step": 4101 + }, + { + "epoch": 0.2643552233034736, + "grad_norm": 0.0044728075765404236, + "learning_rate": 8.17687074829932e-06, + "loss": 0.0, + "step": 4102 + }, + { + "epoch": 0.2644196687504028, + "grad_norm": 0.0070594660207348395, + "learning_rate": 8.176154672395274e-06, + "loss": 0.0001, + "step": 4103 + }, + { + "epoch": 0.264484114197332, + "grad_norm": 0.10401460655221516, + "learning_rate": 8.175438596491229e-06, + "loss": 0.0001, + "step": 4104 + }, + { + "epoch": 0.2645485596442611, + "grad_norm": 0.03844934332167701, + "learning_rate": 8.174722520587183e-06, + "loss": 0.0002, + "step": 4105 + }, + { + "epoch": 0.2646130050911903, + "grad_norm": 1.0716477578622337, + "learning_rate": 8.174006444683137e-06, + "loss": 0.0032, + "step": 4106 + }, + { + "epoch": 0.2646774505381195, + "grad_norm": 1.3784877152724, + "learning_rate": 8.173290368779092e-06, + "loss": 0.0164, + "step": 4107 + }, + { + "epoch": 0.26474189598504866, + "grad_norm": 0.029587838605416625, + "learning_rate": 8.172574292875046e-06, + "loss": 0.0001, + "step": 4108 + }, + { + "epoch": 0.26480634143197784, + "grad_norm": 0.18620288726645495, + "learning_rate": 8.171858216970998e-06, + "loss": 0.0019, + "step": 4109 + }, + { + "epoch": 0.264870786878907, + "grad_norm": 0.0058924889771335845, + "learning_rate": 8.171142141066953e-06, + "loss": 0.0, + "step": 4110 + }, + { + "epoch": 0.26493523232583616, + "grad_norm": 0.025689142320420872, + "learning_rate": 8.170426065162907e-06, + "loss": 0.0001, + "step": 4111 + }, + { + "epoch": 0.26499967777276534, + "grad_norm": 0.007680422605328349, + "learning_rate": 8.169709989258861e-06, + "loss": 0.0, + "step": 4112 + }, + { + "epoch": 0.2650641232196945, + "grad_norm": 0.025682199960681367, + "learning_rate": 8.168993913354816e-06, + "loss": 0.0001, + "step": 4113 + }, + { + "epoch": 0.2651285686666237, + "grad_norm": 0.6048429639215468, + "learning_rate": 8.168277837450772e-06, + "loss": 0.0029, + "step": 4114 + }, + { + "epoch": 0.2651930141135529, + "grad_norm": 0.21238982942042506, + "learning_rate": 8.167561761546726e-06, + "loss": 0.0002, + "step": 4115 + }, + { + "epoch": 0.2652574595604821, + "grad_norm": 0.1002031654121083, + "learning_rate": 8.166845685642679e-06, + "loss": 0.0001, + "step": 4116 + }, + { + "epoch": 0.2653219050074112, + "grad_norm": 0.07676032076806907, + "learning_rate": 8.166129609738633e-06, + "loss": 0.0, + "step": 4117 + }, + { + "epoch": 0.2653863504543404, + "grad_norm": 0.0556255801510828, + "learning_rate": 8.165413533834587e-06, + "loss": 0.0001, + "step": 4118 + }, + { + "epoch": 0.2654507959012696, + "grad_norm": 0.38215803137244037, + "learning_rate": 8.164697457930541e-06, + "loss": 0.0012, + "step": 4119 + }, + { + "epoch": 0.26551524134819876, + "grad_norm": 0.367995408179826, + "learning_rate": 8.163981382026496e-06, + "loss": 0.0004, + "step": 4120 + }, + { + "epoch": 0.26557968679512794, + "grad_norm": 0.12432520116844026, + "learning_rate": 8.16326530612245e-06, + "loss": 0.0002, + "step": 4121 + }, + { + "epoch": 0.2656441322420571, + "grad_norm": 0.40048772983471204, + "learning_rate": 8.162549230218404e-06, + "loss": 0.0012, + "step": 4122 + }, + { + "epoch": 0.26570857768898626, + "grad_norm": 0.028347294384729757, + "learning_rate": 8.161833154314359e-06, + "loss": 0.0001, + "step": 4123 + }, + { + "epoch": 0.26577302313591544, + "grad_norm": 0.001553801958047865, + "learning_rate": 8.161117078410313e-06, + "loss": 0.0, + "step": 4124 + }, + { + "epoch": 0.2658374685828446, + "grad_norm": 0.0048637148912617, + "learning_rate": 8.160401002506266e-06, + "loss": 0.0, + "step": 4125 + }, + { + "epoch": 0.2659019140297738, + "grad_norm": 0.06001730600695284, + "learning_rate": 8.15968492660222e-06, + "loss": 0.0004, + "step": 4126 + }, + { + "epoch": 0.265966359476703, + "grad_norm": 0.002554106564452214, + "learning_rate": 8.158968850698174e-06, + "loss": 0.0, + "step": 4127 + }, + { + "epoch": 0.2660308049236321, + "grad_norm": 0.01876722425713162, + "learning_rate": 8.158252774794128e-06, + "loss": 0.0002, + "step": 4128 + }, + { + "epoch": 0.2660952503705613, + "grad_norm": 0.0034013080274845862, + "learning_rate": 8.157536698890083e-06, + "loss": 0.0, + "step": 4129 + }, + { + "epoch": 0.2661596958174905, + "grad_norm": 0.0013784421649856453, + "learning_rate": 8.156820622986037e-06, + "loss": 0.0, + "step": 4130 + }, + { + "epoch": 0.2662241412644197, + "grad_norm": 0.002031356068066092, + "learning_rate": 8.156104547081991e-06, + "loss": 0.0, + "step": 4131 + }, + { + "epoch": 0.26628858671134886, + "grad_norm": 0.008120560940019509, + "learning_rate": 8.155388471177946e-06, + "loss": 0.0, + "step": 4132 + }, + { + "epoch": 0.266353032158278, + "grad_norm": 0.09823169443657727, + "learning_rate": 8.1546723952739e-06, + "loss": 0.0001, + "step": 4133 + }, + { + "epoch": 0.2664174776052072, + "grad_norm": 0.016965823095737853, + "learning_rate": 8.153956319369854e-06, + "loss": 0.0001, + "step": 4134 + }, + { + "epoch": 0.26648192305213636, + "grad_norm": 0.0015814950437502196, + "learning_rate": 8.153240243465807e-06, + "loss": 0.0, + "step": 4135 + }, + { + "epoch": 0.26654636849906554, + "grad_norm": 0.05691408457785069, + "learning_rate": 8.152524167561761e-06, + "loss": 0.0001, + "step": 4136 + }, + { + "epoch": 0.26661081394599473, + "grad_norm": 0.03462659186650775, + "learning_rate": 8.151808091657717e-06, + "loss": 0.0001, + "step": 4137 + }, + { + "epoch": 0.2666752593929239, + "grad_norm": 0.014375004746504964, + "learning_rate": 8.151092015753671e-06, + "loss": 0.0001, + "step": 4138 + }, + { + "epoch": 0.26673970483985304, + "grad_norm": 0.0024106756135800498, + "learning_rate": 8.150375939849626e-06, + "loss": 0.0, + "step": 4139 + }, + { + "epoch": 0.2668041502867822, + "grad_norm": 0.007668585018300137, + "learning_rate": 8.14965986394558e-06, + "loss": 0.0, + "step": 4140 + }, + { + "epoch": 0.2668685957337114, + "grad_norm": 0.006620185098797049, + "learning_rate": 8.148943788041533e-06, + "loss": 0.0, + "step": 4141 + }, + { + "epoch": 0.2669330411806406, + "grad_norm": 0.001317973851116255, + "learning_rate": 8.148227712137487e-06, + "loss": 0.0, + "step": 4142 + }, + { + "epoch": 0.2669974866275698, + "grad_norm": 0.035794016515809664, + "learning_rate": 8.147511636233441e-06, + "loss": 0.0004, + "step": 4143 + }, + { + "epoch": 0.2670619320744989, + "grad_norm": 0.00035408094955500456, + "learning_rate": 8.146795560329396e-06, + "loss": 0.0, + "step": 4144 + }, + { + "epoch": 0.2671263775214281, + "grad_norm": 0.008474903101516811, + "learning_rate": 8.14607948442535e-06, + "loss": 0.0001, + "step": 4145 + }, + { + "epoch": 0.2671908229683573, + "grad_norm": 0.0012700962973577897, + "learning_rate": 8.145363408521304e-06, + "loss": 0.0, + "step": 4146 + }, + { + "epoch": 0.26725526841528646, + "grad_norm": 0.0002888536530388627, + "learning_rate": 8.144647332617258e-06, + "loss": 0.0, + "step": 4147 + }, + { + "epoch": 0.26731971386221565, + "grad_norm": 0.0006534653339654185, + "learning_rate": 8.143931256713213e-06, + "loss": 0.0, + "step": 4148 + }, + { + "epoch": 0.26738415930914483, + "grad_norm": 0.07163475431684561, + "learning_rate": 8.143215180809167e-06, + "loss": 0.0001, + "step": 4149 + }, + { + "epoch": 0.26744860475607396, + "grad_norm": 0.0025577957639277035, + "learning_rate": 8.142499104905121e-06, + "loss": 0.0, + "step": 4150 + }, + { + "epoch": 0.26751305020300314, + "grad_norm": 0.00018299976222950143, + "learning_rate": 8.141783029001074e-06, + "loss": 0.0, + "step": 4151 + }, + { + "epoch": 0.26757749564993233, + "grad_norm": 0.2443402427946589, + "learning_rate": 8.141066953097028e-06, + "loss": 0.0007, + "step": 4152 + }, + { + "epoch": 0.2676419410968615, + "grad_norm": 0.4529980777695485, + "learning_rate": 8.140350877192983e-06, + "loss": 0.002, + "step": 4153 + }, + { + "epoch": 0.2677063865437907, + "grad_norm": 0.023886939799844756, + "learning_rate": 8.139634801288937e-06, + "loss": 0.0003, + "step": 4154 + }, + { + "epoch": 0.2677708319907199, + "grad_norm": 0.0007809454694696253, + "learning_rate": 8.138918725384891e-06, + "loss": 0.0, + "step": 4155 + }, + { + "epoch": 0.267835277437649, + "grad_norm": 0.18457986917933467, + "learning_rate": 8.138202649480845e-06, + "loss": 0.0043, + "step": 4156 + }, + { + "epoch": 0.2678997228845782, + "grad_norm": 0.005621835272940629, + "learning_rate": 8.1374865735768e-06, + "loss": 0.0001, + "step": 4157 + }, + { + "epoch": 0.2679641683315074, + "grad_norm": 0.0006343240890120217, + "learning_rate": 8.136770497672754e-06, + "loss": 0.0, + "step": 4158 + }, + { + "epoch": 0.26802861377843656, + "grad_norm": 0.07103669554810184, + "learning_rate": 8.136054421768708e-06, + "loss": 0.0002, + "step": 4159 + }, + { + "epoch": 0.26809305922536575, + "grad_norm": 0.00017467493782411902, + "learning_rate": 8.135338345864663e-06, + "loss": 0.0, + "step": 4160 + }, + { + "epoch": 0.2681575046722949, + "grad_norm": 0.00046530142134858303, + "learning_rate": 8.134622269960617e-06, + "loss": 0.0, + "step": 4161 + }, + { + "epoch": 0.26822195011922406, + "grad_norm": 0.00028367250039852975, + "learning_rate": 8.133906194056571e-06, + "loss": 0.0, + "step": 4162 + }, + { + "epoch": 0.26828639556615325, + "grad_norm": 0.07753849113407583, + "learning_rate": 8.133190118152525e-06, + "loss": 0.0002, + "step": 4163 + }, + { + "epoch": 0.26835084101308243, + "grad_norm": 0.00028427710331693025, + "learning_rate": 8.13247404224848e-06, + "loss": 0.0, + "step": 4164 + }, + { + "epoch": 0.2684152864600116, + "grad_norm": 0.0039621051110842725, + "learning_rate": 8.131757966344434e-06, + "loss": 0.0, + "step": 4165 + }, + { + "epoch": 0.2684797319069408, + "grad_norm": 0.021191026836945282, + "learning_rate": 8.131041890440388e-06, + "loss": 0.0001, + "step": 4166 + }, + { + "epoch": 0.26854417735386993, + "grad_norm": 0.0008846425877893397, + "learning_rate": 8.130325814536341e-06, + "loss": 0.0, + "step": 4167 + }, + { + "epoch": 0.2686086228007991, + "grad_norm": 0.00047771099607565615, + "learning_rate": 8.129609738632295e-06, + "loss": 0.0, + "step": 4168 + }, + { + "epoch": 0.2686730682477283, + "grad_norm": 0.011498115816560874, + "learning_rate": 8.12889366272825e-06, + "loss": 0.0, + "step": 4169 + }, + { + "epoch": 0.2687375136946575, + "grad_norm": 0.00035771112664960457, + "learning_rate": 8.128177586824204e-06, + "loss": 0.0, + "step": 4170 + }, + { + "epoch": 0.26880195914158667, + "grad_norm": 0.0011935188824502712, + "learning_rate": 8.127461510920158e-06, + "loss": 0.0, + "step": 4171 + }, + { + "epoch": 0.2688664045885158, + "grad_norm": 0.0036977720277806395, + "learning_rate": 8.126745435016112e-06, + "loss": 0.0, + "step": 4172 + }, + { + "epoch": 0.268930850035445, + "grad_norm": 0.0014385408918984439, + "learning_rate": 8.126029359112067e-06, + "loss": 0.0, + "step": 4173 + }, + { + "epoch": 0.26899529548237416, + "grad_norm": 0.0024186235474048293, + "learning_rate": 8.125313283208021e-06, + "loss": 0.0, + "step": 4174 + }, + { + "epoch": 0.26905974092930335, + "grad_norm": 0.034542698594028746, + "learning_rate": 8.124597207303975e-06, + "loss": 0.0001, + "step": 4175 + }, + { + "epoch": 0.26912418637623253, + "grad_norm": 0.009862969678027288, + "learning_rate": 8.12388113139993e-06, + "loss": 0.0, + "step": 4176 + }, + { + "epoch": 0.2691886318231617, + "grad_norm": 0.22107264717946118, + "learning_rate": 8.123165055495882e-06, + "loss": 0.002, + "step": 4177 + }, + { + "epoch": 0.26925307727009085, + "grad_norm": 0.0001773784457137841, + "learning_rate": 8.122448979591837e-06, + "loss": 0.0, + "step": 4178 + }, + { + "epoch": 0.26931752271702003, + "grad_norm": 0.7825109224501692, + "learning_rate": 8.121732903687791e-06, + "loss": 0.0013, + "step": 4179 + }, + { + "epoch": 0.2693819681639492, + "grad_norm": 1.3505293144001949, + "learning_rate": 8.121016827783745e-06, + "loss": 0.0069, + "step": 4180 + }, + { + "epoch": 0.2694464136108784, + "grad_norm": 0.15621311825343512, + "learning_rate": 8.1203007518797e-06, + "loss": 0.0, + "step": 4181 + }, + { + "epoch": 0.2695108590578076, + "grad_norm": 0.007509552035810147, + "learning_rate": 8.119584675975654e-06, + "loss": 0.0, + "step": 4182 + }, + { + "epoch": 0.26957530450473677, + "grad_norm": 0.0005021826837052551, + "learning_rate": 8.118868600071608e-06, + "loss": 0.0, + "step": 4183 + }, + { + "epoch": 0.2696397499516659, + "grad_norm": 0.0031436756764531048, + "learning_rate": 8.118152524167562e-06, + "loss": 0.0, + "step": 4184 + }, + { + "epoch": 0.2697041953985951, + "grad_norm": 0.9572962491351598, + "learning_rate": 8.117436448263517e-06, + "loss": 0.0021, + "step": 4185 + }, + { + "epoch": 0.26976864084552427, + "grad_norm": 1.3736213361417184, + "learning_rate": 8.116720372359471e-06, + "loss": 0.0054, + "step": 4186 + }, + { + "epoch": 0.26983308629245345, + "grad_norm": 0.3207216905616851, + "learning_rate": 8.116004296455425e-06, + "loss": 0.0012, + "step": 4187 + }, + { + "epoch": 0.26989753173938263, + "grad_norm": 0.029056574864155083, + "learning_rate": 8.11528822055138e-06, + "loss": 0.0001, + "step": 4188 + }, + { + "epoch": 0.26996197718631176, + "grad_norm": 0.0008124575126002671, + "learning_rate": 8.114572144647334e-06, + "loss": 0.0, + "step": 4189 + }, + { + "epoch": 0.27002642263324095, + "grad_norm": 0.007161701968818947, + "learning_rate": 8.113856068743288e-06, + "loss": 0.0, + "step": 4190 + }, + { + "epoch": 0.27009086808017013, + "grad_norm": 0.002907445352707109, + "learning_rate": 8.113139992839242e-06, + "loss": 0.0, + "step": 4191 + }, + { + "epoch": 0.2701553135270993, + "grad_norm": 0.0014930615116478428, + "learning_rate": 8.112423916935197e-06, + "loss": 0.0, + "step": 4192 + }, + { + "epoch": 0.2702197589740285, + "grad_norm": 0.0064780529217881066, + "learning_rate": 8.11170784103115e-06, + "loss": 0.0, + "step": 4193 + }, + { + "epoch": 0.2702842044209577, + "grad_norm": 0.0006701570723918696, + "learning_rate": 8.110991765127104e-06, + "loss": 0.0, + "step": 4194 + }, + { + "epoch": 0.2703486498678868, + "grad_norm": 0.17730787477319992, + "learning_rate": 8.110275689223058e-06, + "loss": 0.0007, + "step": 4195 + }, + { + "epoch": 0.270413095314816, + "grad_norm": 0.4032809813305308, + "learning_rate": 8.109559613319012e-06, + "loss": 0.0016, + "step": 4196 + }, + { + "epoch": 0.2704775407617452, + "grad_norm": 0.00046503311607447816, + "learning_rate": 8.108843537414967e-06, + "loss": 0.0, + "step": 4197 + }, + { + "epoch": 0.27054198620867437, + "grad_norm": 0.0011172217789278758, + "learning_rate": 8.108127461510921e-06, + "loss": 0.0, + "step": 4198 + }, + { + "epoch": 0.27060643165560355, + "grad_norm": 0.0008143952554526554, + "learning_rate": 8.107411385606875e-06, + "loss": 0.0, + "step": 4199 + }, + { + "epoch": 0.2706708771025327, + "grad_norm": 0.008522984751307939, + "learning_rate": 8.10669530970283e-06, + "loss": 0.0001, + "step": 4200 + }, + { + "epoch": 0.27073532254946187, + "grad_norm": 0.0024840219283999553, + "learning_rate": 8.105979233798784e-06, + "loss": 0.0, + "step": 4201 + }, + { + "epoch": 0.27079976799639105, + "grad_norm": 0.00039293305919349785, + "learning_rate": 8.105263157894736e-06, + "loss": 0.0, + "step": 4202 + }, + { + "epoch": 0.27086421344332023, + "grad_norm": 0.005540428539705254, + "learning_rate": 8.10454708199069e-06, + "loss": 0.0001, + "step": 4203 + }, + { + "epoch": 0.2709286588902494, + "grad_norm": 0.011686594502197983, + "learning_rate": 8.103831006086645e-06, + "loss": 0.0, + "step": 4204 + }, + { + "epoch": 0.2709931043371786, + "grad_norm": 0.16621984375021687, + "learning_rate": 8.1031149301826e-06, + "loss": 0.0001, + "step": 4205 + }, + { + "epoch": 0.27105754978410773, + "grad_norm": 0.0004452170447672888, + "learning_rate": 8.102398854278554e-06, + "loss": 0.0, + "step": 4206 + }, + { + "epoch": 0.2711219952310369, + "grad_norm": 0.00228342254717163, + "learning_rate": 8.10168277837451e-06, + "loss": 0.0, + "step": 4207 + }, + { + "epoch": 0.2711864406779661, + "grad_norm": 0.00729092715793383, + "learning_rate": 8.100966702470464e-06, + "loss": 0.0, + "step": 4208 + }, + { + "epoch": 0.2712508861248953, + "grad_norm": 0.01637368052505697, + "learning_rate": 8.100250626566416e-06, + "loss": 0.0001, + "step": 4209 + }, + { + "epoch": 0.27131533157182447, + "grad_norm": 0.47464788147246495, + "learning_rate": 8.09953455066237e-06, + "loss": 0.0007, + "step": 4210 + }, + { + "epoch": 0.2713797770187536, + "grad_norm": 0.017767601083250248, + "learning_rate": 8.098818474758325e-06, + "loss": 0.0001, + "step": 4211 + }, + { + "epoch": 0.2714442224656828, + "grad_norm": 0.0002587533399039369, + "learning_rate": 8.09810239885428e-06, + "loss": 0.0, + "step": 4212 + }, + { + "epoch": 0.27150866791261197, + "grad_norm": 1.0716687262551394, + "learning_rate": 8.097386322950234e-06, + "loss": 0.0001, + "step": 4213 + }, + { + "epoch": 0.27157311335954115, + "grad_norm": 0.0024058953844227756, + "learning_rate": 8.096670247046188e-06, + "loss": 0.0, + "step": 4214 + }, + { + "epoch": 0.27163755880647034, + "grad_norm": 0.00714180532387043, + "learning_rate": 8.095954171142142e-06, + "loss": 0.0, + "step": 4215 + }, + { + "epoch": 0.2717020042533995, + "grad_norm": 0.2157260896229057, + "learning_rate": 8.095238095238097e-06, + "loss": 0.0004, + "step": 4216 + }, + { + "epoch": 0.27176644970032865, + "grad_norm": 0.015134509852193008, + "learning_rate": 8.09452201933405e-06, + "loss": 0.0, + "step": 4217 + }, + { + "epoch": 0.27183089514725783, + "grad_norm": 0.013603620153944285, + "learning_rate": 8.093805943430003e-06, + "loss": 0.0001, + "step": 4218 + }, + { + "epoch": 0.271895340594187, + "grad_norm": 0.014178400987857479, + "learning_rate": 8.093089867525958e-06, + "loss": 0.0001, + "step": 4219 + }, + { + "epoch": 0.2719597860411162, + "grad_norm": 0.00024165479162664222, + "learning_rate": 8.092373791621912e-06, + "loss": 0.0, + "step": 4220 + }, + { + "epoch": 0.2720242314880454, + "grad_norm": 0.0020571868036936888, + "learning_rate": 8.091657715717866e-06, + "loss": 0.0, + "step": 4221 + }, + { + "epoch": 0.27208867693497457, + "grad_norm": 0.005309349799260391, + "learning_rate": 8.09094163981382e-06, + "loss": 0.0, + "step": 4222 + }, + { + "epoch": 0.2721531223819037, + "grad_norm": 0.30123737973572057, + "learning_rate": 8.090225563909775e-06, + "loss": 0.005, + "step": 4223 + }, + { + "epoch": 0.2722175678288329, + "grad_norm": 0.0012302217094057092, + "learning_rate": 8.08950948800573e-06, + "loss": 0.0, + "step": 4224 + }, + { + "epoch": 0.27228201327576207, + "grad_norm": 0.0002683422688236845, + "learning_rate": 8.088793412101684e-06, + "loss": 0.0, + "step": 4225 + }, + { + "epoch": 0.27234645872269125, + "grad_norm": 0.0006782941113959453, + "learning_rate": 8.088077336197638e-06, + "loss": 0.0, + "step": 4226 + }, + { + "epoch": 0.27241090416962044, + "grad_norm": 0.010038923617897768, + "learning_rate": 8.087361260293592e-06, + "loss": 0.0001, + "step": 4227 + }, + { + "epoch": 0.27247534961654957, + "grad_norm": 0.003118605860282275, + "learning_rate": 8.086645184389545e-06, + "loss": 0.0, + "step": 4228 + }, + { + "epoch": 0.27253979506347875, + "grad_norm": 0.01232286451285321, + "learning_rate": 8.085929108485499e-06, + "loss": 0.0001, + "step": 4229 + }, + { + "epoch": 0.27260424051040794, + "grad_norm": 0.8026916418279838, + "learning_rate": 8.085213032581455e-06, + "loss": 0.005, + "step": 4230 + }, + { + "epoch": 0.2726686859573371, + "grad_norm": 0.25647062712724145, + "learning_rate": 8.08449695667741e-06, + "loss": 0.0021, + "step": 4231 + }, + { + "epoch": 0.2727331314042663, + "grad_norm": 0.0792547996356328, + "learning_rate": 8.083780880773364e-06, + "loss": 0.0001, + "step": 4232 + }, + { + "epoch": 0.2727975768511955, + "grad_norm": 0.07577762607384415, + "learning_rate": 8.083064804869318e-06, + "loss": 0.0007, + "step": 4233 + }, + { + "epoch": 0.2728620222981246, + "grad_norm": 0.2807311066234324, + "learning_rate": 8.08234872896527e-06, + "loss": 0.0008, + "step": 4234 + }, + { + "epoch": 0.2729264677450538, + "grad_norm": 0.0006046875437334746, + "learning_rate": 8.081632653061225e-06, + "loss": 0.0, + "step": 4235 + }, + { + "epoch": 0.272990913191983, + "grad_norm": 0.030921507784656604, + "learning_rate": 8.080916577157179e-06, + "loss": 0.0002, + "step": 4236 + }, + { + "epoch": 0.27305535863891217, + "grad_norm": 0.17561644139596558, + "learning_rate": 8.080200501253133e-06, + "loss": 0.0018, + "step": 4237 + }, + { + "epoch": 0.27311980408584136, + "grad_norm": 0.0016910747966463998, + "learning_rate": 8.079484425349088e-06, + "loss": 0.0, + "step": 4238 + }, + { + "epoch": 0.2731842495327705, + "grad_norm": 0.017976164972360383, + "learning_rate": 8.078768349445042e-06, + "loss": 0.0, + "step": 4239 + }, + { + "epoch": 0.27324869497969967, + "grad_norm": 0.0002662013235983584, + "learning_rate": 8.078052273540996e-06, + "loss": 0.0, + "step": 4240 + }, + { + "epoch": 0.27331314042662885, + "grad_norm": 0.004663524159279211, + "learning_rate": 8.07733619763695e-06, + "loss": 0.0, + "step": 4241 + }, + { + "epoch": 0.27337758587355804, + "grad_norm": 0.21042938315959228, + "learning_rate": 8.076620121732905e-06, + "loss": 0.0005, + "step": 4242 + }, + { + "epoch": 0.2734420313204872, + "grad_norm": 0.03295338194526715, + "learning_rate": 8.07590404582886e-06, + "loss": 0.0001, + "step": 4243 + }, + { + "epoch": 0.2735064767674164, + "grad_norm": 0.0071647418191086885, + "learning_rate": 8.075187969924812e-06, + "loss": 0.0, + "step": 4244 + }, + { + "epoch": 0.27357092221434554, + "grad_norm": 0.09229751786692166, + "learning_rate": 8.074471894020766e-06, + "loss": 0.0009, + "step": 4245 + }, + { + "epoch": 0.2736353676612747, + "grad_norm": 0.005337680304002737, + "learning_rate": 8.07375581811672e-06, + "loss": 0.0, + "step": 4246 + }, + { + "epoch": 0.2736998131082039, + "grad_norm": 0.0066058648404035405, + "learning_rate": 8.073039742212675e-06, + "loss": 0.0, + "step": 4247 + }, + { + "epoch": 0.2737642585551331, + "grad_norm": 0.018948461852127226, + "learning_rate": 8.072323666308629e-06, + "loss": 0.0, + "step": 4248 + }, + { + "epoch": 0.2738287040020623, + "grad_norm": 0.28756954658064515, + "learning_rate": 8.071607590404583e-06, + "loss": 0.0011, + "step": 4249 + }, + { + "epoch": 0.2738931494489914, + "grad_norm": 0.1112823266641712, + "learning_rate": 8.070891514500538e-06, + "loss": 0.0002, + "step": 4250 + }, + { + "epoch": 0.2739575948959206, + "grad_norm": 0.0033629401611851456, + "learning_rate": 8.070175438596492e-06, + "loss": 0.0, + "step": 4251 + }, + { + "epoch": 0.27402204034284977, + "grad_norm": 0.01336159347321719, + "learning_rate": 8.069459362692446e-06, + "loss": 0.0001, + "step": 4252 + }, + { + "epoch": 0.27408648578977896, + "grad_norm": 0.08284823562061203, + "learning_rate": 8.0687432867884e-06, + "loss": 0.0005, + "step": 4253 + }, + { + "epoch": 0.27415093123670814, + "grad_norm": 0.02694515727281307, + "learning_rate": 8.068027210884355e-06, + "loss": 0.0, + "step": 4254 + }, + { + "epoch": 0.2742153766836373, + "grad_norm": 0.012616425954789283, + "learning_rate": 8.067311134980309e-06, + "loss": 0.0001, + "step": 4255 + }, + { + "epoch": 0.27427982213056645, + "grad_norm": 0.0007318360328273589, + "learning_rate": 8.066595059076263e-06, + "loss": 0.0, + "step": 4256 + }, + { + "epoch": 0.27434426757749564, + "grad_norm": 1.0259591406667061, + "learning_rate": 8.065878983172218e-06, + "loss": 0.0031, + "step": 4257 + }, + { + "epoch": 0.2744087130244248, + "grad_norm": 0.006475353661629028, + "learning_rate": 8.065162907268172e-06, + "loss": 0.0, + "step": 4258 + }, + { + "epoch": 0.274473158471354, + "grad_norm": 0.44145669902026735, + "learning_rate": 8.064446831364126e-06, + "loss": 0.0013, + "step": 4259 + }, + { + "epoch": 0.2745376039182832, + "grad_norm": 0.0496036830400151, + "learning_rate": 8.063730755460079e-06, + "loss": 0.0004, + "step": 4260 + }, + { + "epoch": 0.2746020493652124, + "grad_norm": 0.08654972122820045, + "learning_rate": 8.063014679556033e-06, + "loss": 0.0005, + "step": 4261 + }, + { + "epoch": 0.2746664948121415, + "grad_norm": 0.0032045456562780393, + "learning_rate": 8.062298603651988e-06, + "loss": 0.0, + "step": 4262 + }, + { + "epoch": 0.2747309402590707, + "grad_norm": 0.027642436666024946, + "learning_rate": 8.061582527747942e-06, + "loss": 0.0, + "step": 4263 + }, + { + "epoch": 0.2747953857059999, + "grad_norm": 0.026773698904416023, + "learning_rate": 8.060866451843896e-06, + "loss": 0.0001, + "step": 4264 + }, + { + "epoch": 0.27485983115292906, + "grad_norm": 0.009627766904490808, + "learning_rate": 8.06015037593985e-06, + "loss": 0.0, + "step": 4265 + }, + { + "epoch": 0.27492427659985824, + "grad_norm": 0.0016107713773131466, + "learning_rate": 8.059434300035805e-06, + "loss": 0.0, + "step": 4266 + }, + { + "epoch": 0.27498872204678737, + "grad_norm": 0.025908643128907324, + "learning_rate": 8.058718224131759e-06, + "loss": 0.0001, + "step": 4267 + }, + { + "epoch": 0.27505316749371655, + "grad_norm": 0.006806098258419837, + "learning_rate": 8.058002148227713e-06, + "loss": 0.0, + "step": 4268 + }, + { + "epoch": 0.27511761294064574, + "grad_norm": 0.007748768085792652, + "learning_rate": 8.057286072323668e-06, + "loss": 0.0, + "step": 4269 + }, + { + "epoch": 0.2751820583875749, + "grad_norm": 0.25058081332503046, + "learning_rate": 8.05656999641962e-06, + "loss": 0.0007, + "step": 4270 + }, + { + "epoch": 0.2752465038345041, + "grad_norm": 0.024260379358677264, + "learning_rate": 8.055853920515575e-06, + "loss": 0.0003, + "step": 4271 + }, + { + "epoch": 0.2753109492814333, + "grad_norm": 0.007065816195131135, + "learning_rate": 8.055137844611529e-06, + "loss": 0.0, + "step": 4272 + }, + { + "epoch": 0.2753753947283624, + "grad_norm": 0.0032001312958800823, + "learning_rate": 8.054421768707483e-06, + "loss": 0.0, + "step": 4273 + }, + { + "epoch": 0.2754398401752916, + "grad_norm": 0.0041486393690888055, + "learning_rate": 8.053705692803437e-06, + "loss": 0.0, + "step": 4274 + }, + { + "epoch": 0.2755042856222208, + "grad_norm": 0.002187301162407734, + "learning_rate": 8.052989616899392e-06, + "loss": 0.0, + "step": 4275 + }, + { + "epoch": 0.27556873106915, + "grad_norm": 0.019823731982545546, + "learning_rate": 8.052273540995346e-06, + "loss": 0.0001, + "step": 4276 + }, + { + "epoch": 0.27563317651607916, + "grad_norm": 0.03341653553794979, + "learning_rate": 8.0515574650913e-06, + "loss": 0.0002, + "step": 4277 + }, + { + "epoch": 0.2756976219630083, + "grad_norm": 0.0022856206212265636, + "learning_rate": 8.050841389187255e-06, + "loss": 0.0, + "step": 4278 + }, + { + "epoch": 0.2757620674099375, + "grad_norm": 0.09553208442519484, + "learning_rate": 8.050125313283209e-06, + "loss": 0.0002, + "step": 4279 + }, + { + "epoch": 0.27582651285686666, + "grad_norm": 0.008370866194229725, + "learning_rate": 8.049409237379163e-06, + "loss": 0.0001, + "step": 4280 + }, + { + "epoch": 0.27589095830379584, + "grad_norm": 0.0013321094427172684, + "learning_rate": 8.048693161475117e-06, + "loss": 0.0, + "step": 4281 + }, + { + "epoch": 0.275955403750725, + "grad_norm": 0.07517994067131512, + "learning_rate": 8.047977085571072e-06, + "loss": 0.0001, + "step": 4282 + }, + { + "epoch": 0.2760198491976542, + "grad_norm": 0.0026881408795613724, + "learning_rate": 8.047261009667026e-06, + "loss": 0.0, + "step": 4283 + }, + { + "epoch": 0.27608429464458334, + "grad_norm": 0.5881114557634267, + "learning_rate": 8.04654493376298e-06, + "loss": 0.0026, + "step": 4284 + }, + { + "epoch": 0.2761487400915125, + "grad_norm": 0.0552722037753931, + "learning_rate": 8.045828857858935e-06, + "loss": 0.0005, + "step": 4285 + }, + { + "epoch": 0.2762131855384417, + "grad_norm": 0.0019059926219596923, + "learning_rate": 8.045112781954887e-06, + "loss": 0.0, + "step": 4286 + }, + { + "epoch": 0.2762776309853709, + "grad_norm": 0.004945866811174837, + "learning_rate": 8.044396706050842e-06, + "loss": 0.0, + "step": 4287 + }, + { + "epoch": 0.2763420764323001, + "grad_norm": 0.020964249731356712, + "learning_rate": 8.043680630146796e-06, + "loss": 0.0001, + "step": 4288 + }, + { + "epoch": 0.2764065218792292, + "grad_norm": 2.334616909744486, + "learning_rate": 8.04296455424275e-06, + "loss": 0.0045, + "step": 4289 + }, + { + "epoch": 0.2764709673261584, + "grad_norm": 0.001980315912694081, + "learning_rate": 8.042248478338704e-06, + "loss": 0.0, + "step": 4290 + }, + { + "epoch": 0.2765354127730876, + "grad_norm": 0.07433033072093934, + "learning_rate": 8.041532402434659e-06, + "loss": 0.0003, + "step": 4291 + }, + { + "epoch": 0.27659985822001676, + "grad_norm": 0.015762347565720446, + "learning_rate": 8.040816326530613e-06, + "loss": 0.0, + "step": 4292 + }, + { + "epoch": 0.27666430366694594, + "grad_norm": 9.599061102562011e-05, + "learning_rate": 8.040100250626567e-06, + "loss": 0.0, + "step": 4293 + }, + { + "epoch": 0.2767287491138751, + "grad_norm": 0.011378677333695802, + "learning_rate": 8.039384174722522e-06, + "loss": 0.0001, + "step": 4294 + }, + { + "epoch": 0.27679319456080426, + "grad_norm": 0.0012122533737657336, + "learning_rate": 8.038668098818474e-06, + "loss": 0.0, + "step": 4295 + }, + { + "epoch": 0.27685764000773344, + "grad_norm": 0.00712413864184761, + "learning_rate": 8.037952022914429e-06, + "loss": 0.0, + "step": 4296 + }, + { + "epoch": 0.2769220854546626, + "grad_norm": 0.0021291666023090256, + "learning_rate": 8.037235947010383e-06, + "loss": 0.0, + "step": 4297 + }, + { + "epoch": 0.2769865309015918, + "grad_norm": 0.006989701767573375, + "learning_rate": 8.036519871106337e-06, + "loss": 0.0, + "step": 4298 + }, + { + "epoch": 0.277050976348521, + "grad_norm": 0.0030630408264887, + "learning_rate": 8.035803795202291e-06, + "loss": 0.0, + "step": 4299 + }, + { + "epoch": 0.2771154217954502, + "grad_norm": 0.42474275334818506, + "learning_rate": 8.035087719298247e-06, + "loss": 0.0018, + "step": 4300 + }, + { + "epoch": 0.2771798672423793, + "grad_norm": 0.0020389694047471857, + "learning_rate": 8.034371643394202e-06, + "loss": 0.0, + "step": 4301 + }, + { + "epoch": 0.2772443126893085, + "grad_norm": 0.02280851230690095, + "learning_rate": 8.033655567490154e-06, + "loss": 0.0001, + "step": 4302 + }, + { + "epoch": 0.2773087581362377, + "grad_norm": 0.8225653820081554, + "learning_rate": 8.032939491586109e-06, + "loss": 0.0045, + "step": 4303 + }, + { + "epoch": 0.27737320358316686, + "grad_norm": 0.007784121116634503, + "learning_rate": 8.032223415682063e-06, + "loss": 0.0, + "step": 4304 + }, + { + "epoch": 0.27743764903009605, + "grad_norm": 0.09095866422393512, + "learning_rate": 8.031507339778017e-06, + "loss": 0.0009, + "step": 4305 + }, + { + "epoch": 0.2775020944770252, + "grad_norm": 0.24910336875888767, + "learning_rate": 8.030791263873972e-06, + "loss": 0.0003, + "step": 4306 + }, + { + "epoch": 0.27756653992395436, + "grad_norm": 0.973263011252569, + "learning_rate": 8.030075187969926e-06, + "loss": 0.0138, + "step": 4307 + }, + { + "epoch": 0.27763098537088354, + "grad_norm": 0.49882107567641787, + "learning_rate": 8.02935911206588e-06, + "loss": 0.0013, + "step": 4308 + }, + { + "epoch": 0.2776954308178127, + "grad_norm": 0.001363859972825628, + "learning_rate": 8.028643036161834e-06, + "loss": 0.0, + "step": 4309 + }, + { + "epoch": 0.2777598762647419, + "grad_norm": 0.021673290387127987, + "learning_rate": 8.027926960257789e-06, + "loss": 0.0, + "step": 4310 + }, + { + "epoch": 0.2778243217116711, + "grad_norm": 0.02073911943213021, + "learning_rate": 8.027210884353741e-06, + "loss": 0.0001, + "step": 4311 + }, + { + "epoch": 0.2778887671586002, + "grad_norm": 0.35953358592667545, + "learning_rate": 8.026494808449696e-06, + "loss": 0.0029, + "step": 4312 + }, + { + "epoch": 0.2779532126055294, + "grad_norm": 0.004120392236843992, + "learning_rate": 8.02577873254565e-06, + "loss": 0.0, + "step": 4313 + }, + { + "epoch": 0.2780176580524586, + "grad_norm": 0.0016659283892527326, + "learning_rate": 8.025062656641604e-06, + "loss": 0.0, + "step": 4314 + }, + { + "epoch": 0.2780821034993878, + "grad_norm": 0.000579688410185921, + "learning_rate": 8.024346580737559e-06, + "loss": 0.0, + "step": 4315 + }, + { + "epoch": 0.27814654894631696, + "grad_norm": 0.1932598493658158, + "learning_rate": 8.023630504833513e-06, + "loss": 0.0009, + "step": 4316 + }, + { + "epoch": 0.2782109943932461, + "grad_norm": 0.009979227631807589, + "learning_rate": 8.022914428929467e-06, + "loss": 0.0, + "step": 4317 + }, + { + "epoch": 0.2782754398401753, + "grad_norm": 0.5246637414184626, + "learning_rate": 8.022198353025421e-06, + "loss": 0.0009, + "step": 4318 + }, + { + "epoch": 0.27833988528710446, + "grad_norm": 0.0019687456576445756, + "learning_rate": 8.021482277121376e-06, + "loss": 0.0, + "step": 4319 + }, + { + "epoch": 0.27840433073403364, + "grad_norm": 0.0011185073687473165, + "learning_rate": 8.02076620121733e-06, + "loss": 0.0, + "step": 4320 + }, + { + "epoch": 0.27846877618096283, + "grad_norm": 0.04931498752101843, + "learning_rate": 8.020050125313283e-06, + "loss": 0.0002, + "step": 4321 + }, + { + "epoch": 0.278533221627892, + "grad_norm": 0.00027083873114944093, + "learning_rate": 8.019334049409237e-06, + "loss": 0.0, + "step": 4322 + }, + { + "epoch": 0.27859766707482114, + "grad_norm": 0.05839166362732932, + "learning_rate": 8.018617973505191e-06, + "loss": 0.0008, + "step": 4323 + }, + { + "epoch": 0.2786621125217503, + "grad_norm": 0.0001520224323067307, + "learning_rate": 8.017901897601147e-06, + "loss": 0.0, + "step": 4324 + }, + { + "epoch": 0.2787265579686795, + "grad_norm": 0.0007621996242916202, + "learning_rate": 8.017185821697102e-06, + "loss": 0.0, + "step": 4325 + }, + { + "epoch": 0.2787910034156087, + "grad_norm": 0.00018054994650765078, + "learning_rate": 8.016469745793056e-06, + "loss": 0.0, + "step": 4326 + }, + { + "epoch": 0.2788554488625379, + "grad_norm": 0.004102618013888196, + "learning_rate": 8.015753669889008e-06, + "loss": 0.0, + "step": 4327 + }, + { + "epoch": 0.278919894309467, + "grad_norm": 0.0015162126017946738, + "learning_rate": 8.015037593984963e-06, + "loss": 0.0, + "step": 4328 + }, + { + "epoch": 0.2789843397563962, + "grad_norm": 0.002592029774412762, + "learning_rate": 8.014321518080917e-06, + "loss": 0.0, + "step": 4329 + }, + { + "epoch": 0.2790487852033254, + "grad_norm": 0.0014765765187576673, + "learning_rate": 8.013605442176871e-06, + "loss": 0.0, + "step": 4330 + }, + { + "epoch": 0.27911323065025456, + "grad_norm": 0.024509027084424147, + "learning_rate": 8.012889366272826e-06, + "loss": 0.0, + "step": 4331 + }, + { + "epoch": 0.27917767609718375, + "grad_norm": 0.0017077207017750734, + "learning_rate": 8.01217329036878e-06, + "loss": 0.0, + "step": 4332 + }, + { + "epoch": 0.27924212154411293, + "grad_norm": 0.0022571001131268175, + "learning_rate": 8.011457214464734e-06, + "loss": 0.0, + "step": 4333 + }, + { + "epoch": 0.27930656699104206, + "grad_norm": 0.0006027117464969319, + "learning_rate": 8.010741138560689e-06, + "loss": 0.0, + "step": 4334 + }, + { + "epoch": 0.27937101243797124, + "grad_norm": 0.00018672067099711776, + "learning_rate": 8.010025062656643e-06, + "loss": 0.0, + "step": 4335 + }, + { + "epoch": 0.27943545788490043, + "grad_norm": 0.0009781388881216495, + "learning_rate": 8.009308986752597e-06, + "loss": 0.0, + "step": 4336 + }, + { + "epoch": 0.2794999033318296, + "grad_norm": 0.004771590672136837, + "learning_rate": 8.00859291084855e-06, + "loss": 0.0, + "step": 4337 + }, + { + "epoch": 0.2795643487787588, + "grad_norm": 0.14926098480487418, + "learning_rate": 8.007876834944504e-06, + "loss": 0.0005, + "step": 4338 + }, + { + "epoch": 0.279628794225688, + "grad_norm": 0.486617649409908, + "learning_rate": 8.007160759040458e-06, + "loss": 0.0008, + "step": 4339 + }, + { + "epoch": 0.2796932396726171, + "grad_norm": 0.009931994546620311, + "learning_rate": 8.006444683136413e-06, + "loss": 0.0001, + "step": 4340 + }, + { + "epoch": 0.2797576851195463, + "grad_norm": 0.00038062467703721604, + "learning_rate": 8.005728607232367e-06, + "loss": 0.0, + "step": 4341 + }, + { + "epoch": 0.2798221305664755, + "grad_norm": 0.0003990964290874825, + "learning_rate": 8.005012531328321e-06, + "loss": 0.0, + "step": 4342 + }, + { + "epoch": 0.27988657601340466, + "grad_norm": 0.008947741416100167, + "learning_rate": 8.004296455424276e-06, + "loss": 0.0001, + "step": 4343 + }, + { + "epoch": 0.27995102146033385, + "grad_norm": 0.007823324713702041, + "learning_rate": 8.00358037952023e-06, + "loss": 0.0, + "step": 4344 + }, + { + "epoch": 0.280015466907263, + "grad_norm": 0.040479600314342654, + "learning_rate": 8.002864303616184e-06, + "loss": 0.0001, + "step": 4345 + }, + { + "epoch": 0.28007991235419216, + "grad_norm": 0.0055250204423881845, + "learning_rate": 8.002148227712138e-06, + "loss": 0.0, + "step": 4346 + }, + { + "epoch": 0.28014435780112135, + "grad_norm": 0.010728338781064616, + "learning_rate": 8.001432151808093e-06, + "loss": 0.0001, + "step": 4347 + }, + { + "epoch": 0.28020880324805053, + "grad_norm": 0.00040887627223743986, + "learning_rate": 8.000716075904047e-06, + "loss": 0.0, + "step": 4348 + }, + { + "epoch": 0.2802732486949797, + "grad_norm": 0.13571115180144624, + "learning_rate": 8.000000000000001e-06, + "loss": 0.0004, + "step": 4349 + }, + { + "epoch": 0.2803376941419089, + "grad_norm": 0.5591548156003548, + "learning_rate": 7.999283924095956e-06, + "loss": 0.004, + "step": 4350 + }, + { + "epoch": 0.28040213958883803, + "grad_norm": 2.2364727922981085, + "learning_rate": 7.99856784819191e-06, + "loss": 0.0049, + "step": 4351 + }, + { + "epoch": 0.2804665850357672, + "grad_norm": 0.015054656240965792, + "learning_rate": 7.997851772287864e-06, + "loss": 0.0, + "step": 4352 + }, + { + "epoch": 0.2805310304826964, + "grad_norm": 0.25799236381304563, + "learning_rate": 7.997135696383817e-06, + "loss": 0.0008, + "step": 4353 + }, + { + "epoch": 0.2805954759296256, + "grad_norm": 0.047828245320771644, + "learning_rate": 7.996419620479771e-06, + "loss": 0.0001, + "step": 4354 + }, + { + "epoch": 0.28065992137655477, + "grad_norm": 0.5221935978641291, + "learning_rate": 7.995703544575725e-06, + "loss": 0.0039, + "step": 4355 + }, + { + "epoch": 0.2807243668234839, + "grad_norm": 0.011907581028104474, + "learning_rate": 7.99498746867168e-06, + "loss": 0.0001, + "step": 4356 + }, + { + "epoch": 0.2807888122704131, + "grad_norm": 0.0031201953035860396, + "learning_rate": 7.994271392767634e-06, + "loss": 0.0, + "step": 4357 + }, + { + "epoch": 0.28085325771734226, + "grad_norm": 1.5818540874230766, + "learning_rate": 7.993555316863588e-06, + "loss": 0.0024, + "step": 4358 + }, + { + "epoch": 0.28091770316427145, + "grad_norm": 0.01792528071689751, + "learning_rate": 7.992839240959543e-06, + "loss": 0.0, + "step": 4359 + }, + { + "epoch": 0.28098214861120063, + "grad_norm": 0.03182447723050989, + "learning_rate": 7.992123165055497e-06, + "loss": 0.0003, + "step": 4360 + }, + { + "epoch": 0.2810465940581298, + "grad_norm": 0.0016257270435471858, + "learning_rate": 7.991407089151451e-06, + "loss": 0.0, + "step": 4361 + }, + { + "epoch": 0.28111103950505895, + "grad_norm": 0.4480030194664001, + "learning_rate": 7.990691013247406e-06, + "loss": 0.0019, + "step": 4362 + }, + { + "epoch": 0.28117548495198813, + "grad_norm": 0.051033991221149974, + "learning_rate": 7.989974937343358e-06, + "loss": 0.0001, + "step": 4363 + }, + { + "epoch": 0.2812399303989173, + "grad_norm": 0.2753491742477933, + "learning_rate": 7.989258861439312e-06, + "loss": 0.0011, + "step": 4364 + }, + { + "epoch": 0.2813043758458465, + "grad_norm": 0.009632521139026014, + "learning_rate": 7.988542785535267e-06, + "loss": 0.0001, + "step": 4365 + }, + { + "epoch": 0.2813688212927757, + "grad_norm": 0.39363273433598595, + "learning_rate": 7.987826709631221e-06, + "loss": 0.0048, + "step": 4366 + }, + { + "epoch": 0.2814332667397048, + "grad_norm": 0.004705003054986105, + "learning_rate": 7.987110633727175e-06, + "loss": 0.0, + "step": 4367 + }, + { + "epoch": 0.281497712186634, + "grad_norm": 0.002300404489696207, + "learning_rate": 7.98639455782313e-06, + "loss": 0.0, + "step": 4368 + }, + { + "epoch": 0.2815621576335632, + "grad_norm": 0.11177615728153535, + "learning_rate": 7.985678481919084e-06, + "loss": 0.0002, + "step": 4369 + }, + { + "epoch": 0.28162660308049237, + "grad_norm": 0.004602240365965075, + "learning_rate": 7.984962406015038e-06, + "loss": 0.0, + "step": 4370 + }, + { + "epoch": 0.28169104852742155, + "grad_norm": 0.05880574974903835, + "learning_rate": 7.984246330110993e-06, + "loss": 0.0017, + "step": 4371 + }, + { + "epoch": 0.28175549397435073, + "grad_norm": 0.008953983911220216, + "learning_rate": 7.983530254206947e-06, + "loss": 0.0001, + "step": 4372 + }, + { + "epoch": 0.28181993942127986, + "grad_norm": 0.007779673626552251, + "learning_rate": 7.982814178302901e-06, + "loss": 0.0001, + "step": 4373 + }, + { + "epoch": 0.28188438486820905, + "grad_norm": 0.0016397288000667067, + "learning_rate": 7.982098102398855e-06, + "loss": 0.0, + "step": 4374 + }, + { + "epoch": 0.28194883031513823, + "grad_norm": 0.07536983363889126, + "learning_rate": 7.98138202649481e-06, + "loss": 0.0005, + "step": 4375 + }, + { + "epoch": 0.2820132757620674, + "grad_norm": 0.0662605672125797, + "learning_rate": 7.980665950590764e-06, + "loss": 0.0002, + "step": 4376 + }, + { + "epoch": 0.2820777212089966, + "grad_norm": 0.001330320124680005, + "learning_rate": 7.979949874686718e-06, + "loss": 0.0, + "step": 4377 + }, + { + "epoch": 0.2821421666559258, + "grad_norm": 0.00196304783207142, + "learning_rate": 7.979233798782673e-06, + "loss": 0.0, + "step": 4378 + }, + { + "epoch": 0.2822066121028549, + "grad_norm": 0.037367839809333316, + "learning_rate": 7.978517722878625e-06, + "loss": 0.0001, + "step": 4379 + }, + { + "epoch": 0.2822710575497841, + "grad_norm": 0.006716713789545665, + "learning_rate": 7.97780164697458e-06, + "loss": 0.0, + "step": 4380 + }, + { + "epoch": 0.2823355029967133, + "grad_norm": 0.6094877187697938, + "learning_rate": 7.977085571070534e-06, + "loss": 0.0014, + "step": 4381 + }, + { + "epoch": 0.28239994844364247, + "grad_norm": 0.06727481563265361, + "learning_rate": 7.976369495166488e-06, + "loss": 0.0001, + "step": 4382 + }, + { + "epoch": 0.28246439389057165, + "grad_norm": 0.18548593022155066, + "learning_rate": 7.975653419262442e-06, + "loss": 0.0005, + "step": 4383 + }, + { + "epoch": 0.2825288393375008, + "grad_norm": 0.3365746156429773, + "learning_rate": 7.974937343358397e-06, + "loss": 0.0024, + "step": 4384 + }, + { + "epoch": 0.28259328478442997, + "grad_norm": 0.00029314361765416907, + "learning_rate": 7.974221267454351e-06, + "loss": 0.0, + "step": 4385 + }, + { + "epoch": 0.28265773023135915, + "grad_norm": 0.16602354030813277, + "learning_rate": 7.973505191550305e-06, + "loss": 0.0003, + "step": 4386 + }, + { + "epoch": 0.28272217567828833, + "grad_norm": 0.0002632260427845974, + "learning_rate": 7.97278911564626e-06, + "loss": 0.0, + "step": 4387 + }, + { + "epoch": 0.2827866211252175, + "grad_norm": 0.005457814706330409, + "learning_rate": 7.972073039742212e-06, + "loss": 0.0, + "step": 4388 + }, + { + "epoch": 0.2828510665721467, + "grad_norm": 0.3241865073108496, + "learning_rate": 7.971356963838167e-06, + "loss": 0.0074, + "step": 4389 + }, + { + "epoch": 0.28291551201907583, + "grad_norm": 0.03676520602108709, + "learning_rate": 7.97064088793412e-06, + "loss": 0.0001, + "step": 4390 + }, + { + "epoch": 0.282979957466005, + "grad_norm": 0.5804222704426779, + "learning_rate": 7.969924812030075e-06, + "loss": 0.0032, + "step": 4391 + }, + { + "epoch": 0.2830444029129342, + "grad_norm": 0.0040055974290435425, + "learning_rate": 7.96920873612603e-06, + "loss": 0.0, + "step": 4392 + }, + { + "epoch": 0.2831088483598634, + "grad_norm": 0.005350671320858573, + "learning_rate": 7.968492660221984e-06, + "loss": 0.0, + "step": 4393 + }, + { + "epoch": 0.28317329380679257, + "grad_norm": 0.004508154975757049, + "learning_rate": 7.96777658431794e-06, + "loss": 0.0, + "step": 4394 + }, + { + "epoch": 0.2832377392537217, + "grad_norm": 0.04889548900087098, + "learning_rate": 7.967060508413892e-06, + "loss": 0.0001, + "step": 4395 + }, + { + "epoch": 0.2833021847006509, + "grad_norm": 0.0443628869935588, + "learning_rate": 7.966344432509847e-06, + "loss": 0.0002, + "step": 4396 + }, + { + "epoch": 0.28336663014758007, + "grad_norm": 0.08155526279809674, + "learning_rate": 7.965628356605801e-06, + "loss": 0.0003, + "step": 4397 + }, + { + "epoch": 0.28343107559450925, + "grad_norm": 0.18657846288264684, + "learning_rate": 7.964912280701755e-06, + "loss": 0.0022, + "step": 4398 + }, + { + "epoch": 0.28349552104143844, + "grad_norm": 0.02187256732999685, + "learning_rate": 7.96419620479771e-06, + "loss": 0.0001, + "step": 4399 + }, + { + "epoch": 0.2835599664883676, + "grad_norm": 0.012205506467956272, + "learning_rate": 7.963480128893664e-06, + "loss": 0.0, + "step": 4400 + }, + { + "epoch": 0.28362441193529675, + "grad_norm": 0.037582196116246906, + "learning_rate": 7.962764052989618e-06, + "loss": 0.0, + "step": 4401 + }, + { + "epoch": 0.28368885738222593, + "grad_norm": 0.06266569361901952, + "learning_rate": 7.962047977085572e-06, + "loss": 0.0002, + "step": 4402 + }, + { + "epoch": 0.2837533028291551, + "grad_norm": 0.0008525386340146979, + "learning_rate": 7.961331901181527e-06, + "loss": 0.0, + "step": 4403 + }, + { + "epoch": 0.2838177482760843, + "grad_norm": 0.005498334319090381, + "learning_rate": 7.96061582527748e-06, + "loss": 0.0, + "step": 4404 + }, + { + "epoch": 0.2838821937230135, + "grad_norm": 0.010821442272882176, + "learning_rate": 7.959899749373434e-06, + "loss": 0.0, + "step": 4405 + }, + { + "epoch": 0.2839466391699426, + "grad_norm": 0.24158699435794354, + "learning_rate": 7.959183673469388e-06, + "loss": 0.002, + "step": 4406 + }, + { + "epoch": 0.2840110846168718, + "grad_norm": 0.3462137556730702, + "learning_rate": 7.958467597565342e-06, + "loss": 0.0012, + "step": 4407 + }, + { + "epoch": 0.284075530063801, + "grad_norm": 0.2733609092257243, + "learning_rate": 7.957751521661296e-06, + "loss": 0.0023, + "step": 4408 + }, + { + "epoch": 0.28413997551073017, + "grad_norm": 0.16384960734985596, + "learning_rate": 7.95703544575725e-06, + "loss": 0.0003, + "step": 4409 + }, + { + "epoch": 0.28420442095765935, + "grad_norm": 0.38844305999655787, + "learning_rate": 7.956319369853205e-06, + "loss": 0.0028, + "step": 4410 + }, + { + "epoch": 0.28426886640458854, + "grad_norm": 0.004082978805036018, + "learning_rate": 7.95560329394916e-06, + "loss": 0.0, + "step": 4411 + }, + { + "epoch": 0.28433331185151767, + "grad_norm": 0.004148427370944359, + "learning_rate": 7.954887218045114e-06, + "loss": 0.0, + "step": 4412 + }, + { + "epoch": 0.28439775729844685, + "grad_norm": 0.021951817193776223, + "learning_rate": 7.954171142141068e-06, + "loss": 0.0, + "step": 4413 + }, + { + "epoch": 0.28446220274537604, + "grad_norm": 0.006462803659022571, + "learning_rate": 7.95345506623702e-06, + "loss": 0.0, + "step": 4414 + }, + { + "epoch": 0.2845266481923052, + "grad_norm": 0.13909423337469742, + "learning_rate": 7.952738990332975e-06, + "loss": 0.0008, + "step": 4415 + }, + { + "epoch": 0.2845910936392344, + "grad_norm": 0.11019298248775587, + "learning_rate": 7.95202291442893e-06, + "loss": 0.002, + "step": 4416 + }, + { + "epoch": 0.2846555390861636, + "grad_norm": 0.015269646596399714, + "learning_rate": 7.951306838524885e-06, + "loss": 0.0, + "step": 4417 + }, + { + "epoch": 0.2847199845330927, + "grad_norm": 0.021452810796437523, + "learning_rate": 7.95059076262084e-06, + "loss": 0.0001, + "step": 4418 + }, + { + "epoch": 0.2847844299800219, + "grad_norm": 0.517738806776239, + "learning_rate": 7.949874686716794e-06, + "loss": 0.0012, + "step": 4419 + }, + { + "epoch": 0.2848488754269511, + "grad_norm": 0.03322008169773669, + "learning_rate": 7.949158610812746e-06, + "loss": 0.0003, + "step": 4420 + }, + { + "epoch": 0.28491332087388027, + "grad_norm": 0.009138953843116304, + "learning_rate": 7.9484425349087e-06, + "loss": 0.0, + "step": 4421 + }, + { + "epoch": 0.28497776632080946, + "grad_norm": 0.47363823956398626, + "learning_rate": 7.947726459004655e-06, + "loss": 0.0039, + "step": 4422 + }, + { + "epoch": 0.2850422117677386, + "grad_norm": 0.0020104430634621095, + "learning_rate": 7.94701038310061e-06, + "loss": 0.0, + "step": 4423 + }, + { + "epoch": 0.28510665721466777, + "grad_norm": 0.053079706230507256, + "learning_rate": 7.946294307196564e-06, + "loss": 0.0002, + "step": 4424 + }, + { + "epoch": 0.28517110266159695, + "grad_norm": 0.02456466714186916, + "learning_rate": 7.945578231292518e-06, + "loss": 0.0001, + "step": 4425 + }, + { + "epoch": 0.28523554810852614, + "grad_norm": 0.0008166402254194522, + "learning_rate": 7.944862155388472e-06, + "loss": 0.0, + "step": 4426 + }, + { + "epoch": 0.2852999935554553, + "grad_norm": 0.006199516264046085, + "learning_rate": 7.944146079484426e-06, + "loss": 0.0, + "step": 4427 + }, + { + "epoch": 0.2853644390023845, + "grad_norm": 0.0008081691424822122, + "learning_rate": 7.94343000358038e-06, + "loss": 0.0, + "step": 4428 + }, + { + "epoch": 0.28542888444931364, + "grad_norm": 0.0029845301345706203, + "learning_rate": 7.942713927676335e-06, + "loss": 0.0, + "step": 4429 + }, + { + "epoch": 0.2854933298962428, + "grad_norm": 0.011622539974228662, + "learning_rate": 7.941997851772288e-06, + "loss": 0.0, + "step": 4430 + }, + { + "epoch": 0.285557775343172, + "grad_norm": 0.00507248422770294, + "learning_rate": 7.941281775868242e-06, + "loss": 0.0, + "step": 4431 + }, + { + "epoch": 0.2856222207901012, + "grad_norm": 0.004660809038102969, + "learning_rate": 7.940565699964196e-06, + "loss": 0.0, + "step": 4432 + }, + { + "epoch": 0.2856866662370304, + "grad_norm": 0.00253746912322247, + "learning_rate": 7.93984962406015e-06, + "loss": 0.0, + "step": 4433 + }, + { + "epoch": 0.2857511116839595, + "grad_norm": 0.061854504871958836, + "learning_rate": 7.939133548156105e-06, + "loss": 0.002, + "step": 4434 + }, + { + "epoch": 0.2858155571308887, + "grad_norm": 0.008229746313761522, + "learning_rate": 7.93841747225206e-06, + "loss": 0.0, + "step": 4435 + }, + { + "epoch": 0.28588000257781787, + "grad_norm": 0.003418590816872071, + "learning_rate": 7.937701396348013e-06, + "loss": 0.0, + "step": 4436 + }, + { + "epoch": 0.28594444802474706, + "grad_norm": 0.4305533788055168, + "learning_rate": 7.936985320443968e-06, + "loss": 0.0026, + "step": 4437 + }, + { + "epoch": 0.28600889347167624, + "grad_norm": 0.006124099355040623, + "learning_rate": 7.936269244539922e-06, + "loss": 0.0, + "step": 4438 + }, + { + "epoch": 0.2860733389186054, + "grad_norm": 0.0031271294943862695, + "learning_rate": 7.935553168635876e-06, + "loss": 0.0, + "step": 4439 + }, + { + "epoch": 0.28613778436553455, + "grad_norm": 0.01045592306874938, + "learning_rate": 7.934837092731829e-06, + "loss": 0.0001, + "step": 4440 + }, + { + "epoch": 0.28620222981246374, + "grad_norm": 0.01239300328247604, + "learning_rate": 7.934121016827785e-06, + "loss": 0.0, + "step": 4441 + }, + { + "epoch": 0.2862666752593929, + "grad_norm": 0.1479763660766948, + "learning_rate": 7.93340494092374e-06, + "loss": 0.0003, + "step": 4442 + }, + { + "epoch": 0.2863311207063221, + "grad_norm": 0.002779436143763441, + "learning_rate": 7.932688865019694e-06, + "loss": 0.0, + "step": 4443 + }, + { + "epoch": 0.2863955661532513, + "grad_norm": 0.009939447193650896, + "learning_rate": 7.931972789115648e-06, + "loss": 0.0, + "step": 4444 + }, + { + "epoch": 0.2864600116001804, + "grad_norm": 0.03607640825679185, + "learning_rate": 7.931256713211602e-06, + "loss": 0.0001, + "step": 4445 + }, + { + "epoch": 0.2865244570471096, + "grad_norm": 0.030201390696854517, + "learning_rate": 7.930540637307555e-06, + "loss": 0.0, + "step": 4446 + }, + { + "epoch": 0.2865889024940388, + "grad_norm": 0.07210643736495957, + "learning_rate": 7.929824561403509e-06, + "loss": 0.0001, + "step": 4447 + }, + { + "epoch": 0.286653347940968, + "grad_norm": 0.27512171988414574, + "learning_rate": 7.929108485499463e-06, + "loss": 0.0023, + "step": 4448 + }, + { + "epoch": 0.28671779338789716, + "grad_norm": 0.007823387955755273, + "learning_rate": 7.928392409595418e-06, + "loss": 0.0, + "step": 4449 + }, + { + "epoch": 0.28678223883482634, + "grad_norm": 0.14412453491842261, + "learning_rate": 7.927676333691372e-06, + "loss": 0.0003, + "step": 4450 + }, + { + "epoch": 0.28684668428175547, + "grad_norm": 0.00010022605205979735, + "learning_rate": 7.926960257787326e-06, + "loss": 0.0, + "step": 4451 + }, + { + "epoch": 0.28691112972868466, + "grad_norm": 0.00852258052113132, + "learning_rate": 7.92624418188328e-06, + "loss": 0.0, + "step": 4452 + }, + { + "epoch": 0.28697557517561384, + "grad_norm": 0.005646042880733156, + "learning_rate": 7.925528105979235e-06, + "loss": 0.0, + "step": 4453 + }, + { + "epoch": 0.287040020622543, + "grad_norm": 0.0008066836868218597, + "learning_rate": 7.924812030075189e-06, + "loss": 0.0, + "step": 4454 + }, + { + "epoch": 0.2871044660694722, + "grad_norm": 0.001507509646450024, + "learning_rate": 7.924095954171143e-06, + "loss": 0.0, + "step": 4455 + }, + { + "epoch": 0.2871689115164014, + "grad_norm": 0.002629905680913306, + "learning_rate": 7.923379878267096e-06, + "loss": 0.0, + "step": 4456 + }, + { + "epoch": 0.2872333569633305, + "grad_norm": 0.0059772441425043265, + "learning_rate": 7.92266380236305e-06, + "loss": 0.0, + "step": 4457 + }, + { + "epoch": 0.2872978024102597, + "grad_norm": 0.0352865641772406, + "learning_rate": 7.921947726459005e-06, + "loss": 0.0001, + "step": 4458 + }, + { + "epoch": 0.2873622478571889, + "grad_norm": 0.0002449629222558241, + "learning_rate": 7.921231650554959e-06, + "loss": 0.0, + "step": 4459 + }, + { + "epoch": 0.2874266933041181, + "grad_norm": 0.00020933698575602438, + "learning_rate": 7.920515574650913e-06, + "loss": 0.0, + "step": 4460 + }, + { + "epoch": 0.28749113875104726, + "grad_norm": 0.0031949037566283696, + "learning_rate": 7.919799498746868e-06, + "loss": 0.0, + "step": 4461 + }, + { + "epoch": 0.2875555841979764, + "grad_norm": 0.0065847528646073294, + "learning_rate": 7.919083422842822e-06, + "loss": 0.0, + "step": 4462 + }, + { + "epoch": 0.2876200296449056, + "grad_norm": 0.007622588936978267, + "learning_rate": 7.918367346938776e-06, + "loss": 0.0001, + "step": 4463 + }, + { + "epoch": 0.28768447509183476, + "grad_norm": 0.0017648578089107282, + "learning_rate": 7.91765127103473e-06, + "loss": 0.0, + "step": 4464 + }, + { + "epoch": 0.28774892053876394, + "grad_norm": 0.02611680379581983, + "learning_rate": 7.916935195130685e-06, + "loss": 0.0001, + "step": 4465 + }, + { + "epoch": 0.2878133659856931, + "grad_norm": 0.0010693996340695561, + "learning_rate": 7.916219119226639e-06, + "loss": 0.0, + "step": 4466 + }, + { + "epoch": 0.2878778114326223, + "grad_norm": 0.05102518528801154, + "learning_rate": 7.915503043322593e-06, + "loss": 0.0004, + "step": 4467 + }, + { + "epoch": 0.28794225687955144, + "grad_norm": 0.03612863993394471, + "learning_rate": 7.914786967418548e-06, + "loss": 0.0001, + "step": 4468 + }, + { + "epoch": 0.2880067023264806, + "grad_norm": 0.017686609282882126, + "learning_rate": 7.914070891514502e-06, + "loss": 0.0001, + "step": 4469 + }, + { + "epoch": 0.2880711477734098, + "grad_norm": 0.0008417679838710003, + "learning_rate": 7.913354815610456e-06, + "loss": 0.0, + "step": 4470 + }, + { + "epoch": 0.288135593220339, + "grad_norm": 0.00041560685689716803, + "learning_rate": 7.91263873970641e-06, + "loss": 0.0, + "step": 4471 + }, + { + "epoch": 0.2882000386672682, + "grad_norm": 0.18941918295187352, + "learning_rate": 7.911922663802363e-06, + "loss": 0.0004, + "step": 4472 + }, + { + "epoch": 0.2882644841141973, + "grad_norm": 0.21404756925539772, + "learning_rate": 7.911206587898317e-06, + "loss": 0.0006, + "step": 4473 + }, + { + "epoch": 0.2883289295611265, + "grad_norm": 0.013452269074163891, + "learning_rate": 7.910490511994272e-06, + "loss": 0.0, + "step": 4474 + }, + { + "epoch": 0.2883933750080557, + "grad_norm": 0.09559103668822853, + "learning_rate": 7.909774436090226e-06, + "loss": 0.0004, + "step": 4475 + }, + { + "epoch": 0.28845782045498486, + "grad_norm": 0.025086608219520978, + "learning_rate": 7.90905836018618e-06, + "loss": 0.0001, + "step": 4476 + }, + { + "epoch": 0.28852226590191404, + "grad_norm": 0.006432836326093911, + "learning_rate": 7.908342284282135e-06, + "loss": 0.0, + "step": 4477 + }, + { + "epoch": 0.28858671134884323, + "grad_norm": 0.2498886709881648, + "learning_rate": 7.907626208378089e-06, + "loss": 0.0004, + "step": 4478 + }, + { + "epoch": 0.28865115679577236, + "grad_norm": 0.0026041772410369305, + "learning_rate": 7.906910132474043e-06, + "loss": 0.0, + "step": 4479 + }, + { + "epoch": 0.28871560224270154, + "grad_norm": 0.009591682875869855, + "learning_rate": 7.906194056569998e-06, + "loss": 0.0, + "step": 4480 + }, + { + "epoch": 0.2887800476896307, + "grad_norm": 0.00032052997232582793, + "learning_rate": 7.90547798066595e-06, + "loss": 0.0, + "step": 4481 + }, + { + "epoch": 0.2888444931365599, + "grad_norm": 0.00873159164063012, + "learning_rate": 7.904761904761904e-06, + "loss": 0.0, + "step": 4482 + }, + { + "epoch": 0.2889089385834891, + "grad_norm": 0.35534598777334087, + "learning_rate": 7.904045828857859e-06, + "loss": 0.0054, + "step": 4483 + }, + { + "epoch": 0.2889733840304182, + "grad_norm": 0.002808476140444069, + "learning_rate": 7.903329752953813e-06, + "loss": 0.0, + "step": 4484 + }, + { + "epoch": 0.2890378294773474, + "grad_norm": 0.007032176873919268, + "learning_rate": 7.902613677049767e-06, + "loss": 0.0, + "step": 4485 + }, + { + "epoch": 0.2891022749242766, + "grad_norm": 0.0008951899821003473, + "learning_rate": 7.901897601145722e-06, + "loss": 0.0, + "step": 4486 + }, + { + "epoch": 0.2891667203712058, + "grad_norm": 0.07280670081014678, + "learning_rate": 7.901181525241678e-06, + "loss": 0.0017, + "step": 4487 + }, + { + "epoch": 0.28923116581813496, + "grad_norm": 0.31948080480773533, + "learning_rate": 7.90046544933763e-06, + "loss": 0.0009, + "step": 4488 + }, + { + "epoch": 0.28929561126506415, + "grad_norm": 0.0005501602348737609, + "learning_rate": 7.899749373433585e-06, + "loss": 0.0, + "step": 4489 + }, + { + "epoch": 0.2893600567119933, + "grad_norm": 0.018795969206918124, + "learning_rate": 7.899033297529539e-06, + "loss": 0.0, + "step": 4490 + }, + { + "epoch": 0.28942450215892246, + "grad_norm": 0.0006946576011490777, + "learning_rate": 7.898317221625493e-06, + "loss": 0.0, + "step": 4491 + }, + { + "epoch": 0.28948894760585164, + "grad_norm": 0.00017523844631172394, + "learning_rate": 7.897601145721447e-06, + "loss": 0.0, + "step": 4492 + }, + { + "epoch": 0.2895533930527808, + "grad_norm": 0.00040692935676826147, + "learning_rate": 7.896885069817402e-06, + "loss": 0.0, + "step": 4493 + }, + { + "epoch": 0.28961783849971, + "grad_norm": 0.002279674125137819, + "learning_rate": 7.896168993913356e-06, + "loss": 0.0, + "step": 4494 + }, + { + "epoch": 0.2896822839466392, + "grad_norm": 0.2672872842209903, + "learning_rate": 7.89545291800931e-06, + "loss": 0.0027, + "step": 4495 + }, + { + "epoch": 0.2897467293935683, + "grad_norm": 0.006540003647810904, + "learning_rate": 7.894736842105265e-06, + "loss": 0.0, + "step": 4496 + }, + { + "epoch": 0.2898111748404975, + "grad_norm": 0.015930352995606616, + "learning_rate": 7.894020766201217e-06, + "loss": 0.0, + "step": 4497 + }, + { + "epoch": 0.2898756202874267, + "grad_norm": 0.004882923326128659, + "learning_rate": 7.893304690297172e-06, + "loss": 0.0, + "step": 4498 + }, + { + "epoch": 0.2899400657343559, + "grad_norm": 0.31889832402219837, + "learning_rate": 7.892588614393126e-06, + "loss": 0.0005, + "step": 4499 + }, + { + "epoch": 0.29000451118128506, + "grad_norm": 0.0013620853215409057, + "learning_rate": 7.89187253848908e-06, + "loss": 0.0, + "step": 4500 + }, + { + "epoch": 0.2900689566282142, + "grad_norm": 0.015338372594140916, + "learning_rate": 7.891156462585034e-06, + "loss": 0.0, + "step": 4501 + }, + { + "epoch": 0.2901334020751434, + "grad_norm": 0.05778266089969748, + "learning_rate": 7.890440386680989e-06, + "loss": 0.0001, + "step": 4502 + }, + { + "epoch": 0.29019784752207256, + "grad_norm": 0.00270977776113452, + "learning_rate": 7.889724310776943e-06, + "loss": 0.0, + "step": 4503 + }, + { + "epoch": 0.29026229296900175, + "grad_norm": 0.0037655202102685075, + "learning_rate": 7.889008234872897e-06, + "loss": 0.0, + "step": 4504 + }, + { + "epoch": 0.29032673841593093, + "grad_norm": 0.015389182378204158, + "learning_rate": 7.888292158968852e-06, + "loss": 0.0001, + "step": 4505 + }, + { + "epoch": 0.2903911838628601, + "grad_norm": 0.015059754222407136, + "learning_rate": 7.887576083064806e-06, + "loss": 0.0, + "step": 4506 + }, + { + "epoch": 0.29045562930978924, + "grad_norm": 0.03073885991750051, + "learning_rate": 7.886860007160759e-06, + "loss": 0.0, + "step": 4507 + }, + { + "epoch": 0.2905200747567184, + "grad_norm": 0.015867032314513176, + "learning_rate": 7.886143931256713e-06, + "loss": 0.0001, + "step": 4508 + }, + { + "epoch": 0.2905845202036476, + "grad_norm": 0.00045300063330874063, + "learning_rate": 7.885427855352667e-06, + "loss": 0.0, + "step": 4509 + }, + { + "epoch": 0.2906489656505768, + "grad_norm": 0.0004942003374035947, + "learning_rate": 7.884711779448621e-06, + "loss": 0.0, + "step": 4510 + }, + { + "epoch": 0.290713411097506, + "grad_norm": 0.0012337128738821176, + "learning_rate": 7.883995703544577e-06, + "loss": 0.0, + "step": 4511 + }, + { + "epoch": 0.2907778565444351, + "grad_norm": 0.0008876628842310143, + "learning_rate": 7.883279627640532e-06, + "loss": 0.0, + "step": 4512 + }, + { + "epoch": 0.2908423019913643, + "grad_norm": 0.007350333417431606, + "learning_rate": 7.882563551736484e-06, + "loss": 0.0, + "step": 4513 + }, + { + "epoch": 0.2909067474382935, + "grad_norm": 0.0002106284948672585, + "learning_rate": 7.881847475832439e-06, + "loss": 0.0, + "step": 4514 + }, + { + "epoch": 0.29097119288522266, + "grad_norm": 0.00035708859633882503, + "learning_rate": 7.881131399928393e-06, + "loss": 0.0, + "step": 4515 + }, + { + "epoch": 0.29103563833215185, + "grad_norm": 0.029065303728135556, + "learning_rate": 7.880415324024347e-06, + "loss": 0.0, + "step": 4516 + }, + { + "epoch": 0.29110008377908103, + "grad_norm": 0.2127117840682844, + "learning_rate": 7.879699248120301e-06, + "loss": 0.001, + "step": 4517 + }, + { + "epoch": 0.29116452922601016, + "grad_norm": 0.0006066203387944084, + "learning_rate": 7.878983172216256e-06, + "loss": 0.0, + "step": 4518 + }, + { + "epoch": 0.29122897467293934, + "grad_norm": 0.018112009074947217, + "learning_rate": 7.87826709631221e-06, + "loss": 0.0, + "step": 4519 + }, + { + "epoch": 0.29129342011986853, + "grad_norm": 0.002583304637280309, + "learning_rate": 7.877551020408164e-06, + "loss": 0.0, + "step": 4520 + }, + { + "epoch": 0.2913578655667977, + "grad_norm": 0.266955702883381, + "learning_rate": 7.876834944504119e-06, + "loss": 0.0019, + "step": 4521 + }, + { + "epoch": 0.2914223110137269, + "grad_norm": 0.018432121396502184, + "learning_rate": 7.876118868600073e-06, + "loss": 0.0, + "step": 4522 + }, + { + "epoch": 0.2914867564606561, + "grad_norm": 0.03903561799608369, + "learning_rate": 7.875402792696026e-06, + "loss": 0.0003, + "step": 4523 + }, + { + "epoch": 0.2915512019075852, + "grad_norm": 0.012237118359079405, + "learning_rate": 7.87468671679198e-06, + "loss": 0.0, + "step": 4524 + }, + { + "epoch": 0.2916156473545144, + "grad_norm": 0.0033078222190907323, + "learning_rate": 7.873970640887934e-06, + "loss": 0.0, + "step": 4525 + }, + { + "epoch": 0.2916800928014436, + "grad_norm": 0.003472865748525989, + "learning_rate": 7.873254564983888e-06, + "loss": 0.0, + "step": 4526 + }, + { + "epoch": 0.29174453824837276, + "grad_norm": 0.0013939818695185597, + "learning_rate": 7.872538489079843e-06, + "loss": 0.0, + "step": 4527 + }, + { + "epoch": 0.29180898369530195, + "grad_norm": 0.00018821632106576328, + "learning_rate": 7.871822413175797e-06, + "loss": 0.0, + "step": 4528 + }, + { + "epoch": 0.2918734291422311, + "grad_norm": 0.00306179393129678, + "learning_rate": 7.871106337271751e-06, + "loss": 0.0, + "step": 4529 + }, + { + "epoch": 0.29193787458916026, + "grad_norm": 0.0007617039287525343, + "learning_rate": 7.870390261367706e-06, + "loss": 0.0, + "step": 4530 + }, + { + "epoch": 0.29200232003608945, + "grad_norm": 1.2425142734099195, + "learning_rate": 7.86967418546366e-06, + "loss": 0.0022, + "step": 4531 + }, + { + "epoch": 0.29206676548301863, + "grad_norm": 0.0022768657242729392, + "learning_rate": 7.868958109559614e-06, + "loss": 0.0, + "step": 4532 + }, + { + "epoch": 0.2921312109299478, + "grad_norm": 0.00020864275355323165, + "learning_rate": 7.868242033655567e-06, + "loss": 0.0, + "step": 4533 + }, + { + "epoch": 0.292195656376877, + "grad_norm": 0.000213358795277892, + "learning_rate": 7.867525957751523e-06, + "loss": 0.0, + "step": 4534 + }, + { + "epoch": 0.29226010182380613, + "grad_norm": 0.00015255038864135203, + "learning_rate": 7.866809881847477e-06, + "loss": 0.0, + "step": 4535 + }, + { + "epoch": 0.2923245472707353, + "grad_norm": 0.00015455752183585395, + "learning_rate": 7.866093805943431e-06, + "loss": 0.0, + "step": 4536 + }, + { + "epoch": 0.2923889927176645, + "grad_norm": 0.12145031071190686, + "learning_rate": 7.865377730039386e-06, + "loss": 0.0001, + "step": 4537 + }, + { + "epoch": 0.2924534381645937, + "grad_norm": 0.031089749014432462, + "learning_rate": 7.86466165413534e-06, + "loss": 0.0002, + "step": 4538 + }, + { + "epoch": 0.29251788361152287, + "grad_norm": 0.3512590264114246, + "learning_rate": 7.863945578231293e-06, + "loss": 0.0025, + "step": 4539 + }, + { + "epoch": 0.292582329058452, + "grad_norm": 0.02734669707799407, + "learning_rate": 7.863229502327247e-06, + "loss": 0.0001, + "step": 4540 + }, + { + "epoch": 0.2926467745053812, + "grad_norm": 0.0010073198792487593, + "learning_rate": 7.862513426423201e-06, + "loss": 0.0, + "step": 4541 + }, + { + "epoch": 0.29271121995231036, + "grad_norm": 0.00019868880896533006, + "learning_rate": 7.861797350519156e-06, + "loss": 0.0, + "step": 4542 + }, + { + "epoch": 0.29277566539923955, + "grad_norm": 0.0006258420716790086, + "learning_rate": 7.86108127461511e-06, + "loss": 0.0, + "step": 4543 + }, + { + "epoch": 0.29284011084616873, + "grad_norm": 0.0005874389946818668, + "learning_rate": 7.860365198711064e-06, + "loss": 0.0, + "step": 4544 + }, + { + "epoch": 0.2929045562930979, + "grad_norm": 0.003848245562625219, + "learning_rate": 7.859649122807018e-06, + "loss": 0.0, + "step": 4545 + }, + { + "epoch": 0.29296900174002705, + "grad_norm": 0.0007591827858018054, + "learning_rate": 7.858933046902973e-06, + "loss": 0.0, + "step": 4546 + }, + { + "epoch": 0.29303344718695623, + "grad_norm": 0.0027661824546528937, + "learning_rate": 7.858216970998927e-06, + "loss": 0.0, + "step": 4547 + }, + { + "epoch": 0.2930978926338854, + "grad_norm": 0.004813073623290894, + "learning_rate": 7.857500895094881e-06, + "loss": 0.0, + "step": 4548 + }, + { + "epoch": 0.2931623380808146, + "grad_norm": 0.7728116364248644, + "learning_rate": 7.856784819190834e-06, + "loss": 0.0034, + "step": 4549 + }, + { + "epoch": 0.2932267835277438, + "grad_norm": 0.014514382210677952, + "learning_rate": 7.856068743286788e-06, + "loss": 0.0001, + "step": 4550 + }, + { + "epoch": 0.2932912289746729, + "grad_norm": 0.18422850996311158, + "learning_rate": 7.855352667382743e-06, + "loss": 0.0017, + "step": 4551 + }, + { + "epoch": 0.2933556744216021, + "grad_norm": 0.004560710887887605, + "learning_rate": 7.854636591478697e-06, + "loss": 0.0, + "step": 4552 + }, + { + "epoch": 0.2934201198685313, + "grad_norm": 0.03367021315660142, + "learning_rate": 7.853920515574651e-06, + "loss": 0.0001, + "step": 4553 + }, + { + "epoch": 0.29348456531546047, + "grad_norm": 0.0755510030149236, + "learning_rate": 7.853204439670605e-06, + "loss": 0.0008, + "step": 4554 + }, + { + "epoch": 0.29354901076238965, + "grad_norm": 0.27525725415499364, + "learning_rate": 7.85248836376656e-06, + "loss": 0.0008, + "step": 4555 + }, + { + "epoch": 0.29361345620931883, + "grad_norm": 0.019564531699342042, + "learning_rate": 7.851772287862514e-06, + "loss": 0.0, + "step": 4556 + }, + { + "epoch": 0.29367790165624796, + "grad_norm": 0.008114938435770352, + "learning_rate": 7.851056211958468e-06, + "loss": 0.0, + "step": 4557 + }, + { + "epoch": 0.29374234710317715, + "grad_norm": 0.3528800646587041, + "learning_rate": 7.850340136054423e-06, + "loss": 0.0002, + "step": 4558 + }, + { + "epoch": 0.29380679255010633, + "grad_norm": 0.0006609426373109618, + "learning_rate": 7.849624060150377e-06, + "loss": 0.0, + "step": 4559 + }, + { + "epoch": 0.2938712379970355, + "grad_norm": 0.18983152295207498, + "learning_rate": 7.848907984246331e-06, + "loss": 0.0019, + "step": 4560 + }, + { + "epoch": 0.2939356834439647, + "grad_norm": 0.051502671807416966, + "learning_rate": 7.848191908342286e-06, + "loss": 0.0001, + "step": 4561 + }, + { + "epoch": 0.2940001288908939, + "grad_norm": 0.004743945694445782, + "learning_rate": 7.84747583243824e-06, + "loss": 0.0, + "step": 4562 + }, + { + "epoch": 0.294064574337823, + "grad_norm": 0.3051454696860592, + "learning_rate": 7.846759756534194e-06, + "loss": 0.0007, + "step": 4563 + }, + { + "epoch": 0.2941290197847522, + "grad_norm": 0.0005357593062468779, + "learning_rate": 7.846043680630148e-06, + "loss": 0.0, + "step": 4564 + }, + { + "epoch": 0.2941934652316814, + "grad_norm": 0.0004975399317757935, + "learning_rate": 7.845327604726101e-06, + "loss": 0.0, + "step": 4565 + }, + { + "epoch": 0.29425791067861057, + "grad_norm": 0.002784029827135192, + "learning_rate": 7.844611528822055e-06, + "loss": 0.0, + "step": 4566 + }, + { + "epoch": 0.29432235612553975, + "grad_norm": 0.00492558169817448, + "learning_rate": 7.84389545291801e-06, + "loss": 0.0, + "step": 4567 + }, + { + "epoch": 0.2943868015724689, + "grad_norm": 0.0001998417877650629, + "learning_rate": 7.843179377013964e-06, + "loss": 0.0, + "step": 4568 + }, + { + "epoch": 0.29445124701939807, + "grad_norm": 0.01305385781332726, + "learning_rate": 7.842463301109918e-06, + "loss": 0.0001, + "step": 4569 + }, + { + "epoch": 0.29451569246632725, + "grad_norm": 0.010528942460724023, + "learning_rate": 7.841747225205873e-06, + "loss": 0.0001, + "step": 4570 + }, + { + "epoch": 0.29458013791325643, + "grad_norm": 0.00020059010498940666, + "learning_rate": 7.841031149301827e-06, + "loss": 0.0, + "step": 4571 + }, + { + "epoch": 0.2946445833601856, + "grad_norm": 0.0032149860570045243, + "learning_rate": 7.840315073397781e-06, + "loss": 0.0, + "step": 4572 + }, + { + "epoch": 0.2947090288071148, + "grad_norm": 0.71958861976571, + "learning_rate": 7.839598997493735e-06, + "loss": 0.0035, + "step": 4573 + }, + { + "epoch": 0.29477347425404393, + "grad_norm": 3.0767340446139673, + "learning_rate": 7.838882921589688e-06, + "loss": 0.025, + "step": 4574 + }, + { + "epoch": 0.2948379197009731, + "grad_norm": 0.001674350790710976, + "learning_rate": 7.838166845685642e-06, + "loss": 0.0, + "step": 4575 + }, + { + "epoch": 0.2949023651479023, + "grad_norm": 0.012657985235324564, + "learning_rate": 7.837450769781597e-06, + "loss": 0.0001, + "step": 4576 + }, + { + "epoch": 0.2949668105948315, + "grad_norm": 0.005560305992674056, + "learning_rate": 7.836734693877551e-06, + "loss": 0.0, + "step": 4577 + }, + { + "epoch": 0.29503125604176067, + "grad_norm": 0.039299774754964066, + "learning_rate": 7.836018617973505e-06, + "loss": 0.0001, + "step": 4578 + }, + { + "epoch": 0.2950957014886898, + "grad_norm": 0.00030549551142350253, + "learning_rate": 7.83530254206946e-06, + "loss": 0.0, + "step": 4579 + }, + { + "epoch": 0.295160146935619, + "grad_norm": 0.004564936822918925, + "learning_rate": 7.834586466165414e-06, + "loss": 0.0, + "step": 4580 + }, + { + "epoch": 0.29522459238254817, + "grad_norm": 0.001231723360405645, + "learning_rate": 7.833870390261368e-06, + "loss": 0.0, + "step": 4581 + }, + { + "epoch": 0.29528903782947735, + "grad_norm": 0.01125057962057763, + "learning_rate": 7.833154314357322e-06, + "loss": 0.0, + "step": 4582 + }, + { + "epoch": 0.29535348327640654, + "grad_norm": 0.025788512570669428, + "learning_rate": 7.832438238453277e-06, + "loss": 0.0, + "step": 4583 + }, + { + "epoch": 0.2954179287233357, + "grad_norm": 0.03530924012953212, + "learning_rate": 7.831722162549231e-06, + "loss": 0.0, + "step": 4584 + }, + { + "epoch": 0.29548237417026485, + "grad_norm": 0.03815530609659034, + "learning_rate": 7.831006086645185e-06, + "loss": 0.0005, + "step": 4585 + }, + { + "epoch": 0.29554681961719403, + "grad_norm": 0.7514677785081176, + "learning_rate": 7.83029001074114e-06, + "loss": 0.0039, + "step": 4586 + }, + { + "epoch": 0.2956112650641232, + "grad_norm": 0.009560576602998675, + "learning_rate": 7.829573934837094e-06, + "loss": 0.0001, + "step": 4587 + }, + { + "epoch": 0.2956757105110524, + "grad_norm": 0.032842624976011536, + "learning_rate": 7.828857858933048e-06, + "loss": 0.0, + "step": 4588 + }, + { + "epoch": 0.2957401559579816, + "grad_norm": 0.031006444152063366, + "learning_rate": 7.828141783029003e-06, + "loss": 0.0017, + "step": 4589 + }, + { + "epoch": 0.2958046014049107, + "grad_norm": 0.24915392579693496, + "learning_rate": 7.827425707124955e-06, + "loss": 0.0007, + "step": 4590 + }, + { + "epoch": 0.2958690468518399, + "grad_norm": 0.03304664469135916, + "learning_rate": 7.82670963122091e-06, + "loss": 0.0, + "step": 4591 + }, + { + "epoch": 0.2959334922987691, + "grad_norm": 0.05655894885454582, + "learning_rate": 7.825993555316864e-06, + "loss": 0.0001, + "step": 4592 + }, + { + "epoch": 0.29599793774569827, + "grad_norm": 0.38388087603718396, + "learning_rate": 7.825277479412818e-06, + "loss": 0.0011, + "step": 4593 + }, + { + "epoch": 0.29606238319262745, + "grad_norm": 0.566841116400412, + "learning_rate": 7.824561403508772e-06, + "loss": 0.0035, + "step": 4594 + }, + { + "epoch": 0.29612682863955664, + "grad_norm": 0.011698133493121187, + "learning_rate": 7.823845327604727e-06, + "loss": 0.0001, + "step": 4595 + }, + { + "epoch": 0.29619127408648577, + "grad_norm": 0.027770648157643973, + "learning_rate": 7.823129251700681e-06, + "loss": 0.0, + "step": 4596 + }, + { + "epoch": 0.29625571953341495, + "grad_norm": 2.278856981939525, + "learning_rate": 7.822413175796635e-06, + "loss": 0.0001, + "step": 4597 + }, + { + "epoch": 0.29632016498034414, + "grad_norm": 0.0020159815640930436, + "learning_rate": 7.82169709989259e-06, + "loss": 0.0, + "step": 4598 + }, + { + "epoch": 0.2963846104272733, + "grad_norm": 0.14718794658617437, + "learning_rate": 7.820981023988544e-06, + "loss": 0.0008, + "step": 4599 + }, + { + "epoch": 0.2964490558742025, + "grad_norm": 0.0021516636260371985, + "learning_rate": 7.820264948084496e-06, + "loss": 0.0, + "step": 4600 + }, + { + "epoch": 0.2965135013211317, + "grad_norm": 0.010642395207132137, + "learning_rate": 7.81954887218045e-06, + "loss": 0.0, + "step": 4601 + }, + { + "epoch": 0.2965779467680608, + "grad_norm": 0.04448229876604851, + "learning_rate": 7.818832796276405e-06, + "loss": 0.0, + "step": 4602 + }, + { + "epoch": 0.29664239221499, + "grad_norm": 0.0033751772275503853, + "learning_rate": 7.81811672037236e-06, + "loss": 0.0, + "step": 4603 + }, + { + "epoch": 0.2967068376619192, + "grad_norm": 0.0037728265986638255, + "learning_rate": 7.817400644468315e-06, + "loss": 0.0, + "step": 4604 + }, + { + "epoch": 0.29677128310884837, + "grad_norm": 0.0027119067769277696, + "learning_rate": 7.81668456856427e-06, + "loss": 0.0, + "step": 4605 + }, + { + "epoch": 0.29683572855577756, + "grad_norm": 0.06407639553398353, + "learning_rate": 7.815968492660222e-06, + "loss": 0.0002, + "step": 4606 + }, + { + "epoch": 0.2969001740027067, + "grad_norm": 0.03252517104288065, + "learning_rate": 7.815252416756177e-06, + "loss": 0.0001, + "step": 4607 + }, + { + "epoch": 0.29696461944963587, + "grad_norm": 0.04116721890821116, + "learning_rate": 7.81453634085213e-06, + "loss": 0.0003, + "step": 4608 + }, + { + "epoch": 0.29702906489656505, + "grad_norm": 0.0015415324517583455, + "learning_rate": 7.813820264948085e-06, + "loss": 0.0, + "step": 4609 + }, + { + "epoch": 0.29709351034349424, + "grad_norm": 0.08608630962120033, + "learning_rate": 7.81310418904404e-06, + "loss": 0.0002, + "step": 4610 + }, + { + "epoch": 0.2971579557904234, + "grad_norm": 0.00012577019306394426, + "learning_rate": 7.812388113139994e-06, + "loss": 0.0, + "step": 4611 + }, + { + "epoch": 0.2972224012373526, + "grad_norm": 0.006028051862400764, + "learning_rate": 7.811672037235948e-06, + "loss": 0.0001, + "step": 4612 + }, + { + "epoch": 0.29728684668428174, + "grad_norm": 0.10264788711136673, + "learning_rate": 7.810955961331902e-06, + "loss": 0.0004, + "step": 4613 + }, + { + "epoch": 0.2973512921312109, + "grad_norm": 0.037061054366988276, + "learning_rate": 7.810239885427857e-06, + "loss": 0.0, + "step": 4614 + }, + { + "epoch": 0.2974157375781401, + "grad_norm": 0.0002439607188696404, + "learning_rate": 7.809523809523811e-06, + "loss": 0.0, + "step": 4615 + }, + { + "epoch": 0.2974801830250693, + "grad_norm": 0.014972837410916137, + "learning_rate": 7.808807733619764e-06, + "loss": 0.0, + "step": 4616 + }, + { + "epoch": 0.2975446284719985, + "grad_norm": 0.0008584027435497128, + "learning_rate": 7.808091657715718e-06, + "loss": 0.0, + "step": 4617 + }, + { + "epoch": 0.2976090739189276, + "grad_norm": 0.0006271920321097305, + "learning_rate": 7.807375581811672e-06, + "loss": 0.0, + "step": 4618 + }, + { + "epoch": 0.2976735193658568, + "grad_norm": 0.005431255360080073, + "learning_rate": 7.806659505907626e-06, + "loss": 0.0, + "step": 4619 + }, + { + "epoch": 0.29773796481278597, + "grad_norm": 0.0010751240437544664, + "learning_rate": 7.80594343000358e-06, + "loss": 0.0, + "step": 4620 + }, + { + "epoch": 0.29780241025971516, + "grad_norm": 0.3879795137802696, + "learning_rate": 7.805227354099535e-06, + "loss": 0.0039, + "step": 4621 + }, + { + "epoch": 0.29786685570664434, + "grad_norm": 0.0020645484028067747, + "learning_rate": 7.80451127819549e-06, + "loss": 0.0, + "step": 4622 + }, + { + "epoch": 0.2979313011535735, + "grad_norm": 0.0010723496273858072, + "learning_rate": 7.803795202291444e-06, + "loss": 0.0, + "step": 4623 + }, + { + "epoch": 0.29799574660050265, + "grad_norm": 0.0027411407806028714, + "learning_rate": 7.803079126387398e-06, + "loss": 0.0, + "step": 4624 + }, + { + "epoch": 0.29806019204743184, + "grad_norm": 0.00014802318816350044, + "learning_rate": 7.802363050483352e-06, + "loss": 0.0, + "step": 4625 + }, + { + "epoch": 0.298124637494361, + "grad_norm": 0.031220465912154896, + "learning_rate": 7.801646974579305e-06, + "loss": 0.0002, + "step": 4626 + }, + { + "epoch": 0.2981890829412902, + "grad_norm": 0.002613055270924557, + "learning_rate": 7.80093089867526e-06, + "loss": 0.0, + "step": 4627 + }, + { + "epoch": 0.2982535283882194, + "grad_norm": 0.002614199435021259, + "learning_rate": 7.800214822771215e-06, + "loss": 0.0, + "step": 4628 + }, + { + "epoch": 0.2983179738351485, + "grad_norm": 0.0026466108391232086, + "learning_rate": 7.79949874686717e-06, + "loss": 0.0, + "step": 4629 + }, + { + "epoch": 0.2983824192820777, + "grad_norm": 0.0011872838771710653, + "learning_rate": 7.798782670963124e-06, + "loss": 0.0, + "step": 4630 + }, + { + "epoch": 0.2984468647290069, + "grad_norm": 0.46395645085834897, + "learning_rate": 7.798066595059078e-06, + "loss": 0.0014, + "step": 4631 + }, + { + "epoch": 0.2985113101759361, + "grad_norm": 0.0013353272721039187, + "learning_rate": 7.79735051915503e-06, + "loss": 0.0, + "step": 4632 + }, + { + "epoch": 0.29857575562286526, + "grad_norm": 0.0017705315561493894, + "learning_rate": 7.796634443250985e-06, + "loss": 0.0, + "step": 4633 + }, + { + "epoch": 0.29864020106979444, + "grad_norm": 0.007422942160128908, + "learning_rate": 7.79591836734694e-06, + "loss": 0.0001, + "step": 4634 + }, + { + "epoch": 0.29870464651672357, + "grad_norm": 0.0017911138589993113, + "learning_rate": 7.795202291442893e-06, + "loss": 0.0, + "step": 4635 + }, + { + "epoch": 0.29876909196365276, + "grad_norm": 0.023585737352795046, + "learning_rate": 7.794486215538848e-06, + "loss": 0.0002, + "step": 4636 + }, + { + "epoch": 0.29883353741058194, + "grad_norm": 0.0016310814255533401, + "learning_rate": 7.793770139634802e-06, + "loss": 0.0, + "step": 4637 + }, + { + "epoch": 0.2988979828575111, + "grad_norm": 0.10732582740168191, + "learning_rate": 7.793054063730756e-06, + "loss": 0.0011, + "step": 4638 + }, + { + "epoch": 0.2989624283044403, + "grad_norm": 0.15895521683864702, + "learning_rate": 7.79233798782671e-06, + "loss": 0.0006, + "step": 4639 + }, + { + "epoch": 0.2990268737513695, + "grad_norm": 0.00880201531000441, + "learning_rate": 7.791621911922665e-06, + "loss": 0.0, + "step": 4640 + }, + { + "epoch": 0.2990913191982986, + "grad_norm": 0.003968743597232801, + "learning_rate": 7.79090583601862e-06, + "loss": 0.0, + "step": 4641 + }, + { + "epoch": 0.2991557646452278, + "grad_norm": 0.001546892372756122, + "learning_rate": 7.790189760114572e-06, + "loss": 0.0, + "step": 4642 + }, + { + "epoch": 0.299220210092157, + "grad_norm": 0.003255836015276672, + "learning_rate": 7.789473684210526e-06, + "loss": 0.0, + "step": 4643 + }, + { + "epoch": 0.2992846555390862, + "grad_norm": 0.004197248189523486, + "learning_rate": 7.78875760830648e-06, + "loss": 0.0, + "step": 4644 + }, + { + "epoch": 0.29934910098601536, + "grad_norm": 0.0008416042110688216, + "learning_rate": 7.788041532402435e-06, + "loss": 0.0, + "step": 4645 + }, + { + "epoch": 0.2994135464329445, + "grad_norm": 0.0043030888462125645, + "learning_rate": 7.787325456498389e-06, + "loss": 0.0, + "step": 4646 + }, + { + "epoch": 0.2994779918798737, + "grad_norm": 0.033809603114627454, + "learning_rate": 7.786609380594343e-06, + "loss": 0.0002, + "step": 4647 + }, + { + "epoch": 0.29954243732680286, + "grad_norm": 0.0003606526883201009, + "learning_rate": 7.785893304690298e-06, + "loss": 0.0, + "step": 4648 + }, + { + "epoch": 0.29960688277373204, + "grad_norm": 0.014667166220949858, + "learning_rate": 7.785177228786252e-06, + "loss": 0.0, + "step": 4649 + }, + { + "epoch": 0.2996713282206612, + "grad_norm": 0.0005881952328653598, + "learning_rate": 7.784461152882206e-06, + "loss": 0.0, + "step": 4650 + }, + { + "epoch": 0.2997357736675904, + "grad_norm": 0.0008412704886720019, + "learning_rate": 7.78374507697816e-06, + "loss": 0.0, + "step": 4651 + }, + { + "epoch": 0.29980021911451954, + "grad_norm": 0.04959488604565864, + "learning_rate": 7.783029001074115e-06, + "loss": 0.0002, + "step": 4652 + }, + { + "epoch": 0.2998646645614487, + "grad_norm": 0.002363653848485576, + "learning_rate": 7.78231292517007e-06, + "loss": 0.0, + "step": 4653 + }, + { + "epoch": 0.2999291100083779, + "grad_norm": 0.00021418820248923426, + "learning_rate": 7.781596849266023e-06, + "loss": 0.0, + "step": 4654 + }, + { + "epoch": 0.2999935554553071, + "grad_norm": 0.0019327509427194745, + "learning_rate": 7.780880773361978e-06, + "loss": 0.0, + "step": 4655 + }, + { + "epoch": 0.3000580009022363, + "grad_norm": 0.004825567350965219, + "learning_rate": 7.780164697457932e-06, + "loss": 0.0, + "step": 4656 + }, + { + "epoch": 0.3001224463491654, + "grad_norm": 0.02280787046614858, + "learning_rate": 7.779448621553886e-06, + "loss": 0.0001, + "step": 4657 + }, + { + "epoch": 0.3001868917960946, + "grad_norm": 0.000839246267523498, + "learning_rate": 7.778732545649839e-06, + "loss": 0.0, + "step": 4658 + }, + { + "epoch": 0.3002513372430238, + "grad_norm": 7.830966965893415e-05, + "learning_rate": 7.778016469745793e-06, + "loss": 0.0, + "step": 4659 + }, + { + "epoch": 0.30031578268995296, + "grad_norm": 0.27347772847300145, + "learning_rate": 7.777300393841748e-06, + "loss": 0.002, + "step": 4660 + }, + { + "epoch": 0.30038022813688214, + "grad_norm": 0.004087371353731553, + "learning_rate": 7.776584317937702e-06, + "loss": 0.0, + "step": 4661 + }, + { + "epoch": 0.30044467358381133, + "grad_norm": 0.0006904881544805334, + "learning_rate": 7.775868242033656e-06, + "loss": 0.0, + "step": 4662 + }, + { + "epoch": 0.30050911903074046, + "grad_norm": 0.005781394579225731, + "learning_rate": 7.77515216612961e-06, + "loss": 0.0001, + "step": 4663 + }, + { + "epoch": 0.30057356447766964, + "grad_norm": 0.10011603023948211, + "learning_rate": 7.774436090225565e-06, + "loss": 0.0017, + "step": 4664 + }, + { + "epoch": 0.3006380099245988, + "grad_norm": 0.0003700707387533223, + "learning_rate": 7.773720014321519e-06, + "loss": 0.0, + "step": 4665 + }, + { + "epoch": 0.300702455371528, + "grad_norm": 0.0036266337136206684, + "learning_rate": 7.773003938417473e-06, + "loss": 0.0, + "step": 4666 + }, + { + "epoch": 0.3007669008184572, + "grad_norm": 0.006053707020341861, + "learning_rate": 7.772287862513428e-06, + "loss": 0.0, + "step": 4667 + }, + { + "epoch": 0.3008313462653863, + "grad_norm": 0.002798110998151991, + "learning_rate": 7.77157178660938e-06, + "loss": 0.0, + "step": 4668 + }, + { + "epoch": 0.3008957917123155, + "grad_norm": 0.002980137785368464, + "learning_rate": 7.770855710705335e-06, + "loss": 0.0, + "step": 4669 + }, + { + "epoch": 0.3009602371592447, + "grad_norm": 0.002200906356600856, + "learning_rate": 7.770139634801289e-06, + "loss": 0.0, + "step": 4670 + }, + { + "epoch": 0.3010246826061739, + "grad_norm": 0.3781393095729475, + "learning_rate": 7.769423558897243e-06, + "loss": 0.0006, + "step": 4671 + }, + { + "epoch": 0.30108912805310306, + "grad_norm": 0.012304835706529734, + "learning_rate": 7.768707482993197e-06, + "loss": 0.0, + "step": 4672 + }, + { + "epoch": 0.30115357350003225, + "grad_norm": 0.030847193216389755, + "learning_rate": 7.767991407089152e-06, + "loss": 0.0, + "step": 4673 + }, + { + "epoch": 0.3012180189469614, + "grad_norm": 0.0013446218988361794, + "learning_rate": 7.767275331185106e-06, + "loss": 0.0, + "step": 4674 + }, + { + "epoch": 0.30128246439389056, + "grad_norm": 0.3333385313145346, + "learning_rate": 7.76655925528106e-06, + "loss": 0.0004, + "step": 4675 + }, + { + "epoch": 0.30134690984081974, + "grad_norm": 0.05260483629009791, + "learning_rate": 7.765843179377015e-06, + "loss": 0.0001, + "step": 4676 + }, + { + "epoch": 0.3014113552877489, + "grad_norm": 0.012214662431553574, + "learning_rate": 7.765127103472969e-06, + "loss": 0.0, + "step": 4677 + }, + { + "epoch": 0.3014758007346781, + "grad_norm": 0.00023572483317842684, + "learning_rate": 7.764411027568923e-06, + "loss": 0.0, + "step": 4678 + }, + { + "epoch": 0.3015402461816073, + "grad_norm": 0.15849859996043447, + "learning_rate": 7.763694951664878e-06, + "loss": 0.0002, + "step": 4679 + }, + { + "epoch": 0.3016046916285364, + "grad_norm": 0.028688948102707078, + "learning_rate": 7.762978875760832e-06, + "loss": 0.0001, + "step": 4680 + }, + { + "epoch": 0.3016691370754656, + "grad_norm": 0.2802043258044797, + "learning_rate": 7.762262799856786e-06, + "loss": 0.0006, + "step": 4681 + }, + { + "epoch": 0.3017335825223948, + "grad_norm": 0.01589553270615149, + "learning_rate": 7.76154672395274e-06, + "loss": 0.0002, + "step": 4682 + }, + { + "epoch": 0.301798027969324, + "grad_norm": 0.005081411054152497, + "learning_rate": 7.760830648048695e-06, + "loss": 0.0, + "step": 4683 + }, + { + "epoch": 0.30186247341625316, + "grad_norm": 0.05606253532474472, + "learning_rate": 7.760114572144647e-06, + "loss": 0.0001, + "step": 4684 + }, + { + "epoch": 0.3019269188631823, + "grad_norm": 0.01129783697631848, + "learning_rate": 7.759398496240602e-06, + "loss": 0.0001, + "step": 4685 + }, + { + "epoch": 0.3019913643101115, + "grad_norm": 0.0004596890283011299, + "learning_rate": 7.758682420336556e-06, + "loss": 0.0, + "step": 4686 + }, + { + "epoch": 0.30205580975704066, + "grad_norm": 0.000857765967735999, + "learning_rate": 7.75796634443251e-06, + "loss": 0.0, + "step": 4687 + }, + { + "epoch": 0.30212025520396985, + "grad_norm": 0.0026360514695217855, + "learning_rate": 7.757250268528465e-06, + "loss": 0.0, + "step": 4688 + }, + { + "epoch": 0.30218470065089903, + "grad_norm": 0.019969502601820874, + "learning_rate": 7.756534192624419e-06, + "loss": 0.0, + "step": 4689 + }, + { + "epoch": 0.3022491460978282, + "grad_norm": 0.009782519107099928, + "learning_rate": 7.755818116720373e-06, + "loss": 0.0, + "step": 4690 + }, + { + "epoch": 0.30231359154475734, + "grad_norm": 0.001139921838035331, + "learning_rate": 7.755102040816327e-06, + "loss": 0.0, + "step": 4691 + }, + { + "epoch": 0.3023780369916865, + "grad_norm": 0.010498980062060898, + "learning_rate": 7.754385964912282e-06, + "loss": 0.0, + "step": 4692 + }, + { + "epoch": 0.3024424824386157, + "grad_norm": 0.209275470748402, + "learning_rate": 7.753669889008234e-06, + "loss": 0.0008, + "step": 4693 + }, + { + "epoch": 0.3025069278855449, + "grad_norm": 0.006151414616092774, + "learning_rate": 7.752953813104189e-06, + "loss": 0.0, + "step": 4694 + }, + { + "epoch": 0.3025713733324741, + "grad_norm": 0.2086179229931362, + "learning_rate": 7.752237737200143e-06, + "loss": 0.0015, + "step": 4695 + }, + { + "epoch": 0.3026358187794032, + "grad_norm": 0.00013166228230322507, + "learning_rate": 7.751521661296097e-06, + "loss": 0.0, + "step": 4696 + }, + { + "epoch": 0.3027002642263324, + "grad_norm": 9.777865384865902e-05, + "learning_rate": 7.750805585392053e-06, + "loss": 0.0, + "step": 4697 + }, + { + "epoch": 0.3027647096732616, + "grad_norm": 0.734374634762937, + "learning_rate": 7.750089509488008e-06, + "loss": 0.0025, + "step": 4698 + }, + { + "epoch": 0.30282915512019076, + "grad_norm": 0.006581345635300139, + "learning_rate": 7.749373433583962e-06, + "loss": 0.0, + "step": 4699 + }, + { + "epoch": 0.30289360056711995, + "grad_norm": 0.1299164592212838, + "learning_rate": 7.748657357679914e-06, + "loss": 0.0009, + "step": 4700 + }, + { + "epoch": 0.30295804601404913, + "grad_norm": 0.005917647420111509, + "learning_rate": 7.747941281775869e-06, + "loss": 0.0, + "step": 4701 + }, + { + "epoch": 0.30302249146097826, + "grad_norm": 0.46501049288836965, + "learning_rate": 7.747225205871823e-06, + "loss": 0.0088, + "step": 4702 + }, + { + "epoch": 0.30308693690790744, + "grad_norm": 0.0004601057773333485, + "learning_rate": 7.746509129967777e-06, + "loss": 0.0, + "step": 4703 + }, + { + "epoch": 0.30315138235483663, + "grad_norm": 0.16790902384417963, + "learning_rate": 7.745793054063732e-06, + "loss": 0.0002, + "step": 4704 + }, + { + "epoch": 0.3032158278017658, + "grad_norm": 0.009253831686704374, + "learning_rate": 7.745076978159686e-06, + "loss": 0.0, + "step": 4705 + }, + { + "epoch": 0.303280273248695, + "grad_norm": 0.05335917375409336, + "learning_rate": 7.74436090225564e-06, + "loss": 0.0002, + "step": 4706 + }, + { + "epoch": 0.3033447186956241, + "grad_norm": 0.0001441868512219672, + "learning_rate": 7.743644826351595e-06, + "loss": 0.0, + "step": 4707 + }, + { + "epoch": 0.3034091641425533, + "grad_norm": 0.0002741329134754317, + "learning_rate": 7.742928750447549e-06, + "loss": 0.0, + "step": 4708 + }, + { + "epoch": 0.3034736095894825, + "grad_norm": 0.01443467281263996, + "learning_rate": 7.742212674543501e-06, + "loss": 0.0001, + "step": 4709 + }, + { + "epoch": 0.3035380550364117, + "grad_norm": 0.015183455470397374, + "learning_rate": 7.741496598639456e-06, + "loss": 0.0001, + "step": 4710 + }, + { + "epoch": 0.30360250048334086, + "grad_norm": 0.023355433462827355, + "learning_rate": 7.74078052273541e-06, + "loss": 0.0002, + "step": 4711 + }, + { + "epoch": 0.30366694593027005, + "grad_norm": 0.004497382272455204, + "learning_rate": 7.740064446831364e-06, + "loss": 0.0, + "step": 4712 + }, + { + "epoch": 0.3037313913771992, + "grad_norm": 0.03580885399675683, + "learning_rate": 7.739348370927319e-06, + "loss": 0.0004, + "step": 4713 + }, + { + "epoch": 0.30379583682412836, + "grad_norm": 0.00021593148826366693, + "learning_rate": 7.738632295023273e-06, + "loss": 0.0, + "step": 4714 + }, + { + "epoch": 0.30386028227105755, + "grad_norm": 0.0006532279015124764, + "learning_rate": 7.737916219119227e-06, + "loss": 0.0, + "step": 4715 + }, + { + "epoch": 0.30392472771798673, + "grad_norm": 0.0016168136163807064, + "learning_rate": 7.737200143215182e-06, + "loss": 0.0, + "step": 4716 + }, + { + "epoch": 0.3039891731649159, + "grad_norm": 0.00016003900954974685, + "learning_rate": 7.736484067311136e-06, + "loss": 0.0, + "step": 4717 + }, + { + "epoch": 0.3040536186118451, + "grad_norm": 0.04574878624670092, + "learning_rate": 7.73576799140709e-06, + "loss": 0.0001, + "step": 4718 + }, + { + "epoch": 0.30411806405877423, + "grad_norm": 0.09921795946091631, + "learning_rate": 7.735051915503043e-06, + "loss": 0.0009, + "step": 4719 + }, + { + "epoch": 0.3041825095057034, + "grad_norm": 0.0009618883446051705, + "learning_rate": 7.734335839598997e-06, + "loss": 0.0, + "step": 4720 + }, + { + "epoch": 0.3042469549526326, + "grad_norm": 0.00015935827088833944, + "learning_rate": 7.733619763694953e-06, + "loss": 0.0, + "step": 4721 + }, + { + "epoch": 0.3043114003995618, + "grad_norm": 0.004121915434638065, + "learning_rate": 7.732903687790907e-06, + "loss": 0.0, + "step": 4722 + }, + { + "epoch": 0.30437584584649097, + "grad_norm": 1.251110299054438, + "learning_rate": 7.732187611886862e-06, + "loss": 0.0038, + "step": 4723 + }, + { + "epoch": 0.3044402912934201, + "grad_norm": 0.0019873520206396024, + "learning_rate": 7.731471535982816e-06, + "loss": 0.0, + "step": 4724 + }, + { + "epoch": 0.3045047367403493, + "grad_norm": 0.00038713677005660534, + "learning_rate": 7.730755460078769e-06, + "loss": 0.0, + "step": 4725 + }, + { + "epoch": 0.30456918218727846, + "grad_norm": 0.007009648691403379, + "learning_rate": 7.730039384174723e-06, + "loss": 0.0001, + "step": 4726 + }, + { + "epoch": 0.30463362763420765, + "grad_norm": 0.27969282933703976, + "learning_rate": 7.729323308270677e-06, + "loss": 0.0006, + "step": 4727 + }, + { + "epoch": 0.30469807308113683, + "grad_norm": 0.0012857714808325838, + "learning_rate": 7.728607232366631e-06, + "loss": 0.0, + "step": 4728 + }, + { + "epoch": 0.304762518528066, + "grad_norm": 0.5750549352359965, + "learning_rate": 7.727891156462586e-06, + "loss": 0.0035, + "step": 4729 + }, + { + "epoch": 0.30482696397499515, + "grad_norm": 0.2587270122489265, + "learning_rate": 7.72717508055854e-06, + "loss": 0.0004, + "step": 4730 + }, + { + "epoch": 0.30489140942192433, + "grad_norm": 0.002896226310871362, + "learning_rate": 7.726459004654494e-06, + "loss": 0.0, + "step": 4731 + }, + { + "epoch": 0.3049558548688535, + "grad_norm": 0.00044256071047343225, + "learning_rate": 7.725742928750449e-06, + "loss": 0.0, + "step": 4732 + }, + { + "epoch": 0.3050203003157827, + "grad_norm": 0.003308588148680433, + "learning_rate": 7.725026852846403e-06, + "loss": 0.0, + "step": 4733 + }, + { + "epoch": 0.3050847457627119, + "grad_norm": 0.001083691728375002, + "learning_rate": 7.724310776942357e-06, + "loss": 0.0, + "step": 4734 + }, + { + "epoch": 0.305149191209641, + "grad_norm": 0.0007827155588311144, + "learning_rate": 7.72359470103831e-06, + "loss": 0.0, + "step": 4735 + }, + { + "epoch": 0.3052136366565702, + "grad_norm": 0.036329813310400326, + "learning_rate": 7.722878625134264e-06, + "loss": 0.0004, + "step": 4736 + }, + { + "epoch": 0.3052780821034994, + "grad_norm": 0.0210308823587032, + "learning_rate": 7.722162549230218e-06, + "loss": 0.0, + "step": 4737 + }, + { + "epoch": 0.30534252755042857, + "grad_norm": 0.16520561188861155, + "learning_rate": 7.721446473326173e-06, + "loss": 0.0023, + "step": 4738 + }, + { + "epoch": 0.30540697299735775, + "grad_norm": 0.005275712354562943, + "learning_rate": 7.720730397422127e-06, + "loss": 0.0, + "step": 4739 + }, + { + "epoch": 0.30547141844428694, + "grad_norm": 0.900496966853056, + "learning_rate": 7.720014321518081e-06, + "loss": 0.0048, + "step": 4740 + }, + { + "epoch": 0.30553586389121606, + "grad_norm": 0.008231191698063514, + "learning_rate": 7.719298245614036e-06, + "loss": 0.0, + "step": 4741 + }, + { + "epoch": 0.30560030933814525, + "grad_norm": 0.3738767171786509, + "learning_rate": 7.71858216970999e-06, + "loss": 0.0023, + "step": 4742 + }, + { + "epoch": 0.30566475478507443, + "grad_norm": 0.3131810158747463, + "learning_rate": 7.717866093805944e-06, + "loss": 0.001, + "step": 4743 + }, + { + "epoch": 0.3057292002320036, + "grad_norm": 0.20129595328139735, + "learning_rate": 7.717150017901898e-06, + "loss": 0.0007, + "step": 4744 + }, + { + "epoch": 0.3057936456789328, + "grad_norm": 0.021132452629210614, + "learning_rate": 7.716433941997853e-06, + "loss": 0.0001, + "step": 4745 + }, + { + "epoch": 0.30585809112586193, + "grad_norm": 0.037783536362330825, + "learning_rate": 7.715717866093807e-06, + "loss": 0.0001, + "step": 4746 + }, + { + "epoch": 0.3059225365727911, + "grad_norm": 0.45276547013303264, + "learning_rate": 7.715001790189761e-06, + "loss": 0.0015, + "step": 4747 + }, + { + "epoch": 0.3059869820197203, + "grad_norm": 0.49549389414773587, + "learning_rate": 7.714285714285716e-06, + "loss": 0.0024, + "step": 4748 + }, + { + "epoch": 0.3060514274666495, + "grad_norm": 0.00019081945517868202, + "learning_rate": 7.71356963838167e-06, + "loss": 0.0, + "step": 4749 + }, + { + "epoch": 0.30611587291357867, + "grad_norm": 0.0014320023238470958, + "learning_rate": 7.712853562477624e-06, + "loss": 0.0, + "step": 4750 + }, + { + "epoch": 0.30618031836050785, + "grad_norm": 0.06193467670974739, + "learning_rate": 7.712137486573577e-06, + "loss": 0.0001, + "step": 4751 + }, + { + "epoch": 0.306244763807437, + "grad_norm": 0.024614945322434304, + "learning_rate": 7.711421410669531e-06, + "loss": 0.0, + "step": 4752 + }, + { + "epoch": 0.30630920925436617, + "grad_norm": 0.0035196920149812207, + "learning_rate": 7.710705334765485e-06, + "loss": 0.0, + "step": 4753 + }, + { + "epoch": 0.30637365470129535, + "grad_norm": 0.13125440414169431, + "learning_rate": 7.70998925886144e-06, + "loss": 0.0002, + "step": 4754 + }, + { + "epoch": 0.30643810014822453, + "grad_norm": 0.004681982640320887, + "learning_rate": 7.709273182957394e-06, + "loss": 0.0, + "step": 4755 + }, + { + "epoch": 0.3065025455951537, + "grad_norm": 0.006949093560674066, + "learning_rate": 7.708557107053348e-06, + "loss": 0.0, + "step": 4756 + }, + { + "epoch": 0.3065669910420829, + "grad_norm": 0.0036677102878780133, + "learning_rate": 7.707841031149303e-06, + "loss": 0.0, + "step": 4757 + }, + { + "epoch": 0.30663143648901203, + "grad_norm": 0.12488582731538928, + "learning_rate": 7.707124955245257e-06, + "loss": 0.0002, + "step": 4758 + }, + { + "epoch": 0.3066958819359412, + "grad_norm": 0.10194604235196442, + "learning_rate": 7.706408879341211e-06, + "loss": 0.002, + "step": 4759 + }, + { + "epoch": 0.3067603273828704, + "grad_norm": 0.029717985220950458, + "learning_rate": 7.705692803437166e-06, + "loss": 0.0003, + "step": 4760 + }, + { + "epoch": 0.3068247728297996, + "grad_norm": 0.29767039057667016, + "learning_rate": 7.704976727533118e-06, + "loss": 0.0043, + "step": 4761 + }, + { + "epoch": 0.30688921827672877, + "grad_norm": 0.009892853815882239, + "learning_rate": 7.704260651629072e-06, + "loss": 0.0001, + "step": 4762 + }, + { + "epoch": 0.3069536637236579, + "grad_norm": 0.9851313213488757, + "learning_rate": 7.703544575725027e-06, + "loss": 0.0037, + "step": 4763 + }, + { + "epoch": 0.3070181091705871, + "grad_norm": 0.17237959830448984, + "learning_rate": 7.702828499820981e-06, + "loss": 0.0004, + "step": 4764 + }, + { + "epoch": 0.30708255461751627, + "grad_norm": 0.20161969607969146, + "learning_rate": 7.702112423916935e-06, + "loss": 0.0007, + "step": 4765 + }, + { + "epoch": 0.30714700006444545, + "grad_norm": 0.009081867704277675, + "learning_rate": 7.70139634801289e-06, + "loss": 0.0, + "step": 4766 + }, + { + "epoch": 0.30721144551137464, + "grad_norm": 0.00021591472203924571, + "learning_rate": 7.700680272108844e-06, + "loss": 0.0, + "step": 4767 + }, + { + "epoch": 0.3072758909583038, + "grad_norm": 0.1620586212402397, + "learning_rate": 7.699964196204798e-06, + "loss": 0.0014, + "step": 4768 + }, + { + "epoch": 0.30734033640523295, + "grad_norm": 0.0013959799328598175, + "learning_rate": 7.699248120300753e-06, + "loss": 0.0, + "step": 4769 + }, + { + "epoch": 0.30740478185216213, + "grad_norm": 0.0035084092927934003, + "learning_rate": 7.698532044396707e-06, + "loss": 0.0, + "step": 4770 + }, + { + "epoch": 0.3074692272990913, + "grad_norm": 0.0012145909662963545, + "learning_rate": 7.697815968492661e-06, + "loss": 0.0, + "step": 4771 + }, + { + "epoch": 0.3075336727460205, + "grad_norm": 0.0020774691234807267, + "learning_rate": 7.697099892588615e-06, + "loss": 0.0, + "step": 4772 + }, + { + "epoch": 0.3075981181929497, + "grad_norm": 0.02511536306626666, + "learning_rate": 7.69638381668457e-06, + "loss": 0.0001, + "step": 4773 + }, + { + "epoch": 0.3076625636398788, + "grad_norm": 0.3414683043347056, + "learning_rate": 7.695667740780524e-06, + "loss": 0.001, + "step": 4774 + }, + { + "epoch": 0.307727009086808, + "grad_norm": 0.018721636445177253, + "learning_rate": 7.694951664876478e-06, + "loss": 0.0, + "step": 4775 + }, + { + "epoch": 0.3077914545337372, + "grad_norm": 0.004516405023989719, + "learning_rate": 7.694235588972433e-06, + "loss": 0.0, + "step": 4776 + }, + { + "epoch": 0.30785589998066637, + "grad_norm": 0.001999490388142513, + "learning_rate": 7.693519513068385e-06, + "loss": 0.0, + "step": 4777 + }, + { + "epoch": 0.30792034542759555, + "grad_norm": 0.009126433007855877, + "learning_rate": 7.69280343716434e-06, + "loss": 0.0, + "step": 4778 + }, + { + "epoch": 0.30798479087452474, + "grad_norm": 0.1669292889199868, + "learning_rate": 7.692087361260294e-06, + "loss": 0.0003, + "step": 4779 + }, + { + "epoch": 0.30804923632145387, + "grad_norm": 0.1951716678231349, + "learning_rate": 7.691371285356248e-06, + "loss": 0.0021, + "step": 4780 + }, + { + "epoch": 0.30811368176838305, + "grad_norm": 0.03909516756695523, + "learning_rate": 7.690655209452202e-06, + "loss": 0.0001, + "step": 4781 + }, + { + "epoch": 0.30817812721531224, + "grad_norm": 0.03378777956318579, + "learning_rate": 7.689939133548157e-06, + "loss": 0.0001, + "step": 4782 + }, + { + "epoch": 0.3082425726622414, + "grad_norm": 0.019446393540969095, + "learning_rate": 7.689223057644111e-06, + "loss": 0.0001, + "step": 4783 + }, + { + "epoch": 0.3083070181091706, + "grad_norm": 0.00664535455497235, + "learning_rate": 7.688506981740065e-06, + "loss": 0.0, + "step": 4784 + }, + { + "epoch": 0.30837146355609973, + "grad_norm": 0.03097241392532757, + "learning_rate": 7.68779090583602e-06, + "loss": 0.0, + "step": 4785 + }, + { + "epoch": 0.3084359090030289, + "grad_norm": 0.00806853471227027, + "learning_rate": 7.687074829931972e-06, + "loss": 0.0001, + "step": 4786 + }, + { + "epoch": 0.3085003544499581, + "grad_norm": 0.018839860233995302, + "learning_rate": 7.686358754027927e-06, + "loss": 0.0001, + "step": 4787 + }, + { + "epoch": 0.3085647998968873, + "grad_norm": 0.04220925687343892, + "learning_rate": 7.685642678123881e-06, + "loss": 0.0001, + "step": 4788 + }, + { + "epoch": 0.30862924534381647, + "grad_norm": 0.0034860404645811554, + "learning_rate": 7.684926602219835e-06, + "loss": 0.0, + "step": 4789 + }, + { + "epoch": 0.30869369079074566, + "grad_norm": 0.0022177384041537087, + "learning_rate": 7.68421052631579e-06, + "loss": 0.0, + "step": 4790 + }, + { + "epoch": 0.3087581362376748, + "grad_norm": 0.005044939837968819, + "learning_rate": 7.683494450411745e-06, + "loss": 0.0, + "step": 4791 + }, + { + "epoch": 0.30882258168460397, + "grad_norm": 0.0005836039764449223, + "learning_rate": 7.6827783745077e-06, + "loss": 0.0, + "step": 4792 + }, + { + "epoch": 0.30888702713153315, + "grad_norm": 0.0037036582704997596, + "learning_rate": 7.682062298603652e-06, + "loss": 0.0, + "step": 4793 + }, + { + "epoch": 0.30895147257846234, + "grad_norm": 0.006685573662889619, + "learning_rate": 7.681346222699607e-06, + "loss": 0.0, + "step": 4794 + }, + { + "epoch": 0.3090159180253915, + "grad_norm": 0.0004702842280755588, + "learning_rate": 7.680630146795561e-06, + "loss": 0.0, + "step": 4795 + }, + { + "epoch": 0.3090803634723207, + "grad_norm": 0.041626408650233614, + "learning_rate": 7.679914070891515e-06, + "loss": 0.0002, + "step": 4796 + }, + { + "epoch": 0.30914480891924984, + "grad_norm": 0.0007088733267549225, + "learning_rate": 7.67919799498747e-06, + "loss": 0.0, + "step": 4797 + }, + { + "epoch": 0.309209254366179, + "grad_norm": 0.01583183010603208, + "learning_rate": 7.678481919083424e-06, + "loss": 0.0, + "step": 4798 + }, + { + "epoch": 0.3092736998131082, + "grad_norm": 0.005335127912471382, + "learning_rate": 7.677765843179378e-06, + "loss": 0.0, + "step": 4799 + }, + { + "epoch": 0.3093381452600374, + "grad_norm": 0.0010392895308987736, + "learning_rate": 7.677049767275332e-06, + "loss": 0.0, + "step": 4800 + }, + { + "epoch": 0.3094025907069666, + "grad_norm": 0.04592088900990831, + "learning_rate": 7.676333691371287e-06, + "loss": 0.0001, + "step": 4801 + }, + { + "epoch": 0.3094670361538957, + "grad_norm": 0.05353814637908716, + "learning_rate": 7.67561761546724e-06, + "loss": 0.0001, + "step": 4802 + }, + { + "epoch": 0.3095314816008249, + "grad_norm": 0.004138582846488795, + "learning_rate": 7.674901539563194e-06, + "loss": 0.0, + "step": 4803 + }, + { + "epoch": 0.30959592704775407, + "grad_norm": 0.02532980585580661, + "learning_rate": 7.674185463659148e-06, + "loss": 0.0002, + "step": 4804 + }, + { + "epoch": 0.30966037249468326, + "grad_norm": 0.22175176851799505, + "learning_rate": 7.673469387755102e-06, + "loss": 0.0006, + "step": 4805 + }, + { + "epoch": 0.30972481794161244, + "grad_norm": 0.02077356363290854, + "learning_rate": 7.672753311851057e-06, + "loss": 0.0001, + "step": 4806 + }, + { + "epoch": 0.3097892633885416, + "grad_norm": 0.0009383453196531269, + "learning_rate": 7.67203723594701e-06, + "loss": 0.0, + "step": 4807 + }, + { + "epoch": 0.30985370883547075, + "grad_norm": 0.0021745621033941653, + "learning_rate": 7.671321160042965e-06, + "loss": 0.0, + "step": 4808 + }, + { + "epoch": 0.30991815428239994, + "grad_norm": 0.012943322912088268, + "learning_rate": 7.67060508413892e-06, + "loss": 0.0001, + "step": 4809 + }, + { + "epoch": 0.3099825997293291, + "grad_norm": 1.1511934908108268, + "learning_rate": 7.669889008234874e-06, + "loss": 0.0127, + "step": 4810 + }, + { + "epoch": 0.3100470451762583, + "grad_norm": 0.0009821788142993016, + "learning_rate": 7.669172932330828e-06, + "loss": 0.0, + "step": 4811 + }, + { + "epoch": 0.3101114906231875, + "grad_norm": 0.0007450914756910727, + "learning_rate": 7.66845685642678e-06, + "loss": 0.0, + "step": 4812 + }, + { + "epoch": 0.3101759360701166, + "grad_norm": 0.0015614974705150063, + "learning_rate": 7.667740780522735e-06, + "loss": 0.0, + "step": 4813 + }, + { + "epoch": 0.3102403815170458, + "grad_norm": 0.007181676464111988, + "learning_rate": 7.667024704618691e-06, + "loss": 0.0, + "step": 4814 + }, + { + "epoch": 0.310304826963975, + "grad_norm": 0.019878176986525983, + "learning_rate": 7.666308628714645e-06, + "loss": 0.0, + "step": 4815 + }, + { + "epoch": 0.3103692724109042, + "grad_norm": 0.0002867674669890647, + "learning_rate": 7.6655925528106e-06, + "loss": 0.0, + "step": 4816 + }, + { + "epoch": 0.31043371785783336, + "grad_norm": 1.71531500456341, + "learning_rate": 7.664876476906554e-06, + "loss": 0.0089, + "step": 4817 + }, + { + "epoch": 0.31049816330476254, + "grad_norm": 0.020545370898791864, + "learning_rate": 7.664160401002506e-06, + "loss": 0.0, + "step": 4818 + }, + { + "epoch": 0.31056260875169167, + "grad_norm": 0.0020818963706780835, + "learning_rate": 7.66344432509846e-06, + "loss": 0.0, + "step": 4819 + }, + { + "epoch": 0.31062705419862086, + "grad_norm": 0.0007152623732576892, + "learning_rate": 7.662728249194415e-06, + "loss": 0.0, + "step": 4820 + }, + { + "epoch": 0.31069149964555004, + "grad_norm": 0.0009722607342199007, + "learning_rate": 7.66201217329037e-06, + "loss": 0.0, + "step": 4821 + }, + { + "epoch": 0.3107559450924792, + "grad_norm": 0.023470547345033253, + "learning_rate": 7.661296097386324e-06, + "loss": 0.0, + "step": 4822 + }, + { + "epoch": 0.3108203905394084, + "grad_norm": 0.023717350679689185, + "learning_rate": 7.660580021482278e-06, + "loss": 0.0001, + "step": 4823 + }, + { + "epoch": 0.31088483598633754, + "grad_norm": 0.04864605815166363, + "learning_rate": 7.659863945578232e-06, + "loss": 0.0001, + "step": 4824 + }, + { + "epoch": 0.3109492814332667, + "grad_norm": 0.34392935235481353, + "learning_rate": 7.659147869674187e-06, + "loss": 0.0009, + "step": 4825 + }, + { + "epoch": 0.3110137268801959, + "grad_norm": 0.0010634566579679346, + "learning_rate": 7.65843179377014e-06, + "loss": 0.0, + "step": 4826 + }, + { + "epoch": 0.3110781723271251, + "grad_norm": 0.011746606897081312, + "learning_rate": 7.657715717866095e-06, + "loss": 0.0, + "step": 4827 + }, + { + "epoch": 0.3111426177740543, + "grad_norm": 0.03706126639581151, + "learning_rate": 7.656999641962048e-06, + "loss": 0.0015, + "step": 4828 + }, + { + "epoch": 0.31120706322098346, + "grad_norm": 0.2990590368965242, + "learning_rate": 7.656283566058002e-06, + "loss": 0.0015, + "step": 4829 + }, + { + "epoch": 0.3112715086679126, + "grad_norm": 0.0010528169780914909, + "learning_rate": 7.655567490153956e-06, + "loss": 0.0, + "step": 4830 + }, + { + "epoch": 0.3113359541148418, + "grad_norm": 0.042425665637751686, + "learning_rate": 7.65485141424991e-06, + "loss": 0.0, + "step": 4831 + }, + { + "epoch": 0.31140039956177096, + "grad_norm": 0.001670182439381502, + "learning_rate": 7.654135338345865e-06, + "loss": 0.0, + "step": 4832 + }, + { + "epoch": 0.31146484500870014, + "grad_norm": 0.0009654571293139372, + "learning_rate": 7.65341926244182e-06, + "loss": 0.0, + "step": 4833 + }, + { + "epoch": 0.3115292904556293, + "grad_norm": 0.011802421818662695, + "learning_rate": 7.652703186537774e-06, + "loss": 0.0, + "step": 4834 + }, + { + "epoch": 0.3115937359025585, + "grad_norm": 0.25027796074816694, + "learning_rate": 7.651987110633728e-06, + "loss": 0.0001, + "step": 4835 + }, + { + "epoch": 0.31165818134948764, + "grad_norm": 0.009832647673374325, + "learning_rate": 7.651271034729682e-06, + "loss": 0.0, + "step": 4836 + }, + { + "epoch": 0.3117226267964168, + "grad_norm": 0.29034152459702184, + "learning_rate": 7.650554958825636e-06, + "loss": 0.0042, + "step": 4837 + }, + { + "epoch": 0.311787072243346, + "grad_norm": 0.0031092830812778157, + "learning_rate": 7.64983888292159e-06, + "loss": 0.0, + "step": 4838 + }, + { + "epoch": 0.3118515176902752, + "grad_norm": 0.04667664571851142, + "learning_rate": 7.649122807017545e-06, + "loss": 0.0003, + "step": 4839 + }, + { + "epoch": 0.3119159631372044, + "grad_norm": 0.007606067281312005, + "learning_rate": 7.6484067311135e-06, + "loss": 0.0, + "step": 4840 + }, + { + "epoch": 0.3119804085841335, + "grad_norm": 0.02780274263883628, + "learning_rate": 7.647690655209454e-06, + "loss": 0.0, + "step": 4841 + }, + { + "epoch": 0.3120448540310627, + "grad_norm": 0.023759968008487852, + "learning_rate": 7.646974579305408e-06, + "loss": 0.0001, + "step": 4842 + }, + { + "epoch": 0.3121092994779919, + "grad_norm": 0.0008431761617935588, + "learning_rate": 7.646258503401362e-06, + "loss": 0.0, + "step": 4843 + }, + { + "epoch": 0.31217374492492106, + "grad_norm": 0.015393384023410159, + "learning_rate": 7.645542427497315e-06, + "loss": 0.0, + "step": 4844 + }, + { + "epoch": 0.31223819037185024, + "grad_norm": 0.004651234971706581, + "learning_rate": 7.644826351593269e-06, + "loss": 0.0, + "step": 4845 + }, + { + "epoch": 0.31230263581877943, + "grad_norm": 0.3541639505543996, + "learning_rate": 7.644110275689223e-06, + "loss": 0.0008, + "step": 4846 + }, + { + "epoch": 0.31236708126570856, + "grad_norm": 0.5029302800739834, + "learning_rate": 7.643394199785178e-06, + "loss": 0.0007, + "step": 4847 + }, + { + "epoch": 0.31243152671263774, + "grad_norm": 1.1434380840791063, + "learning_rate": 7.642678123881132e-06, + "loss": 0.0061, + "step": 4848 + }, + { + "epoch": 0.3124959721595669, + "grad_norm": 0.09346831740869416, + "learning_rate": 7.641962047977086e-06, + "loss": 0.0004, + "step": 4849 + }, + { + "epoch": 0.3125604176064961, + "grad_norm": 0.05341842719165527, + "learning_rate": 7.64124597207304e-06, + "loss": 0.0001, + "step": 4850 + }, + { + "epoch": 0.3126248630534253, + "grad_norm": 0.00365746558821454, + "learning_rate": 7.640529896168995e-06, + "loss": 0.0, + "step": 4851 + }, + { + "epoch": 0.3126893085003544, + "grad_norm": 0.004174417788371213, + "learning_rate": 7.63981382026495e-06, + "loss": 0.0, + "step": 4852 + }, + { + "epoch": 0.3127537539472836, + "grad_norm": 0.12209112752832342, + "learning_rate": 7.639097744360904e-06, + "loss": 0.0006, + "step": 4853 + }, + { + "epoch": 0.3128181993942128, + "grad_norm": 4.058324927935555, + "learning_rate": 7.638381668456856e-06, + "loss": 0.0265, + "step": 4854 + }, + { + "epoch": 0.312882644841142, + "grad_norm": 0.006323243901996307, + "learning_rate": 7.63766559255281e-06, + "loss": 0.0, + "step": 4855 + }, + { + "epoch": 0.31294709028807116, + "grad_norm": 0.08450654898943195, + "learning_rate": 7.636949516648765e-06, + "loss": 0.0003, + "step": 4856 + }, + { + "epoch": 0.31301153573500035, + "grad_norm": 0.036397043543195534, + "learning_rate": 7.636233440744719e-06, + "loss": 0.0, + "step": 4857 + }, + { + "epoch": 0.3130759811819295, + "grad_norm": 0.1896315550434969, + "learning_rate": 7.635517364840673e-06, + "loss": 0.0002, + "step": 4858 + }, + { + "epoch": 0.31314042662885866, + "grad_norm": 0.09269444465632841, + "learning_rate": 7.634801288936628e-06, + "loss": 0.0002, + "step": 4859 + }, + { + "epoch": 0.31320487207578784, + "grad_norm": 1.4154748765446705, + "learning_rate": 7.634085213032582e-06, + "loss": 0.0064, + "step": 4860 + }, + { + "epoch": 0.31326931752271703, + "grad_norm": 0.1601978399193165, + "learning_rate": 7.633369137128536e-06, + "loss": 0.0002, + "step": 4861 + }, + { + "epoch": 0.3133337629696462, + "grad_norm": 0.001664992046050716, + "learning_rate": 7.63265306122449e-06, + "loss": 0.0, + "step": 4862 + }, + { + "epoch": 0.31339820841657534, + "grad_norm": 0.005741379817492573, + "learning_rate": 7.631936985320445e-06, + "loss": 0.0, + "step": 4863 + }, + { + "epoch": 0.3134626538635045, + "grad_norm": 0.3956785422217856, + "learning_rate": 7.631220909416399e-06, + "loss": 0.0017, + "step": 4864 + }, + { + "epoch": 0.3135270993104337, + "grad_norm": 0.02680946006956498, + "learning_rate": 7.630504833512353e-06, + "loss": 0.0, + "step": 4865 + }, + { + "epoch": 0.3135915447573629, + "grad_norm": 0.4638488605292008, + "learning_rate": 7.629788757608308e-06, + "loss": 0.0007, + "step": 4866 + }, + { + "epoch": 0.3136559902042921, + "grad_norm": 0.14234355024120665, + "learning_rate": 7.629072681704261e-06, + "loss": 0.0013, + "step": 4867 + }, + { + "epoch": 0.31372043565122126, + "grad_norm": 0.07415628314107933, + "learning_rate": 7.6283566058002154e-06, + "loss": 0.0001, + "step": 4868 + }, + { + "epoch": 0.3137848810981504, + "grad_norm": 0.06906294810141103, + "learning_rate": 7.62764052989617e-06, + "loss": 0.0001, + "step": 4869 + }, + { + "epoch": 0.3138493265450796, + "grad_norm": 0.13154624617698507, + "learning_rate": 7.626924453992124e-06, + "loss": 0.0018, + "step": 4870 + }, + { + "epoch": 0.31391377199200876, + "grad_norm": 1.0724474365407197, + "learning_rate": 7.626208378088078e-06, + "loss": 0.0054, + "step": 4871 + }, + { + "epoch": 0.31397821743893795, + "grad_norm": 0.3422724004482794, + "learning_rate": 7.625492302184032e-06, + "loss": 0.0024, + "step": 4872 + }, + { + "epoch": 0.31404266288586713, + "grad_norm": 0.0016748577841006922, + "learning_rate": 7.624776226279986e-06, + "loss": 0.0, + "step": 4873 + }, + { + "epoch": 0.3141071083327963, + "grad_norm": 0.041638429798124134, + "learning_rate": 7.62406015037594e-06, + "loss": 0.0001, + "step": 4874 + }, + { + "epoch": 0.31417155377972544, + "grad_norm": 0.003857485529783318, + "learning_rate": 7.623344074471895e-06, + "loss": 0.0, + "step": 4875 + }, + { + "epoch": 0.3142359992266546, + "grad_norm": 0.03168840340461246, + "learning_rate": 7.622627998567849e-06, + "loss": 0.0, + "step": 4876 + }, + { + "epoch": 0.3143004446735838, + "grad_norm": 0.006814045902594915, + "learning_rate": 7.6219119226638024e-06, + "loss": 0.0, + "step": 4877 + }, + { + "epoch": 0.314364890120513, + "grad_norm": 0.18182873983099754, + "learning_rate": 7.621195846759757e-06, + "loss": 0.0003, + "step": 4878 + }, + { + "epoch": 0.3144293355674422, + "grad_norm": 0.00901951630925349, + "learning_rate": 7.620479770855711e-06, + "loss": 0.0, + "step": 4879 + }, + { + "epoch": 0.3144937810143713, + "grad_norm": 1.9066806994546444, + "learning_rate": 7.619763694951665e-06, + "loss": 0.0055, + "step": 4880 + }, + { + "epoch": 0.3145582264613005, + "grad_norm": 0.04556863316205216, + "learning_rate": 7.61904761904762e-06, + "loss": 0.0002, + "step": 4881 + }, + { + "epoch": 0.3146226719082297, + "grad_norm": 0.0012398452923164066, + "learning_rate": 7.618331543143573e-06, + "loss": 0.0, + "step": 4882 + }, + { + "epoch": 0.31468711735515886, + "grad_norm": 0.04519157202135707, + "learning_rate": 7.617615467239527e-06, + "loss": 0.0001, + "step": 4883 + }, + { + "epoch": 0.31475156280208805, + "grad_norm": 0.02342436591337108, + "learning_rate": 7.6168993913354825e-06, + "loss": 0.0017, + "step": 4884 + }, + { + "epoch": 0.31481600824901723, + "grad_norm": 0.018536176514005024, + "learning_rate": 7.616183315431437e-06, + "loss": 0.0002, + "step": 4885 + }, + { + "epoch": 0.31488045369594636, + "grad_norm": 0.172805904026286, + "learning_rate": 7.615467239527391e-06, + "loss": 0.0019, + "step": 4886 + }, + { + "epoch": 0.31494489914287555, + "grad_norm": 0.05709872304307744, + "learning_rate": 7.614751163623345e-06, + "loss": 0.0002, + "step": 4887 + }, + { + "epoch": 0.31500934458980473, + "grad_norm": 0.04901532527782089, + "learning_rate": 7.614035087719299e-06, + "loss": 0.0, + "step": 4888 + }, + { + "epoch": 0.3150737900367339, + "grad_norm": 0.045398359407119136, + "learning_rate": 7.613319011815253e-06, + "loss": 0.0001, + "step": 4889 + }, + { + "epoch": 0.3151382354836631, + "grad_norm": 0.01797741643383171, + "learning_rate": 7.6126029359112075e-06, + "loss": 0.0, + "step": 4890 + }, + { + "epoch": 0.3152026809305922, + "grad_norm": 0.003458750480485351, + "learning_rate": 7.611886860007162e-06, + "loss": 0.0, + "step": 4891 + }, + { + "epoch": 0.3152671263775214, + "grad_norm": 0.041268147514423606, + "learning_rate": 7.611170784103116e-06, + "loss": 0.0003, + "step": 4892 + }, + { + "epoch": 0.3153315718244506, + "grad_norm": 0.10172780046470717, + "learning_rate": 7.6104547081990695e-06, + "loss": 0.0001, + "step": 4893 + }, + { + "epoch": 0.3153960172713798, + "grad_norm": 0.0037263810735243897, + "learning_rate": 7.609738632295024e-06, + "loss": 0.0, + "step": 4894 + }, + { + "epoch": 0.31546046271830896, + "grad_norm": 0.0032725613126103575, + "learning_rate": 7.609022556390978e-06, + "loss": 0.0, + "step": 4895 + }, + { + "epoch": 0.31552490816523815, + "grad_norm": 0.09012161182014738, + "learning_rate": 7.608306480486932e-06, + "loss": 0.0008, + "step": 4896 + }, + { + "epoch": 0.3155893536121673, + "grad_norm": 0.09166959806201995, + "learning_rate": 7.607590404582887e-06, + "loss": 0.0017, + "step": 4897 + }, + { + "epoch": 0.31565379905909646, + "grad_norm": 0.011971259717290881, + "learning_rate": 7.60687432867884e-06, + "loss": 0.0, + "step": 4898 + }, + { + "epoch": 0.31571824450602565, + "grad_norm": 0.07688257728135989, + "learning_rate": 7.6061582527747945e-06, + "loss": 0.0001, + "step": 4899 + }, + { + "epoch": 0.31578268995295483, + "grad_norm": 0.000963422072308008, + "learning_rate": 7.605442176870749e-06, + "loss": 0.0, + "step": 4900 + }, + { + "epoch": 0.315847135399884, + "grad_norm": 0.04923102333675415, + "learning_rate": 7.604726100966703e-06, + "loss": 0.0001, + "step": 4901 + }, + { + "epoch": 0.3159115808468132, + "grad_norm": 0.017858915009544632, + "learning_rate": 7.604010025062657e-06, + "loss": 0.0, + "step": 4902 + }, + { + "epoch": 0.31597602629374233, + "grad_norm": 0.0074076590984602534, + "learning_rate": 7.603293949158611e-06, + "loss": 0.0, + "step": 4903 + }, + { + "epoch": 0.3160404717406715, + "grad_norm": 0.004138050993269019, + "learning_rate": 7.602577873254565e-06, + "loss": 0.0, + "step": 4904 + }, + { + "epoch": 0.3161049171876007, + "grad_norm": 0.007657744900865728, + "learning_rate": 7.601861797350519e-06, + "loss": 0.0001, + "step": 4905 + }, + { + "epoch": 0.3161693626345299, + "grad_norm": 0.015958886199231824, + "learning_rate": 7.601145721446474e-06, + "loss": 0.0001, + "step": 4906 + }, + { + "epoch": 0.31623380808145907, + "grad_norm": 0.11232840246170804, + "learning_rate": 7.600429645542427e-06, + "loss": 0.0003, + "step": 4907 + }, + { + "epoch": 0.3162982535283882, + "grad_norm": 0.0019847038300378613, + "learning_rate": 7.599713569638383e-06, + "loss": 0.0, + "step": 4908 + }, + { + "epoch": 0.3163626989753174, + "grad_norm": 0.015164377914120451, + "learning_rate": 7.598997493734337e-06, + "loss": 0.0, + "step": 4909 + }, + { + "epoch": 0.31642714442224656, + "grad_norm": 0.0005348704955189589, + "learning_rate": 7.598281417830291e-06, + "loss": 0.0, + "step": 4910 + }, + { + "epoch": 0.31649158986917575, + "grad_norm": 0.4584278384947784, + "learning_rate": 7.597565341926245e-06, + "loss": 0.0147, + "step": 4911 + }, + { + "epoch": 0.31655603531610493, + "grad_norm": 0.01919581211447712, + "learning_rate": 7.5968492660221995e-06, + "loss": 0.0, + "step": 4912 + }, + { + "epoch": 0.3166204807630341, + "grad_norm": 0.0255280418873319, + "learning_rate": 7.596133190118154e-06, + "loss": 0.0001, + "step": 4913 + }, + { + "epoch": 0.31668492620996325, + "grad_norm": 0.022365740209846784, + "learning_rate": 7.595417114214107e-06, + "loss": 0.0, + "step": 4914 + }, + { + "epoch": 0.31674937165689243, + "grad_norm": 0.0017737070032865822, + "learning_rate": 7.5947010383100616e-06, + "loss": 0.0, + "step": 4915 + }, + { + "epoch": 0.3168138171038216, + "grad_norm": 0.000988195283464662, + "learning_rate": 7.593984962406016e-06, + "loss": 0.0, + "step": 4916 + }, + { + "epoch": 0.3168782625507508, + "grad_norm": 0.11666120524551912, + "learning_rate": 7.59326888650197e-06, + "loss": 0.0002, + "step": 4917 + }, + { + "epoch": 0.31694270799768, + "grad_norm": 0.02640212358308717, + "learning_rate": 7.5925528105979244e-06, + "loss": 0.0001, + "step": 4918 + }, + { + "epoch": 0.3170071534446091, + "grad_norm": 0.0005211830684490821, + "learning_rate": 7.591836734693878e-06, + "loss": 0.0, + "step": 4919 + }, + { + "epoch": 0.3170715988915383, + "grad_norm": 0.007234303323305554, + "learning_rate": 7.591120658789832e-06, + "loss": 0.0, + "step": 4920 + }, + { + "epoch": 0.3171360443384675, + "grad_norm": 0.07723608651426664, + "learning_rate": 7.5904045828857865e-06, + "loss": 0.0002, + "step": 4921 + }, + { + "epoch": 0.31720048978539667, + "grad_norm": 0.0030668760381343594, + "learning_rate": 7.589688506981741e-06, + "loss": 0.0, + "step": 4922 + }, + { + "epoch": 0.31726493523232585, + "grad_norm": 0.08688448284729575, + "learning_rate": 7.588972431077694e-06, + "loss": 0.0002, + "step": 4923 + }, + { + "epoch": 0.31732938067925504, + "grad_norm": 0.06115100927977217, + "learning_rate": 7.5882563551736485e-06, + "loss": 0.0005, + "step": 4924 + }, + { + "epoch": 0.31739382612618416, + "grad_norm": 0.004299995558535509, + "learning_rate": 7.587540279269603e-06, + "loss": 0.0, + "step": 4925 + }, + { + "epoch": 0.31745827157311335, + "grad_norm": 0.010539844593121827, + "learning_rate": 7.586824203365557e-06, + "loss": 0.0, + "step": 4926 + }, + { + "epoch": 0.31752271702004253, + "grad_norm": 0.0030391662830343918, + "learning_rate": 7.5861081274615114e-06, + "loss": 0.0, + "step": 4927 + }, + { + "epoch": 0.3175871624669717, + "grad_norm": 0.03277884812642534, + "learning_rate": 7.585392051557465e-06, + "loss": 0.0001, + "step": 4928 + }, + { + "epoch": 0.3176516079139009, + "grad_norm": 0.03166498928316155, + "learning_rate": 7.584675975653419e-06, + "loss": 0.0001, + "step": 4929 + }, + { + "epoch": 0.31771605336083003, + "grad_norm": 0.005663952936868069, + "learning_rate": 7.5839598997493735e-06, + "loss": 0.0, + "step": 4930 + }, + { + "epoch": 0.3177804988077592, + "grad_norm": 0.016582148083116855, + "learning_rate": 7.583243823845329e-06, + "loss": 0.0, + "step": 4931 + }, + { + "epoch": 0.3178449442546884, + "grad_norm": 0.20244802858863004, + "learning_rate": 7.582527747941283e-06, + "loss": 0.0004, + "step": 4932 + }, + { + "epoch": 0.3179093897016176, + "grad_norm": 0.001806635185564558, + "learning_rate": 7.581811672037237e-06, + "loss": 0.0, + "step": 4933 + }, + { + "epoch": 0.31797383514854677, + "grad_norm": 1.1378611169584365, + "learning_rate": 7.5810955961331915e-06, + "loss": 0.0016, + "step": 4934 + }, + { + "epoch": 0.31803828059547595, + "grad_norm": 0.10410084035311608, + "learning_rate": 7.580379520229145e-06, + "loss": 0.0002, + "step": 4935 + }, + { + "epoch": 0.3181027260424051, + "grad_norm": 0.0009001930670387458, + "learning_rate": 7.579663444325099e-06, + "loss": 0.0, + "step": 4936 + }, + { + "epoch": 0.31816717148933427, + "grad_norm": 0.00022304137717573243, + "learning_rate": 7.578947368421054e-06, + "loss": 0.0, + "step": 4937 + }, + { + "epoch": 0.31823161693626345, + "grad_norm": 0.021512349972738248, + "learning_rate": 7.578231292517008e-06, + "loss": 0.0, + "step": 4938 + }, + { + "epoch": 0.31829606238319264, + "grad_norm": 0.10591603729833651, + "learning_rate": 7.577515216612961e-06, + "loss": 0.0002, + "step": 4939 + }, + { + "epoch": 0.3183605078301218, + "grad_norm": 0.013542071059994441, + "learning_rate": 7.576799140708916e-06, + "loss": 0.0001, + "step": 4940 + }, + { + "epoch": 0.318424953277051, + "grad_norm": 0.00536616140223203, + "learning_rate": 7.57608306480487e-06, + "loss": 0.0, + "step": 4941 + }, + { + "epoch": 0.31848939872398013, + "grad_norm": 0.01473934950136891, + "learning_rate": 7.575366988900824e-06, + "loss": 0.0001, + "step": 4942 + }, + { + "epoch": 0.3185538441709093, + "grad_norm": 0.02923624998055832, + "learning_rate": 7.5746509129967785e-06, + "loss": 0.0001, + "step": 4943 + }, + { + "epoch": 0.3186182896178385, + "grad_norm": 0.00951459181256414, + "learning_rate": 7.573934837092732e-06, + "loss": 0.0001, + "step": 4944 + }, + { + "epoch": 0.3186827350647677, + "grad_norm": 0.002520293197353339, + "learning_rate": 7.573218761188686e-06, + "loss": 0.0, + "step": 4945 + }, + { + "epoch": 0.31874718051169687, + "grad_norm": 0.0006642405582552634, + "learning_rate": 7.5725026852846406e-06, + "loss": 0.0, + "step": 4946 + }, + { + "epoch": 0.318811625958626, + "grad_norm": 0.0023286591485846588, + "learning_rate": 7.571786609380595e-06, + "loss": 0.0, + "step": 4947 + }, + { + "epoch": 0.3188760714055552, + "grad_norm": 0.010581924490633903, + "learning_rate": 7.571070533476549e-06, + "loss": 0.0001, + "step": 4948 + }, + { + "epoch": 0.31894051685248437, + "grad_norm": 0.5570829541365085, + "learning_rate": 7.570354457572503e-06, + "loss": 0.0034, + "step": 4949 + }, + { + "epoch": 0.31900496229941355, + "grad_norm": 0.01401707071364209, + "learning_rate": 7.569638381668457e-06, + "loss": 0.0, + "step": 4950 + }, + { + "epoch": 0.31906940774634274, + "grad_norm": 0.0011558668939177993, + "learning_rate": 7.568922305764411e-06, + "loss": 0.0, + "step": 4951 + }, + { + "epoch": 0.3191338531932719, + "grad_norm": 0.2016895448269546, + "learning_rate": 7.5682062298603655e-06, + "loss": 0.0011, + "step": 4952 + }, + { + "epoch": 0.31919829864020105, + "grad_norm": 1.4378902071553488, + "learning_rate": 7.56749015395632e-06, + "loss": 0.0022, + "step": 4953 + }, + { + "epoch": 0.31926274408713023, + "grad_norm": 0.002368113513227609, + "learning_rate": 7.566774078052275e-06, + "loss": 0.0, + "step": 4954 + }, + { + "epoch": 0.3193271895340594, + "grad_norm": 0.0015807385536410412, + "learning_rate": 7.566058002148228e-06, + "loss": 0.0, + "step": 4955 + }, + { + "epoch": 0.3193916349809886, + "grad_norm": 0.023490105392632896, + "learning_rate": 7.565341926244183e-06, + "loss": 0.0002, + "step": 4956 + }, + { + "epoch": 0.3194560804279178, + "grad_norm": 0.003028390952271756, + "learning_rate": 7.564625850340137e-06, + "loss": 0.0, + "step": 4957 + }, + { + "epoch": 0.3195205258748469, + "grad_norm": 0.02639448995249658, + "learning_rate": 7.563909774436091e-06, + "loss": 0.0001, + "step": 4958 + }, + { + "epoch": 0.3195849713217761, + "grad_norm": 0.0002196733853071634, + "learning_rate": 7.563193698532046e-06, + "loss": 0.0, + "step": 4959 + }, + { + "epoch": 0.3196494167687053, + "grad_norm": 0.00525486369614464, + "learning_rate": 7.562477622627999e-06, + "loss": 0.0, + "step": 4960 + }, + { + "epoch": 0.31971386221563447, + "grad_norm": 6.02780353327736, + "learning_rate": 7.561761546723953e-06, + "loss": 0.0112, + "step": 4961 + }, + { + "epoch": 0.31977830766256365, + "grad_norm": 0.027587832602718406, + "learning_rate": 7.561045470819908e-06, + "loss": 0.0002, + "step": 4962 + }, + { + "epoch": 0.31984275310949284, + "grad_norm": 0.0010843918612778584, + "learning_rate": 7.560329394915862e-06, + "loss": 0.0, + "step": 4963 + }, + { + "epoch": 0.31990719855642197, + "grad_norm": 0.019770665104542517, + "learning_rate": 7.559613319011816e-06, + "loss": 0.0001, + "step": 4964 + }, + { + "epoch": 0.31997164400335115, + "grad_norm": 0.0004921501486983964, + "learning_rate": 7.55889724310777e-06, + "loss": 0.0, + "step": 4965 + }, + { + "epoch": 0.32003608945028034, + "grad_norm": 0.06938545379726922, + "learning_rate": 7.558181167203724e-06, + "loss": 0.0001, + "step": 4966 + }, + { + "epoch": 0.3201005348972095, + "grad_norm": 0.004667437215269022, + "learning_rate": 7.557465091299678e-06, + "loss": 0.0, + "step": 4967 + }, + { + "epoch": 0.3201649803441387, + "grad_norm": 0.014288407805833092, + "learning_rate": 7.556749015395633e-06, + "loss": 0.0001, + "step": 4968 + }, + { + "epoch": 0.32022942579106783, + "grad_norm": 0.008945516329945936, + "learning_rate": 7.556032939491587e-06, + "loss": 0.0001, + "step": 4969 + }, + { + "epoch": 0.320293871237997, + "grad_norm": 0.00023568888504286596, + "learning_rate": 7.55531686358754e-06, + "loss": 0.0, + "step": 4970 + }, + { + "epoch": 0.3203583166849262, + "grad_norm": 0.01571903394353528, + "learning_rate": 7.554600787683495e-06, + "loss": 0.0001, + "step": 4971 + }, + { + "epoch": 0.3204227621318554, + "grad_norm": 0.00460776685039601, + "learning_rate": 7.553884711779449e-06, + "loss": 0.0, + "step": 4972 + }, + { + "epoch": 0.32048720757878457, + "grad_norm": 0.04010418068407689, + "learning_rate": 7.553168635875403e-06, + "loss": 0.0001, + "step": 4973 + }, + { + "epoch": 0.32055165302571376, + "grad_norm": 0.00035000815771177833, + "learning_rate": 7.5524525599713576e-06, + "loss": 0.0, + "step": 4974 + }, + { + "epoch": 0.3206160984726429, + "grad_norm": 0.0024140885061416342, + "learning_rate": 7.551736484067311e-06, + "loss": 0.0, + "step": 4975 + }, + { + "epoch": 0.32068054391957207, + "grad_norm": 0.007612368659569762, + "learning_rate": 7.551020408163265e-06, + "loss": 0.0, + "step": 4976 + }, + { + "epoch": 0.32074498936650125, + "grad_norm": 0.001607101033468048, + "learning_rate": 7.55030433225922e-06, + "loss": 0.0, + "step": 4977 + }, + { + "epoch": 0.32080943481343044, + "grad_norm": 0.0698845675905335, + "learning_rate": 7.549588256355175e-06, + "loss": 0.0001, + "step": 4978 + }, + { + "epoch": 0.3208738802603596, + "grad_norm": 0.00011869231645262834, + "learning_rate": 7.548872180451129e-06, + "loss": 0.0, + "step": 4979 + }, + { + "epoch": 0.3209383257072888, + "grad_norm": 0.002316536849549659, + "learning_rate": 7.548156104547083e-06, + "loss": 0.0, + "step": 4980 + }, + { + "epoch": 0.32100277115421794, + "grad_norm": 0.2341897152622086, + "learning_rate": 7.547440028643037e-06, + "loss": 0.0015, + "step": 4981 + }, + { + "epoch": 0.3210672166011471, + "grad_norm": 0.28748342580289754, + "learning_rate": 7.546723952738991e-06, + "loss": 0.0002, + "step": 4982 + }, + { + "epoch": 0.3211316620480763, + "grad_norm": 0.0005328684311772538, + "learning_rate": 7.546007876834945e-06, + "loss": 0.0, + "step": 4983 + }, + { + "epoch": 0.3211961074950055, + "grad_norm": 0.02484436391275856, + "learning_rate": 7.5452918009309e-06, + "loss": 0.0001, + "step": 4984 + }, + { + "epoch": 0.3212605529419347, + "grad_norm": 0.008556152524764708, + "learning_rate": 7.544575725026854e-06, + "loss": 0.0001, + "step": 4985 + }, + { + "epoch": 0.3213249983888638, + "grad_norm": 0.038374263358831415, + "learning_rate": 7.5438596491228074e-06, + "loss": 0.0003, + "step": 4986 + }, + { + "epoch": 0.321389443835793, + "grad_norm": 0.0023323979630988576, + "learning_rate": 7.543143573218762e-06, + "loss": 0.0, + "step": 4987 + }, + { + "epoch": 0.32145388928272217, + "grad_norm": 0.0018241033879360137, + "learning_rate": 7.542427497314716e-06, + "loss": 0.0, + "step": 4988 + }, + { + "epoch": 0.32151833472965136, + "grad_norm": 0.014841046251769881, + "learning_rate": 7.54171142141067e-06, + "loss": 0.0001, + "step": 4989 + }, + { + "epoch": 0.32158278017658054, + "grad_norm": 0.00044722802438275366, + "learning_rate": 7.540995345506625e-06, + "loss": 0.0, + "step": 4990 + }, + { + "epoch": 0.3216472256235097, + "grad_norm": 0.012483224214803168, + "learning_rate": 7.540279269602578e-06, + "loss": 0.0, + "step": 4991 + }, + { + "epoch": 0.32171167107043885, + "grad_norm": 0.0073138178050027175, + "learning_rate": 7.539563193698532e-06, + "loss": 0.0, + "step": 4992 + }, + { + "epoch": 0.32177611651736804, + "grad_norm": 0.1553142062233551, + "learning_rate": 7.538847117794487e-06, + "loss": 0.0002, + "step": 4993 + }, + { + "epoch": 0.3218405619642972, + "grad_norm": 0.0007977184599504469, + "learning_rate": 7.538131041890441e-06, + "loss": 0.0, + "step": 4994 + }, + { + "epoch": 0.3219050074112264, + "grad_norm": 0.0020614767277034192, + "learning_rate": 7.537414965986395e-06, + "loss": 0.0, + "step": 4995 + }, + { + "epoch": 0.3219694528581556, + "grad_norm": 0.00481232716002087, + "learning_rate": 7.536698890082349e-06, + "loss": 0.0, + "step": 4996 + }, + { + "epoch": 0.3220338983050847, + "grad_norm": 0.004769241063000902, + "learning_rate": 7.535982814178303e-06, + "loss": 0.0001, + "step": 4997 + }, + { + "epoch": 0.3220983437520139, + "grad_norm": 0.000924509781990204, + "learning_rate": 7.535266738274257e-06, + "loss": 0.0, + "step": 4998 + }, + { + "epoch": 0.3221627891989431, + "grad_norm": 0.002373089335570934, + "learning_rate": 7.534550662370212e-06, + "loss": 0.0, + "step": 4999 + }, + { + "epoch": 0.3222272346458723, + "grad_norm": 2.5394377915254367, + "learning_rate": 7.533834586466165e-06, + "loss": 0.0116, + "step": 5000 + }, + { + "epoch": 0.32229168009280146, + "grad_norm": 2.836771188312746, + "learning_rate": 7.533118510562121e-06, + "loss": 0.0571, + "step": 5001 + }, + { + "epoch": 0.32235612553973064, + "grad_norm": 0.0867760639624333, + "learning_rate": 7.5324024346580745e-06, + "loss": 0.0004, + "step": 5002 + }, + { + "epoch": 0.32242057098665977, + "grad_norm": 0.009055324801599299, + "learning_rate": 7.531686358754029e-06, + "loss": 0.0001, + "step": 5003 + }, + { + "epoch": 0.32248501643358896, + "grad_norm": 0.0007850045675465759, + "learning_rate": 7.530970282849983e-06, + "loss": 0.0, + "step": 5004 + }, + { + "epoch": 0.32254946188051814, + "grad_norm": 0.0054402043559637694, + "learning_rate": 7.530254206945937e-06, + "loss": 0.0001, + "step": 5005 + }, + { + "epoch": 0.3226139073274473, + "grad_norm": 0.0003870119441539209, + "learning_rate": 7.529538131041892e-06, + "loss": 0.0, + "step": 5006 + }, + { + "epoch": 0.3226783527743765, + "grad_norm": 0.002394564450146463, + "learning_rate": 7.528822055137845e-06, + "loss": 0.0, + "step": 5007 + }, + { + "epoch": 0.32274279822130564, + "grad_norm": 0.0009554164590431097, + "learning_rate": 7.5281059792337995e-06, + "loss": 0.0, + "step": 5008 + }, + { + "epoch": 0.3228072436682348, + "grad_norm": 0.018775061036906286, + "learning_rate": 7.527389903329754e-06, + "loss": 0.0001, + "step": 5009 + }, + { + "epoch": 0.322871689115164, + "grad_norm": 0.0004067308282074071, + "learning_rate": 7.526673827425708e-06, + "loss": 0.0, + "step": 5010 + }, + { + "epoch": 0.3229361345620932, + "grad_norm": 0.0022833677798131395, + "learning_rate": 7.525957751521662e-06, + "loss": 0.0, + "step": 5011 + }, + { + "epoch": 0.3230005800090224, + "grad_norm": 0.0034419373129303036, + "learning_rate": 7.525241675617616e-06, + "loss": 0.0, + "step": 5012 + }, + { + "epoch": 0.32306502545595156, + "grad_norm": 0.0013269891500375182, + "learning_rate": 7.52452559971357e-06, + "loss": 0.0, + "step": 5013 + }, + { + "epoch": 0.3231294709028807, + "grad_norm": 0.005102314456417867, + "learning_rate": 7.523809523809524e-06, + "loss": 0.0, + "step": 5014 + }, + { + "epoch": 0.3231939163498099, + "grad_norm": 0.0011584206982300578, + "learning_rate": 7.523093447905479e-06, + "loss": 0.0, + "step": 5015 + }, + { + "epoch": 0.32325836179673906, + "grad_norm": 0.004096714247036613, + "learning_rate": 7.522377372001432e-06, + "loss": 0.0, + "step": 5016 + }, + { + "epoch": 0.32332280724366824, + "grad_norm": 0.02179641642182101, + "learning_rate": 7.5216612960973865e-06, + "loss": 0.0001, + "step": 5017 + }, + { + "epoch": 0.3233872526905974, + "grad_norm": 0.0008365472971686693, + "learning_rate": 7.520945220193341e-06, + "loss": 0.0, + "step": 5018 + }, + { + "epoch": 0.3234516981375266, + "grad_norm": 0.06371238259548284, + "learning_rate": 7.520229144289295e-06, + "loss": 0.0001, + "step": 5019 + }, + { + "epoch": 0.32351614358445574, + "grad_norm": 0.009985623889348058, + "learning_rate": 7.519513068385249e-06, + "loss": 0.0, + "step": 5020 + }, + { + "epoch": 0.3235805890313849, + "grad_norm": 0.007412286860036669, + "learning_rate": 7.518796992481203e-06, + "loss": 0.0001, + "step": 5021 + }, + { + "epoch": 0.3236450344783141, + "grad_norm": 1.3540490710721447, + "learning_rate": 7.518080916577157e-06, + "loss": 0.0054, + "step": 5022 + }, + { + "epoch": 0.3237094799252433, + "grad_norm": 0.000858304434348953, + "learning_rate": 7.517364840673111e-06, + "loss": 0.0, + "step": 5023 + }, + { + "epoch": 0.3237739253721725, + "grad_norm": 0.0041181096965707915, + "learning_rate": 7.5166487647690666e-06, + "loss": 0.0, + "step": 5024 + }, + { + "epoch": 0.3238383708191016, + "grad_norm": 0.0003157757437743913, + "learning_rate": 7.515932688865021e-06, + "loss": 0.0, + "step": 5025 + }, + { + "epoch": 0.3239028162660308, + "grad_norm": 0.06434296094244776, + "learning_rate": 7.515216612960975e-06, + "loss": 0.0003, + "step": 5026 + }, + { + "epoch": 0.32396726171296, + "grad_norm": 0.3501482917633669, + "learning_rate": 7.5145005370569295e-06, + "loss": 0.0012, + "step": 5027 + }, + { + "epoch": 0.32403170715988916, + "grad_norm": 0.003286186092908012, + "learning_rate": 7.513784461152883e-06, + "loss": 0.0, + "step": 5028 + }, + { + "epoch": 0.32409615260681834, + "grad_norm": 0.006841491514237018, + "learning_rate": 7.513068385248837e-06, + "loss": 0.0, + "step": 5029 + }, + { + "epoch": 0.32416059805374753, + "grad_norm": 0.0026199554590787223, + "learning_rate": 7.5123523093447915e-06, + "loss": 0.0, + "step": 5030 + }, + { + "epoch": 0.32422504350067666, + "grad_norm": 0.002010553894782923, + "learning_rate": 7.511636233440746e-06, + "loss": 0.0, + "step": 5031 + }, + { + "epoch": 0.32428948894760584, + "grad_norm": 0.01111582230251598, + "learning_rate": 7.510920157536699e-06, + "loss": 0.0, + "step": 5032 + }, + { + "epoch": 0.324353934394535, + "grad_norm": 0.012234168882052655, + "learning_rate": 7.5102040816326536e-06, + "loss": 0.0, + "step": 5033 + }, + { + "epoch": 0.3244183798414642, + "grad_norm": 0.24919705042346788, + "learning_rate": 7.509488005728608e-06, + "loss": 0.0018, + "step": 5034 + }, + { + "epoch": 0.3244828252883934, + "grad_norm": 0.0015482715286001473, + "learning_rate": 7.508771929824562e-06, + "loss": 0.0, + "step": 5035 + }, + { + "epoch": 0.3245472707353225, + "grad_norm": 0.004696018029830818, + "learning_rate": 7.5080558539205164e-06, + "loss": 0.0, + "step": 5036 + }, + { + "epoch": 0.3246117161822517, + "grad_norm": 0.014119543213684357, + "learning_rate": 7.50733977801647e-06, + "loss": 0.0, + "step": 5037 + }, + { + "epoch": 0.3246761616291809, + "grad_norm": 0.3867381890545684, + "learning_rate": 7.506623702112424e-06, + "loss": 0.0025, + "step": 5038 + }, + { + "epoch": 0.3247406070761101, + "grad_norm": 0.00042987711144992877, + "learning_rate": 7.5059076262083785e-06, + "loss": 0.0, + "step": 5039 + }, + { + "epoch": 0.32480505252303926, + "grad_norm": 0.2663883013006666, + "learning_rate": 7.505191550304333e-06, + "loss": 0.0002, + "step": 5040 + }, + { + "epoch": 0.32486949796996845, + "grad_norm": 0.0004538031013607848, + "learning_rate": 7.504475474400287e-06, + "loss": 0.0, + "step": 5041 + }, + { + "epoch": 0.3249339434168976, + "grad_norm": 0.001688240594975126, + "learning_rate": 7.5037593984962405e-06, + "loss": 0.0, + "step": 5042 + }, + { + "epoch": 0.32499838886382676, + "grad_norm": 0.0006231698876898269, + "learning_rate": 7.503043322592195e-06, + "loss": 0.0, + "step": 5043 + }, + { + "epoch": 0.32506283431075594, + "grad_norm": 0.001326850866979831, + "learning_rate": 7.502327246688149e-06, + "loss": 0.0, + "step": 5044 + }, + { + "epoch": 0.32512727975768513, + "grad_norm": 0.09123642767439222, + "learning_rate": 7.5016111707841034e-06, + "loss": 0.0003, + "step": 5045 + }, + { + "epoch": 0.3251917252046143, + "grad_norm": 0.00031140489873245536, + "learning_rate": 7.500895094880058e-06, + "loss": 0.0, + "step": 5046 + }, + { + "epoch": 0.32525617065154344, + "grad_norm": 0.010423918369235648, + "learning_rate": 7.500179018976011e-06, + "loss": 0.0001, + "step": 5047 + }, + { + "epoch": 0.3253206160984726, + "grad_norm": 0.027732214205477675, + "learning_rate": 7.499462943071967e-06, + "loss": 0.0001, + "step": 5048 + }, + { + "epoch": 0.3253850615454018, + "grad_norm": 0.0008578557245992876, + "learning_rate": 7.498746867167921e-06, + "loss": 0.0, + "step": 5049 + }, + { + "epoch": 0.325449506992331, + "grad_norm": 0.015105272515006055, + "learning_rate": 7.498030791263875e-06, + "loss": 0.0001, + "step": 5050 + }, + { + "epoch": 0.3255139524392602, + "grad_norm": 0.3200695232774652, + "learning_rate": 7.497314715359829e-06, + "loss": 0.0018, + "step": 5051 + }, + { + "epoch": 0.32557839788618936, + "grad_norm": 0.06262675857097338, + "learning_rate": 7.4965986394557835e-06, + "loss": 0.0001, + "step": 5052 + }, + { + "epoch": 0.3256428433331185, + "grad_norm": 0.008279700894868208, + "learning_rate": 7.495882563551737e-06, + "loss": 0.0, + "step": 5053 + }, + { + "epoch": 0.3257072887800477, + "grad_norm": 0.01809988351456632, + "learning_rate": 7.495166487647691e-06, + "loss": 0.0001, + "step": 5054 + }, + { + "epoch": 0.32577173422697686, + "grad_norm": 0.00032587838609959163, + "learning_rate": 7.494450411743646e-06, + "loss": 0.0, + "step": 5055 + }, + { + "epoch": 0.32583617967390605, + "grad_norm": 0.16255369537207687, + "learning_rate": 7.4937343358396e-06, + "loss": 0.0021, + "step": 5056 + }, + { + "epoch": 0.32590062512083523, + "grad_norm": 0.008922854562315093, + "learning_rate": 7.493018259935554e-06, + "loss": 0.0, + "step": 5057 + }, + { + "epoch": 0.3259650705677644, + "grad_norm": 0.002154537714572031, + "learning_rate": 7.492302184031508e-06, + "loss": 0.0, + "step": 5058 + }, + { + "epoch": 0.32602951601469354, + "grad_norm": 0.0004839898917475037, + "learning_rate": 7.491586108127462e-06, + "loss": 0.0, + "step": 5059 + }, + { + "epoch": 0.32609396146162273, + "grad_norm": 0.0004548610049905596, + "learning_rate": 7.490870032223416e-06, + "loss": 0.0, + "step": 5060 + }, + { + "epoch": 0.3261584069085519, + "grad_norm": 0.44944039557347387, + "learning_rate": 7.4901539563193705e-06, + "loss": 0.0026, + "step": 5061 + }, + { + "epoch": 0.3262228523554811, + "grad_norm": 0.00016835646080685742, + "learning_rate": 7.489437880415325e-06, + "loss": 0.0, + "step": 5062 + }, + { + "epoch": 0.3262872978024103, + "grad_norm": 0.29209044726483824, + "learning_rate": 7.488721804511278e-06, + "loss": 0.0009, + "step": 5063 + }, + { + "epoch": 0.3263517432493394, + "grad_norm": 0.0001249842603296567, + "learning_rate": 7.4880057286072326e-06, + "loss": 0.0, + "step": 5064 + }, + { + "epoch": 0.3264161886962686, + "grad_norm": 0.011162206390915428, + "learning_rate": 7.487289652703187e-06, + "loss": 0.0, + "step": 5065 + }, + { + "epoch": 0.3264806341431978, + "grad_norm": 0.005577277546888436, + "learning_rate": 7.486573576799141e-06, + "loss": 0.0001, + "step": 5066 + }, + { + "epoch": 0.32654507959012696, + "grad_norm": 0.008560459027039776, + "learning_rate": 7.4858575008950955e-06, + "loss": 0.0, + "step": 5067 + }, + { + "epoch": 0.32660952503705615, + "grad_norm": 0.22203547121366796, + "learning_rate": 7.485141424991049e-06, + "loss": 0.0013, + "step": 5068 + }, + { + "epoch": 0.32667397048398533, + "grad_norm": 0.1355473815182943, + "learning_rate": 7.484425349087003e-06, + "loss": 0.0019, + "step": 5069 + }, + { + "epoch": 0.32673841593091446, + "grad_norm": 0.0003878992249302429, + "learning_rate": 7.4837092731829575e-06, + "loss": 0.0, + "step": 5070 + }, + { + "epoch": 0.32680286137784365, + "grad_norm": 0.0011205206516367939, + "learning_rate": 7.482993197278913e-06, + "loss": 0.0, + "step": 5071 + }, + { + "epoch": 0.32686730682477283, + "grad_norm": 0.00033044966655028444, + "learning_rate": 7.482277121374867e-06, + "loss": 0.0, + "step": 5072 + }, + { + "epoch": 0.326931752271702, + "grad_norm": 0.02445801950067175, + "learning_rate": 7.481561045470821e-06, + "loss": 0.0001, + "step": 5073 + }, + { + "epoch": 0.3269961977186312, + "grad_norm": 0.012846676525802831, + "learning_rate": 7.480844969566775e-06, + "loss": 0.0, + "step": 5074 + }, + { + "epoch": 0.3270606431655603, + "grad_norm": 0.006191962266799229, + "learning_rate": 7.480128893662729e-06, + "loss": 0.0, + "step": 5075 + }, + { + "epoch": 0.3271250886124895, + "grad_norm": 0.0037964312015699485, + "learning_rate": 7.479412817758683e-06, + "loss": 0.0, + "step": 5076 + }, + { + "epoch": 0.3271895340594187, + "grad_norm": 0.0003184726203052082, + "learning_rate": 7.478696741854638e-06, + "loss": 0.0, + "step": 5077 + }, + { + "epoch": 0.3272539795063479, + "grad_norm": 0.47123552026941196, + "learning_rate": 7.477980665950592e-06, + "loss": 0.004, + "step": 5078 + }, + { + "epoch": 0.32731842495327707, + "grad_norm": 0.35992211698106236, + "learning_rate": 7.477264590046545e-06, + "loss": 0.0007, + "step": 5079 + }, + { + "epoch": 0.32738287040020625, + "grad_norm": 0.18303458304005787, + "learning_rate": 7.4765485141425e-06, + "loss": 0.0002, + "step": 5080 + }, + { + "epoch": 0.3274473158471354, + "grad_norm": 0.005515852076379496, + "learning_rate": 7.475832438238454e-06, + "loss": 0.0, + "step": 5081 + }, + { + "epoch": 0.32751176129406456, + "grad_norm": 0.2568020526304833, + "learning_rate": 7.475116362334408e-06, + "loss": 0.0008, + "step": 5082 + }, + { + "epoch": 0.32757620674099375, + "grad_norm": 0.003086437234108956, + "learning_rate": 7.4744002864303626e-06, + "loss": 0.0, + "step": 5083 + }, + { + "epoch": 0.32764065218792293, + "grad_norm": 1.3171834943355232, + "learning_rate": 7.473684210526316e-06, + "loss": 0.0013, + "step": 5084 + }, + { + "epoch": 0.3277050976348521, + "grad_norm": 0.033313737024242854, + "learning_rate": 7.47296813462227e-06, + "loss": 0.0003, + "step": 5085 + }, + { + "epoch": 0.32776954308178124, + "grad_norm": 0.7567478842426889, + "learning_rate": 7.472252058718225e-06, + "loss": 0.0061, + "step": 5086 + }, + { + "epoch": 0.32783398852871043, + "grad_norm": 0.0049614953742675985, + "learning_rate": 7.471535982814179e-06, + "loss": 0.0, + "step": 5087 + }, + { + "epoch": 0.3278984339756396, + "grad_norm": 0.025865455721768113, + "learning_rate": 7.470819906910133e-06, + "loss": 0.0001, + "step": 5088 + }, + { + "epoch": 0.3279628794225688, + "grad_norm": 0.06708022955203878, + "learning_rate": 7.470103831006087e-06, + "loss": 0.0005, + "step": 5089 + }, + { + "epoch": 0.328027324869498, + "grad_norm": 0.11167039041291603, + "learning_rate": 7.469387755102041e-06, + "loss": 0.001, + "step": 5090 + }, + { + "epoch": 0.32809177031642717, + "grad_norm": 0.07170793525024977, + "learning_rate": 7.468671679197995e-06, + "loss": 0.0001, + "step": 5091 + }, + { + "epoch": 0.3281562157633563, + "grad_norm": 0.7082797245233898, + "learning_rate": 7.4679556032939496e-06, + "loss": 0.0016, + "step": 5092 + }, + { + "epoch": 0.3282206612102855, + "grad_norm": 0.006456634595156348, + "learning_rate": 7.467239527389904e-06, + "loss": 0.0, + "step": 5093 + }, + { + "epoch": 0.32828510665721466, + "grad_norm": 0.01645892325423116, + "learning_rate": 7.466523451485859e-06, + "loss": 0.0003, + "step": 5094 + }, + { + "epoch": 0.32834955210414385, + "grad_norm": 0.7233672896851489, + "learning_rate": 7.4658073755818124e-06, + "loss": 0.0031, + "step": 5095 + }, + { + "epoch": 0.32841399755107303, + "grad_norm": 0.1790257063440346, + "learning_rate": 7.465091299677767e-06, + "loss": 0.0004, + "step": 5096 + }, + { + "epoch": 0.3284784429980022, + "grad_norm": 0.004959311847834531, + "learning_rate": 7.464375223773721e-06, + "loss": 0.0, + "step": 5097 + }, + { + "epoch": 0.32854288844493135, + "grad_norm": 0.33592448098438615, + "learning_rate": 7.463659147869675e-06, + "loss": 0.0036, + "step": 5098 + }, + { + "epoch": 0.32860733389186053, + "grad_norm": 0.006044847333106452, + "learning_rate": 7.46294307196563e-06, + "loss": 0.0, + "step": 5099 + }, + { + "epoch": 0.3286717793387897, + "grad_norm": 0.015346268232809247, + "learning_rate": 7.462226996061583e-06, + "loss": 0.0, + "step": 5100 + }, + { + "epoch": 0.3287362247857189, + "grad_norm": 0.03213329618545794, + "learning_rate": 7.461510920157537e-06, + "loss": 0.0, + "step": 5101 + }, + { + "epoch": 0.3288006702326481, + "grad_norm": 0.0006729679357930884, + "learning_rate": 7.460794844253492e-06, + "loss": 0.0, + "step": 5102 + }, + { + "epoch": 0.3288651156795772, + "grad_norm": 0.3873201952901252, + "learning_rate": 7.460078768349446e-06, + "loss": 0.0025, + "step": 5103 + }, + { + "epoch": 0.3289295611265064, + "grad_norm": 0.0012658050799610712, + "learning_rate": 7.4593626924454e-06, + "loss": 0.0, + "step": 5104 + }, + { + "epoch": 0.3289940065734356, + "grad_norm": 0.1685233110152655, + "learning_rate": 7.458646616541354e-06, + "loss": 0.0004, + "step": 5105 + }, + { + "epoch": 0.32905845202036477, + "grad_norm": 0.003145091526784752, + "learning_rate": 7.457930540637308e-06, + "loss": 0.0, + "step": 5106 + }, + { + "epoch": 0.32912289746729395, + "grad_norm": 0.006677561097412213, + "learning_rate": 7.457214464733262e-06, + "loss": 0.0, + "step": 5107 + }, + { + "epoch": 0.32918734291422314, + "grad_norm": 0.008704954859180948, + "learning_rate": 7.456498388829217e-06, + "loss": 0.0, + "step": 5108 + }, + { + "epoch": 0.32925178836115226, + "grad_norm": 0.020684504992077225, + "learning_rate": 7.455782312925171e-06, + "loss": 0.0, + "step": 5109 + }, + { + "epoch": 0.32931623380808145, + "grad_norm": 0.017915273181646683, + "learning_rate": 7.455066237021124e-06, + "loss": 0.0001, + "step": 5110 + }, + { + "epoch": 0.32938067925501063, + "grad_norm": 0.0006413246172499555, + "learning_rate": 7.454350161117079e-06, + "loss": 0.0, + "step": 5111 + }, + { + "epoch": 0.3294451247019398, + "grad_norm": 0.016389241157989606, + "learning_rate": 7.453634085213033e-06, + "loss": 0.0, + "step": 5112 + }, + { + "epoch": 0.329509570148869, + "grad_norm": 0.04613150357756326, + "learning_rate": 7.452918009308987e-06, + "loss": 0.0002, + "step": 5113 + }, + { + "epoch": 0.32957401559579813, + "grad_norm": 0.006047588655278598, + "learning_rate": 7.452201933404941e-06, + "loss": 0.0, + "step": 5114 + }, + { + "epoch": 0.3296384610427273, + "grad_norm": 0.11379492971456501, + "learning_rate": 7.451485857500895e-06, + "loss": 0.0002, + "step": 5115 + }, + { + "epoch": 0.3297029064896565, + "grad_norm": 0.18467343189665436, + "learning_rate": 7.450769781596849e-06, + "loss": 0.0017, + "step": 5116 + }, + { + "epoch": 0.3297673519365857, + "grad_norm": 0.00666060152594704, + "learning_rate": 7.450053705692804e-06, + "loss": 0.0, + "step": 5117 + }, + { + "epoch": 0.32983179738351487, + "grad_norm": 0.15716715098903433, + "learning_rate": 7.449337629788759e-06, + "loss": 0.0002, + "step": 5118 + }, + { + "epoch": 0.32989624283044405, + "grad_norm": 0.03158307087649183, + "learning_rate": 7.448621553884713e-06, + "loss": 0.0001, + "step": 5119 + }, + { + "epoch": 0.3299606882773732, + "grad_norm": 6.076223041511504, + "learning_rate": 7.447905477980667e-06, + "loss": 0.0233, + "step": 5120 + }, + { + "epoch": 0.33002513372430237, + "grad_norm": 0.04275050059957507, + "learning_rate": 7.447189402076621e-06, + "loss": 0.0001, + "step": 5121 + }, + { + "epoch": 0.33008957917123155, + "grad_norm": 0.017601381917265038, + "learning_rate": 7.446473326172575e-06, + "loss": 0.0001, + "step": 5122 + }, + { + "epoch": 0.33015402461816074, + "grad_norm": 0.0008449752226208954, + "learning_rate": 7.445757250268529e-06, + "loss": 0.0, + "step": 5123 + }, + { + "epoch": 0.3302184700650899, + "grad_norm": 0.1010258416027035, + "learning_rate": 7.445041174364484e-06, + "loss": 0.0014, + "step": 5124 + }, + { + "epoch": 0.33028291551201905, + "grad_norm": 0.0027437871382046137, + "learning_rate": 7.444325098460438e-06, + "loss": 0.0, + "step": 5125 + }, + { + "epoch": 0.33034736095894823, + "grad_norm": 0.7007602556671023, + "learning_rate": 7.4436090225563915e-06, + "loss": 0.0044, + "step": 5126 + }, + { + "epoch": 0.3304118064058774, + "grad_norm": 0.06670512931405594, + "learning_rate": 7.442892946652346e-06, + "loss": 0.0002, + "step": 5127 + }, + { + "epoch": 0.3304762518528066, + "grad_norm": 0.00035463289021841213, + "learning_rate": 7.4421768707483e-06, + "loss": 0.0, + "step": 5128 + }, + { + "epoch": 0.3305406972997358, + "grad_norm": 0.011838379957474788, + "learning_rate": 7.441460794844254e-06, + "loss": 0.0, + "step": 5129 + }, + { + "epoch": 0.33060514274666497, + "grad_norm": 0.10076242075093453, + "learning_rate": 7.440744718940208e-06, + "loss": 0.0001, + "step": 5130 + }, + { + "epoch": 0.3306695881935941, + "grad_norm": 0.03547042955610553, + "learning_rate": 7.440028643036162e-06, + "loss": 0.0, + "step": 5131 + }, + { + "epoch": 0.3307340336405233, + "grad_norm": 0.02167577114795024, + "learning_rate": 7.439312567132116e-06, + "loss": 0.0, + "step": 5132 + }, + { + "epoch": 0.33079847908745247, + "grad_norm": 0.17646025275384114, + "learning_rate": 7.438596491228071e-06, + "loss": 0.0005, + "step": 5133 + }, + { + "epoch": 0.33086292453438165, + "grad_norm": 0.2529165492824762, + "learning_rate": 7.437880415324025e-06, + "loss": 0.0008, + "step": 5134 + }, + { + "epoch": 0.33092736998131084, + "grad_norm": 0.020512357560091968, + "learning_rate": 7.4371643394199785e-06, + "loss": 0.0001, + "step": 5135 + }, + { + "epoch": 0.33099181542824, + "grad_norm": 0.48623961290478873, + "learning_rate": 7.436448263515933e-06, + "loss": 0.0024, + "step": 5136 + }, + { + "epoch": 0.33105626087516915, + "grad_norm": 0.011717598828756112, + "learning_rate": 7.435732187611887e-06, + "loss": 0.0, + "step": 5137 + }, + { + "epoch": 0.33112070632209833, + "grad_norm": 0.10853024365035156, + "learning_rate": 7.435016111707841e-06, + "loss": 0.0016, + "step": 5138 + }, + { + "epoch": 0.3311851517690275, + "grad_norm": 0.011491480938313548, + "learning_rate": 7.434300035803796e-06, + "loss": 0.0001, + "step": 5139 + }, + { + "epoch": 0.3312495972159567, + "grad_norm": 0.018680412427338224, + "learning_rate": 7.433583959899749e-06, + "loss": 0.0001, + "step": 5140 + }, + { + "epoch": 0.3313140426628859, + "grad_norm": 0.0033161842348454814, + "learning_rate": 7.432867883995705e-06, + "loss": 0.0, + "step": 5141 + }, + { + "epoch": 0.331378488109815, + "grad_norm": 0.00034673562412390456, + "learning_rate": 7.4321518080916586e-06, + "loss": 0.0, + "step": 5142 + }, + { + "epoch": 0.3314429335567442, + "grad_norm": 0.0029976854056438, + "learning_rate": 7.431435732187613e-06, + "loss": 0.0, + "step": 5143 + }, + { + "epoch": 0.3315073790036734, + "grad_norm": 0.0018374341236920763, + "learning_rate": 7.430719656283567e-06, + "loss": 0.0, + "step": 5144 + }, + { + "epoch": 0.33157182445060257, + "grad_norm": 0.003731978135310744, + "learning_rate": 7.4300035803795215e-06, + "loss": 0.0, + "step": 5145 + }, + { + "epoch": 0.33163626989753175, + "grad_norm": 0.01021389306752017, + "learning_rate": 7.429287504475475e-06, + "loss": 0.0001, + "step": 5146 + }, + { + "epoch": 0.33170071534446094, + "grad_norm": 0.002822017216866906, + "learning_rate": 7.428571428571429e-06, + "loss": 0.0, + "step": 5147 + }, + { + "epoch": 0.33176516079139007, + "grad_norm": 0.0014409824839609782, + "learning_rate": 7.4278553526673835e-06, + "loss": 0.0, + "step": 5148 + }, + { + "epoch": 0.33182960623831925, + "grad_norm": 0.03521087826211135, + "learning_rate": 7.427139276763338e-06, + "loss": 0.0, + "step": 5149 + }, + { + "epoch": 0.33189405168524844, + "grad_norm": 0.0005318137644412104, + "learning_rate": 7.426423200859292e-06, + "loss": 0.0, + "step": 5150 + }, + { + "epoch": 0.3319584971321776, + "grad_norm": 0.001292276645488789, + "learning_rate": 7.4257071249552456e-06, + "loss": 0.0, + "step": 5151 + }, + { + "epoch": 0.3320229425791068, + "grad_norm": 0.1877081827917423, + "learning_rate": 7.4249910490512e-06, + "loss": 0.0018, + "step": 5152 + }, + { + "epoch": 0.33208738802603593, + "grad_norm": 0.0013139168940337887, + "learning_rate": 7.424274973147154e-06, + "loss": 0.0, + "step": 5153 + }, + { + "epoch": 0.3321518334729651, + "grad_norm": 0.0013585570143822931, + "learning_rate": 7.4235588972431084e-06, + "loss": 0.0, + "step": 5154 + }, + { + "epoch": 0.3322162789198943, + "grad_norm": 0.01602709453277668, + "learning_rate": 7.422842821339063e-06, + "loss": 0.0, + "step": 5155 + }, + { + "epoch": 0.3322807243668235, + "grad_norm": 0.17357639773672925, + "learning_rate": 7.422126745435016e-06, + "loss": 0.0008, + "step": 5156 + }, + { + "epoch": 0.33234516981375267, + "grad_norm": 0.527027119841736, + "learning_rate": 7.4214106695309705e-06, + "loss": 0.0079, + "step": 5157 + }, + { + "epoch": 0.33240961526068186, + "grad_norm": 0.1942045831769039, + "learning_rate": 7.420694593626925e-06, + "loss": 0.0004, + "step": 5158 + }, + { + "epoch": 0.332474060707611, + "grad_norm": 0.001252549471896302, + "learning_rate": 7.419978517722879e-06, + "loss": 0.0, + "step": 5159 + }, + { + "epoch": 0.33253850615454017, + "grad_norm": 0.004385940819667968, + "learning_rate": 7.419262441818833e-06, + "loss": 0.0, + "step": 5160 + }, + { + "epoch": 0.33260295160146935, + "grad_norm": 0.0018455546544822695, + "learning_rate": 7.418546365914787e-06, + "loss": 0.0, + "step": 5161 + }, + { + "epoch": 0.33266739704839854, + "grad_norm": 0.0013265551464794725, + "learning_rate": 7.417830290010741e-06, + "loss": 0.0, + "step": 5162 + }, + { + "epoch": 0.3327318424953277, + "grad_norm": 0.0005334555103447912, + "learning_rate": 7.4171142141066954e-06, + "loss": 0.0, + "step": 5163 + }, + { + "epoch": 0.33279628794225685, + "grad_norm": 0.0003283532295627354, + "learning_rate": 7.41639813820265e-06, + "loss": 0.0, + "step": 5164 + }, + { + "epoch": 0.33286073338918604, + "grad_norm": 0.05352444132349498, + "learning_rate": 7.415682062298605e-06, + "loss": 0.0001, + "step": 5165 + }, + { + "epoch": 0.3329251788361152, + "grad_norm": 0.008953477244246817, + "learning_rate": 7.414965986394559e-06, + "loss": 0.0, + "step": 5166 + }, + { + "epoch": 0.3329896242830444, + "grad_norm": 0.14708287994672403, + "learning_rate": 7.414249910490513e-06, + "loss": 0.0003, + "step": 5167 + }, + { + "epoch": 0.3330540697299736, + "grad_norm": 0.0017898316957946575, + "learning_rate": 7.413533834586467e-06, + "loss": 0.0, + "step": 5168 + }, + { + "epoch": 0.3331185151769028, + "grad_norm": 0.019036091577501586, + "learning_rate": 7.412817758682421e-06, + "loss": 0.0001, + "step": 5169 + }, + { + "epoch": 0.3331829606238319, + "grad_norm": 0.0005750557774482397, + "learning_rate": 7.4121016827783755e-06, + "loss": 0.0, + "step": 5170 + }, + { + "epoch": 0.3332474060707611, + "grad_norm": 0.00792877003601551, + "learning_rate": 7.41138560687433e-06, + "loss": 0.0, + "step": 5171 + }, + { + "epoch": 0.33331185151769027, + "grad_norm": 0.46352720952438115, + "learning_rate": 7.410669530970283e-06, + "loss": 0.0031, + "step": 5172 + }, + { + "epoch": 0.33337629696461946, + "grad_norm": 0.04526218280174097, + "learning_rate": 7.409953455066238e-06, + "loss": 0.0001, + "step": 5173 + }, + { + "epoch": 0.33344074241154864, + "grad_norm": 0.00010384908638211477, + "learning_rate": 7.409237379162192e-06, + "loss": 0.0, + "step": 5174 + }, + { + "epoch": 0.3335051878584778, + "grad_norm": 0.0001965139918647116, + "learning_rate": 7.408521303258146e-06, + "loss": 0.0, + "step": 5175 + }, + { + "epoch": 0.33356963330540695, + "grad_norm": 0.002679323084956987, + "learning_rate": 7.4078052273541005e-06, + "loss": 0.0, + "step": 5176 + }, + { + "epoch": 0.33363407875233614, + "grad_norm": 0.0040892885241793665, + "learning_rate": 7.407089151450054e-06, + "loss": 0.0, + "step": 5177 + }, + { + "epoch": 0.3336985241992653, + "grad_norm": 0.004283341959280961, + "learning_rate": 7.406373075546008e-06, + "loss": 0.0, + "step": 5178 + }, + { + "epoch": 0.3337629696461945, + "grad_norm": 0.010563679890308148, + "learning_rate": 7.4056569996419625e-06, + "loss": 0.0001, + "step": 5179 + }, + { + "epoch": 0.3338274150931237, + "grad_norm": 0.014411587506700527, + "learning_rate": 7.404940923737917e-06, + "loss": 0.0, + "step": 5180 + }, + { + "epoch": 0.3338918605400528, + "grad_norm": 0.002117332996749874, + "learning_rate": 7.404224847833871e-06, + "loss": 0.0, + "step": 5181 + }, + { + "epoch": 0.333956305986982, + "grad_norm": 0.0051531902656213316, + "learning_rate": 7.4035087719298246e-06, + "loss": 0.0, + "step": 5182 + }, + { + "epoch": 0.3340207514339112, + "grad_norm": 0.008657921238361855, + "learning_rate": 7.402792696025779e-06, + "loss": 0.0, + "step": 5183 + }, + { + "epoch": 0.3340851968808404, + "grad_norm": 0.08266300786287209, + "learning_rate": 7.402076620121733e-06, + "loss": 0.0001, + "step": 5184 + }, + { + "epoch": 0.33414964232776956, + "grad_norm": 0.053964652856025946, + "learning_rate": 7.4013605442176875e-06, + "loss": 0.0001, + "step": 5185 + }, + { + "epoch": 0.33421408777469874, + "grad_norm": 0.00027337132137712447, + "learning_rate": 7.400644468313642e-06, + "loss": 0.0, + "step": 5186 + }, + { + "epoch": 0.33427853322162787, + "grad_norm": 0.017739007588697363, + "learning_rate": 7.399928392409595e-06, + "loss": 0.0001, + "step": 5187 + }, + { + "epoch": 0.33434297866855706, + "grad_norm": 0.10516118308706995, + "learning_rate": 7.39921231650555e-06, + "loss": 0.0001, + "step": 5188 + }, + { + "epoch": 0.33440742411548624, + "grad_norm": 0.0012310758511546937, + "learning_rate": 7.398496240601505e-06, + "loss": 0.0, + "step": 5189 + }, + { + "epoch": 0.3344718695624154, + "grad_norm": 0.0024450255738462916, + "learning_rate": 7.397780164697459e-06, + "loss": 0.0, + "step": 5190 + }, + { + "epoch": 0.3345363150093446, + "grad_norm": 0.008509604235843085, + "learning_rate": 7.397064088793413e-06, + "loss": 0.0001, + "step": 5191 + }, + { + "epoch": 0.33460076045627374, + "grad_norm": 0.016462490353666547, + "learning_rate": 7.3963480128893676e-06, + "loss": 0.0002, + "step": 5192 + }, + { + "epoch": 0.3346652059032029, + "grad_norm": 0.001184509139402617, + "learning_rate": 7.395631936985321e-06, + "loss": 0.0, + "step": 5193 + }, + { + "epoch": 0.3347296513501321, + "grad_norm": 0.0011018125580738482, + "learning_rate": 7.394915861081275e-06, + "loss": 0.0, + "step": 5194 + }, + { + "epoch": 0.3347940967970613, + "grad_norm": 0.001489303290882407, + "learning_rate": 7.39419978517723e-06, + "loss": 0.0, + "step": 5195 + }, + { + "epoch": 0.3348585422439905, + "grad_norm": 0.0018044500319175656, + "learning_rate": 7.393483709273184e-06, + "loss": 0.0, + "step": 5196 + }, + { + "epoch": 0.33492298769091966, + "grad_norm": 0.17798532857531452, + "learning_rate": 7.392767633369138e-06, + "loss": 0.0001, + "step": 5197 + }, + { + "epoch": 0.3349874331378488, + "grad_norm": 0.023186446715734367, + "learning_rate": 7.392051557465092e-06, + "loss": 0.0, + "step": 5198 + }, + { + "epoch": 0.335051878584778, + "grad_norm": 0.14059826808740464, + "learning_rate": 7.391335481561046e-06, + "loss": 0.0005, + "step": 5199 + }, + { + "epoch": 0.33511632403170716, + "grad_norm": 0.003308997020900199, + "learning_rate": 7.390619405657e-06, + "loss": 0.0, + "step": 5200 + }, + { + "epoch": 0.33518076947863634, + "grad_norm": 0.031233637035030917, + "learning_rate": 7.3899033297529546e-06, + "loss": 0.0001, + "step": 5201 + }, + { + "epoch": 0.3352452149255655, + "grad_norm": 0.0012639814431313896, + "learning_rate": 7.389187253848909e-06, + "loss": 0.0, + "step": 5202 + }, + { + "epoch": 0.33530966037249466, + "grad_norm": 0.005084546256411885, + "learning_rate": 7.388471177944862e-06, + "loss": 0.0, + "step": 5203 + }, + { + "epoch": 0.33537410581942384, + "grad_norm": 0.39486029094773817, + "learning_rate": 7.387755102040817e-06, + "loss": 0.0007, + "step": 5204 + }, + { + "epoch": 0.335438551266353, + "grad_norm": 0.035950594309817285, + "learning_rate": 7.387039026136771e-06, + "loss": 0.0004, + "step": 5205 + }, + { + "epoch": 0.3355029967132822, + "grad_norm": 0.022437522697819116, + "learning_rate": 7.386322950232725e-06, + "loss": 0.0002, + "step": 5206 + }, + { + "epoch": 0.3355674421602114, + "grad_norm": 0.002273460860477751, + "learning_rate": 7.385606874328679e-06, + "loss": 0.0, + "step": 5207 + }, + { + "epoch": 0.3356318876071406, + "grad_norm": 0.019155860026953915, + "learning_rate": 7.384890798424633e-06, + "loss": 0.0, + "step": 5208 + }, + { + "epoch": 0.3356963330540697, + "grad_norm": 0.008933259082885026, + "learning_rate": 7.384174722520587e-06, + "loss": 0.0, + "step": 5209 + }, + { + "epoch": 0.3357607785009989, + "grad_norm": 0.0018883702706931679, + "learning_rate": 7.3834586466165416e-06, + "loss": 0.0, + "step": 5210 + }, + { + "epoch": 0.3358252239479281, + "grad_norm": 0.041224874042787886, + "learning_rate": 7.382742570712497e-06, + "loss": 0.0002, + "step": 5211 + }, + { + "epoch": 0.33588966939485726, + "grad_norm": 0.005298047347779996, + "learning_rate": 7.382026494808451e-06, + "loss": 0.0, + "step": 5212 + }, + { + "epoch": 0.33595411484178644, + "grad_norm": 0.012037538191049219, + "learning_rate": 7.381310418904405e-06, + "loss": 0.0, + "step": 5213 + }, + { + "epoch": 0.33601856028871563, + "grad_norm": 0.018119530926582156, + "learning_rate": 7.380594343000359e-06, + "loss": 0.0, + "step": 5214 + }, + { + "epoch": 0.33608300573564476, + "grad_norm": 0.0065361923963192506, + "learning_rate": 7.379878267096313e-06, + "loss": 0.0001, + "step": 5215 + }, + { + "epoch": 0.33614745118257394, + "grad_norm": 0.03795719088023908, + "learning_rate": 7.379162191192267e-06, + "loss": 0.0002, + "step": 5216 + }, + { + "epoch": 0.3362118966295031, + "grad_norm": 0.0003327354220714893, + "learning_rate": 7.378446115288222e-06, + "loss": 0.0, + "step": 5217 + }, + { + "epoch": 0.3362763420764323, + "grad_norm": 0.002622280967886348, + "learning_rate": 7.377730039384176e-06, + "loss": 0.0, + "step": 5218 + }, + { + "epoch": 0.3363407875233615, + "grad_norm": 0.003362164567001042, + "learning_rate": 7.377013963480129e-06, + "loss": 0.0, + "step": 5219 + }, + { + "epoch": 0.3364052329702906, + "grad_norm": 0.002488965637931309, + "learning_rate": 7.376297887576084e-06, + "loss": 0.0, + "step": 5220 + }, + { + "epoch": 0.3364696784172198, + "grad_norm": 0.021178266136436676, + "learning_rate": 7.375581811672038e-06, + "loss": 0.0017, + "step": 5221 + }, + { + "epoch": 0.336534123864149, + "grad_norm": 5.556497552128451e-05, + "learning_rate": 7.374865735767992e-06, + "loss": 0.0, + "step": 5222 + }, + { + "epoch": 0.3365985693110782, + "grad_norm": 1.27603100623005, + "learning_rate": 7.374149659863946e-06, + "loss": 0.0302, + "step": 5223 + }, + { + "epoch": 0.33666301475800736, + "grad_norm": 0.10044723039261633, + "learning_rate": 7.3734335839599e-06, + "loss": 0.0005, + "step": 5224 + }, + { + "epoch": 0.33672746020493655, + "grad_norm": 0.002654275139626637, + "learning_rate": 7.372717508055854e-06, + "loss": 0.0, + "step": 5225 + }, + { + "epoch": 0.3367919056518657, + "grad_norm": 0.15670975755353522, + "learning_rate": 7.372001432151809e-06, + "loss": 0.0001, + "step": 5226 + }, + { + "epoch": 0.33685635109879486, + "grad_norm": 0.006504992868248114, + "learning_rate": 7.371285356247763e-06, + "loss": 0.0, + "step": 5227 + }, + { + "epoch": 0.33692079654572404, + "grad_norm": 0.0029773298244788306, + "learning_rate": 7.370569280343716e-06, + "loss": 0.0, + "step": 5228 + }, + { + "epoch": 0.33698524199265323, + "grad_norm": 0.001564768362004572, + "learning_rate": 7.369853204439671e-06, + "loss": 0.0, + "step": 5229 + }, + { + "epoch": 0.3370496874395824, + "grad_norm": 0.46053742972565676, + "learning_rate": 7.369137128535625e-06, + "loss": 0.0004, + "step": 5230 + }, + { + "epoch": 0.33711413288651154, + "grad_norm": 0.03325454502955247, + "learning_rate": 7.368421052631579e-06, + "loss": 0.0001, + "step": 5231 + }, + { + "epoch": 0.3371785783334407, + "grad_norm": 0.017572879836545905, + "learning_rate": 7.367704976727534e-06, + "loss": 0.0, + "step": 5232 + }, + { + "epoch": 0.3372430237803699, + "grad_norm": 0.008048314701695543, + "learning_rate": 7.366988900823487e-06, + "loss": 0.0, + "step": 5233 + }, + { + "epoch": 0.3373074692272991, + "grad_norm": 0.0466043386887468, + "learning_rate": 7.366272824919441e-06, + "loss": 0.0001, + "step": 5234 + }, + { + "epoch": 0.3373719146742283, + "grad_norm": 0.17321825195418455, + "learning_rate": 7.3655567490153965e-06, + "loss": 0.0002, + "step": 5235 + }, + { + "epoch": 0.33743636012115746, + "grad_norm": 0.014061451726326977, + "learning_rate": 7.364840673111351e-06, + "loss": 0.0001, + "step": 5236 + }, + { + "epoch": 0.3375008055680866, + "grad_norm": 0.0063745520315426, + "learning_rate": 7.364124597207305e-06, + "loss": 0.0, + "step": 5237 + }, + { + "epoch": 0.3375652510150158, + "grad_norm": 0.09786029955600638, + "learning_rate": 7.363408521303259e-06, + "loss": 0.0007, + "step": 5238 + }, + { + "epoch": 0.33762969646194496, + "grad_norm": 0.0045003900085370694, + "learning_rate": 7.362692445399213e-06, + "loss": 0.0, + "step": 5239 + }, + { + "epoch": 0.33769414190887415, + "grad_norm": 0.07432003388342096, + "learning_rate": 7.361976369495167e-06, + "loss": 0.0001, + "step": 5240 + }, + { + "epoch": 0.33775858735580333, + "grad_norm": 0.013922346916827281, + "learning_rate": 7.361260293591121e-06, + "loss": 0.0, + "step": 5241 + }, + { + "epoch": 0.3378230328027325, + "grad_norm": 0.0038764664985424606, + "learning_rate": 7.360544217687076e-06, + "loss": 0.0, + "step": 5242 + }, + { + "epoch": 0.33788747824966164, + "grad_norm": 0.004857801852726184, + "learning_rate": 7.35982814178303e-06, + "loss": 0.0, + "step": 5243 + }, + { + "epoch": 0.33795192369659083, + "grad_norm": 0.2933163297644183, + "learning_rate": 7.3591120658789835e-06, + "loss": 0.0012, + "step": 5244 + }, + { + "epoch": 0.33801636914352, + "grad_norm": 0.013560541083518562, + "learning_rate": 7.358395989974938e-06, + "loss": 0.0, + "step": 5245 + }, + { + "epoch": 0.3380808145904492, + "grad_norm": 0.0003962089272216273, + "learning_rate": 7.357679914070892e-06, + "loss": 0.0, + "step": 5246 + }, + { + "epoch": 0.3381452600373784, + "grad_norm": 0.009339175938270656, + "learning_rate": 7.356963838166846e-06, + "loss": 0.0, + "step": 5247 + }, + { + "epoch": 0.3382097054843075, + "grad_norm": 0.0007190247019661537, + "learning_rate": 7.356247762262801e-06, + "loss": 0.0, + "step": 5248 + }, + { + "epoch": 0.3382741509312367, + "grad_norm": 0.0059187646550370775, + "learning_rate": 7.355531686358754e-06, + "loss": 0.0, + "step": 5249 + }, + { + "epoch": 0.3383385963781659, + "grad_norm": 0.4797958686513202, + "learning_rate": 7.354815610454708e-06, + "loss": 0.0016, + "step": 5250 + }, + { + "epoch": 0.33840304182509506, + "grad_norm": 0.20533320593971888, + "learning_rate": 7.354099534550663e-06, + "loss": 0.0026, + "step": 5251 + }, + { + "epoch": 0.33846748727202425, + "grad_norm": 0.0036076111581535755, + "learning_rate": 7.353383458646617e-06, + "loss": 0.0, + "step": 5252 + }, + { + "epoch": 0.33853193271895343, + "grad_norm": 0.004331472841271792, + "learning_rate": 7.352667382742571e-06, + "loss": 0.0, + "step": 5253 + }, + { + "epoch": 0.33859637816588256, + "grad_norm": 0.08988593500177013, + "learning_rate": 7.351951306838525e-06, + "loss": 0.0002, + "step": 5254 + }, + { + "epoch": 0.33866082361281175, + "grad_norm": 0.004191599223747066, + "learning_rate": 7.351235230934479e-06, + "loss": 0.0, + "step": 5255 + }, + { + "epoch": 0.33872526905974093, + "grad_norm": 0.36925171890969277, + "learning_rate": 7.350519155030433e-06, + "loss": 0.0003, + "step": 5256 + }, + { + "epoch": 0.3387897145066701, + "grad_norm": 0.04986970061517144, + "learning_rate": 7.349803079126388e-06, + "loss": 0.0001, + "step": 5257 + }, + { + "epoch": 0.3388541599535993, + "grad_norm": 0.010657370180991166, + "learning_rate": 7.349087003222343e-06, + "loss": 0.0, + "step": 5258 + }, + { + "epoch": 0.3389186054005284, + "grad_norm": 0.5126343899815423, + "learning_rate": 7.348370927318297e-06, + "loss": 0.0013, + "step": 5259 + }, + { + "epoch": 0.3389830508474576, + "grad_norm": 0.05698766120418015, + "learning_rate": 7.3476548514142506e-06, + "loss": 0.0001, + "step": 5260 + }, + { + "epoch": 0.3390474962943868, + "grad_norm": 0.004494436469781267, + "learning_rate": 7.346938775510205e-06, + "loss": 0.0, + "step": 5261 + }, + { + "epoch": 0.339111941741316, + "grad_norm": 0.0020461885320504514, + "learning_rate": 7.346222699606159e-06, + "loss": 0.0, + "step": 5262 + }, + { + "epoch": 0.33917638718824517, + "grad_norm": 0.02113158221440442, + "learning_rate": 7.3455066237021135e-06, + "loss": 0.0002, + "step": 5263 + }, + { + "epoch": 0.33924083263517435, + "grad_norm": 0.11814162932010543, + "learning_rate": 7.344790547798068e-06, + "loss": 0.0005, + "step": 5264 + }, + { + "epoch": 0.3393052780821035, + "grad_norm": 0.011550950562999067, + "learning_rate": 7.344074471894021e-06, + "loss": 0.0, + "step": 5265 + }, + { + "epoch": 0.33936972352903266, + "grad_norm": 0.02042146368376844, + "learning_rate": 7.3433583959899755e-06, + "loss": 0.0002, + "step": 5266 + }, + { + "epoch": 0.33943416897596185, + "grad_norm": 0.005358844574434535, + "learning_rate": 7.34264232008593e-06, + "loss": 0.0, + "step": 5267 + }, + { + "epoch": 0.33949861442289103, + "grad_norm": 0.003798950955890751, + "learning_rate": 7.341926244181884e-06, + "loss": 0.0, + "step": 5268 + }, + { + "epoch": 0.3395630598698202, + "grad_norm": 0.008024759503129957, + "learning_rate": 7.341210168277838e-06, + "loss": 0.0, + "step": 5269 + }, + { + "epoch": 0.33962750531674935, + "grad_norm": 0.008326903955132609, + "learning_rate": 7.340494092373792e-06, + "loss": 0.0, + "step": 5270 + }, + { + "epoch": 0.33969195076367853, + "grad_norm": 0.11832471047203198, + "learning_rate": 7.339778016469746e-06, + "loss": 0.0019, + "step": 5271 + }, + { + "epoch": 0.3397563962106077, + "grad_norm": 0.0015766559886541335, + "learning_rate": 7.3390619405657004e-06, + "loss": 0.0, + "step": 5272 + }, + { + "epoch": 0.3398208416575369, + "grad_norm": 0.07010359991070295, + "learning_rate": 7.338345864661655e-06, + "loss": 0.0001, + "step": 5273 + }, + { + "epoch": 0.3398852871044661, + "grad_norm": 0.0008208800151459135, + "learning_rate": 7.337629788757609e-06, + "loss": 0.0, + "step": 5274 + }, + { + "epoch": 0.33994973255139527, + "grad_norm": 0.0007744022262015843, + "learning_rate": 7.3369137128535625e-06, + "loss": 0.0, + "step": 5275 + }, + { + "epoch": 0.3400141779983244, + "grad_norm": 0.19669594039359198, + "learning_rate": 7.336197636949517e-06, + "loss": 0.0024, + "step": 5276 + }, + { + "epoch": 0.3400786234452536, + "grad_norm": 0.001586631846665345, + "learning_rate": 7.335481561045471e-06, + "loss": 0.0, + "step": 5277 + }, + { + "epoch": 0.34014306889218276, + "grad_norm": 0.0016974822848838252, + "learning_rate": 7.334765485141425e-06, + "loss": 0.0, + "step": 5278 + }, + { + "epoch": 0.34020751433911195, + "grad_norm": 0.010299373551429326, + "learning_rate": 7.33404940923738e-06, + "loss": 0.0, + "step": 5279 + }, + { + "epoch": 0.34027195978604113, + "grad_norm": 0.021553520264792618, + "learning_rate": 7.333333333333333e-06, + "loss": 0.0, + "step": 5280 + }, + { + "epoch": 0.3403364052329703, + "grad_norm": 0.0010162259051668598, + "learning_rate": 7.332617257429288e-06, + "loss": 0.0, + "step": 5281 + }, + { + "epoch": 0.34040085067989945, + "grad_norm": 0.19239838048229532, + "learning_rate": 7.331901181525243e-06, + "loss": 0.0016, + "step": 5282 + }, + { + "epoch": 0.34046529612682863, + "grad_norm": 0.0019388955843136493, + "learning_rate": 7.331185105621197e-06, + "loss": 0.0, + "step": 5283 + }, + { + "epoch": 0.3405297415737578, + "grad_norm": 0.10166173451835614, + "learning_rate": 7.330469029717151e-06, + "loss": 0.0002, + "step": 5284 + }, + { + "epoch": 0.340594187020687, + "grad_norm": 0.2888227447924931, + "learning_rate": 7.3297529538131055e-06, + "loss": 0.0002, + "step": 5285 + }, + { + "epoch": 0.3406586324676162, + "grad_norm": 0.10953043213619218, + "learning_rate": 7.329036877909059e-06, + "loss": 0.0018, + "step": 5286 + }, + { + "epoch": 0.3407230779145453, + "grad_norm": 0.08343678035229286, + "learning_rate": 7.328320802005013e-06, + "loss": 0.0003, + "step": 5287 + }, + { + "epoch": 0.3407875233614745, + "grad_norm": 0.008743033563584184, + "learning_rate": 7.3276047261009675e-06, + "loss": 0.0, + "step": 5288 + }, + { + "epoch": 0.3408519688084037, + "grad_norm": 0.015663007613879978, + "learning_rate": 7.326888650196922e-06, + "loss": 0.0016, + "step": 5289 + }, + { + "epoch": 0.34091641425533287, + "grad_norm": 0.16035313832000075, + "learning_rate": 7.326172574292876e-06, + "loss": 0.0004, + "step": 5290 + }, + { + "epoch": 0.34098085970226205, + "grad_norm": 0.02837167493002224, + "learning_rate": 7.32545649838883e-06, + "loss": 0.0, + "step": 5291 + }, + { + "epoch": 0.34104530514919124, + "grad_norm": 0.05193844351158227, + "learning_rate": 7.324740422484784e-06, + "loss": 0.0, + "step": 5292 + }, + { + "epoch": 0.34110975059612036, + "grad_norm": 0.002242715562046738, + "learning_rate": 7.324024346580738e-06, + "loss": 0.0, + "step": 5293 + }, + { + "epoch": 0.34117419604304955, + "grad_norm": 0.11049409118395545, + "learning_rate": 7.3233082706766925e-06, + "loss": 0.0005, + "step": 5294 + }, + { + "epoch": 0.34123864148997873, + "grad_norm": 0.0005390615075129139, + "learning_rate": 7.322592194772647e-06, + "loss": 0.0, + "step": 5295 + }, + { + "epoch": 0.3413030869369079, + "grad_norm": 0.0013760686804202237, + "learning_rate": 7.3218761188686e-06, + "loss": 0.0, + "step": 5296 + }, + { + "epoch": 0.3413675323838371, + "grad_norm": 0.09472930550290676, + "learning_rate": 7.3211600429645545e-06, + "loss": 0.0011, + "step": 5297 + }, + { + "epoch": 0.34143197783076623, + "grad_norm": 0.2669654151871651, + "learning_rate": 7.320443967060509e-06, + "loss": 0.0006, + "step": 5298 + }, + { + "epoch": 0.3414964232776954, + "grad_norm": 0.005662463238322093, + "learning_rate": 7.319727891156463e-06, + "loss": 0.0, + "step": 5299 + }, + { + "epoch": 0.3415608687246246, + "grad_norm": 1.0199606272634265, + "learning_rate": 7.3190118152524166e-06, + "loss": 0.001, + "step": 5300 + }, + { + "epoch": 0.3416253141715538, + "grad_norm": 0.0006392407831279938, + "learning_rate": 7.318295739348371e-06, + "loss": 0.0, + "step": 5301 + }, + { + "epoch": 0.34168975961848297, + "grad_norm": 0.0009464675577917425, + "learning_rate": 7.317579663444325e-06, + "loss": 0.0, + "step": 5302 + }, + { + "epoch": 0.34175420506541215, + "grad_norm": 0.33982247253106124, + "learning_rate": 7.3168635875402795e-06, + "loss": 0.0008, + "step": 5303 + }, + { + "epoch": 0.3418186505123413, + "grad_norm": 0.0005554538138186463, + "learning_rate": 7.316147511636234e-06, + "loss": 0.0, + "step": 5304 + }, + { + "epoch": 0.34188309595927047, + "grad_norm": 0.011539964270771048, + "learning_rate": 7.315431435732189e-06, + "loss": 0.0, + "step": 5305 + }, + { + "epoch": 0.34194754140619965, + "grad_norm": 0.004195442787445282, + "learning_rate": 7.314715359828143e-06, + "loss": 0.0, + "step": 5306 + }, + { + "epoch": 0.34201198685312884, + "grad_norm": 0.007462997480980597, + "learning_rate": 7.313999283924097e-06, + "loss": 0.0, + "step": 5307 + }, + { + "epoch": 0.342076432300058, + "grad_norm": 0.00024238609706196129, + "learning_rate": 7.313283208020051e-06, + "loss": 0.0, + "step": 5308 + }, + { + "epoch": 0.34214087774698715, + "grad_norm": 0.0022106063684528892, + "learning_rate": 7.312567132116005e-06, + "loss": 0.0, + "step": 5309 + }, + { + "epoch": 0.34220532319391633, + "grad_norm": 0.01913270911313999, + "learning_rate": 7.3118510562119596e-06, + "loss": 0.0001, + "step": 5310 + }, + { + "epoch": 0.3422697686408455, + "grad_norm": 0.002142273071297971, + "learning_rate": 7.311134980307914e-06, + "loss": 0.0, + "step": 5311 + }, + { + "epoch": 0.3423342140877747, + "grad_norm": 0.0065981141478347735, + "learning_rate": 7.310418904403867e-06, + "loss": 0.0, + "step": 5312 + }, + { + "epoch": 0.3423986595347039, + "grad_norm": 0.004802954010512389, + "learning_rate": 7.309702828499822e-06, + "loss": 0.0, + "step": 5313 + }, + { + "epoch": 0.34246310498163307, + "grad_norm": 0.00110266347810728, + "learning_rate": 7.308986752595776e-06, + "loss": 0.0, + "step": 5314 + }, + { + "epoch": 0.3425275504285622, + "grad_norm": 0.0005134383818776354, + "learning_rate": 7.30827067669173e-06, + "loss": 0.0, + "step": 5315 + }, + { + "epoch": 0.3425919958754914, + "grad_norm": 0.538310230953227, + "learning_rate": 7.307554600787684e-06, + "loss": 0.001, + "step": 5316 + }, + { + "epoch": 0.34265644132242057, + "grad_norm": 0.00018143450225810087, + "learning_rate": 7.306838524883638e-06, + "loss": 0.0, + "step": 5317 + }, + { + "epoch": 0.34272088676934975, + "grad_norm": 0.40119536868709277, + "learning_rate": 7.306122448979592e-06, + "loss": 0.0025, + "step": 5318 + }, + { + "epoch": 0.34278533221627894, + "grad_norm": 0.024010840365756462, + "learning_rate": 7.3054063730755466e-06, + "loss": 0.0, + "step": 5319 + }, + { + "epoch": 0.3428497776632081, + "grad_norm": 0.0008069614846977794, + "learning_rate": 7.304690297171501e-06, + "loss": 0.0, + "step": 5320 + }, + { + "epoch": 0.34291422311013725, + "grad_norm": 0.028020230829516272, + "learning_rate": 7.303974221267454e-06, + "loss": 0.0, + "step": 5321 + }, + { + "epoch": 0.34297866855706644, + "grad_norm": 0.00010121037613932627, + "learning_rate": 7.303258145363409e-06, + "loss": 0.0, + "step": 5322 + }, + { + "epoch": 0.3430431140039956, + "grad_norm": 0.10079432169841467, + "learning_rate": 7.302542069459363e-06, + "loss": 0.0003, + "step": 5323 + }, + { + "epoch": 0.3431075594509248, + "grad_norm": 0.03269509886827942, + "learning_rate": 7.301825993555317e-06, + "loss": 0.0002, + "step": 5324 + }, + { + "epoch": 0.343172004897854, + "grad_norm": 0.0022781756443171625, + "learning_rate": 7.3011099176512715e-06, + "loss": 0.0, + "step": 5325 + }, + { + "epoch": 0.3432364503447831, + "grad_norm": 0.0014035525781737842, + "learning_rate": 7.300393841747225e-06, + "loss": 0.0, + "step": 5326 + }, + { + "epoch": 0.3433008957917123, + "grad_norm": 0.0006713917775058356, + "learning_rate": 7.299677765843179e-06, + "loss": 0.0, + "step": 5327 + }, + { + "epoch": 0.3433653412386415, + "grad_norm": 0.01338775709200437, + "learning_rate": 7.298961689939134e-06, + "loss": 0.0, + "step": 5328 + }, + { + "epoch": 0.34342978668557067, + "grad_norm": 0.0006338464533540836, + "learning_rate": 7.298245614035089e-06, + "loss": 0.0, + "step": 5329 + }, + { + "epoch": 0.34349423213249985, + "grad_norm": 0.0021448701466866347, + "learning_rate": 7.297529538131043e-06, + "loss": 0.0, + "step": 5330 + }, + { + "epoch": 0.34355867757942904, + "grad_norm": 0.012620152073128372, + "learning_rate": 7.296813462226997e-06, + "loss": 0.0, + "step": 5331 + }, + { + "epoch": 0.34362312302635817, + "grad_norm": 0.000746423385854066, + "learning_rate": 7.296097386322952e-06, + "loss": 0.0, + "step": 5332 + }, + { + "epoch": 0.34368756847328735, + "grad_norm": 9.873649287161303e-05, + "learning_rate": 7.295381310418905e-06, + "loss": 0.0, + "step": 5333 + }, + { + "epoch": 0.34375201392021654, + "grad_norm": 0.00031365937247385795, + "learning_rate": 7.294665234514859e-06, + "loss": 0.0, + "step": 5334 + }, + { + "epoch": 0.3438164593671457, + "grad_norm": 0.00528864045369544, + "learning_rate": 7.293949158610814e-06, + "loss": 0.0, + "step": 5335 + }, + { + "epoch": 0.3438809048140749, + "grad_norm": 0.0005738690377691539, + "learning_rate": 7.293233082706768e-06, + "loss": 0.0, + "step": 5336 + }, + { + "epoch": 0.34394535026100403, + "grad_norm": 0.0009102301472844842, + "learning_rate": 7.292517006802721e-06, + "loss": 0.0, + "step": 5337 + }, + { + "epoch": 0.3440097957079332, + "grad_norm": 0.00034346797846901265, + "learning_rate": 7.291800930898676e-06, + "loss": 0.0, + "step": 5338 + }, + { + "epoch": 0.3440742411548624, + "grad_norm": 0.049793358991671866, + "learning_rate": 7.29108485499463e-06, + "loss": 0.0004, + "step": 5339 + }, + { + "epoch": 0.3441386866017916, + "grad_norm": 0.014751395360373839, + "learning_rate": 7.290368779090584e-06, + "loss": 0.0, + "step": 5340 + }, + { + "epoch": 0.3442031320487208, + "grad_norm": 0.005169372080883912, + "learning_rate": 7.289652703186539e-06, + "loss": 0.0, + "step": 5341 + }, + { + "epoch": 0.34426757749564996, + "grad_norm": 2.6960228307514163e-05, + "learning_rate": 7.288936627282492e-06, + "loss": 0.0, + "step": 5342 + }, + { + "epoch": 0.3443320229425791, + "grad_norm": 0.050153460105654975, + "learning_rate": 7.288220551378446e-06, + "loss": 0.0001, + "step": 5343 + }, + { + "epoch": 0.34439646838950827, + "grad_norm": 0.00024310397212061517, + "learning_rate": 7.287504475474401e-06, + "loss": 0.0, + "step": 5344 + }, + { + "epoch": 0.34446091383643745, + "grad_norm": 0.05808561250266076, + "learning_rate": 7.286788399570355e-06, + "loss": 0.0006, + "step": 5345 + }, + { + "epoch": 0.34452535928336664, + "grad_norm": 0.00013547144495678156, + "learning_rate": 7.286072323666309e-06, + "loss": 0.0, + "step": 5346 + }, + { + "epoch": 0.3445898047302958, + "grad_norm": 0.3373302818885634, + "learning_rate": 7.285356247762263e-06, + "loss": 0.0025, + "step": 5347 + }, + { + "epoch": 0.34465425017722495, + "grad_norm": 0.016717329194680847, + "learning_rate": 7.284640171858217e-06, + "loss": 0.0001, + "step": 5348 + }, + { + "epoch": 0.34471869562415414, + "grad_norm": 0.00030821020629320307, + "learning_rate": 7.283924095954171e-06, + "loss": 0.0, + "step": 5349 + }, + { + "epoch": 0.3447831410710833, + "grad_norm": 0.0033671255527705796, + "learning_rate": 7.283208020050126e-06, + "loss": 0.0, + "step": 5350 + }, + { + "epoch": 0.3448475865180125, + "grad_norm": 0.005871330298462043, + "learning_rate": 7.282491944146081e-06, + "loss": 0.0, + "step": 5351 + }, + { + "epoch": 0.3449120319649417, + "grad_norm": 0.0003919803199134911, + "learning_rate": 7.281775868242035e-06, + "loss": 0.0, + "step": 5352 + }, + { + "epoch": 0.3449764774118709, + "grad_norm": 0.0004434323271049176, + "learning_rate": 7.2810597923379885e-06, + "loss": 0.0, + "step": 5353 + }, + { + "epoch": 0.3450409228588, + "grad_norm": 0.17988686282623115, + "learning_rate": 7.280343716433943e-06, + "loss": 0.0011, + "step": 5354 + }, + { + "epoch": 0.3451053683057292, + "grad_norm": 5.375923874340952e-05, + "learning_rate": 7.279627640529897e-06, + "loss": 0.0, + "step": 5355 + }, + { + "epoch": 0.34516981375265837, + "grad_norm": 0.0009465090573032732, + "learning_rate": 7.278911564625851e-06, + "loss": 0.0, + "step": 5356 + }, + { + "epoch": 0.34523425919958756, + "grad_norm": 0.0020489958458895278, + "learning_rate": 7.278195488721806e-06, + "loss": 0.0, + "step": 5357 + }, + { + "epoch": 0.34529870464651674, + "grad_norm": 0.00010094518209825967, + "learning_rate": 7.277479412817759e-06, + "loss": 0.0, + "step": 5358 + }, + { + "epoch": 0.3453631500934459, + "grad_norm": 0.0006345648995169979, + "learning_rate": 7.276763336913713e-06, + "loss": 0.0, + "step": 5359 + }, + { + "epoch": 0.34542759554037505, + "grad_norm": 0.41101601724437337, + "learning_rate": 7.276047261009668e-06, + "loss": 0.0011, + "step": 5360 + }, + { + "epoch": 0.34549204098730424, + "grad_norm": 0.01636080831375019, + "learning_rate": 7.275331185105622e-06, + "loss": 0.0001, + "step": 5361 + }, + { + "epoch": 0.3455564864342334, + "grad_norm": 0.030371541605649418, + "learning_rate": 7.274615109201576e-06, + "loss": 0.0001, + "step": 5362 + }, + { + "epoch": 0.3456209318811626, + "grad_norm": 0.0009595921015281959, + "learning_rate": 7.27389903329753e-06, + "loss": 0.0, + "step": 5363 + }, + { + "epoch": 0.3456853773280918, + "grad_norm": 0.0028069042707871083, + "learning_rate": 7.273182957393484e-06, + "loss": 0.0, + "step": 5364 + }, + { + "epoch": 0.3457498227750209, + "grad_norm": 0.006725710750406669, + "learning_rate": 7.272466881489438e-06, + "loss": 0.0, + "step": 5365 + }, + { + "epoch": 0.3458142682219501, + "grad_norm": 0.02202792303496806, + "learning_rate": 7.271750805585393e-06, + "loss": 0.0002, + "step": 5366 + }, + { + "epoch": 0.3458787136688793, + "grad_norm": 0.0033364088445729323, + "learning_rate": 7.271034729681347e-06, + "loss": 0.0, + "step": 5367 + }, + { + "epoch": 0.3459431591158085, + "grad_norm": 0.4054470012322448, + "learning_rate": 7.2703186537773e-06, + "loss": 0.0021, + "step": 5368 + }, + { + "epoch": 0.34600760456273766, + "grad_norm": 0.006543067397167515, + "learning_rate": 7.269602577873255e-06, + "loss": 0.0, + "step": 5369 + }, + { + "epoch": 0.34607205000966684, + "grad_norm": 4.882931587784024e-05, + "learning_rate": 7.268886501969209e-06, + "loss": 0.0, + "step": 5370 + }, + { + "epoch": 0.34613649545659597, + "grad_norm": 0.2890103138391657, + "learning_rate": 7.268170426065163e-06, + "loss": 0.0013, + "step": 5371 + }, + { + "epoch": 0.34620094090352516, + "grad_norm": 0.02928876168441755, + "learning_rate": 7.267454350161118e-06, + "loss": 0.0002, + "step": 5372 + }, + { + "epoch": 0.34626538635045434, + "grad_norm": 0.03327625299827368, + "learning_rate": 7.266738274257071e-06, + "loss": 0.0, + "step": 5373 + }, + { + "epoch": 0.3463298317973835, + "grad_norm": 0.004407488781814231, + "learning_rate": 7.266022198353025e-06, + "loss": 0.0, + "step": 5374 + }, + { + "epoch": 0.3463942772443127, + "grad_norm": 0.00046352221804896843, + "learning_rate": 7.2653061224489805e-06, + "loss": 0.0, + "step": 5375 + }, + { + "epoch": 0.34645872269124184, + "grad_norm": 1.0442598045825846, + "learning_rate": 7.264590046544935e-06, + "loss": 0.0033, + "step": 5376 + }, + { + "epoch": 0.346523168138171, + "grad_norm": 0.0087028528833674, + "learning_rate": 7.263873970640889e-06, + "loss": 0.0001, + "step": 5377 + }, + { + "epoch": 0.3465876135851002, + "grad_norm": 0.04466082821105845, + "learning_rate": 7.263157894736843e-06, + "loss": 0.0, + "step": 5378 + }, + { + "epoch": 0.3466520590320294, + "grad_norm": 0.00022414270993919194, + "learning_rate": 7.262441818832797e-06, + "loss": 0.0, + "step": 5379 + }, + { + "epoch": 0.3467165044789586, + "grad_norm": 0.13606521537237062, + "learning_rate": 7.261725742928751e-06, + "loss": 0.0017, + "step": 5380 + }, + { + "epoch": 0.34678094992588776, + "grad_norm": 0.0032358758394306617, + "learning_rate": 7.2610096670247055e-06, + "loss": 0.0, + "step": 5381 + }, + { + "epoch": 0.3468453953728169, + "grad_norm": 0.10357858884203731, + "learning_rate": 7.26029359112066e-06, + "loss": 0.0012, + "step": 5382 + }, + { + "epoch": 0.3469098408197461, + "grad_norm": 9.994017630786523e-05, + "learning_rate": 7.259577515216614e-06, + "loss": 0.0, + "step": 5383 + }, + { + "epoch": 0.34697428626667526, + "grad_norm": 9.690410155464549e-05, + "learning_rate": 7.2588614393125675e-06, + "loss": 0.0, + "step": 5384 + }, + { + "epoch": 0.34703873171360444, + "grad_norm": 0.005215186637607239, + "learning_rate": 7.258145363408522e-06, + "loss": 0.0, + "step": 5385 + }, + { + "epoch": 0.3471031771605336, + "grad_norm": 0.0011850453431943883, + "learning_rate": 7.257429287504476e-06, + "loss": 0.0, + "step": 5386 + }, + { + "epoch": 0.34716762260746276, + "grad_norm": 0.0797241166546595, + "learning_rate": 7.25671321160043e-06, + "loss": 0.0005, + "step": 5387 + }, + { + "epoch": 0.34723206805439194, + "grad_norm": 0.14444908177693655, + "learning_rate": 7.255997135696385e-06, + "loss": 0.0022, + "step": 5388 + }, + { + "epoch": 0.3472965135013211, + "grad_norm": 0.0015494501170997498, + "learning_rate": 7.255281059792338e-06, + "loss": 0.0, + "step": 5389 + }, + { + "epoch": 0.3473609589482503, + "grad_norm": 0.0723502269609664, + "learning_rate": 7.2545649838882924e-06, + "loss": 0.0002, + "step": 5390 + }, + { + "epoch": 0.3474254043951795, + "grad_norm": 0.13437147135869884, + "learning_rate": 7.253848907984247e-06, + "loss": 0.002, + "step": 5391 + }, + { + "epoch": 0.3474898498421087, + "grad_norm": 0.07793027930166455, + "learning_rate": 7.253132832080201e-06, + "loss": 0.0005, + "step": 5392 + }, + { + "epoch": 0.3475542952890378, + "grad_norm": 0.0005459188861817741, + "learning_rate": 7.252416756176155e-06, + "loss": 0.0, + "step": 5393 + }, + { + "epoch": 0.347618740735967, + "grad_norm": 0.0005229998610681955, + "learning_rate": 7.251700680272109e-06, + "loss": 0.0, + "step": 5394 + }, + { + "epoch": 0.3476831861828962, + "grad_norm": 0.014832489254280722, + "learning_rate": 7.250984604368063e-06, + "loss": 0.0, + "step": 5395 + }, + { + "epoch": 0.34774763162982536, + "grad_norm": 0.001830075605712059, + "learning_rate": 7.250268528464017e-06, + "loss": 0.0, + "step": 5396 + }, + { + "epoch": 0.34781207707675454, + "grad_norm": 0.00035845492018812364, + "learning_rate": 7.249552452559972e-06, + "loss": 0.0015, + "step": 5397 + }, + { + "epoch": 0.34787652252368373, + "grad_norm": 0.22111401246031417, + "learning_rate": 7.248836376655927e-06, + "loss": 0.0018, + "step": 5398 + }, + { + "epoch": 0.34794096797061286, + "grad_norm": 0.34508336003421725, + "learning_rate": 7.248120300751881e-06, + "loss": 0.0013, + "step": 5399 + }, + { + "epoch": 0.34800541341754204, + "grad_norm": 0.11998791833124678, + "learning_rate": 7.247404224847835e-06, + "loss": 0.0003, + "step": 5400 + }, + { + "epoch": 0.3480698588644712, + "grad_norm": 0.0010438379812748842, + "learning_rate": 7.246688148943789e-06, + "loss": 0.0, + "step": 5401 + }, + { + "epoch": 0.3481343043114004, + "grad_norm": 0.0009734335987623431, + "learning_rate": 7.245972073039743e-06, + "loss": 0.0, + "step": 5402 + }, + { + "epoch": 0.3481987497583296, + "grad_norm": 0.0018563996895338923, + "learning_rate": 7.2452559971356975e-06, + "loss": 0.0, + "step": 5403 + }, + { + "epoch": 0.3482631952052587, + "grad_norm": 0.20984205384467244, + "learning_rate": 7.244539921231652e-06, + "loss": 0.0004, + "step": 5404 + }, + { + "epoch": 0.3483276406521879, + "grad_norm": 0.015448803826021713, + "learning_rate": 7.243823845327605e-06, + "loss": 0.0002, + "step": 5405 + }, + { + "epoch": 0.3483920860991171, + "grad_norm": 0.03937960007927294, + "learning_rate": 7.2431077694235595e-06, + "loss": 0.0, + "step": 5406 + }, + { + "epoch": 0.3484565315460463, + "grad_norm": 0.022501578310819947, + "learning_rate": 7.242391693519514e-06, + "loss": 0.0, + "step": 5407 + }, + { + "epoch": 0.34852097699297546, + "grad_norm": 0.03170367334764925, + "learning_rate": 7.241675617615468e-06, + "loss": 0.0016, + "step": 5408 + }, + { + "epoch": 0.34858542243990465, + "grad_norm": 0.0038783959984967516, + "learning_rate": 7.2409595417114224e-06, + "loss": 0.0, + "step": 5409 + }, + { + "epoch": 0.3486498678868338, + "grad_norm": 0.24674016584155095, + "learning_rate": 7.240243465807376e-06, + "loss": 0.0024, + "step": 5410 + }, + { + "epoch": 0.34871431333376296, + "grad_norm": 0.000528044999434226, + "learning_rate": 7.23952738990333e-06, + "loss": 0.0, + "step": 5411 + }, + { + "epoch": 0.34877875878069214, + "grad_norm": 0.04335431754627749, + "learning_rate": 7.2388113139992845e-06, + "loss": 0.0001, + "step": 5412 + }, + { + "epoch": 0.34884320422762133, + "grad_norm": 9.593325914361795e-05, + "learning_rate": 7.238095238095239e-06, + "loss": 0.0, + "step": 5413 + }, + { + "epoch": 0.3489076496745505, + "grad_norm": 0.0014304134812169755, + "learning_rate": 7.237379162191192e-06, + "loss": 0.0, + "step": 5414 + }, + { + "epoch": 0.34897209512147964, + "grad_norm": 0.004716281375558154, + "learning_rate": 7.2366630862871465e-06, + "loss": 0.0, + "step": 5415 + }, + { + "epoch": 0.3490365405684088, + "grad_norm": 0.0077143613239694445, + "learning_rate": 7.235947010383101e-06, + "loss": 0.0, + "step": 5416 + }, + { + "epoch": 0.349100986015338, + "grad_norm": 0.019198324219276017, + "learning_rate": 7.235230934479055e-06, + "loss": 0.0002, + "step": 5417 + }, + { + "epoch": 0.3491654314622672, + "grad_norm": 0.001355964329896379, + "learning_rate": 7.234514858575009e-06, + "loss": 0.0, + "step": 5418 + }, + { + "epoch": 0.3492298769091964, + "grad_norm": 0.022225915169465784, + "learning_rate": 7.233798782670963e-06, + "loss": 0.0, + "step": 5419 + }, + { + "epoch": 0.34929432235612556, + "grad_norm": 0.017610877868679407, + "learning_rate": 7.233082706766917e-06, + "loss": 0.0, + "step": 5420 + }, + { + "epoch": 0.3493587678030547, + "grad_norm": 0.007438692190637162, + "learning_rate": 7.232366630862872e-06, + "loss": 0.0, + "step": 5421 + }, + { + "epoch": 0.3494232132499839, + "grad_norm": 0.007550065798805919, + "learning_rate": 7.231650554958827e-06, + "loss": 0.0001, + "step": 5422 + }, + { + "epoch": 0.34948765869691306, + "grad_norm": 1.1858373848494488, + "learning_rate": 7.230934479054781e-06, + "loss": 0.0052, + "step": 5423 + }, + { + "epoch": 0.34955210414384225, + "grad_norm": 0.13834342205455954, + "learning_rate": 7.230218403150735e-06, + "loss": 0.0013, + "step": 5424 + }, + { + "epoch": 0.34961654959077143, + "grad_norm": 0.0019500085784708264, + "learning_rate": 7.2295023272466895e-06, + "loss": 0.0, + "step": 5425 + }, + { + "epoch": 0.34968099503770056, + "grad_norm": 0.274820237577197, + "learning_rate": 7.228786251342643e-06, + "loss": 0.0014, + "step": 5426 + }, + { + "epoch": 0.34974544048462974, + "grad_norm": 0.16902668597399362, + "learning_rate": 7.228070175438597e-06, + "loss": 0.0004, + "step": 5427 + }, + { + "epoch": 0.34980988593155893, + "grad_norm": 0.0031173492434176304, + "learning_rate": 7.2273540995345516e-06, + "loss": 0.0, + "step": 5428 + }, + { + "epoch": 0.3498743313784881, + "grad_norm": 0.0015306366566559873, + "learning_rate": 7.226638023630506e-06, + "loss": 0.0, + "step": 5429 + }, + { + "epoch": 0.3499387768254173, + "grad_norm": 0.0010650450143334291, + "learning_rate": 7.225921947726459e-06, + "loss": 0.0, + "step": 5430 + }, + { + "epoch": 0.3500032222723465, + "grad_norm": 0.00024179158076437042, + "learning_rate": 7.225205871822414e-06, + "loss": 0.0, + "step": 5431 + }, + { + "epoch": 0.3500676677192756, + "grad_norm": 0.007258163297915242, + "learning_rate": 7.224489795918368e-06, + "loss": 0.0, + "step": 5432 + }, + { + "epoch": 0.3501321131662048, + "grad_norm": 2.513392435115923, + "learning_rate": 7.223773720014322e-06, + "loss": 0.0187, + "step": 5433 + }, + { + "epoch": 0.350196558613134, + "grad_norm": 0.0008973330386759609, + "learning_rate": 7.2230576441102765e-06, + "loss": 0.0, + "step": 5434 + }, + { + "epoch": 0.35026100406006316, + "grad_norm": 0.011253913607002416, + "learning_rate": 7.22234156820623e-06, + "loss": 0.0, + "step": 5435 + }, + { + "epoch": 0.35032544950699235, + "grad_norm": 0.6856501140185183, + "learning_rate": 7.221625492302184e-06, + "loss": 0.0033, + "step": 5436 + }, + { + "epoch": 0.35038989495392153, + "grad_norm": 0.002744451414998837, + "learning_rate": 7.2209094163981386e-06, + "loss": 0.0, + "step": 5437 + }, + { + "epoch": 0.35045434040085066, + "grad_norm": 0.1043653877480623, + "learning_rate": 7.220193340494093e-06, + "loss": 0.0001, + "step": 5438 + }, + { + "epoch": 0.35051878584777985, + "grad_norm": 0.04470527560901967, + "learning_rate": 7.219477264590047e-06, + "loss": 0.0001, + "step": 5439 + }, + { + "epoch": 0.35058323129470903, + "grad_norm": 0.0028691901618445375, + "learning_rate": 7.218761188686001e-06, + "loss": 0.0, + "step": 5440 + }, + { + "epoch": 0.3506476767416382, + "grad_norm": 0.014048890434080821, + "learning_rate": 7.218045112781955e-06, + "loss": 0.0001, + "step": 5441 + }, + { + "epoch": 0.3507121221885674, + "grad_norm": 0.0001558104690521515, + "learning_rate": 7.217329036877909e-06, + "loss": 0.0, + "step": 5442 + }, + { + "epoch": 0.35077656763549653, + "grad_norm": 0.0006746233065462414, + "learning_rate": 7.2166129609738635e-06, + "loss": 0.0, + "step": 5443 + }, + { + "epoch": 0.3508410130824257, + "grad_norm": 0.0024750607397397654, + "learning_rate": 7.215896885069818e-06, + "loss": 0.0, + "step": 5444 + }, + { + "epoch": 0.3509054585293549, + "grad_norm": 0.006352597500439202, + "learning_rate": 7.215180809165773e-06, + "loss": 0.0, + "step": 5445 + }, + { + "epoch": 0.3509699039762841, + "grad_norm": 0.1022585984155468, + "learning_rate": 7.214464733261726e-06, + "loss": 0.0001, + "step": 5446 + }, + { + "epoch": 0.35103434942321327, + "grad_norm": 0.003986765644523545, + "learning_rate": 7.213748657357681e-06, + "loss": 0.0, + "step": 5447 + }, + { + "epoch": 0.35109879487014245, + "grad_norm": 0.35015860097590396, + "learning_rate": 7.213032581453635e-06, + "loss": 0.0015, + "step": 5448 + }, + { + "epoch": 0.3511632403170716, + "grad_norm": 0.004855895917974423, + "learning_rate": 7.212316505549589e-06, + "loss": 0.0, + "step": 5449 + }, + { + "epoch": 0.35122768576400076, + "grad_norm": 0.00476196278000269, + "learning_rate": 7.211600429645544e-06, + "loss": 0.0, + "step": 5450 + }, + { + "epoch": 0.35129213121092995, + "grad_norm": 4.2020598196443586e-05, + "learning_rate": 7.210884353741497e-06, + "loss": 0.0, + "step": 5451 + }, + { + "epoch": 0.35135657665785913, + "grad_norm": 0.000898003348724344, + "learning_rate": 7.210168277837451e-06, + "loss": 0.0, + "step": 5452 + }, + { + "epoch": 0.3514210221047883, + "grad_norm": 0.00018579366299213354, + "learning_rate": 7.209452201933406e-06, + "loss": 0.0, + "step": 5453 + }, + { + "epoch": 0.35148546755171745, + "grad_norm": 0.158084677871953, + "learning_rate": 7.20873612602936e-06, + "loss": 0.001, + "step": 5454 + }, + { + "epoch": 0.35154991299864663, + "grad_norm": 0.028924409174075277, + "learning_rate": 7.208020050125314e-06, + "loss": 0.0001, + "step": 5455 + }, + { + "epoch": 0.3516143584455758, + "grad_norm": 0.10685927277426384, + "learning_rate": 7.207303974221268e-06, + "loss": 0.0016, + "step": 5456 + }, + { + "epoch": 0.351678803892505, + "grad_norm": 0.003219382873925816, + "learning_rate": 7.206587898317222e-06, + "loss": 0.0, + "step": 5457 + }, + { + "epoch": 0.3517432493394342, + "grad_norm": 0.023248814491553707, + "learning_rate": 7.205871822413176e-06, + "loss": 0.0002, + "step": 5458 + }, + { + "epoch": 0.35180769478636337, + "grad_norm": 0.008260090754341405, + "learning_rate": 7.205155746509131e-06, + "loss": 0.0, + "step": 5459 + }, + { + "epoch": 0.3518721402332925, + "grad_norm": 0.02113740518555449, + "learning_rate": 7.204439670605085e-06, + "loss": 0.0002, + "step": 5460 + }, + { + "epoch": 0.3519365856802217, + "grad_norm": 0.00722376485948921, + "learning_rate": 7.203723594701038e-06, + "loss": 0.0001, + "step": 5461 + }, + { + "epoch": 0.35200103112715087, + "grad_norm": 0.0833738754293488, + "learning_rate": 7.203007518796993e-06, + "loss": 0.0001, + "step": 5462 + }, + { + "epoch": 0.35206547657408005, + "grad_norm": 0.00026359184926858333, + "learning_rate": 7.202291442892947e-06, + "loss": 0.0, + "step": 5463 + }, + { + "epoch": 0.35212992202100923, + "grad_norm": 0.14296674674876744, + "learning_rate": 7.201575366988901e-06, + "loss": 0.0005, + "step": 5464 + }, + { + "epoch": 0.35219436746793836, + "grad_norm": 0.0016080032851724442, + "learning_rate": 7.2008592910848555e-06, + "loss": 0.0, + "step": 5465 + }, + { + "epoch": 0.35225881291486755, + "grad_norm": 0.0009047653986284783, + "learning_rate": 7.200143215180809e-06, + "loss": 0.0, + "step": 5466 + }, + { + "epoch": 0.35232325836179673, + "grad_norm": 0.0308639411701821, + "learning_rate": 7.199427139276763e-06, + "loss": 0.0001, + "step": 5467 + }, + { + "epoch": 0.3523877038087259, + "grad_norm": 0.4904011446517464, + "learning_rate": 7.1987110633727184e-06, + "loss": 0.0024, + "step": 5468 + }, + { + "epoch": 0.3524521492556551, + "grad_norm": 0.04524271869956901, + "learning_rate": 7.197994987468673e-06, + "loss": 0.0001, + "step": 5469 + }, + { + "epoch": 0.3525165947025843, + "grad_norm": 0.04035843557537505, + "learning_rate": 7.197278911564627e-06, + "loss": 0.0002, + "step": 5470 + }, + { + "epoch": 0.3525810401495134, + "grad_norm": 0.006825177577924673, + "learning_rate": 7.196562835660581e-06, + "loss": 0.0, + "step": 5471 + }, + { + "epoch": 0.3526454855964426, + "grad_norm": 0.13986985453114945, + "learning_rate": 7.195846759756535e-06, + "loss": 0.0006, + "step": 5472 + }, + { + "epoch": 0.3527099310433718, + "grad_norm": 0.0017970094026375993, + "learning_rate": 7.195130683852489e-06, + "loss": 0.0, + "step": 5473 + }, + { + "epoch": 0.35277437649030097, + "grad_norm": 0.006094273782439035, + "learning_rate": 7.194414607948443e-06, + "loss": 0.0, + "step": 5474 + }, + { + "epoch": 0.35283882193723015, + "grad_norm": 0.008172755866807201, + "learning_rate": 7.193698532044398e-06, + "loss": 0.0001, + "step": 5475 + }, + { + "epoch": 0.35290326738415934, + "grad_norm": 5.158375250050825, + "learning_rate": 7.192982456140352e-06, + "loss": 0.0934, + "step": 5476 + }, + { + "epoch": 0.35296771283108846, + "grad_norm": 0.22788083518064708, + "learning_rate": 7.192266380236305e-06, + "loss": 0.0008, + "step": 5477 + }, + { + "epoch": 0.35303215827801765, + "grad_norm": 1.3529695452401322, + "learning_rate": 7.19155030433226e-06, + "loss": 0.0108, + "step": 5478 + }, + { + "epoch": 0.35309660372494683, + "grad_norm": 0.001636092079940293, + "learning_rate": 7.190834228428214e-06, + "loss": 0.0, + "step": 5479 + }, + { + "epoch": 0.353161049171876, + "grad_norm": 1.787735879849132e-05, + "learning_rate": 7.190118152524168e-06, + "loss": 0.0, + "step": 5480 + }, + { + "epoch": 0.3532254946188052, + "grad_norm": 0.004427899683739982, + "learning_rate": 7.189402076620123e-06, + "loss": 0.0, + "step": 5481 + }, + { + "epoch": 0.35328994006573433, + "grad_norm": 0.0019082844941634223, + "learning_rate": 7.188686000716076e-06, + "loss": 0.0, + "step": 5482 + }, + { + "epoch": 0.3533543855126635, + "grad_norm": 0.0046977578647996515, + "learning_rate": 7.18796992481203e-06, + "loss": 0.0, + "step": 5483 + }, + { + "epoch": 0.3534188309595927, + "grad_norm": 0.040097359922366724, + "learning_rate": 7.187253848907985e-06, + "loss": 0.0002, + "step": 5484 + }, + { + "epoch": 0.3534832764065219, + "grad_norm": 0.001663645825594052, + "learning_rate": 7.186537773003939e-06, + "loss": 0.0, + "step": 5485 + }, + { + "epoch": 0.35354772185345107, + "grad_norm": 0.07505223049971804, + "learning_rate": 7.185821697099893e-06, + "loss": 0.0007, + "step": 5486 + }, + { + "epoch": 0.35361216730038025, + "grad_norm": 3.3424625948722473, + "learning_rate": 7.185105621195847e-06, + "loss": 0.0171, + "step": 5487 + }, + { + "epoch": 0.3536766127473094, + "grad_norm": 0.002825290381998295, + "learning_rate": 7.184389545291801e-06, + "loss": 0.0, + "step": 5488 + }, + { + "epoch": 0.35374105819423857, + "grad_norm": 0.009157336320531529, + "learning_rate": 7.183673469387755e-06, + "loss": 0.0, + "step": 5489 + }, + { + "epoch": 0.35380550364116775, + "grad_norm": 0.002773670880667257, + "learning_rate": 7.18295739348371e-06, + "loss": 0.0, + "step": 5490 + }, + { + "epoch": 0.35386994908809694, + "grad_norm": 0.08855313046890825, + "learning_rate": 7.182241317579665e-06, + "loss": 0.0003, + "step": 5491 + }, + { + "epoch": 0.3539343945350261, + "grad_norm": 0.004166906161995427, + "learning_rate": 7.181525241675619e-06, + "loss": 0.0, + "step": 5492 + }, + { + "epoch": 0.35399883998195525, + "grad_norm": 0.005168623229808937, + "learning_rate": 7.1808091657715725e-06, + "loss": 0.0, + "step": 5493 + }, + { + "epoch": 0.35406328542888443, + "grad_norm": 0.003997300840861494, + "learning_rate": 7.180093089867527e-06, + "loss": 0.0, + "step": 5494 + }, + { + "epoch": 0.3541277308758136, + "grad_norm": 0.009798578823934631, + "learning_rate": 7.179377013963481e-06, + "loss": 0.0, + "step": 5495 + }, + { + "epoch": 0.3541921763227428, + "grad_norm": 0.003605997833364615, + "learning_rate": 7.178660938059435e-06, + "loss": 0.0, + "step": 5496 + }, + { + "epoch": 0.354256621769672, + "grad_norm": 0.39026054546592165, + "learning_rate": 7.17794486215539e-06, + "loss": 0.0028, + "step": 5497 + }, + { + "epoch": 0.35432106721660117, + "grad_norm": 0.22522868316952996, + "learning_rate": 7.177228786251343e-06, + "loss": 0.0006, + "step": 5498 + }, + { + "epoch": 0.3543855126635303, + "grad_norm": 0.0007982156110889913, + "learning_rate": 7.1765127103472975e-06, + "loss": 0.0, + "step": 5499 + }, + { + "epoch": 0.3544499581104595, + "grad_norm": 0.26714035756006893, + "learning_rate": 7.175796634443252e-06, + "loss": 0.002, + "step": 5500 + }, + { + "epoch": 0.35451440355738867, + "grad_norm": 0.0017835782053076952, + "learning_rate": 7.175080558539206e-06, + "loss": 0.0, + "step": 5501 + }, + { + "epoch": 0.35457884900431785, + "grad_norm": 0.001526153733418559, + "learning_rate": 7.17436448263516e-06, + "loss": 0.0, + "step": 5502 + }, + { + "epoch": 0.35464329445124704, + "grad_norm": 0.006026490988079468, + "learning_rate": 7.173648406731114e-06, + "loss": 0.0, + "step": 5503 + }, + { + "epoch": 0.35470773989817617, + "grad_norm": 0.017582708992891122, + "learning_rate": 7.172932330827068e-06, + "loss": 0.0015, + "step": 5504 + }, + { + "epoch": 0.35477218534510535, + "grad_norm": 0.30581867098200244, + "learning_rate": 7.172216254923022e-06, + "loss": 0.0019, + "step": 5505 + }, + { + "epoch": 0.35483663079203454, + "grad_norm": 0.0036598724000132385, + "learning_rate": 7.171500179018977e-06, + "loss": 0.0, + "step": 5506 + }, + { + "epoch": 0.3549010762389637, + "grad_norm": 0.0026242730294129815, + "learning_rate": 7.17078410311493e-06, + "loss": 0.0, + "step": 5507 + }, + { + "epoch": 0.3549655216858929, + "grad_norm": 0.0011075331112727902, + "learning_rate": 7.1700680272108844e-06, + "loss": 0.0, + "step": 5508 + }, + { + "epoch": 0.3550299671328221, + "grad_norm": 0.19952681974901088, + "learning_rate": 7.169351951306839e-06, + "loss": 0.0004, + "step": 5509 + }, + { + "epoch": 0.3550944125797512, + "grad_norm": 0.000878946801695103, + "learning_rate": 7.168635875402793e-06, + "loss": 0.0, + "step": 5510 + }, + { + "epoch": 0.3551588580266804, + "grad_norm": 0.24027537949617142, + "learning_rate": 7.167919799498747e-06, + "loss": 0.0018, + "step": 5511 + }, + { + "epoch": 0.3552233034736096, + "grad_norm": 0.015573056221064105, + "learning_rate": 7.167203723594701e-06, + "loss": 0.0, + "step": 5512 + }, + { + "epoch": 0.35528774892053877, + "grad_norm": 0.012980932203767375, + "learning_rate": 7.166487647690655e-06, + "loss": 0.0, + "step": 5513 + }, + { + "epoch": 0.35535219436746796, + "grad_norm": 0.02937732815710757, + "learning_rate": 7.165771571786609e-06, + "loss": 0.0001, + "step": 5514 + }, + { + "epoch": 0.35541663981439714, + "grad_norm": 0.003133471086012606, + "learning_rate": 7.1650554958825645e-06, + "loss": 0.0, + "step": 5515 + }, + { + "epoch": 0.35548108526132627, + "grad_norm": 0.0001353438010940419, + "learning_rate": 7.164339419978519e-06, + "loss": 0.0, + "step": 5516 + }, + { + "epoch": 0.35554553070825545, + "grad_norm": 0.0015784288790910163, + "learning_rate": 7.163623344074473e-06, + "loss": 0.0, + "step": 5517 + }, + { + "epoch": 0.35560997615518464, + "grad_norm": 0.25518015621623313, + "learning_rate": 7.1629072681704274e-06, + "loss": 0.0006, + "step": 5518 + }, + { + "epoch": 0.3556744216021138, + "grad_norm": 0.05870437907137682, + "learning_rate": 7.162191192266381e-06, + "loss": 0.0014, + "step": 5519 + }, + { + "epoch": 0.355738867049043, + "grad_norm": 0.008105418444959963, + "learning_rate": 7.161475116362335e-06, + "loss": 0.0, + "step": 5520 + }, + { + "epoch": 0.35580331249597213, + "grad_norm": 0.07082251511150572, + "learning_rate": 7.1607590404582895e-06, + "loss": 0.002, + "step": 5521 + }, + { + "epoch": 0.3558677579429013, + "grad_norm": 0.025880416565134647, + "learning_rate": 7.160042964554244e-06, + "loss": 0.0, + "step": 5522 + }, + { + "epoch": 0.3559322033898305, + "grad_norm": 0.0074415682950392205, + "learning_rate": 7.159326888650197e-06, + "loss": 0.0, + "step": 5523 + }, + { + "epoch": 0.3559966488367597, + "grad_norm": 0.010568843378593547, + "learning_rate": 7.1586108127461515e-06, + "loss": 0.0, + "step": 5524 + }, + { + "epoch": 0.3560610942836889, + "grad_norm": 0.02883981875404712, + "learning_rate": 7.157894736842106e-06, + "loss": 0.0001, + "step": 5525 + }, + { + "epoch": 0.35612553973061806, + "grad_norm": 0.1443228572447026, + "learning_rate": 7.15717866093806e-06, + "loss": 0.0004, + "step": 5526 + }, + { + "epoch": 0.3561899851775472, + "grad_norm": 0.1543692648358986, + "learning_rate": 7.1564625850340144e-06, + "loss": 0.0007, + "step": 5527 + }, + { + "epoch": 0.35625443062447637, + "grad_norm": 0.006400996936471855, + "learning_rate": 7.155746509129968e-06, + "loss": 0.0, + "step": 5528 + }, + { + "epoch": 0.35631887607140555, + "grad_norm": 0.023857215512726463, + "learning_rate": 7.155030433225922e-06, + "loss": 0.0, + "step": 5529 + }, + { + "epoch": 0.35638332151833474, + "grad_norm": 0.0010024296612209454, + "learning_rate": 7.1543143573218765e-06, + "loss": 0.0, + "step": 5530 + }, + { + "epoch": 0.3564477669652639, + "grad_norm": 0.46602568507627373, + "learning_rate": 7.153598281417831e-06, + "loss": 0.0007, + "step": 5531 + }, + { + "epoch": 0.35651221241219305, + "grad_norm": 0.007157312719751308, + "learning_rate": 7.152882205513785e-06, + "loss": 0.0, + "step": 5532 + }, + { + "epoch": 0.35657665785912224, + "grad_norm": 0.0022695185644662445, + "learning_rate": 7.1521661296097385e-06, + "loss": 0.0, + "step": 5533 + }, + { + "epoch": 0.3566411033060514, + "grad_norm": 0.0007219926736973189, + "learning_rate": 7.151450053705693e-06, + "loss": 0.0, + "step": 5534 + }, + { + "epoch": 0.3567055487529806, + "grad_norm": 0.007866555084452822, + "learning_rate": 7.150733977801647e-06, + "loss": 0.0, + "step": 5535 + }, + { + "epoch": 0.3567699941999098, + "grad_norm": 0.001497095775340077, + "learning_rate": 7.150017901897601e-06, + "loss": 0.0, + "step": 5536 + }, + { + "epoch": 0.356834439646839, + "grad_norm": 0.0021093867092792707, + "learning_rate": 7.149301825993556e-06, + "loss": 0.0, + "step": 5537 + }, + { + "epoch": 0.3568988850937681, + "grad_norm": 0.0030429109849377583, + "learning_rate": 7.148585750089511e-06, + "loss": 0.0, + "step": 5538 + }, + { + "epoch": 0.3569633305406973, + "grad_norm": 0.005652474576479686, + "learning_rate": 7.147869674185464e-06, + "loss": 0.0, + "step": 5539 + }, + { + "epoch": 0.3570277759876265, + "grad_norm": 0.0002939338741031065, + "learning_rate": 7.147153598281419e-06, + "loss": 0.0, + "step": 5540 + }, + { + "epoch": 0.35709222143455566, + "grad_norm": 0.0005272829418229353, + "learning_rate": 7.146437522377373e-06, + "loss": 0.0, + "step": 5541 + }, + { + "epoch": 0.35715666688148484, + "grad_norm": 0.0014691027056069703, + "learning_rate": 7.145721446473327e-06, + "loss": 0.0, + "step": 5542 + }, + { + "epoch": 0.35722111232841397, + "grad_norm": 0.0019714445798182743, + "learning_rate": 7.1450053705692815e-06, + "loss": 0.0, + "step": 5543 + }, + { + "epoch": 0.35728555777534315, + "grad_norm": 0.0017453958343376377, + "learning_rate": 7.144289294665235e-06, + "loss": 0.0, + "step": 5544 + }, + { + "epoch": 0.35735000322227234, + "grad_norm": 0.009047168410867279, + "learning_rate": 7.143573218761189e-06, + "loss": 0.0, + "step": 5545 + }, + { + "epoch": 0.3574144486692015, + "grad_norm": 0.2842775943064396, + "learning_rate": 7.1428571428571436e-06, + "loss": 0.0018, + "step": 5546 + }, + { + "epoch": 0.3574788941161307, + "grad_norm": 0.001421204330789419, + "learning_rate": 7.142141066953098e-06, + "loss": 0.0, + "step": 5547 + }, + { + "epoch": 0.3575433395630599, + "grad_norm": 0.003525207027181184, + "learning_rate": 7.141424991049052e-06, + "loss": 0.0, + "step": 5548 + }, + { + "epoch": 0.357607785009989, + "grad_norm": 0.05103926720907329, + "learning_rate": 7.140708915145006e-06, + "loss": 0.0001, + "step": 5549 + }, + { + "epoch": 0.3576722304569182, + "grad_norm": 0.5437392661251964, + "learning_rate": 7.13999283924096e-06, + "loss": 0.001, + "step": 5550 + }, + { + "epoch": 0.3577366759038474, + "grad_norm": 0.0900597256928726, + "learning_rate": 7.139276763336914e-06, + "loss": 0.0006, + "step": 5551 + }, + { + "epoch": 0.3578011213507766, + "grad_norm": 0.0013100415151973432, + "learning_rate": 7.1385606874328685e-06, + "loss": 0.0, + "step": 5552 + }, + { + "epoch": 0.35786556679770576, + "grad_norm": 0.287950904363612, + "learning_rate": 7.137844611528823e-06, + "loss": 0.0014, + "step": 5553 + }, + { + "epoch": 0.35793001224463494, + "grad_norm": 0.0008622037652708815, + "learning_rate": 7.137128535624776e-06, + "loss": 0.0, + "step": 5554 + }, + { + "epoch": 0.35799445769156407, + "grad_norm": 0.0015019054203790178, + "learning_rate": 7.1364124597207306e-06, + "loss": 0.0, + "step": 5555 + }, + { + "epoch": 0.35805890313849326, + "grad_norm": 0.020955137988755305, + "learning_rate": 7.135696383816685e-06, + "loss": 0.0001, + "step": 5556 + }, + { + "epoch": 0.35812334858542244, + "grad_norm": 0.3577950619108931, + "learning_rate": 7.134980307912639e-06, + "loss": 0.0015, + "step": 5557 + }, + { + "epoch": 0.3581877940323516, + "grad_norm": 0.0007089497421827163, + "learning_rate": 7.1342642320085935e-06, + "loss": 0.0, + "step": 5558 + }, + { + "epoch": 0.3582522394792808, + "grad_norm": 0.0246501357034617, + "learning_rate": 7.133548156104547e-06, + "loss": 0.0001, + "step": 5559 + }, + { + "epoch": 0.35831668492620994, + "grad_norm": 0.5051342395314098, + "learning_rate": 7.132832080200501e-06, + "loss": 0.0037, + "step": 5560 + }, + { + "epoch": 0.3583811303731391, + "grad_norm": 0.02137042368144486, + "learning_rate": 7.1321160042964555e-06, + "loss": 0.0002, + "step": 5561 + }, + { + "epoch": 0.3584455758200683, + "grad_norm": 0.0010559639345811118, + "learning_rate": 7.131399928392411e-06, + "loss": 0.0, + "step": 5562 + }, + { + "epoch": 0.3585100212669975, + "grad_norm": 0.559211362846977, + "learning_rate": 7.130683852488365e-06, + "loss": 0.0012, + "step": 5563 + }, + { + "epoch": 0.3585744667139267, + "grad_norm": 0.017202826393729526, + "learning_rate": 7.129967776584319e-06, + "loss": 0.0, + "step": 5564 + }, + { + "epoch": 0.35863891216085586, + "grad_norm": 0.005808692085628072, + "learning_rate": 7.129251700680273e-06, + "loss": 0.0, + "step": 5565 + }, + { + "epoch": 0.358703357607785, + "grad_norm": 0.06375497502894896, + "learning_rate": 7.128535624776227e-06, + "loss": 0.0002, + "step": 5566 + }, + { + "epoch": 0.3587678030547142, + "grad_norm": 0.0058093430870949725, + "learning_rate": 7.127819548872181e-06, + "loss": 0.0, + "step": 5567 + }, + { + "epoch": 0.35883224850164336, + "grad_norm": 0.032179049666154896, + "learning_rate": 7.127103472968136e-06, + "loss": 0.0, + "step": 5568 + }, + { + "epoch": 0.35889669394857254, + "grad_norm": 0.03255481688625018, + "learning_rate": 7.12638739706409e-06, + "loss": 0.0, + "step": 5569 + }, + { + "epoch": 0.3589611393955017, + "grad_norm": 0.03756726107666502, + "learning_rate": 7.125671321160043e-06, + "loss": 0.0002, + "step": 5570 + }, + { + "epoch": 0.35902558484243086, + "grad_norm": 0.0024971589116067573, + "learning_rate": 7.124955245255998e-06, + "loss": 0.0, + "step": 5571 + }, + { + "epoch": 0.35909003028936004, + "grad_norm": 0.00042623072682908026, + "learning_rate": 7.124239169351952e-06, + "loss": 0.0, + "step": 5572 + }, + { + "epoch": 0.3591544757362892, + "grad_norm": 0.0017245227030109347, + "learning_rate": 7.123523093447906e-06, + "loss": 0.0, + "step": 5573 + }, + { + "epoch": 0.3592189211832184, + "grad_norm": 3.358811867999132, + "learning_rate": 7.1228070175438605e-06, + "loss": 0.007, + "step": 5574 + }, + { + "epoch": 0.3592833666301476, + "grad_norm": 0.005075338771603977, + "learning_rate": 7.122090941639814e-06, + "loss": 0.0, + "step": 5575 + }, + { + "epoch": 0.3593478120770768, + "grad_norm": 0.0017143627481109437, + "learning_rate": 7.121374865735768e-06, + "loss": 0.0, + "step": 5576 + }, + { + "epoch": 0.3594122575240059, + "grad_norm": 0.0020707422092871275, + "learning_rate": 7.120658789831723e-06, + "loss": 0.0, + "step": 5577 + }, + { + "epoch": 0.3594767029709351, + "grad_norm": 0.0007588354010330688, + "learning_rate": 7.119942713927677e-06, + "loss": 0.0, + "step": 5578 + }, + { + "epoch": 0.3595411484178643, + "grad_norm": 0.08036000371090321, + "learning_rate": 7.119226638023631e-06, + "loss": 0.0004, + "step": 5579 + }, + { + "epoch": 0.35960559386479346, + "grad_norm": 0.00012982236447759736, + "learning_rate": 7.118510562119585e-06, + "loss": 0.0, + "step": 5580 + }, + { + "epoch": 0.35967003931172264, + "grad_norm": 0.029504824766621456, + "learning_rate": 7.117794486215539e-06, + "loss": 0.0002, + "step": 5581 + }, + { + "epoch": 0.3597344847586518, + "grad_norm": 0.0033183237279156897, + "learning_rate": 7.117078410311493e-06, + "loss": 0.0, + "step": 5582 + }, + { + "epoch": 0.35979893020558096, + "grad_norm": 0.004723393791790135, + "learning_rate": 7.1163623344074475e-06, + "loss": 0.0, + "step": 5583 + }, + { + "epoch": 0.35986337565251014, + "grad_norm": 0.08625253997566465, + "learning_rate": 7.115646258503401e-06, + "loss": 0.0003, + "step": 5584 + }, + { + "epoch": 0.3599278210994393, + "grad_norm": 0.009677660664644353, + "learning_rate": 7.114930182599357e-06, + "loss": 0.0, + "step": 5585 + }, + { + "epoch": 0.3599922665463685, + "grad_norm": 0.00018866588452496163, + "learning_rate": 7.1142141066953104e-06, + "loss": 0.0, + "step": 5586 + }, + { + "epoch": 0.3600567119932977, + "grad_norm": 0.0016751659241547368, + "learning_rate": 7.113498030791265e-06, + "loss": 0.0, + "step": 5587 + }, + { + "epoch": 0.3601211574402268, + "grad_norm": 0.0017755997623683422, + "learning_rate": 7.112781954887219e-06, + "loss": 0.0, + "step": 5588 + }, + { + "epoch": 0.360185602887156, + "grad_norm": 0.013514959361658361, + "learning_rate": 7.112065878983173e-06, + "loss": 0.0, + "step": 5589 + }, + { + "epoch": 0.3602500483340852, + "grad_norm": 0.00037803318271111, + "learning_rate": 7.111349803079128e-06, + "loss": 0.0, + "step": 5590 + }, + { + "epoch": 0.3603144937810144, + "grad_norm": 0.050001389420844165, + "learning_rate": 7.110633727175081e-06, + "loss": 0.0002, + "step": 5591 + }, + { + "epoch": 0.36037893922794356, + "grad_norm": 0.0014661494256198607, + "learning_rate": 7.109917651271035e-06, + "loss": 0.0, + "step": 5592 + }, + { + "epoch": 0.36044338467487275, + "grad_norm": 0.0172112343024897, + "learning_rate": 7.10920157536699e-06, + "loss": 0.0, + "step": 5593 + }, + { + "epoch": 0.3605078301218019, + "grad_norm": 0.003739827721603054, + "learning_rate": 7.108485499462944e-06, + "loss": 0.0, + "step": 5594 + }, + { + "epoch": 0.36057227556873106, + "grad_norm": 0.0035318203953639943, + "learning_rate": 7.107769423558898e-06, + "loss": 0.0, + "step": 5595 + }, + { + "epoch": 0.36063672101566024, + "grad_norm": 0.024730250178133278, + "learning_rate": 7.107053347654852e-06, + "loss": 0.0, + "step": 5596 + }, + { + "epoch": 0.36070116646258943, + "grad_norm": 0.00018136025456127962, + "learning_rate": 7.106337271750806e-06, + "loss": 0.0, + "step": 5597 + }, + { + "epoch": 0.3607656119095186, + "grad_norm": 0.006373948769455688, + "learning_rate": 7.10562119584676e-06, + "loss": 0.0015, + "step": 5598 + }, + { + "epoch": 0.36083005735644774, + "grad_norm": 0.0007691793894941653, + "learning_rate": 7.104905119942715e-06, + "loss": 0.0, + "step": 5599 + }, + { + "epoch": 0.3608945028033769, + "grad_norm": 0.009427548311023269, + "learning_rate": 7.104189044038668e-06, + "loss": 0.0, + "step": 5600 + }, + { + "epoch": 0.3609589482503061, + "grad_norm": 0.14387279738820716, + "learning_rate": 7.103472968134622e-06, + "loss": 0.0001, + "step": 5601 + }, + { + "epoch": 0.3610233936972353, + "grad_norm": 0.060341764475685185, + "learning_rate": 7.102756892230577e-06, + "loss": 0.0016, + "step": 5602 + }, + { + "epoch": 0.3610878391441645, + "grad_norm": 0.00019170525434062738, + "learning_rate": 7.102040816326531e-06, + "loss": 0.0, + "step": 5603 + }, + { + "epoch": 0.36115228459109366, + "grad_norm": 0.018135760065123022, + "learning_rate": 7.101324740422485e-06, + "loss": 0.0, + "step": 5604 + }, + { + "epoch": 0.3612167300380228, + "grad_norm": 0.07888118928994316, + "learning_rate": 7.100608664518439e-06, + "loss": 0.0001, + "step": 5605 + }, + { + "epoch": 0.361281175484952, + "grad_norm": 0.0009429972317331661, + "learning_rate": 7.099892588614393e-06, + "loss": 0.0, + "step": 5606 + }, + { + "epoch": 0.36134562093188116, + "grad_norm": 0.0005612899489036782, + "learning_rate": 7.099176512710347e-06, + "loss": 0.0, + "step": 5607 + }, + { + "epoch": 0.36141006637881035, + "grad_norm": 0.0023027160039246065, + "learning_rate": 7.0984604368063025e-06, + "loss": 0.0, + "step": 5608 + }, + { + "epoch": 0.36147451182573953, + "grad_norm": 0.0004911979683064441, + "learning_rate": 7.097744360902257e-06, + "loss": 0.0, + "step": 5609 + }, + { + "epoch": 0.36153895727266866, + "grad_norm": 0.15138024304600262, + "learning_rate": 7.097028284998211e-06, + "loss": 0.0017, + "step": 5610 + }, + { + "epoch": 0.36160340271959784, + "grad_norm": 0.007569795518407454, + "learning_rate": 7.096312209094165e-06, + "loss": 0.0, + "step": 5611 + }, + { + "epoch": 0.36166784816652703, + "grad_norm": 0.009712033820648556, + "learning_rate": 7.095596133190119e-06, + "loss": 0.0, + "step": 5612 + }, + { + "epoch": 0.3617322936134562, + "grad_norm": 0.12356386491916947, + "learning_rate": 7.094880057286073e-06, + "loss": 0.0003, + "step": 5613 + }, + { + "epoch": 0.3617967390603854, + "grad_norm": 0.0013265063818277874, + "learning_rate": 7.094163981382027e-06, + "loss": 0.0, + "step": 5614 + }, + { + "epoch": 0.3618611845073146, + "grad_norm": 0.0016747608507945689, + "learning_rate": 7.093447905477982e-06, + "loss": 0.0, + "step": 5615 + }, + { + "epoch": 0.3619256299542437, + "grad_norm": 0.0008821640464383144, + "learning_rate": 7.092731829573936e-06, + "loss": 0.0, + "step": 5616 + }, + { + "epoch": 0.3619900754011729, + "grad_norm": 0.00030392545722019647, + "learning_rate": 7.0920157536698895e-06, + "loss": 0.0, + "step": 5617 + }, + { + "epoch": 0.3620545208481021, + "grad_norm": 0.0054529868195787165, + "learning_rate": 7.091299677765844e-06, + "loss": 0.0, + "step": 5618 + }, + { + "epoch": 0.36211896629503126, + "grad_norm": 0.13914422730415102, + "learning_rate": 7.090583601861798e-06, + "loss": 0.0001, + "step": 5619 + }, + { + "epoch": 0.36218341174196045, + "grad_norm": 0.02705458926689199, + "learning_rate": 7.089867525957752e-06, + "loss": 0.0001, + "step": 5620 + }, + { + "epoch": 0.36224785718888963, + "grad_norm": 0.13120434966247765, + "learning_rate": 7.089151450053706e-06, + "loss": 0.0008, + "step": 5621 + }, + { + "epoch": 0.36231230263581876, + "grad_norm": 0.12314011810163554, + "learning_rate": 7.08843537414966e-06, + "loss": 0.0015, + "step": 5622 + }, + { + "epoch": 0.36237674808274795, + "grad_norm": 0.01434271131636332, + "learning_rate": 7.087719298245614e-06, + "loss": 0.0001, + "step": 5623 + }, + { + "epoch": 0.36244119352967713, + "grad_norm": 0.021528547431360342, + "learning_rate": 7.087003222341569e-06, + "loss": 0.0, + "step": 5624 + }, + { + "epoch": 0.3625056389766063, + "grad_norm": 0.01073329697163935, + "learning_rate": 7.086287146437523e-06, + "loss": 0.0, + "step": 5625 + }, + { + "epoch": 0.3625700844235355, + "grad_norm": 1.960513490832337, + "learning_rate": 7.0855710705334764e-06, + "loss": 0.0038, + "step": 5626 + }, + { + "epoch": 0.36263452987046463, + "grad_norm": 0.025460006296816034, + "learning_rate": 7.084854994629431e-06, + "loss": 0.0, + "step": 5627 + }, + { + "epoch": 0.3626989753173938, + "grad_norm": 0.20646837479210997, + "learning_rate": 7.084138918725385e-06, + "loss": 0.0004, + "step": 5628 + }, + { + "epoch": 0.362763420764323, + "grad_norm": 0.25142192289102916, + "learning_rate": 7.083422842821339e-06, + "loss": 0.001, + "step": 5629 + }, + { + "epoch": 0.3628278662112522, + "grad_norm": 0.006383531148001112, + "learning_rate": 7.082706766917294e-06, + "loss": 0.0, + "step": 5630 + }, + { + "epoch": 0.36289231165818137, + "grad_norm": 0.0029308486291541765, + "learning_rate": 7.081990691013247e-06, + "loss": 0.0, + "step": 5631 + }, + { + "epoch": 0.36295675710511055, + "grad_norm": 0.0007120470554510387, + "learning_rate": 7.081274615109203e-06, + "loss": 0.0, + "step": 5632 + }, + { + "epoch": 0.3630212025520397, + "grad_norm": 0.0012225325945689409, + "learning_rate": 7.0805585392051565e-06, + "loss": 0.0, + "step": 5633 + }, + { + "epoch": 0.36308564799896886, + "grad_norm": 0.019594588990991643, + "learning_rate": 7.079842463301111e-06, + "loss": 0.0001, + "step": 5634 + }, + { + "epoch": 0.36315009344589805, + "grad_norm": 0.003277557610881603, + "learning_rate": 7.079126387397065e-06, + "loss": 0.0, + "step": 5635 + }, + { + "epoch": 0.36321453889282723, + "grad_norm": 0.04047106517839488, + "learning_rate": 7.0784103114930194e-06, + "loss": 0.0001, + "step": 5636 + }, + { + "epoch": 0.3632789843397564, + "grad_norm": 0.006228676430910245, + "learning_rate": 7.077694235588973e-06, + "loss": 0.0, + "step": 5637 + }, + { + "epoch": 0.36334342978668555, + "grad_norm": 0.004968203124256135, + "learning_rate": 7.076978159684927e-06, + "loss": 0.0, + "step": 5638 + }, + { + "epoch": 0.36340787523361473, + "grad_norm": 0.004763822483671288, + "learning_rate": 7.0762620837808815e-06, + "loss": 0.0, + "step": 5639 + }, + { + "epoch": 0.3634723206805439, + "grad_norm": 0.47286776074125114, + "learning_rate": 7.075546007876836e-06, + "loss": 0.0032, + "step": 5640 + }, + { + "epoch": 0.3635367661274731, + "grad_norm": 0.21851157069067817, + "learning_rate": 7.07482993197279e-06, + "loss": 0.0024, + "step": 5641 + }, + { + "epoch": 0.3636012115744023, + "grad_norm": 0.000831134689962334, + "learning_rate": 7.0741138560687435e-06, + "loss": 0.0, + "step": 5642 + }, + { + "epoch": 0.36366565702133147, + "grad_norm": 0.0012023844883557063, + "learning_rate": 7.073397780164698e-06, + "loss": 0.0, + "step": 5643 + }, + { + "epoch": 0.3637301024682606, + "grad_norm": 0.00047426837176558134, + "learning_rate": 7.072681704260652e-06, + "loss": 0.0, + "step": 5644 + }, + { + "epoch": 0.3637945479151898, + "grad_norm": 0.0028780370890813933, + "learning_rate": 7.0719656283566064e-06, + "loss": 0.0, + "step": 5645 + }, + { + "epoch": 0.36385899336211897, + "grad_norm": 0.16501946869057837, + "learning_rate": 7.071249552452561e-06, + "loss": 0.0012, + "step": 5646 + }, + { + "epoch": 0.36392343880904815, + "grad_norm": 0.26383039160046534, + "learning_rate": 7.070533476548514e-06, + "loss": 0.0006, + "step": 5647 + }, + { + "epoch": 0.36398788425597733, + "grad_norm": 0.7559218269984767, + "learning_rate": 7.0698174006444685e-06, + "loss": 0.0082, + "step": 5648 + }, + { + "epoch": 0.36405232970290646, + "grad_norm": 0.11576493211318237, + "learning_rate": 7.069101324740423e-06, + "loss": 0.001, + "step": 5649 + }, + { + "epoch": 0.36411677514983565, + "grad_norm": 0.3949228552741045, + "learning_rate": 7.068385248836377e-06, + "loss": 0.0015, + "step": 5650 + }, + { + "epoch": 0.36418122059676483, + "grad_norm": 0.0030827061663709814, + "learning_rate": 7.067669172932331e-06, + "loss": 0.0, + "step": 5651 + }, + { + "epoch": 0.364245666043694, + "grad_norm": 0.05707504512960632, + "learning_rate": 7.066953097028285e-06, + "loss": 0.0044, + "step": 5652 + }, + { + "epoch": 0.3643101114906232, + "grad_norm": 0.0029535415213127816, + "learning_rate": 7.066237021124239e-06, + "loss": 0.0, + "step": 5653 + }, + { + "epoch": 0.3643745569375524, + "grad_norm": 0.6487911363102642, + "learning_rate": 7.065520945220193e-06, + "loss": 0.0012, + "step": 5654 + }, + { + "epoch": 0.3644390023844815, + "grad_norm": 8.800743879255332e-05, + "learning_rate": 7.0648048693161486e-06, + "loss": 0.0, + "step": 5655 + }, + { + "epoch": 0.3645034478314107, + "grad_norm": 0.003355957561265595, + "learning_rate": 7.064088793412103e-06, + "loss": 0.0, + "step": 5656 + }, + { + "epoch": 0.3645678932783399, + "grad_norm": 0.007201152011268038, + "learning_rate": 7.063372717508057e-06, + "loss": 0.0, + "step": 5657 + }, + { + "epoch": 0.36463233872526907, + "grad_norm": 0.021779140689082172, + "learning_rate": 7.062656641604011e-06, + "loss": 0.0001, + "step": 5658 + }, + { + "epoch": 0.36469678417219825, + "grad_norm": 0.1626429279984157, + "learning_rate": 7.061940565699965e-06, + "loss": 0.0009, + "step": 5659 + }, + { + "epoch": 0.36476122961912744, + "grad_norm": 0.8840851851432028, + "learning_rate": 7.061224489795919e-06, + "loss": 0.0044, + "step": 5660 + }, + { + "epoch": 0.36482567506605657, + "grad_norm": 0.3878680980398993, + "learning_rate": 7.0605084138918735e-06, + "loss": 0.0022, + "step": 5661 + }, + { + "epoch": 0.36489012051298575, + "grad_norm": 0.010117391982138556, + "learning_rate": 7.059792337987828e-06, + "loss": 0.0, + "step": 5662 + }, + { + "epoch": 0.36495456595991493, + "grad_norm": 0.05438971549517224, + "learning_rate": 7.059076262083781e-06, + "loss": 0.0002, + "step": 5663 + }, + { + "epoch": 0.3650190114068441, + "grad_norm": 0.0023272584511669445, + "learning_rate": 7.0583601861797356e-06, + "loss": 0.0, + "step": 5664 + }, + { + "epoch": 0.3650834568537733, + "grad_norm": 0.008117755104337068, + "learning_rate": 7.05764411027569e-06, + "loss": 0.0, + "step": 5665 + }, + { + "epoch": 0.36514790230070243, + "grad_norm": 0.1603883944921648, + "learning_rate": 7.056928034371644e-06, + "loss": 0.0011, + "step": 5666 + }, + { + "epoch": 0.3652123477476316, + "grad_norm": 0.1780197137734566, + "learning_rate": 7.0562119584675985e-06, + "loss": 0.0018, + "step": 5667 + }, + { + "epoch": 0.3652767931945608, + "grad_norm": 0.05630731138250431, + "learning_rate": 7.055495882563552e-06, + "loss": 0.0001, + "step": 5668 + }, + { + "epoch": 0.36534123864149, + "grad_norm": 0.004059539192963445, + "learning_rate": 7.054779806659506e-06, + "loss": 0.0, + "step": 5669 + }, + { + "epoch": 0.36540568408841917, + "grad_norm": 0.2651977187882061, + "learning_rate": 7.0540637307554605e-06, + "loss": 0.0011, + "step": 5670 + }, + { + "epoch": 0.36547012953534835, + "grad_norm": 0.18956324729432547, + "learning_rate": 7.053347654851415e-06, + "loss": 0.0001, + "step": 5671 + }, + { + "epoch": 0.3655345749822775, + "grad_norm": 0.06572701027378827, + "learning_rate": 7.052631578947369e-06, + "loss": 0.0001, + "step": 5672 + }, + { + "epoch": 0.36559902042920667, + "grad_norm": 0.0016957948449696752, + "learning_rate": 7.0519155030433226e-06, + "loss": 0.0, + "step": 5673 + }, + { + "epoch": 0.36566346587613585, + "grad_norm": 0.017716966280276046, + "learning_rate": 7.051199427139277e-06, + "loss": 0.0001, + "step": 5674 + }, + { + "epoch": 0.36572791132306504, + "grad_norm": 0.001855080590519422, + "learning_rate": 7.050483351235231e-06, + "loss": 0.0, + "step": 5675 + }, + { + "epoch": 0.3657923567699942, + "grad_norm": 0.0014049639301356737, + "learning_rate": 7.0497672753311855e-06, + "loss": 0.0, + "step": 5676 + }, + { + "epoch": 0.36585680221692335, + "grad_norm": 0.006962279213334821, + "learning_rate": 7.04905119942714e-06, + "loss": 0.0, + "step": 5677 + }, + { + "epoch": 0.36592124766385253, + "grad_norm": 0.020090188529929526, + "learning_rate": 7.048335123523095e-06, + "loss": 0.0, + "step": 5678 + }, + { + "epoch": 0.3659856931107817, + "grad_norm": 0.10503809921713848, + "learning_rate": 7.047619047619048e-06, + "loss": 0.0002, + "step": 5679 + }, + { + "epoch": 0.3660501385577109, + "grad_norm": 0.004166492745336915, + "learning_rate": 7.046902971715003e-06, + "loss": 0.0, + "step": 5680 + }, + { + "epoch": 0.3661145840046401, + "grad_norm": 0.0009582915293647691, + "learning_rate": 7.046186895810957e-06, + "loss": 0.0, + "step": 5681 + }, + { + "epoch": 0.36617902945156927, + "grad_norm": 0.037545327208160215, + "learning_rate": 7.045470819906911e-06, + "loss": 0.0001, + "step": 5682 + }, + { + "epoch": 0.3662434748984984, + "grad_norm": 0.10824613353346786, + "learning_rate": 7.0447547440028655e-06, + "loss": 0.0003, + "step": 5683 + }, + { + "epoch": 0.3663079203454276, + "grad_norm": 0.0014164057504567702, + "learning_rate": 7.044038668098819e-06, + "loss": 0.0, + "step": 5684 + }, + { + "epoch": 0.36637236579235677, + "grad_norm": 0.729636427012959, + "learning_rate": 7.043322592194773e-06, + "loss": 0.0045, + "step": 5685 + }, + { + "epoch": 0.36643681123928595, + "grad_norm": 0.05235399263431841, + "learning_rate": 7.042606516290728e-06, + "loss": 0.0001, + "step": 5686 + }, + { + "epoch": 0.36650125668621514, + "grad_norm": 0.0010319380225699353, + "learning_rate": 7.041890440386682e-06, + "loss": 0.0, + "step": 5687 + }, + { + "epoch": 0.36656570213314427, + "grad_norm": 0.0012176516510191088, + "learning_rate": 7.041174364482636e-06, + "loss": 0.0, + "step": 5688 + }, + { + "epoch": 0.36663014758007345, + "grad_norm": 0.0004752984702248449, + "learning_rate": 7.04045828857859e-06, + "loss": 0.0, + "step": 5689 + }, + { + "epoch": 0.36669459302700264, + "grad_norm": 0.019731819460332183, + "learning_rate": 7.039742212674544e-06, + "loss": 0.0001, + "step": 5690 + }, + { + "epoch": 0.3667590384739318, + "grad_norm": 0.0057392792271502915, + "learning_rate": 7.039026136770498e-06, + "loss": 0.0, + "step": 5691 + }, + { + "epoch": 0.366823483920861, + "grad_norm": 0.46144410757981064, + "learning_rate": 7.0383100608664525e-06, + "loss": 0.0028, + "step": 5692 + }, + { + "epoch": 0.3668879293677902, + "grad_norm": 0.0238733995461195, + "learning_rate": 7.037593984962407e-06, + "loss": 0.0001, + "step": 5693 + }, + { + "epoch": 0.3669523748147193, + "grad_norm": 0.30712166491100057, + "learning_rate": 7.03687790905836e-06, + "loss": 0.0024, + "step": 5694 + }, + { + "epoch": 0.3670168202616485, + "grad_norm": 0.00197018473504978, + "learning_rate": 7.036161833154315e-06, + "loss": 0.0, + "step": 5695 + }, + { + "epoch": 0.3670812657085777, + "grad_norm": 0.007235413846213685, + "learning_rate": 7.035445757250269e-06, + "loss": 0.0, + "step": 5696 + }, + { + "epoch": 0.36714571115550687, + "grad_norm": 0.0008705440038263109, + "learning_rate": 7.034729681346223e-06, + "loss": 0.0, + "step": 5697 + }, + { + "epoch": 0.36721015660243606, + "grad_norm": 0.016319481775685544, + "learning_rate": 7.034013605442177e-06, + "loss": 0.0, + "step": 5698 + }, + { + "epoch": 0.36727460204936524, + "grad_norm": 0.060190379923427566, + "learning_rate": 7.033297529538131e-06, + "loss": 0.0001, + "step": 5699 + }, + { + "epoch": 0.36733904749629437, + "grad_norm": 0.07271395616914396, + "learning_rate": 7.032581453634085e-06, + "loss": 0.0001, + "step": 5700 + }, + { + "epoch": 0.36740349294322355, + "grad_norm": 0.004665248624292797, + "learning_rate": 7.0318653777300395e-06, + "loss": 0.0, + "step": 5701 + }, + { + "epoch": 0.36746793839015274, + "grad_norm": 0.08727091492317983, + "learning_rate": 7.031149301825995e-06, + "loss": 0.0001, + "step": 5702 + }, + { + "epoch": 0.3675323838370819, + "grad_norm": 0.00788484775339447, + "learning_rate": 7.030433225921949e-06, + "loss": 0.0, + "step": 5703 + }, + { + "epoch": 0.3675968292840111, + "grad_norm": 0.035432436020510494, + "learning_rate": 7.029717150017903e-06, + "loss": 0.0001, + "step": 5704 + }, + { + "epoch": 0.36766127473094024, + "grad_norm": 0.031635243714684846, + "learning_rate": 7.029001074113857e-06, + "loss": 0.0001, + "step": 5705 + }, + { + "epoch": 0.3677257201778694, + "grad_norm": 0.4187037698648444, + "learning_rate": 7.028284998209811e-06, + "loss": 0.0015, + "step": 5706 + }, + { + "epoch": 0.3677901656247986, + "grad_norm": 0.001485610405980241, + "learning_rate": 7.027568922305765e-06, + "loss": 0.0, + "step": 5707 + }, + { + "epoch": 0.3678546110717278, + "grad_norm": 0.0032538214693292274, + "learning_rate": 7.02685284640172e-06, + "loss": 0.0, + "step": 5708 + }, + { + "epoch": 0.367919056518657, + "grad_norm": 0.4946226701409237, + "learning_rate": 7.026136770497674e-06, + "loss": 0.0016, + "step": 5709 + }, + { + "epoch": 0.36798350196558616, + "grad_norm": 0.02520561557442119, + "learning_rate": 7.025420694593627e-06, + "loss": 0.0001, + "step": 5710 + }, + { + "epoch": 0.3680479474125153, + "grad_norm": 0.010406505327414488, + "learning_rate": 7.024704618689582e-06, + "loss": 0.0, + "step": 5711 + }, + { + "epoch": 0.36811239285944447, + "grad_norm": 0.009454967464173939, + "learning_rate": 7.023988542785536e-06, + "loss": 0.0, + "step": 5712 + }, + { + "epoch": 0.36817683830637365, + "grad_norm": 0.5461984263936956, + "learning_rate": 7.02327246688149e-06, + "loss": 0.0025, + "step": 5713 + }, + { + "epoch": 0.36824128375330284, + "grad_norm": 0.011822903242047569, + "learning_rate": 7.022556390977444e-06, + "loss": 0.0001, + "step": 5714 + }, + { + "epoch": 0.368305729200232, + "grad_norm": 0.002773322625933541, + "learning_rate": 7.021840315073398e-06, + "loss": 0.0, + "step": 5715 + }, + { + "epoch": 0.36837017464716115, + "grad_norm": 0.14855857353534296, + "learning_rate": 7.021124239169352e-06, + "loss": 0.0002, + "step": 5716 + }, + { + "epoch": 0.36843462009409034, + "grad_norm": 0.027646845724586845, + "learning_rate": 7.020408163265307e-06, + "loss": 0.0, + "step": 5717 + }, + { + "epoch": 0.3684990655410195, + "grad_norm": 0.0027564969182885253, + "learning_rate": 7.019692087361261e-06, + "loss": 0.0, + "step": 5718 + }, + { + "epoch": 0.3685635109879487, + "grad_norm": 0.0005528286557136186, + "learning_rate": 7.018976011457214e-06, + "loss": 0.0, + "step": 5719 + }, + { + "epoch": 0.3686279564348779, + "grad_norm": 1.5152654319623102, + "learning_rate": 7.018259935553169e-06, + "loss": 0.0032, + "step": 5720 + }, + { + "epoch": 0.3686924018818071, + "grad_norm": 0.0014066507855464159, + "learning_rate": 7.017543859649123e-06, + "loss": 0.0, + "step": 5721 + }, + { + "epoch": 0.3687568473287362, + "grad_norm": 0.0002940719089644006, + "learning_rate": 7.016827783745077e-06, + "loss": 0.0, + "step": 5722 + }, + { + "epoch": 0.3688212927756654, + "grad_norm": 0.7755664462750681, + "learning_rate": 7.0161117078410316e-06, + "loss": 0.0042, + "step": 5723 + }, + { + "epoch": 0.3688857382225946, + "grad_norm": 0.011148354046125965, + "learning_rate": 7.015395631936985e-06, + "loss": 0.0, + "step": 5724 + }, + { + "epoch": 0.36895018366952376, + "grad_norm": 0.037286561287033865, + "learning_rate": 7.014679556032941e-06, + "loss": 0.0001, + "step": 5725 + }, + { + "epoch": 0.36901462911645294, + "grad_norm": 0.28917756239200254, + "learning_rate": 7.0139634801288945e-06, + "loss": 0.001, + "step": 5726 + }, + { + "epoch": 0.36907907456338207, + "grad_norm": 0.02074656065833119, + "learning_rate": 7.013247404224849e-06, + "loss": 0.0001, + "step": 5727 + }, + { + "epoch": 0.36914352001031125, + "grad_norm": 0.009874683740494761, + "learning_rate": 7.012531328320803e-06, + "loss": 0.0, + "step": 5728 + }, + { + "epoch": 0.36920796545724044, + "grad_norm": 0.0010247783570523034, + "learning_rate": 7.011815252416757e-06, + "loss": 0.0, + "step": 5729 + }, + { + "epoch": 0.3692724109041696, + "grad_norm": 0.12852172866055342, + "learning_rate": 7.011099176512711e-06, + "loss": 0.0017, + "step": 5730 + }, + { + "epoch": 0.3693368563510988, + "grad_norm": 0.022266211156909587, + "learning_rate": 7.010383100608665e-06, + "loss": 0.0, + "step": 5731 + }, + { + "epoch": 0.369401301798028, + "grad_norm": 0.03223963132931994, + "learning_rate": 7.009667024704619e-06, + "loss": 0.0, + "step": 5732 + }, + { + "epoch": 0.3694657472449571, + "grad_norm": 0.01174380559418009, + "learning_rate": 7.008950948800574e-06, + "loss": 0.0, + "step": 5733 + }, + { + "epoch": 0.3695301926918863, + "grad_norm": 0.0016373883989292677, + "learning_rate": 7.008234872896528e-06, + "loss": 0.0, + "step": 5734 + }, + { + "epoch": 0.3695946381388155, + "grad_norm": 0.015438489387722118, + "learning_rate": 7.0075187969924815e-06, + "loss": 0.0, + "step": 5735 + }, + { + "epoch": 0.3696590835857447, + "grad_norm": 0.004045239143780003, + "learning_rate": 7.006802721088436e-06, + "loss": 0.0, + "step": 5736 + }, + { + "epoch": 0.36972352903267386, + "grad_norm": 0.014195178455213005, + "learning_rate": 7.00608664518439e-06, + "loss": 0.0, + "step": 5737 + }, + { + "epoch": 0.36978797447960304, + "grad_norm": 0.024569513616097965, + "learning_rate": 7.005370569280344e-06, + "loss": 0.0, + "step": 5738 + }, + { + "epoch": 0.36985241992653217, + "grad_norm": 0.020241459484012674, + "learning_rate": 7.004654493376299e-06, + "loss": 0.0, + "step": 5739 + }, + { + "epoch": 0.36991686537346136, + "grad_norm": 0.004866278303404176, + "learning_rate": 7.003938417472252e-06, + "loss": 0.0, + "step": 5740 + }, + { + "epoch": 0.36998131082039054, + "grad_norm": 0.010689225892144915, + "learning_rate": 7.003222341568206e-06, + "loss": 0.0001, + "step": 5741 + }, + { + "epoch": 0.3700457562673197, + "grad_norm": 0.03240860651724199, + "learning_rate": 7.002506265664161e-06, + "loss": 0.0, + "step": 5742 + }, + { + "epoch": 0.3701102017142489, + "grad_norm": 0.6012695391805059, + "learning_rate": 7.001790189760115e-06, + "loss": 0.0037, + "step": 5743 + }, + { + "epoch": 0.37017464716117804, + "grad_norm": 0.04119383182365942, + "learning_rate": 7.001074113856069e-06, + "loss": 0.0001, + "step": 5744 + }, + { + "epoch": 0.3702390926081072, + "grad_norm": 0.008040074272905727, + "learning_rate": 7.000358037952023e-06, + "loss": 0.0, + "step": 5745 + }, + { + "epoch": 0.3703035380550364, + "grad_norm": 0.14133781336635506, + "learning_rate": 6.999641962047977e-06, + "loss": 0.0022, + "step": 5746 + }, + { + "epoch": 0.3703679835019656, + "grad_norm": 0.024683611120630507, + "learning_rate": 6.998925886143931e-06, + "loss": 0.0, + "step": 5747 + }, + { + "epoch": 0.3704324289488948, + "grad_norm": 0.08903047211636257, + "learning_rate": 6.9982098102398865e-06, + "loss": 0.0003, + "step": 5748 + }, + { + "epoch": 0.37049687439582396, + "grad_norm": 0.45973190145044607, + "learning_rate": 6.997493734335841e-06, + "loss": 0.0009, + "step": 5749 + }, + { + "epoch": 0.3705613198427531, + "grad_norm": 0.06435428612509528, + "learning_rate": 6.996777658431795e-06, + "loss": 0.0001, + "step": 5750 + }, + { + "epoch": 0.3706257652896823, + "grad_norm": 0.018558351255393086, + "learning_rate": 6.9960615825277485e-06, + "loss": 0.0, + "step": 5751 + }, + { + "epoch": 0.37069021073661146, + "grad_norm": 0.008864585347216576, + "learning_rate": 6.995345506623703e-06, + "loss": 0.0, + "step": 5752 + }, + { + "epoch": 0.37075465618354064, + "grad_norm": 0.009975094178598903, + "learning_rate": 6.994629430719657e-06, + "loss": 0.0, + "step": 5753 + }, + { + "epoch": 0.3708191016304698, + "grad_norm": 0.016556182540155888, + "learning_rate": 6.9939133548156114e-06, + "loss": 0.0001, + "step": 5754 + }, + { + "epoch": 0.37088354707739896, + "grad_norm": 0.13527741899174603, + "learning_rate": 6.993197278911566e-06, + "loss": 0.0019, + "step": 5755 + }, + { + "epoch": 0.37094799252432814, + "grad_norm": 0.06295406905782694, + "learning_rate": 6.992481203007519e-06, + "loss": 0.0001, + "step": 5756 + }, + { + "epoch": 0.3710124379712573, + "grad_norm": 0.0010064822738222764, + "learning_rate": 6.9917651271034735e-06, + "loss": 0.0, + "step": 5757 + }, + { + "epoch": 0.3710768834181865, + "grad_norm": 0.024290977389738094, + "learning_rate": 6.991049051199428e-06, + "loss": 0.0015, + "step": 5758 + }, + { + "epoch": 0.3711413288651157, + "grad_norm": 0.010974935412825312, + "learning_rate": 6.990332975295382e-06, + "loss": 0.0, + "step": 5759 + }, + { + "epoch": 0.3712057743120449, + "grad_norm": 0.10455954243655033, + "learning_rate": 6.989616899391336e-06, + "loss": 0.0003, + "step": 5760 + }, + { + "epoch": 0.371270219758974, + "grad_norm": 0.12336619134229843, + "learning_rate": 6.98890082348729e-06, + "loss": 0.0002, + "step": 5761 + }, + { + "epoch": 0.3713346652059032, + "grad_norm": 0.21171697777713713, + "learning_rate": 6.988184747583244e-06, + "loss": 0.0009, + "step": 5762 + }, + { + "epoch": 0.3713991106528324, + "grad_norm": 0.19279942206943126, + "learning_rate": 6.9874686716791984e-06, + "loss": 0.0026, + "step": 5763 + }, + { + "epoch": 0.37146355609976156, + "grad_norm": 0.01216910669966571, + "learning_rate": 6.986752595775153e-06, + "loss": 0.0001, + "step": 5764 + }, + { + "epoch": 0.37152800154669074, + "grad_norm": 0.00037731451971985587, + "learning_rate": 6.986036519871107e-06, + "loss": 0.0, + "step": 5765 + }, + { + "epoch": 0.3715924469936199, + "grad_norm": 0.00018807497398390133, + "learning_rate": 6.9853204439670605e-06, + "loss": 0.0, + "step": 5766 + }, + { + "epoch": 0.37165689244054906, + "grad_norm": 0.0026170447983087115, + "learning_rate": 6.984604368063015e-06, + "loss": 0.0, + "step": 5767 + }, + { + "epoch": 0.37172133788747824, + "grad_norm": 0.017747659184786897, + "learning_rate": 6.983888292158969e-06, + "loss": 0.0002, + "step": 5768 + }, + { + "epoch": 0.3717857833344074, + "grad_norm": 0.3520613627754859, + "learning_rate": 6.983172216254923e-06, + "loss": 0.0017, + "step": 5769 + }, + { + "epoch": 0.3718502287813366, + "grad_norm": 0.00016192167565096303, + "learning_rate": 6.982456140350878e-06, + "loss": 0.0, + "step": 5770 + }, + { + "epoch": 0.3719146742282658, + "grad_norm": 0.0008348356854251009, + "learning_rate": 6.981740064446831e-06, + "loss": 0.0, + "step": 5771 + }, + { + "epoch": 0.3719791196751949, + "grad_norm": 0.005497509674599073, + "learning_rate": 6.981023988542786e-06, + "loss": 0.0001, + "step": 5772 + }, + { + "epoch": 0.3720435651221241, + "grad_norm": 0.0037413784761350334, + "learning_rate": 6.9803079126387406e-06, + "loss": 0.0, + "step": 5773 + }, + { + "epoch": 0.3721080105690533, + "grad_norm": 0.0026230430236184187, + "learning_rate": 6.979591836734695e-06, + "loss": 0.0, + "step": 5774 + }, + { + "epoch": 0.3721724560159825, + "grad_norm": 0.41783501811893103, + "learning_rate": 6.978875760830649e-06, + "loss": 0.0034, + "step": 5775 + }, + { + "epoch": 0.37223690146291166, + "grad_norm": 0.024433689572735673, + "learning_rate": 6.9781596849266035e-06, + "loss": 0.0, + "step": 5776 + }, + { + "epoch": 0.37230134690984085, + "grad_norm": 0.20971572218454174, + "learning_rate": 6.977443609022557e-06, + "loss": 0.0025, + "step": 5777 + }, + { + "epoch": 0.37236579235677, + "grad_norm": 0.006778586627347576, + "learning_rate": 6.976727533118511e-06, + "loss": 0.0, + "step": 5778 + }, + { + "epoch": 0.37243023780369916, + "grad_norm": 0.004765972105939649, + "learning_rate": 6.9760114572144655e-06, + "loss": 0.0, + "step": 5779 + }, + { + "epoch": 0.37249468325062834, + "grad_norm": 0.00936975226703011, + "learning_rate": 6.97529538131042e-06, + "loss": 0.0, + "step": 5780 + }, + { + "epoch": 0.37255912869755753, + "grad_norm": 0.03314593066305474, + "learning_rate": 6.974579305406374e-06, + "loss": 0.0002, + "step": 5781 + }, + { + "epoch": 0.3726235741444867, + "grad_norm": 0.10569911541069012, + "learning_rate": 6.9738632295023276e-06, + "loss": 0.0001, + "step": 5782 + }, + { + "epoch": 0.37268801959141584, + "grad_norm": 0.005564481327632969, + "learning_rate": 6.973147153598282e-06, + "loss": 0.0, + "step": 5783 + }, + { + "epoch": 0.372752465038345, + "grad_norm": 0.0011321496959383888, + "learning_rate": 6.972431077694236e-06, + "loss": 0.0, + "step": 5784 + }, + { + "epoch": 0.3728169104852742, + "grad_norm": 0.004791011659612873, + "learning_rate": 6.9717150017901905e-06, + "loss": 0.0, + "step": 5785 + }, + { + "epoch": 0.3728813559322034, + "grad_norm": 0.005532310001008372, + "learning_rate": 6.970998925886145e-06, + "loss": 0.0001, + "step": 5786 + }, + { + "epoch": 0.3729458013791326, + "grad_norm": 0.7089383028892337, + "learning_rate": 6.970282849982098e-06, + "loss": 0.0035, + "step": 5787 + }, + { + "epoch": 0.37301024682606176, + "grad_norm": 0.10018688504803142, + "learning_rate": 6.9695667740780525e-06, + "loss": 0.0001, + "step": 5788 + }, + { + "epoch": 0.3730746922729909, + "grad_norm": 0.0001188095476045239, + "learning_rate": 6.968850698174007e-06, + "loss": 0.0, + "step": 5789 + }, + { + "epoch": 0.3731391377199201, + "grad_norm": 0.31996598923516045, + "learning_rate": 6.968134622269961e-06, + "loss": 0.0045, + "step": 5790 + }, + { + "epoch": 0.37320358316684926, + "grad_norm": 0.00032090989078132145, + "learning_rate": 6.9674185463659146e-06, + "loss": 0.0, + "step": 5791 + }, + { + "epoch": 0.37326802861377845, + "grad_norm": 0.0027813093535380045, + "learning_rate": 6.966702470461869e-06, + "loss": 0.0, + "step": 5792 + }, + { + "epoch": 0.37333247406070763, + "grad_norm": 0.0006011110428928335, + "learning_rate": 6.965986394557823e-06, + "loss": 0.0, + "step": 5793 + }, + { + "epoch": 0.37339691950763676, + "grad_norm": 0.006022162366228975, + "learning_rate": 6.9652703186537775e-06, + "loss": 0.0, + "step": 5794 + }, + { + "epoch": 0.37346136495456594, + "grad_norm": 0.0009564569230516909, + "learning_rate": 6.964554242749733e-06, + "loss": 0.0, + "step": 5795 + }, + { + "epoch": 0.37352581040149513, + "grad_norm": 0.03388242371408433, + "learning_rate": 6.963838166845687e-06, + "loss": 0.0, + "step": 5796 + }, + { + "epoch": 0.3735902558484243, + "grad_norm": 0.00852415166096573, + "learning_rate": 6.963122090941641e-06, + "loss": 0.0, + "step": 5797 + }, + { + "epoch": 0.3736547012953535, + "grad_norm": 0.014353758440833062, + "learning_rate": 6.962406015037595e-06, + "loss": 0.0001, + "step": 5798 + }, + { + "epoch": 0.3737191467422827, + "grad_norm": 0.001276369092390287, + "learning_rate": 6.961689939133549e-06, + "loss": 0.0, + "step": 5799 + }, + { + "epoch": 0.3737835921892118, + "grad_norm": 0.0008902047543594542, + "learning_rate": 6.960973863229503e-06, + "loss": 0.0, + "step": 5800 + }, + { + "epoch": 0.373848037636141, + "grad_norm": 0.004717272669452164, + "learning_rate": 6.9602577873254575e-06, + "loss": 0.0, + "step": 5801 + }, + { + "epoch": 0.3739124830830702, + "grad_norm": 0.014702761769368668, + "learning_rate": 6.959541711421412e-06, + "loss": 0.0001, + "step": 5802 + }, + { + "epoch": 0.37397692852999936, + "grad_norm": 0.03831489377639327, + "learning_rate": 6.958825635517365e-06, + "loss": 0.0001, + "step": 5803 + }, + { + "epoch": 0.37404137397692855, + "grad_norm": 0.5141810225466743, + "learning_rate": 6.95810955961332e-06, + "loss": 0.0018, + "step": 5804 + }, + { + "epoch": 0.3741058194238577, + "grad_norm": 0.0005838929170589029, + "learning_rate": 6.957393483709274e-06, + "loss": 0.0, + "step": 5805 + }, + { + "epoch": 0.37417026487078686, + "grad_norm": 0.0002700920403723447, + "learning_rate": 6.956677407805228e-06, + "loss": 0.0, + "step": 5806 + }, + { + "epoch": 0.37423471031771605, + "grad_norm": 0.06860687546291591, + "learning_rate": 6.955961331901182e-06, + "loss": 0.0017, + "step": 5807 + }, + { + "epoch": 0.37429915576464523, + "grad_norm": 0.004176513204807584, + "learning_rate": 6.955245255997136e-06, + "loss": 0.0, + "step": 5808 + }, + { + "epoch": 0.3743636012115744, + "grad_norm": 0.005652395412120658, + "learning_rate": 6.95452918009309e-06, + "loss": 0.0, + "step": 5809 + }, + { + "epoch": 0.3744280466585036, + "grad_norm": 0.019276447759645723, + "learning_rate": 6.9538131041890445e-06, + "loss": 0.0, + "step": 5810 + }, + { + "epoch": 0.37449249210543273, + "grad_norm": 0.004779306271954092, + "learning_rate": 6.953097028284999e-06, + "loss": 0.0, + "step": 5811 + }, + { + "epoch": 0.3745569375523619, + "grad_norm": 0.05639422399771823, + "learning_rate": 6.952380952380952e-06, + "loss": 0.0003, + "step": 5812 + }, + { + "epoch": 0.3746213829992911, + "grad_norm": 0.013047953267711436, + "learning_rate": 6.951664876476907e-06, + "loss": 0.0, + "step": 5813 + }, + { + "epoch": 0.3746858284462203, + "grad_norm": 0.0800606691129166, + "learning_rate": 6.950948800572861e-06, + "loss": 0.0, + "step": 5814 + }, + { + "epoch": 0.37475027389314947, + "grad_norm": 0.12582311229794294, + "learning_rate": 6.950232724668815e-06, + "loss": 0.0017, + "step": 5815 + }, + { + "epoch": 0.37481471934007865, + "grad_norm": 0.019311332882545055, + "learning_rate": 6.9495166487647695e-06, + "loss": 0.0001, + "step": 5816 + }, + { + "epoch": 0.3748791647870078, + "grad_norm": 0.2597594869935363, + "learning_rate": 6.948800572860723e-06, + "loss": 0.0006, + "step": 5817 + }, + { + "epoch": 0.37494361023393696, + "grad_norm": 0.013510250554619812, + "learning_rate": 6.948084496956679e-06, + "loss": 0.0, + "step": 5818 + }, + { + "epoch": 0.37500805568086615, + "grad_norm": 0.004594365982372799, + "learning_rate": 6.947368421052632e-06, + "loss": 0.0, + "step": 5819 + }, + { + "epoch": 0.37507250112779533, + "grad_norm": 0.001138969696631606, + "learning_rate": 6.946652345148587e-06, + "loss": 0.0, + "step": 5820 + }, + { + "epoch": 0.3751369465747245, + "grad_norm": 0.0006642511054819653, + "learning_rate": 6.945936269244541e-06, + "loss": 0.0, + "step": 5821 + }, + { + "epoch": 0.37520139202165365, + "grad_norm": 0.044176775604888044, + "learning_rate": 6.945220193340495e-06, + "loss": 0.0001, + "step": 5822 + }, + { + "epoch": 0.37526583746858283, + "grad_norm": 0.0004816241632529937, + "learning_rate": 6.944504117436449e-06, + "loss": 0.0, + "step": 5823 + }, + { + "epoch": 0.375330282915512, + "grad_norm": 0.0020537732340232817, + "learning_rate": 6.943788041532403e-06, + "loss": 0.0, + "step": 5824 + }, + { + "epoch": 0.3753947283624412, + "grad_norm": 0.3551568350092731, + "learning_rate": 6.943071965628357e-06, + "loss": 0.0015, + "step": 5825 + }, + { + "epoch": 0.3754591738093704, + "grad_norm": 0.0009804151741977017, + "learning_rate": 6.942355889724312e-06, + "loss": 0.0, + "step": 5826 + }, + { + "epoch": 0.37552361925629957, + "grad_norm": 1.3854608552621122, + "learning_rate": 6.941639813820266e-06, + "loss": 0.0075, + "step": 5827 + }, + { + "epoch": 0.3755880647032287, + "grad_norm": 0.000444632576506098, + "learning_rate": 6.940923737916219e-06, + "loss": 0.0, + "step": 5828 + }, + { + "epoch": 0.3756525101501579, + "grad_norm": 0.24999937415044649, + "learning_rate": 6.940207662012174e-06, + "loss": 0.001, + "step": 5829 + }, + { + "epoch": 0.37571695559708707, + "grad_norm": 0.001790503591942046, + "learning_rate": 6.939491586108128e-06, + "loss": 0.0, + "step": 5830 + }, + { + "epoch": 0.37578140104401625, + "grad_norm": 0.00021567154767956597, + "learning_rate": 6.938775510204082e-06, + "loss": 0.0, + "step": 5831 + }, + { + "epoch": 0.37584584649094543, + "grad_norm": 0.001552857672059101, + "learning_rate": 6.9380594343000366e-06, + "loss": 0.0, + "step": 5832 + }, + { + "epoch": 0.37591029193787456, + "grad_norm": 0.0009687577369576802, + "learning_rate": 6.93734335839599e-06, + "loss": 0.0, + "step": 5833 + }, + { + "epoch": 0.37597473738480375, + "grad_norm": 0.22006186439099512, + "learning_rate": 6.936627282491944e-06, + "loss": 0.0018, + "step": 5834 + }, + { + "epoch": 0.37603918283173293, + "grad_norm": 0.0022716589278178995, + "learning_rate": 6.935911206587899e-06, + "loss": 0.0, + "step": 5835 + }, + { + "epoch": 0.3761036282786621, + "grad_norm": 0.07283411149368349, + "learning_rate": 6.935195130683853e-06, + "loss": 0.0003, + "step": 5836 + }, + { + "epoch": 0.3761680737255913, + "grad_norm": 0.0513859471633266, + "learning_rate": 6.934479054779807e-06, + "loss": 0.0002, + "step": 5837 + }, + { + "epoch": 0.3762325191725205, + "grad_norm": 0.216646623524965, + "learning_rate": 6.933762978875761e-06, + "loss": 0.0014, + "step": 5838 + }, + { + "epoch": 0.3762969646194496, + "grad_norm": 0.003388630037906614, + "learning_rate": 6.933046902971715e-06, + "loss": 0.0, + "step": 5839 + }, + { + "epoch": 0.3763614100663788, + "grad_norm": 0.028671905161233677, + "learning_rate": 6.932330827067669e-06, + "loss": 0.0, + "step": 5840 + }, + { + "epoch": 0.376425855513308, + "grad_norm": 0.024152302581481176, + "learning_rate": 6.9316147511636236e-06, + "loss": 0.0, + "step": 5841 + }, + { + "epoch": 0.37649030096023717, + "grad_norm": 0.0009817171541373576, + "learning_rate": 6.930898675259579e-06, + "loss": 0.0, + "step": 5842 + }, + { + "epoch": 0.37655474640716635, + "grad_norm": 0.0013014101235195464, + "learning_rate": 6.930182599355533e-06, + "loss": 0.0, + "step": 5843 + }, + { + "epoch": 0.3766191918540955, + "grad_norm": 0.0028327639840403755, + "learning_rate": 6.9294665234514865e-06, + "loss": 0.0, + "step": 5844 + }, + { + "epoch": 0.37668363730102467, + "grad_norm": 0.0032929799671172245, + "learning_rate": 6.928750447547441e-06, + "loss": 0.0, + "step": 5845 + }, + { + "epoch": 0.37674808274795385, + "grad_norm": 0.030509106413843804, + "learning_rate": 6.928034371643395e-06, + "loss": 0.0002, + "step": 5846 + }, + { + "epoch": 0.37681252819488303, + "grad_norm": 0.004954742198539419, + "learning_rate": 6.927318295739349e-06, + "loss": 0.0, + "step": 5847 + }, + { + "epoch": 0.3768769736418122, + "grad_norm": 0.0002230131182336926, + "learning_rate": 6.926602219835304e-06, + "loss": 0.0, + "step": 5848 + }, + { + "epoch": 0.3769414190887414, + "grad_norm": 0.023321539143309727, + "learning_rate": 6.925886143931257e-06, + "loss": 0.0001, + "step": 5849 + }, + { + "epoch": 0.37700586453567053, + "grad_norm": 0.2192969636607764, + "learning_rate": 6.925170068027211e-06, + "loss": 0.0005, + "step": 5850 + }, + { + "epoch": 0.3770703099825997, + "grad_norm": 0.0029399686461097315, + "learning_rate": 6.924453992123166e-06, + "loss": 0.0, + "step": 5851 + }, + { + "epoch": 0.3771347554295289, + "grad_norm": 0.006376825308723674, + "learning_rate": 6.92373791621912e-06, + "loss": 0.0, + "step": 5852 + }, + { + "epoch": 0.3771992008764581, + "grad_norm": 0.15974989372908052, + "learning_rate": 6.923021840315074e-06, + "loss": 0.0006, + "step": 5853 + }, + { + "epoch": 0.37726364632338727, + "grad_norm": 0.003535994812387964, + "learning_rate": 6.922305764411028e-06, + "loss": 0.0, + "step": 5854 + }, + { + "epoch": 0.37732809177031645, + "grad_norm": 0.001929273478040189, + "learning_rate": 6.921589688506982e-06, + "loss": 0.0, + "step": 5855 + }, + { + "epoch": 0.3773925372172456, + "grad_norm": 0.001803497544205884, + "learning_rate": 6.920873612602936e-06, + "loss": 0.0, + "step": 5856 + }, + { + "epoch": 0.37745698266417477, + "grad_norm": 1.0116954786061811, + "learning_rate": 6.920157536698891e-06, + "loss": 0.0011, + "step": 5857 + }, + { + "epoch": 0.37752142811110395, + "grad_norm": 0.0044865772161063605, + "learning_rate": 6.919441460794845e-06, + "loss": 0.0, + "step": 5858 + }, + { + "epoch": 0.37758587355803314, + "grad_norm": 0.002553314405904031, + "learning_rate": 6.918725384890798e-06, + "loss": 0.0, + "step": 5859 + }, + { + "epoch": 0.3776503190049623, + "grad_norm": 0.0049119126899543186, + "learning_rate": 6.918009308986753e-06, + "loss": 0.0, + "step": 5860 + }, + { + "epoch": 0.37771476445189145, + "grad_norm": 0.0008667354202076583, + "learning_rate": 6.917293233082707e-06, + "loss": 0.0, + "step": 5861 + }, + { + "epoch": 0.37777920989882063, + "grad_norm": 0.0016562458722985242, + "learning_rate": 6.916577157178661e-06, + "loss": 0.0, + "step": 5862 + }, + { + "epoch": 0.3778436553457498, + "grad_norm": 0.0004253058844486389, + "learning_rate": 6.915861081274616e-06, + "loss": 0.0, + "step": 5863 + }, + { + "epoch": 0.377908100792679, + "grad_norm": 0.010075668762335689, + "learning_rate": 6.915145005370569e-06, + "loss": 0.0001, + "step": 5864 + }, + { + "epoch": 0.3779725462396082, + "grad_norm": 0.004239878134591785, + "learning_rate": 6.914428929466524e-06, + "loss": 0.0, + "step": 5865 + }, + { + "epoch": 0.37803699168653737, + "grad_norm": 0.30177530528108326, + "learning_rate": 6.9137128535624785e-06, + "loss": 0.0011, + "step": 5866 + }, + { + "epoch": 0.3781014371334665, + "grad_norm": 0.00040066692664154734, + "learning_rate": 6.912996777658433e-06, + "loss": 0.0, + "step": 5867 + }, + { + "epoch": 0.3781658825803957, + "grad_norm": 0.10780252724976731, + "learning_rate": 6.912280701754387e-06, + "loss": 0.0004, + "step": 5868 + }, + { + "epoch": 0.37823032802732487, + "grad_norm": 0.002191261152124998, + "learning_rate": 6.911564625850341e-06, + "loss": 0.0, + "step": 5869 + }, + { + "epoch": 0.37829477347425405, + "grad_norm": 0.2787256869160595, + "learning_rate": 6.910848549946295e-06, + "loss": 0.0014, + "step": 5870 + }, + { + "epoch": 0.37835921892118324, + "grad_norm": 0.0006742879125156188, + "learning_rate": 6.910132474042249e-06, + "loss": 0.0, + "step": 5871 + }, + { + "epoch": 0.37842366436811237, + "grad_norm": 0.012795925913342132, + "learning_rate": 6.9094163981382034e-06, + "loss": 0.0001, + "step": 5872 + }, + { + "epoch": 0.37848810981504155, + "grad_norm": 0.0007479872914136376, + "learning_rate": 6.908700322234158e-06, + "loss": 0.0, + "step": 5873 + }, + { + "epoch": 0.37855255526197074, + "grad_norm": 0.00018037472474745242, + "learning_rate": 6.907984246330112e-06, + "loss": 0.0, + "step": 5874 + }, + { + "epoch": 0.3786170007088999, + "grad_norm": 0.0015442612656284755, + "learning_rate": 6.9072681704260655e-06, + "loss": 0.0, + "step": 5875 + }, + { + "epoch": 0.3786814461558291, + "grad_norm": 0.0024565796104232176, + "learning_rate": 6.90655209452202e-06, + "loss": 0.0, + "step": 5876 + }, + { + "epoch": 0.3787458916027583, + "grad_norm": 0.500224241993818, + "learning_rate": 6.905836018617974e-06, + "loss": 0.0036, + "step": 5877 + }, + { + "epoch": 0.3788103370496874, + "grad_norm": 0.00017855047645974685, + "learning_rate": 6.905119942713928e-06, + "loss": 0.0, + "step": 5878 + }, + { + "epoch": 0.3788747824966166, + "grad_norm": 0.012190953634215032, + "learning_rate": 6.904403866809883e-06, + "loss": 0.0001, + "step": 5879 + }, + { + "epoch": 0.3789392279435458, + "grad_norm": 0.004909180990656321, + "learning_rate": 6.903687790905836e-06, + "loss": 0.0, + "step": 5880 + }, + { + "epoch": 0.37900367339047497, + "grad_norm": 0.5589098802746703, + "learning_rate": 6.9029717150017904e-06, + "loss": 0.0005, + "step": 5881 + }, + { + "epoch": 0.37906811883740416, + "grad_norm": 0.003212166081683288, + "learning_rate": 6.902255639097745e-06, + "loss": 0.0, + "step": 5882 + }, + { + "epoch": 0.3791325642843333, + "grad_norm": 0.08818240476463624, + "learning_rate": 6.901539563193699e-06, + "loss": 0.0016, + "step": 5883 + }, + { + "epoch": 0.37919700973126247, + "grad_norm": 0.003632769423772264, + "learning_rate": 6.9008234872896525e-06, + "loss": 0.0, + "step": 5884 + }, + { + "epoch": 0.37926145517819165, + "grad_norm": 0.009019636505328735, + "learning_rate": 6.900107411385607e-06, + "loss": 0.0001, + "step": 5885 + }, + { + "epoch": 0.37932590062512084, + "grad_norm": 0.0004343660014682446, + "learning_rate": 6.899391335481561e-06, + "loss": 0.0, + "step": 5886 + }, + { + "epoch": 0.37939034607205, + "grad_norm": 0.00027411815184107786, + "learning_rate": 6.898675259577515e-06, + "loss": 0.0, + "step": 5887 + }, + { + "epoch": 0.3794547915189792, + "grad_norm": 0.018313065196746546, + "learning_rate": 6.8979591836734705e-06, + "loss": 0.0, + "step": 5888 + }, + { + "epoch": 0.37951923696590834, + "grad_norm": 0.05844586306158265, + "learning_rate": 6.897243107769425e-06, + "loss": 0.0001, + "step": 5889 + }, + { + "epoch": 0.3795836824128375, + "grad_norm": 0.008817164212300388, + "learning_rate": 6.896527031865379e-06, + "loss": 0.0, + "step": 5890 + }, + { + "epoch": 0.3796481278597667, + "grad_norm": 0.440502219983676, + "learning_rate": 6.8958109559613326e-06, + "loss": 0.0005, + "step": 5891 + }, + { + "epoch": 0.3797125733066959, + "grad_norm": 0.03877985442103465, + "learning_rate": 6.895094880057287e-06, + "loss": 0.0001, + "step": 5892 + }, + { + "epoch": 0.3797770187536251, + "grad_norm": 0.0006761241313574944, + "learning_rate": 6.894378804153241e-06, + "loss": 0.0, + "step": 5893 + }, + { + "epoch": 0.37984146420055426, + "grad_norm": 0.0003246361163867012, + "learning_rate": 6.8936627282491955e-06, + "loss": 0.0, + "step": 5894 + }, + { + "epoch": 0.3799059096474834, + "grad_norm": 0.001269483495536825, + "learning_rate": 6.89294665234515e-06, + "loss": 0.0, + "step": 5895 + }, + { + "epoch": 0.37997035509441257, + "grad_norm": 0.6018471230283085, + "learning_rate": 6.892230576441103e-06, + "loss": 0.003, + "step": 5896 + }, + { + "epoch": 0.38003480054134176, + "grad_norm": 0.00046990029288776074, + "learning_rate": 6.8915145005370575e-06, + "loss": 0.0, + "step": 5897 + }, + { + "epoch": 0.38009924598827094, + "grad_norm": 0.2449174500328802, + "learning_rate": 6.890798424633012e-06, + "loss": 0.0001, + "step": 5898 + }, + { + "epoch": 0.3801636914352001, + "grad_norm": 0.00953486729292568, + "learning_rate": 6.890082348728966e-06, + "loss": 0.0, + "step": 5899 + }, + { + "epoch": 0.38022813688212925, + "grad_norm": 0.0004494877435115907, + "learning_rate": 6.88936627282492e-06, + "loss": 0.0, + "step": 5900 + }, + { + "epoch": 0.38029258232905844, + "grad_norm": 0.007622277277941377, + "learning_rate": 6.888650196920874e-06, + "loss": 0.0, + "step": 5901 + }, + { + "epoch": 0.3803570277759876, + "grad_norm": 0.00021103624434805576, + "learning_rate": 6.887934121016828e-06, + "loss": 0.0, + "step": 5902 + }, + { + "epoch": 0.3804214732229168, + "grad_norm": 0.00270139356453862, + "learning_rate": 6.8872180451127825e-06, + "loss": 0.0, + "step": 5903 + }, + { + "epoch": 0.380485918669846, + "grad_norm": 0.01119123283242716, + "learning_rate": 6.886501969208737e-06, + "loss": 0.0001, + "step": 5904 + }, + { + "epoch": 0.3805503641167752, + "grad_norm": 0.0005282343466651458, + "learning_rate": 6.88578589330469e-06, + "loss": 0.0, + "step": 5905 + }, + { + "epoch": 0.3806148095637043, + "grad_norm": 2.001432859226168, + "learning_rate": 6.8850698174006445e-06, + "loss": 0.0158, + "step": 5906 + }, + { + "epoch": 0.3806792550106335, + "grad_norm": 0.001497510037133082, + "learning_rate": 6.884353741496599e-06, + "loss": 0.0, + "step": 5907 + }, + { + "epoch": 0.3807437004575627, + "grad_norm": 0.0058557848933884354, + "learning_rate": 6.883637665592553e-06, + "loss": 0.0, + "step": 5908 + }, + { + "epoch": 0.38080814590449186, + "grad_norm": 0.15156723462407826, + "learning_rate": 6.882921589688507e-06, + "loss": 0.0003, + "step": 5909 + }, + { + "epoch": 0.38087259135142104, + "grad_norm": 0.0022827647541258317, + "learning_rate": 6.882205513784461e-06, + "loss": 0.0, + "step": 5910 + }, + { + "epoch": 0.38093703679835017, + "grad_norm": 0.001730530080398193, + "learning_rate": 6.881489437880415e-06, + "loss": 0.0, + "step": 5911 + }, + { + "epoch": 0.38100148224527935, + "grad_norm": 0.0014464979963454568, + "learning_rate": 6.88077336197637e-06, + "loss": 0.0, + "step": 5912 + }, + { + "epoch": 0.38106592769220854, + "grad_norm": 0.0013056269425528884, + "learning_rate": 6.880057286072325e-06, + "loss": 0.0, + "step": 5913 + }, + { + "epoch": 0.3811303731391377, + "grad_norm": 0.000246217387139311, + "learning_rate": 6.879341210168279e-06, + "loss": 0.0, + "step": 5914 + }, + { + "epoch": 0.3811948185860669, + "grad_norm": 0.0029653896752515046, + "learning_rate": 6.878625134264233e-06, + "loss": 0.0, + "step": 5915 + }, + { + "epoch": 0.3812592640329961, + "grad_norm": 0.004897105078602228, + "learning_rate": 6.8779090583601875e-06, + "loss": 0.0001, + "step": 5916 + }, + { + "epoch": 0.3813237094799252, + "grad_norm": 0.0006053366262376849, + "learning_rate": 6.877192982456141e-06, + "loss": 0.0, + "step": 5917 + }, + { + "epoch": 0.3813881549268544, + "grad_norm": 0.0014126298196204864, + "learning_rate": 6.876476906552095e-06, + "loss": 0.0, + "step": 5918 + }, + { + "epoch": 0.3814526003737836, + "grad_norm": 0.020675918183340834, + "learning_rate": 6.8757608306480495e-06, + "loss": 0.0002, + "step": 5919 + }, + { + "epoch": 0.3815170458207128, + "grad_norm": 0.0027330285181974686, + "learning_rate": 6.875044754744004e-06, + "loss": 0.0, + "step": 5920 + }, + { + "epoch": 0.38158149126764196, + "grad_norm": 0.020531958035086353, + "learning_rate": 6.874328678839957e-06, + "loss": 0.0, + "step": 5921 + }, + { + "epoch": 0.3816459367145711, + "grad_norm": 0.49557236434042445, + "learning_rate": 6.873612602935912e-06, + "loss": 0.002, + "step": 5922 + }, + { + "epoch": 0.3817103821615003, + "grad_norm": 0.0013880445470229727, + "learning_rate": 6.872896527031866e-06, + "loss": 0.0, + "step": 5923 + }, + { + "epoch": 0.38177482760842946, + "grad_norm": 0.001162689409539696, + "learning_rate": 6.87218045112782e-06, + "loss": 0.0, + "step": 5924 + }, + { + "epoch": 0.38183927305535864, + "grad_norm": 0.11035111520052028, + "learning_rate": 6.8714643752237745e-06, + "loss": 0.0017, + "step": 5925 + }, + { + "epoch": 0.3819037185022878, + "grad_norm": 0.0007650463249493047, + "learning_rate": 6.870748299319728e-06, + "loss": 0.0, + "step": 5926 + }, + { + "epoch": 0.381968163949217, + "grad_norm": 0.019925055939437754, + "learning_rate": 6.870032223415682e-06, + "loss": 0.0001, + "step": 5927 + }, + { + "epoch": 0.38203260939614614, + "grad_norm": 0.014557531030572302, + "learning_rate": 6.8693161475116365e-06, + "loss": 0.0, + "step": 5928 + }, + { + "epoch": 0.3820970548430753, + "grad_norm": 0.0002573063824412536, + "learning_rate": 6.868600071607591e-06, + "loss": 0.0, + "step": 5929 + }, + { + "epoch": 0.3821615002900045, + "grad_norm": 0.0003335346718924549, + "learning_rate": 6.867883995703545e-06, + "loss": 0.0, + "step": 5930 + }, + { + "epoch": 0.3822259457369337, + "grad_norm": 0.0008118441796438887, + "learning_rate": 6.867167919799499e-06, + "loss": 0.0, + "step": 5931 + }, + { + "epoch": 0.3822903911838629, + "grad_norm": 0.005326369326512274, + "learning_rate": 6.866451843895453e-06, + "loss": 0.0, + "step": 5932 + }, + { + "epoch": 0.38235483663079206, + "grad_norm": 0.005278252984737066, + "learning_rate": 6.865735767991407e-06, + "loss": 0.0, + "step": 5933 + }, + { + "epoch": 0.3824192820777212, + "grad_norm": 0.03136676899392069, + "learning_rate": 6.8650196920873615e-06, + "loss": 0.0001, + "step": 5934 + }, + { + "epoch": 0.3824837275246504, + "grad_norm": 0.0009959009747822732, + "learning_rate": 6.864303616183317e-06, + "loss": 0.0, + "step": 5935 + }, + { + "epoch": 0.38254817297157956, + "grad_norm": 0.00042236544398543236, + "learning_rate": 6.863587540279271e-06, + "loss": 0.0, + "step": 5936 + }, + { + "epoch": 0.38261261841850874, + "grad_norm": 0.0006273076220670741, + "learning_rate": 6.862871464375224e-06, + "loss": 0.0, + "step": 5937 + }, + { + "epoch": 0.3826770638654379, + "grad_norm": 0.013919804187017291, + "learning_rate": 6.862155388471179e-06, + "loss": 0.0, + "step": 5938 + }, + { + "epoch": 0.38274150931236706, + "grad_norm": 0.006399515087818912, + "learning_rate": 6.861439312567133e-06, + "loss": 0.0, + "step": 5939 + }, + { + "epoch": 0.38280595475929624, + "grad_norm": 0.0009560729498972583, + "learning_rate": 6.860723236663087e-06, + "loss": 0.0, + "step": 5940 + }, + { + "epoch": 0.3828704002062254, + "grad_norm": 0.0574728807346303, + "learning_rate": 6.8600071607590416e-06, + "loss": 0.0002, + "step": 5941 + }, + { + "epoch": 0.3829348456531546, + "grad_norm": 0.005010008731110634, + "learning_rate": 6.859291084854995e-06, + "loss": 0.0, + "step": 5942 + }, + { + "epoch": 0.3829992911000838, + "grad_norm": 0.010932127051166089, + "learning_rate": 6.858575008950949e-06, + "loss": 0.0001, + "step": 5943 + }, + { + "epoch": 0.383063736547013, + "grad_norm": 0.24358331776633155, + "learning_rate": 6.857858933046904e-06, + "loss": 0.0002, + "step": 5944 + }, + { + "epoch": 0.3831281819939421, + "grad_norm": 0.005314563855596481, + "learning_rate": 6.857142857142858e-06, + "loss": 0.0, + "step": 5945 + }, + { + "epoch": 0.3831926274408713, + "grad_norm": 0.005676835426711824, + "learning_rate": 6.856426781238812e-06, + "loss": 0.0, + "step": 5946 + }, + { + "epoch": 0.3832570728878005, + "grad_norm": 0.0003028851649869988, + "learning_rate": 6.855710705334766e-06, + "loss": 0.0, + "step": 5947 + }, + { + "epoch": 0.38332151833472966, + "grad_norm": 0.29716372253412965, + "learning_rate": 6.85499462943072e-06, + "loss": 0.0332, + "step": 5948 + }, + { + "epoch": 0.38338596378165885, + "grad_norm": 0.001941066789563445, + "learning_rate": 6.854278553526674e-06, + "loss": 0.0, + "step": 5949 + }, + { + "epoch": 0.383450409228588, + "grad_norm": 0.21121318779958853, + "learning_rate": 6.8535624776226286e-06, + "loss": 0.0013, + "step": 5950 + }, + { + "epoch": 0.38351485467551716, + "grad_norm": 0.023995077734359705, + "learning_rate": 6.852846401718583e-06, + "loss": 0.0001, + "step": 5951 + }, + { + "epoch": 0.38357930012244634, + "grad_norm": 0.05211629073163899, + "learning_rate": 6.852130325814536e-06, + "loss": 0.0004, + "step": 5952 + }, + { + "epoch": 0.3836437455693755, + "grad_norm": 0.000493550789505138, + "learning_rate": 6.851414249910491e-06, + "loss": 0.0, + "step": 5953 + }, + { + "epoch": 0.3837081910163047, + "grad_norm": 0.0003923175907974961, + "learning_rate": 6.850698174006445e-06, + "loss": 0.0, + "step": 5954 + }, + { + "epoch": 0.3837726364632339, + "grad_norm": 0.009923200087034062, + "learning_rate": 6.849982098102399e-06, + "loss": 0.0, + "step": 5955 + }, + { + "epoch": 0.383837081910163, + "grad_norm": 0.03499818907090998, + "learning_rate": 6.8492660221983535e-06, + "loss": 0.0001, + "step": 5956 + }, + { + "epoch": 0.3839015273570922, + "grad_norm": 0.0037882630463259115, + "learning_rate": 6.848549946294307e-06, + "loss": 0.0, + "step": 5957 + }, + { + "epoch": 0.3839659728040214, + "grad_norm": 0.00421029141255445, + "learning_rate": 6.847833870390261e-06, + "loss": 0.0, + "step": 5958 + }, + { + "epoch": 0.3840304182509506, + "grad_norm": 0.0005011436316182269, + "learning_rate": 6.847117794486216e-06, + "loss": 0.0, + "step": 5959 + }, + { + "epoch": 0.38409486369787976, + "grad_norm": 0.014270384782207152, + "learning_rate": 6.846401718582171e-06, + "loss": 0.0, + "step": 5960 + }, + { + "epoch": 0.38415930914480895, + "grad_norm": 0.32118606075803946, + "learning_rate": 6.845685642678125e-06, + "loss": 0.0023, + "step": 5961 + }, + { + "epoch": 0.3842237545917381, + "grad_norm": 0.0011372247327576077, + "learning_rate": 6.844969566774079e-06, + "loss": 0.0, + "step": 5962 + }, + { + "epoch": 0.38428820003866726, + "grad_norm": 0.00047110262412149026, + "learning_rate": 6.844253490870033e-06, + "loss": 0.0, + "step": 5963 + }, + { + "epoch": 0.38435264548559644, + "grad_norm": 0.004175332756483595, + "learning_rate": 6.843537414965987e-06, + "loss": 0.0, + "step": 5964 + }, + { + "epoch": 0.38441709093252563, + "grad_norm": 0.00842632735103172, + "learning_rate": 6.842821339061941e-06, + "loss": 0.0001, + "step": 5965 + }, + { + "epoch": 0.3844815363794548, + "grad_norm": 0.001385774924796425, + "learning_rate": 6.842105263157896e-06, + "loss": 0.0, + "step": 5966 + }, + { + "epoch": 0.38454598182638394, + "grad_norm": 0.00960274895131003, + "learning_rate": 6.84138918725385e-06, + "loss": 0.0001, + "step": 5967 + }, + { + "epoch": 0.3846104272733131, + "grad_norm": 0.0014732819087795253, + "learning_rate": 6.840673111349803e-06, + "loss": 0.0, + "step": 5968 + }, + { + "epoch": 0.3846748727202423, + "grad_norm": 0.0019206416515151602, + "learning_rate": 6.839957035445758e-06, + "loss": 0.0, + "step": 5969 + }, + { + "epoch": 0.3847393181671715, + "grad_norm": 0.010897749206447984, + "learning_rate": 6.839240959541712e-06, + "loss": 0.0001, + "step": 5970 + }, + { + "epoch": 0.3848037636141007, + "grad_norm": 0.003925340168642234, + "learning_rate": 6.838524883637666e-06, + "loss": 0.0, + "step": 5971 + }, + { + "epoch": 0.38486820906102986, + "grad_norm": 0.012405496974591106, + "learning_rate": 6.837808807733621e-06, + "loss": 0.0, + "step": 5972 + }, + { + "epoch": 0.384932654507959, + "grad_norm": 0.06288781766963841, + "learning_rate": 6.837092731829574e-06, + "loss": 0.0001, + "step": 5973 + }, + { + "epoch": 0.3849970999548882, + "grad_norm": 0.09090745498393638, + "learning_rate": 6.836376655925528e-06, + "loss": 0.0, + "step": 5974 + }, + { + "epoch": 0.38506154540181736, + "grad_norm": 0.0006160974128895556, + "learning_rate": 6.835660580021483e-06, + "loss": 0.0, + "step": 5975 + }, + { + "epoch": 0.38512599084874655, + "grad_norm": 0.0007932396092801955, + "learning_rate": 6.834944504117437e-06, + "loss": 0.0, + "step": 5976 + }, + { + "epoch": 0.38519043629567573, + "grad_norm": 0.05446525313130436, + "learning_rate": 6.834228428213391e-06, + "loss": 0.0001, + "step": 5977 + }, + { + "epoch": 0.38525488174260486, + "grad_norm": 0.006795695546119066, + "learning_rate": 6.833512352309345e-06, + "loss": 0.0, + "step": 5978 + }, + { + "epoch": 0.38531932718953404, + "grad_norm": 0.2266163268767816, + "learning_rate": 6.832796276405299e-06, + "loss": 0.0017, + "step": 5979 + }, + { + "epoch": 0.38538377263646323, + "grad_norm": 0.1509233551229267, + "learning_rate": 6.832080200501253e-06, + "loss": 0.0018, + "step": 5980 + }, + { + "epoch": 0.3854482180833924, + "grad_norm": 0.0038038159387376977, + "learning_rate": 6.831364124597208e-06, + "loss": 0.0, + "step": 5981 + }, + { + "epoch": 0.3855126635303216, + "grad_norm": 0.11071187248613129, + "learning_rate": 6.830648048693163e-06, + "loss": 0.0001, + "step": 5982 + }, + { + "epoch": 0.3855771089772508, + "grad_norm": 0.008326152462626822, + "learning_rate": 6.829931972789117e-06, + "loss": 0.0001, + "step": 5983 + }, + { + "epoch": 0.3856415544241799, + "grad_norm": 0.0029565684972451584, + "learning_rate": 6.8292158968850705e-06, + "loss": 0.0, + "step": 5984 + }, + { + "epoch": 0.3857059998711091, + "grad_norm": 0.5216882714265499, + "learning_rate": 6.828499820981025e-06, + "loss": 0.0014, + "step": 5985 + }, + { + "epoch": 0.3857704453180383, + "grad_norm": 0.002250543081538071, + "learning_rate": 6.827783745076979e-06, + "loss": 0.0, + "step": 5986 + }, + { + "epoch": 0.38583489076496746, + "grad_norm": 0.2361621589188941, + "learning_rate": 6.827067669172933e-06, + "loss": 0.0011, + "step": 5987 + }, + { + "epoch": 0.38589933621189665, + "grad_norm": 0.031054470860727275, + "learning_rate": 6.826351593268888e-06, + "loss": 0.0001, + "step": 5988 + }, + { + "epoch": 0.3859637816588258, + "grad_norm": 0.014908333265461695, + "learning_rate": 6.825635517364841e-06, + "loss": 0.0001, + "step": 5989 + }, + { + "epoch": 0.38602822710575496, + "grad_norm": 0.042477362343784304, + "learning_rate": 6.8249194414607954e-06, + "loss": 0.0001, + "step": 5990 + }, + { + "epoch": 0.38609267255268415, + "grad_norm": 0.011556256262070665, + "learning_rate": 6.82420336555675e-06, + "loss": 0.0, + "step": 5991 + }, + { + "epoch": 0.38615711799961333, + "grad_norm": 0.0008571054739306326, + "learning_rate": 6.823487289652704e-06, + "loss": 0.0, + "step": 5992 + }, + { + "epoch": 0.3862215634465425, + "grad_norm": 0.00012599860103708978, + "learning_rate": 6.822771213748658e-06, + "loss": 0.0, + "step": 5993 + }, + { + "epoch": 0.3862860088934717, + "grad_norm": 0.016258295239014172, + "learning_rate": 6.822055137844612e-06, + "loss": 0.0001, + "step": 5994 + }, + { + "epoch": 0.38635045434040083, + "grad_norm": 0.007365234092956695, + "learning_rate": 6.821339061940566e-06, + "loss": 0.0, + "step": 5995 + }, + { + "epoch": 0.38641489978733, + "grad_norm": 0.010457210348021977, + "learning_rate": 6.82062298603652e-06, + "loss": 0.0, + "step": 5996 + }, + { + "epoch": 0.3864793452342592, + "grad_norm": 0.0017911283307170947, + "learning_rate": 6.819906910132475e-06, + "loss": 0.0, + "step": 5997 + }, + { + "epoch": 0.3865437906811884, + "grad_norm": 0.021666974075202527, + "learning_rate": 6.819190834228428e-06, + "loss": 0.0001, + "step": 5998 + }, + { + "epoch": 0.38660823612811757, + "grad_norm": 0.09290394888785891, + "learning_rate": 6.8184747583243824e-06, + "loss": 0.001, + "step": 5999 + }, + { + "epoch": 0.38667268157504675, + "grad_norm": 0.04081432866768783, + "learning_rate": 6.817758682420337e-06, + "loss": 0.0, + "step": 6000 + }, + { + "epoch": 0.3867371270219759, + "grad_norm": 0.0030581764431921246, + "learning_rate": 6.817042606516291e-06, + "loss": 0.0, + "step": 6001 + }, + { + "epoch": 0.38680157246890506, + "grad_norm": 0.04727984482479228, + "learning_rate": 6.816326530612245e-06, + "loss": 0.0001, + "step": 6002 + }, + { + "epoch": 0.38686601791583425, + "grad_norm": 0.07452768727150452, + "learning_rate": 6.815610454708199e-06, + "loss": 0.0001, + "step": 6003 + }, + { + "epoch": 0.38693046336276343, + "grad_norm": 0.20755748626077528, + "learning_rate": 6.814894378804153e-06, + "loss": 0.0005, + "step": 6004 + }, + { + "epoch": 0.3869949088096926, + "grad_norm": 0.006755925785793144, + "learning_rate": 6.814178302900108e-06, + "loss": 0.0, + "step": 6005 + }, + { + "epoch": 0.38705935425662175, + "grad_norm": 0.17547190254891795, + "learning_rate": 6.8134622269960625e-06, + "loss": 0.0002, + "step": 6006 + }, + { + "epoch": 0.38712379970355093, + "grad_norm": 0.012661615225328758, + "learning_rate": 6.812746151092017e-06, + "loss": 0.0002, + "step": 6007 + }, + { + "epoch": 0.3871882451504801, + "grad_norm": 0.003090067111566923, + "learning_rate": 6.812030075187971e-06, + "loss": 0.0, + "step": 6008 + }, + { + "epoch": 0.3872526905974093, + "grad_norm": 0.19586265328413646, + "learning_rate": 6.811313999283925e-06, + "loss": 0.0012, + "step": 6009 + }, + { + "epoch": 0.3873171360443385, + "grad_norm": 4.473975694094967, + "learning_rate": 6.810597923379879e-06, + "loss": 0.0279, + "step": 6010 + }, + { + "epoch": 0.38738158149126767, + "grad_norm": 0.2445356530106942, + "learning_rate": 6.809881847475833e-06, + "loss": 0.0051, + "step": 6011 + }, + { + "epoch": 0.3874460269381968, + "grad_norm": 0.032982293469763065, + "learning_rate": 6.8091657715717875e-06, + "loss": 0.0003, + "step": 6012 + }, + { + "epoch": 0.387510472385126, + "grad_norm": 0.009209936588663899, + "learning_rate": 6.808449695667742e-06, + "loss": 0.0, + "step": 6013 + }, + { + "epoch": 0.38757491783205517, + "grad_norm": 0.00518529117122379, + "learning_rate": 6.807733619763695e-06, + "loss": 0.0001, + "step": 6014 + }, + { + "epoch": 0.38763936327898435, + "grad_norm": 0.07908431500910323, + "learning_rate": 6.8070175438596495e-06, + "loss": 0.0003, + "step": 6015 + }, + { + "epoch": 0.38770380872591353, + "grad_norm": 0.27541477016357735, + "learning_rate": 6.806301467955604e-06, + "loss": 0.0022, + "step": 6016 + }, + { + "epoch": 0.38776825417284266, + "grad_norm": 0.006634024515463209, + "learning_rate": 6.805585392051558e-06, + "loss": 0.0, + "step": 6017 + }, + { + "epoch": 0.38783269961977185, + "grad_norm": 0.011857648203153355, + "learning_rate": 6.804869316147512e-06, + "loss": 0.0, + "step": 6018 + }, + { + "epoch": 0.38789714506670103, + "grad_norm": 0.0028286459308458976, + "learning_rate": 6.804153240243466e-06, + "loss": 0.0, + "step": 6019 + }, + { + "epoch": 0.3879615905136302, + "grad_norm": 0.01959234457512042, + "learning_rate": 6.80343716433942e-06, + "loss": 0.0, + "step": 6020 + }, + { + "epoch": 0.3880260359605594, + "grad_norm": 0.5422197019045307, + "learning_rate": 6.8027210884353745e-06, + "loss": 0.0033, + "step": 6021 + }, + { + "epoch": 0.3880904814074886, + "grad_norm": 0.2587556781468276, + "learning_rate": 6.802005012531329e-06, + "loss": 0.0003, + "step": 6022 + }, + { + "epoch": 0.3881549268544177, + "grad_norm": 0.008435287658794083, + "learning_rate": 6.801288936627283e-06, + "loss": 0.0, + "step": 6023 + }, + { + "epoch": 0.3882193723013469, + "grad_norm": 0.01238562473581905, + "learning_rate": 6.8005728607232365e-06, + "loss": 0.0, + "step": 6024 + }, + { + "epoch": 0.3882838177482761, + "grad_norm": 1.987109665357907, + "learning_rate": 6.799856784819191e-06, + "loss": 0.0106, + "step": 6025 + }, + { + "epoch": 0.38834826319520527, + "grad_norm": 0.04223822440592762, + "learning_rate": 6.799140708915145e-06, + "loss": 0.0001, + "step": 6026 + }, + { + "epoch": 0.38841270864213445, + "grad_norm": 0.002983615328333293, + "learning_rate": 6.798424633011099e-06, + "loss": 0.0, + "step": 6027 + }, + { + "epoch": 0.3884771540890636, + "grad_norm": 0.14209061717448843, + "learning_rate": 6.797708557107054e-06, + "loss": 0.0002, + "step": 6028 + }, + { + "epoch": 0.38854159953599277, + "grad_norm": 0.07302858039418998, + "learning_rate": 6.796992481203009e-06, + "loss": 0.0001, + "step": 6029 + }, + { + "epoch": 0.38860604498292195, + "grad_norm": 0.03682687955726151, + "learning_rate": 6.796276405298962e-06, + "loss": 0.0, + "step": 6030 + }, + { + "epoch": 0.38867049042985113, + "grad_norm": 0.009217186042876992, + "learning_rate": 6.795560329394917e-06, + "loss": 0.0001, + "step": 6031 + }, + { + "epoch": 0.3887349358767803, + "grad_norm": 0.00352494553698736, + "learning_rate": 6.794844253490871e-06, + "loss": 0.0, + "step": 6032 + }, + { + "epoch": 0.3887993813237095, + "grad_norm": 0.029486892316410652, + "learning_rate": 6.794128177586825e-06, + "loss": 0.0, + "step": 6033 + }, + { + "epoch": 0.38886382677063863, + "grad_norm": 0.00390147836480369, + "learning_rate": 6.7934121016827795e-06, + "loss": 0.0, + "step": 6034 + }, + { + "epoch": 0.3889282722175678, + "grad_norm": 0.013006879238566681, + "learning_rate": 6.792696025778733e-06, + "loss": 0.0, + "step": 6035 + }, + { + "epoch": 0.388992717664497, + "grad_norm": 0.11093205133006082, + "learning_rate": 6.791979949874687e-06, + "loss": 0.0002, + "step": 6036 + }, + { + "epoch": 0.3890571631114262, + "grad_norm": 0.06791830821969097, + "learning_rate": 6.7912638739706415e-06, + "loss": 0.0001, + "step": 6037 + }, + { + "epoch": 0.38912160855835537, + "grad_norm": 0.003451845629040953, + "learning_rate": 6.790547798066596e-06, + "loss": 0.0, + "step": 6038 + }, + { + "epoch": 0.38918605400528455, + "grad_norm": 0.09567374634863354, + "learning_rate": 6.78983172216255e-06, + "loss": 0.0001, + "step": 6039 + }, + { + "epoch": 0.3892504994522137, + "grad_norm": 0.08102874455585664, + "learning_rate": 6.789115646258504e-06, + "loss": 0.0001, + "step": 6040 + }, + { + "epoch": 0.38931494489914287, + "grad_norm": 0.09263870092366862, + "learning_rate": 6.788399570354458e-06, + "loss": 0.0002, + "step": 6041 + }, + { + "epoch": 0.38937939034607205, + "grad_norm": 0.058662957804021545, + "learning_rate": 6.787683494450412e-06, + "loss": 0.0001, + "step": 6042 + }, + { + "epoch": 0.38944383579300124, + "grad_norm": 0.028517596008494257, + "learning_rate": 6.7869674185463665e-06, + "loss": 0.0, + "step": 6043 + }, + { + "epoch": 0.3895082812399304, + "grad_norm": 0.4768442821733463, + "learning_rate": 6.786251342642321e-06, + "loss": 0.0004, + "step": 6044 + }, + { + "epoch": 0.38957272668685955, + "grad_norm": 0.07491564763077205, + "learning_rate": 6.785535266738274e-06, + "loss": 0.0002, + "step": 6045 + }, + { + "epoch": 0.38963717213378873, + "grad_norm": 0.13349859351138746, + "learning_rate": 6.7848191908342285e-06, + "loss": 0.0002, + "step": 6046 + }, + { + "epoch": 0.3897016175807179, + "grad_norm": 0.01829255571236091, + "learning_rate": 6.784103114930183e-06, + "loss": 0.0001, + "step": 6047 + }, + { + "epoch": 0.3897660630276471, + "grad_norm": 0.005630099985821416, + "learning_rate": 6.783387039026137e-06, + "loss": 0.0, + "step": 6048 + }, + { + "epoch": 0.3898305084745763, + "grad_norm": 0.008533366939692776, + "learning_rate": 6.7826709631220914e-06, + "loss": 0.0, + "step": 6049 + }, + { + "epoch": 0.38989495392150547, + "grad_norm": 0.21604861616295312, + "learning_rate": 6.781954887218045e-06, + "loss": 0.0008, + "step": 6050 + }, + { + "epoch": 0.3899593993684346, + "grad_norm": 0.04892616443141344, + "learning_rate": 6.781238811313999e-06, + "loss": 0.0001, + "step": 6051 + }, + { + "epoch": 0.3900238448153638, + "grad_norm": 0.003492646098469076, + "learning_rate": 6.780522735409954e-06, + "loss": 0.0, + "step": 6052 + }, + { + "epoch": 0.39008829026229297, + "grad_norm": 0.17101291092330484, + "learning_rate": 6.779806659505909e-06, + "loss": 0.0007, + "step": 6053 + }, + { + "epoch": 0.39015273570922215, + "grad_norm": 0.0006625030070719406, + "learning_rate": 6.779090583601863e-06, + "loss": 0.0, + "step": 6054 + }, + { + "epoch": 0.39021718115615134, + "grad_norm": 0.0407639185230089, + "learning_rate": 6.778374507697817e-06, + "loss": 0.0001, + "step": 6055 + }, + { + "epoch": 0.39028162660308047, + "grad_norm": 0.017097571873581942, + "learning_rate": 6.777658431793771e-06, + "loss": 0.0, + "step": 6056 + }, + { + "epoch": 0.39034607205000965, + "grad_norm": 0.0012163750468424425, + "learning_rate": 6.776942355889725e-06, + "loss": 0.0, + "step": 6057 + }, + { + "epoch": 0.39041051749693884, + "grad_norm": 0.008539081604194343, + "learning_rate": 6.776226279985679e-06, + "loss": 0.0, + "step": 6058 + }, + { + "epoch": 0.390474962943868, + "grad_norm": 0.02127820832464942, + "learning_rate": 6.7755102040816336e-06, + "loss": 0.0016, + "step": 6059 + }, + { + "epoch": 0.3905394083907972, + "grad_norm": 0.05713715860478264, + "learning_rate": 6.774794128177588e-06, + "loss": 0.0002, + "step": 6060 + }, + { + "epoch": 0.3906038538377264, + "grad_norm": 0.040865544077726086, + "learning_rate": 6.774078052273541e-06, + "loss": 0.0003, + "step": 6061 + }, + { + "epoch": 0.3906682992846555, + "grad_norm": 0.0008292018234182559, + "learning_rate": 6.773361976369496e-06, + "loss": 0.0, + "step": 6062 + }, + { + "epoch": 0.3907327447315847, + "grad_norm": 0.10168170810440107, + "learning_rate": 6.77264590046545e-06, + "loss": 0.0004, + "step": 6063 + }, + { + "epoch": 0.3907971901785139, + "grad_norm": 0.003782680501955245, + "learning_rate": 6.771929824561404e-06, + "loss": 0.0, + "step": 6064 + }, + { + "epoch": 0.39086163562544307, + "grad_norm": 0.07099556273375927, + "learning_rate": 6.7712137486573585e-06, + "loss": 0.0001, + "step": 6065 + }, + { + "epoch": 0.39092608107237226, + "grad_norm": 0.0079087437697562, + "learning_rate": 6.770497672753312e-06, + "loss": 0.0, + "step": 6066 + }, + { + "epoch": 0.3909905265193014, + "grad_norm": 0.0015870679381851005, + "learning_rate": 6.769781596849266e-06, + "loss": 0.0, + "step": 6067 + }, + { + "epoch": 0.39105497196623057, + "grad_norm": 0.23510664546596716, + "learning_rate": 6.7690655209452206e-06, + "loss": 0.001, + "step": 6068 + }, + { + "epoch": 0.39111941741315975, + "grad_norm": 0.00031801601169482763, + "learning_rate": 6.768349445041175e-06, + "loss": 0.0, + "step": 6069 + }, + { + "epoch": 0.39118386286008894, + "grad_norm": 0.004575363429178179, + "learning_rate": 6.767633369137129e-06, + "loss": 0.0, + "step": 6070 + }, + { + "epoch": 0.3912483083070181, + "grad_norm": 0.060214690750501815, + "learning_rate": 6.766917293233083e-06, + "loss": 0.0002, + "step": 6071 + }, + { + "epoch": 0.3913127537539473, + "grad_norm": 0.0010421286728905228, + "learning_rate": 6.766201217329037e-06, + "loss": 0.0, + "step": 6072 + }, + { + "epoch": 0.39137719920087644, + "grad_norm": 0.4869503651905972, + "learning_rate": 6.765485141424991e-06, + "loss": 0.0004, + "step": 6073 + }, + { + "epoch": 0.3914416446478056, + "grad_norm": 0.21821732891056522, + "learning_rate": 6.7647690655209455e-06, + "loss": 0.0003, + "step": 6074 + }, + { + "epoch": 0.3915060900947348, + "grad_norm": 0.006221830004070171, + "learning_rate": 6.764052989616901e-06, + "loss": 0.0, + "step": 6075 + }, + { + "epoch": 0.391570535541664, + "grad_norm": 0.0037303944601452836, + "learning_rate": 6.763336913712855e-06, + "loss": 0.0, + "step": 6076 + }, + { + "epoch": 0.3916349809885932, + "grad_norm": 0.00043963797693598783, + "learning_rate": 6.762620837808808e-06, + "loss": 0.0, + "step": 6077 + }, + { + "epoch": 0.39169942643552236, + "grad_norm": 0.3138164567095846, + "learning_rate": 6.761904761904763e-06, + "loss": 0.0012, + "step": 6078 + }, + { + "epoch": 0.3917638718824515, + "grad_norm": 0.011132950687805643, + "learning_rate": 6.761188686000717e-06, + "loss": 0.0, + "step": 6079 + }, + { + "epoch": 0.39182831732938067, + "grad_norm": 0.46684292043449677, + "learning_rate": 6.760472610096671e-06, + "loss": 0.0011, + "step": 6080 + }, + { + "epoch": 0.39189276277630986, + "grad_norm": 0.0036284189679277653, + "learning_rate": 6.759756534192626e-06, + "loss": 0.0, + "step": 6081 + }, + { + "epoch": 0.39195720822323904, + "grad_norm": 0.00039873388616156857, + "learning_rate": 6.759040458288579e-06, + "loss": 0.0, + "step": 6082 + }, + { + "epoch": 0.3920216536701682, + "grad_norm": 0.009515007808803775, + "learning_rate": 6.758324382384533e-06, + "loss": 0.0, + "step": 6083 + }, + { + "epoch": 0.39208609911709735, + "grad_norm": 0.05785692576468786, + "learning_rate": 6.757608306480488e-06, + "loss": 0.0002, + "step": 6084 + }, + { + "epoch": 0.39215054456402654, + "grad_norm": 0.00735022972663383, + "learning_rate": 6.756892230576442e-06, + "loss": 0.0, + "step": 6085 + }, + { + "epoch": 0.3922149900109557, + "grad_norm": 0.0009415286435785793, + "learning_rate": 6.756176154672396e-06, + "loss": 0.0, + "step": 6086 + }, + { + "epoch": 0.3922794354578849, + "grad_norm": 0.02875433070604497, + "learning_rate": 6.75546007876835e-06, + "loss": 0.0, + "step": 6087 + }, + { + "epoch": 0.3923438809048141, + "grad_norm": 0.04135847135380983, + "learning_rate": 6.754744002864304e-06, + "loss": 0.0, + "step": 6088 + }, + { + "epoch": 0.3924083263517433, + "grad_norm": 0.004634106539941361, + "learning_rate": 6.754027926960258e-06, + "loss": 0.0, + "step": 6089 + }, + { + "epoch": 0.3924727717986724, + "grad_norm": 0.37803271956724577, + "learning_rate": 6.753311851056213e-06, + "loss": 0.003, + "step": 6090 + }, + { + "epoch": 0.3925372172456016, + "grad_norm": 0.0013209306774567687, + "learning_rate": 6.752595775152166e-06, + "loss": 0.0, + "step": 6091 + }, + { + "epoch": 0.3926016626925308, + "grad_norm": 0.008602022225160995, + "learning_rate": 6.75187969924812e-06, + "loss": 0.0, + "step": 6092 + }, + { + "epoch": 0.39266610813945996, + "grad_norm": 0.0026656787122006495, + "learning_rate": 6.751163623344075e-06, + "loss": 0.0, + "step": 6093 + }, + { + "epoch": 0.39273055358638914, + "grad_norm": 0.12914930173706923, + "learning_rate": 6.750447547440029e-06, + "loss": 0.0003, + "step": 6094 + }, + { + "epoch": 0.39279499903331827, + "grad_norm": 0.10282103505984658, + "learning_rate": 6.749731471535983e-06, + "loss": 0.0001, + "step": 6095 + }, + { + "epoch": 0.39285944448024746, + "grad_norm": 0.013621648850027368, + "learning_rate": 6.749015395631937e-06, + "loss": 0.0, + "step": 6096 + }, + { + "epoch": 0.39292388992717664, + "grad_norm": 0.038448006543109814, + "learning_rate": 6.748299319727891e-06, + "loss": 0.0001, + "step": 6097 + }, + { + "epoch": 0.3929883353741058, + "grad_norm": 0.044204079857528784, + "learning_rate": 6.747583243823845e-06, + "loss": 0.0006, + "step": 6098 + }, + { + "epoch": 0.393052780821035, + "grad_norm": 0.046062306311024155, + "learning_rate": 6.7468671679198004e-06, + "loss": 0.0007, + "step": 6099 + }, + { + "epoch": 0.3931172262679642, + "grad_norm": 0.007857325413024335, + "learning_rate": 6.746151092015755e-06, + "loss": 0.0001, + "step": 6100 + }, + { + "epoch": 0.3931816717148933, + "grad_norm": 0.01512873154302197, + "learning_rate": 6.745435016111709e-06, + "loss": 0.0001, + "step": 6101 + }, + { + "epoch": 0.3932461171618225, + "grad_norm": 0.0032276045978629686, + "learning_rate": 6.744718940207663e-06, + "loss": 0.0, + "step": 6102 + }, + { + "epoch": 0.3933105626087517, + "grad_norm": 0.0009057025388732807, + "learning_rate": 6.744002864303617e-06, + "loss": 0.0, + "step": 6103 + }, + { + "epoch": 0.3933750080556809, + "grad_norm": 0.03844354321300612, + "learning_rate": 6.743286788399571e-06, + "loss": 0.0001, + "step": 6104 + }, + { + "epoch": 0.39343945350261006, + "grad_norm": 0.0012272262348689547, + "learning_rate": 6.742570712495525e-06, + "loss": 0.0, + "step": 6105 + }, + { + "epoch": 0.3935038989495392, + "grad_norm": 0.001039082765129518, + "learning_rate": 6.74185463659148e-06, + "loss": 0.0, + "step": 6106 + }, + { + "epoch": 0.3935683443964684, + "grad_norm": 0.11303522260833805, + "learning_rate": 6.741138560687433e-06, + "loss": 0.0003, + "step": 6107 + }, + { + "epoch": 0.39363278984339756, + "grad_norm": 0.14047053251100844, + "learning_rate": 6.7404224847833874e-06, + "loss": 0.0009, + "step": 6108 + }, + { + "epoch": 0.39369723529032674, + "grad_norm": 0.00028781397467615895, + "learning_rate": 6.739706408879342e-06, + "loss": 0.0, + "step": 6109 + }, + { + "epoch": 0.3937616807372559, + "grad_norm": 0.0004087873048829891, + "learning_rate": 6.738990332975296e-06, + "loss": 0.0, + "step": 6110 + }, + { + "epoch": 0.3938261261841851, + "grad_norm": 0.00040317603909862624, + "learning_rate": 6.73827425707125e-06, + "loss": 0.0, + "step": 6111 + }, + { + "epoch": 0.39389057163111424, + "grad_norm": 0.00040317603909862624, + "learning_rate": 6.73827425707125e-06, + "loss": 0.0043, + "step": 6112 + }, + { + "epoch": 0.3939550170780434, + "grad_norm": 0.0003019461882047249, + "learning_rate": 6.737558181167204e-06, + "loss": 0.0, + "step": 6113 + }, + { + "epoch": 0.3940194625249726, + "grad_norm": 0.006519207135955577, + "learning_rate": 6.736842105263158e-06, + "loss": 0.0, + "step": 6114 + }, + { + "epoch": 0.3940839079719018, + "grad_norm": 0.0011064302177114479, + "learning_rate": 6.736126029359112e-06, + "loss": 0.0, + "step": 6115 + }, + { + "epoch": 0.394148353418831, + "grad_norm": 0.0011027793071489889, + "learning_rate": 6.735409953455067e-06, + "loss": 0.0, + "step": 6116 + }, + { + "epoch": 0.39421279886576016, + "grad_norm": 0.003786054726373903, + "learning_rate": 6.734693877551021e-06, + "loss": 0.0, + "step": 6117 + }, + { + "epoch": 0.3942772443126893, + "grad_norm": 0.28807663983357745, + "learning_rate": 6.7339778016469744e-06, + "loss": 0.0002, + "step": 6118 + }, + { + "epoch": 0.3943416897596185, + "grad_norm": 0.19951675602869184, + "learning_rate": 6.733261725742929e-06, + "loss": 0.0021, + "step": 6119 + }, + { + "epoch": 0.39440613520654766, + "grad_norm": 0.0014406398795617093, + "learning_rate": 6.732545649838883e-06, + "loss": 0.0, + "step": 6120 + }, + { + "epoch": 0.39447058065347684, + "grad_norm": 0.05896528389795827, + "learning_rate": 6.731829573934837e-06, + "loss": 0.0002, + "step": 6121 + }, + { + "epoch": 0.394535026100406, + "grad_norm": 0.0004485638853455591, + "learning_rate": 6.731113498030792e-06, + "loss": 0.0, + "step": 6122 + }, + { + "epoch": 0.39459947154733516, + "grad_norm": 0.000655233336787404, + "learning_rate": 6.730397422126747e-06, + "loss": 0.0, + "step": 6123 + }, + { + "epoch": 0.39466391699426434, + "grad_norm": 1.2452657214560217, + "learning_rate": 6.7296813462227e-06, + "loss": 0.0059, + "step": 6124 + }, + { + "epoch": 0.3947283624411935, + "grad_norm": 0.006466952664939254, + "learning_rate": 6.7289652703186545e-06, + "loss": 0.0, + "step": 6125 + }, + { + "epoch": 0.3947928078881227, + "grad_norm": 0.11695563902920848, + "learning_rate": 6.728249194414609e-06, + "loss": 0.0009, + "step": 6126 + }, + { + "epoch": 0.3948572533350519, + "grad_norm": 0.0054357978734342165, + "learning_rate": 6.727533118510563e-06, + "loss": 0.0, + "step": 6127 + }, + { + "epoch": 0.3949216987819811, + "grad_norm": 0.010678078004628366, + "learning_rate": 6.726817042606517e-06, + "loss": 0.0, + "step": 6128 + }, + { + "epoch": 0.3949861442289102, + "grad_norm": 0.004239491570611307, + "learning_rate": 6.726100966702471e-06, + "loss": 0.0, + "step": 6129 + }, + { + "epoch": 0.3950505896758394, + "grad_norm": 0.4474075958280737, + "learning_rate": 6.725384890798425e-06, + "loss": 0.0036, + "step": 6130 + }, + { + "epoch": 0.3951150351227686, + "grad_norm": 0.008688036460937971, + "learning_rate": 6.7246688148943795e-06, + "loss": 0.0, + "step": 6131 + }, + { + "epoch": 0.39517948056969776, + "grad_norm": 0.0029086975973019197, + "learning_rate": 6.723952738990334e-06, + "loss": 0.0, + "step": 6132 + }, + { + "epoch": 0.39524392601662695, + "grad_norm": 0.00804262993046448, + "learning_rate": 6.723236663086288e-06, + "loss": 0.0001, + "step": 6133 + }, + { + "epoch": 0.3953083714635561, + "grad_norm": 0.001383582353238081, + "learning_rate": 6.7225205871822415e-06, + "loss": 0.0, + "step": 6134 + }, + { + "epoch": 0.39537281691048526, + "grad_norm": 0.002657203034535678, + "learning_rate": 6.721804511278196e-06, + "loss": 0.0, + "step": 6135 + }, + { + "epoch": 0.39543726235741444, + "grad_norm": 0.22318623860445258, + "learning_rate": 6.72108843537415e-06, + "loss": 0.0007, + "step": 6136 + }, + { + "epoch": 0.3955017078043436, + "grad_norm": 0.060305021991659505, + "learning_rate": 6.720372359470104e-06, + "loss": 0.0001, + "step": 6137 + }, + { + "epoch": 0.3955661532512728, + "grad_norm": 0.005253792241862154, + "learning_rate": 6.719656283566059e-06, + "loss": 0.0, + "step": 6138 + }, + { + "epoch": 0.395630598698202, + "grad_norm": 0.0004886858584370133, + "learning_rate": 6.718940207662012e-06, + "loss": 0.0, + "step": 6139 + }, + { + "epoch": 0.3956950441451311, + "grad_norm": 0.005571864209276483, + "learning_rate": 6.7182241317579665e-06, + "loss": 0.0, + "step": 6140 + }, + { + "epoch": 0.3957594895920603, + "grad_norm": 0.0005897999561558652, + "learning_rate": 6.717508055853921e-06, + "loss": 0.0, + "step": 6141 + }, + { + "epoch": 0.3958239350389895, + "grad_norm": 0.018974760515579954, + "learning_rate": 6.716791979949875e-06, + "loss": 0.0, + "step": 6142 + }, + { + "epoch": 0.3958883804859187, + "grad_norm": 0.005510618869750594, + "learning_rate": 6.716075904045829e-06, + "loss": 0.0, + "step": 6143 + }, + { + "epoch": 0.39595282593284786, + "grad_norm": 0.005582761552538098, + "learning_rate": 6.715359828141783e-06, + "loss": 0.0, + "step": 6144 + }, + { + "epoch": 0.396017271379777, + "grad_norm": 0.0009583769130436984, + "learning_rate": 6.714643752237737e-06, + "loss": 0.0, + "step": 6145 + }, + { + "epoch": 0.3960817168267062, + "grad_norm": 0.0028119638281370062, + "learning_rate": 6.713927676333692e-06, + "loss": 0.0, + "step": 6146 + }, + { + "epoch": 0.39614616227363536, + "grad_norm": 0.0350255340781061, + "learning_rate": 6.7132116004296465e-06, + "loss": 0.0001, + "step": 6147 + }, + { + "epoch": 0.39621060772056454, + "grad_norm": 0.0018648797668935515, + "learning_rate": 6.712495524525601e-06, + "loss": 0.0, + "step": 6148 + }, + { + "epoch": 0.39627505316749373, + "grad_norm": 0.0003898422824717099, + "learning_rate": 6.711779448621555e-06, + "loss": 0.0, + "step": 6149 + }, + { + "epoch": 0.3963394986144229, + "grad_norm": 0.005369384247029605, + "learning_rate": 6.711063372717509e-06, + "loss": 0.0, + "step": 6150 + }, + { + "epoch": 0.39640394406135204, + "grad_norm": 0.0025898006320916687, + "learning_rate": 6.710347296813463e-06, + "loss": 0.0, + "step": 6151 + }, + { + "epoch": 0.3964683895082812, + "grad_norm": 0.05196281984407206, + "learning_rate": 6.709631220909417e-06, + "loss": 0.0005, + "step": 6152 + }, + { + "epoch": 0.3965328349552104, + "grad_norm": 0.07452371645686365, + "learning_rate": 6.7089151450053715e-06, + "loss": 0.0, + "step": 6153 + }, + { + "epoch": 0.3965972804021396, + "grad_norm": 0.19650088216253572, + "learning_rate": 6.708199069101326e-06, + "loss": 0.0024, + "step": 6154 + }, + { + "epoch": 0.3966617258490688, + "grad_norm": 0.12044238442729664, + "learning_rate": 6.707482993197279e-06, + "loss": 0.0003, + "step": 6155 + }, + { + "epoch": 0.39672617129599796, + "grad_norm": 0.3482266044446364, + "learning_rate": 6.7067669172932335e-06, + "loss": 0.0012, + "step": 6156 + }, + { + "epoch": 0.3967906167429271, + "grad_norm": 0.011913191458644948, + "learning_rate": 6.706050841389188e-06, + "loss": 0.0, + "step": 6157 + }, + { + "epoch": 0.3968550621898563, + "grad_norm": 0.006744656352361722, + "learning_rate": 6.705334765485142e-06, + "loss": 0.0, + "step": 6158 + }, + { + "epoch": 0.39691950763678546, + "grad_norm": 0.0047692479281992144, + "learning_rate": 6.7046186895810964e-06, + "loss": 0.0, + "step": 6159 + }, + { + "epoch": 0.39698395308371465, + "grad_norm": 0.39355205601269155, + "learning_rate": 6.70390261367705e-06, + "loss": 0.0013, + "step": 6160 + }, + { + "epoch": 0.39704839853064383, + "grad_norm": 0.20822374123710258, + "learning_rate": 6.703186537773004e-06, + "loss": 0.001, + "step": 6161 + }, + { + "epoch": 0.39711284397757296, + "grad_norm": 0.00042890634310612283, + "learning_rate": 6.7024704618689585e-06, + "loss": 0.0, + "step": 6162 + }, + { + "epoch": 0.39717728942450214, + "grad_norm": 0.32812195731069266, + "learning_rate": 6.701754385964913e-06, + "loss": 0.0003, + "step": 6163 + }, + { + "epoch": 0.39724173487143133, + "grad_norm": 0.19006787676608586, + "learning_rate": 6.701038310060867e-06, + "loss": 0.0004, + "step": 6164 + }, + { + "epoch": 0.3973061803183605, + "grad_norm": 0.17109802580415745, + "learning_rate": 6.7003222341568205e-06, + "loss": 0.0005, + "step": 6165 + }, + { + "epoch": 0.3973706257652897, + "grad_norm": 0.009018840277326589, + "learning_rate": 6.699606158252775e-06, + "loss": 0.0001, + "step": 6166 + }, + { + "epoch": 0.3974350712122189, + "grad_norm": 0.0005974862827873541, + "learning_rate": 6.698890082348729e-06, + "loss": 0.0, + "step": 6167 + }, + { + "epoch": 0.397499516659148, + "grad_norm": 0.0005127417844990209, + "learning_rate": 6.6981740064446834e-06, + "loss": 0.0, + "step": 6168 + }, + { + "epoch": 0.3975639621060772, + "grad_norm": 0.013986048446789382, + "learning_rate": 6.697457930540638e-06, + "loss": 0.0001, + "step": 6169 + }, + { + "epoch": 0.3976284075530064, + "grad_norm": 0.2875345027993739, + "learning_rate": 6.696741854636593e-06, + "loss": 0.0023, + "step": 6170 + }, + { + "epoch": 0.39769285299993556, + "grad_norm": 0.00013710756560736585, + "learning_rate": 6.696025778732546e-06, + "loss": 0.0, + "step": 6171 + }, + { + "epoch": 0.39775729844686475, + "grad_norm": 0.0073792311350031, + "learning_rate": 6.695309702828501e-06, + "loss": 0.0, + "step": 6172 + }, + { + "epoch": 0.3978217438937939, + "grad_norm": 0.003827701575672846, + "learning_rate": 6.694593626924455e-06, + "loss": 0.0, + "step": 6173 + }, + { + "epoch": 0.39788618934072306, + "grad_norm": 0.0034136686566082096, + "learning_rate": 6.693877551020409e-06, + "loss": 0.0, + "step": 6174 + }, + { + "epoch": 0.39795063478765225, + "grad_norm": 0.02060817741945624, + "learning_rate": 6.6931614751163635e-06, + "loss": 0.0002, + "step": 6175 + }, + { + "epoch": 0.39801508023458143, + "grad_norm": 0.0006820699695873835, + "learning_rate": 6.692445399212317e-06, + "loss": 0.0, + "step": 6176 + }, + { + "epoch": 0.3980795256815106, + "grad_norm": 0.007219121822066729, + "learning_rate": 6.691729323308271e-06, + "loss": 0.0, + "step": 6177 + }, + { + "epoch": 0.3981439711284398, + "grad_norm": 0.0012653471260716737, + "learning_rate": 6.6910132474042256e-06, + "loss": 0.0, + "step": 6178 + }, + { + "epoch": 0.39820841657536893, + "grad_norm": 0.017223988011398236, + "learning_rate": 6.69029717150018e-06, + "loss": 0.0001, + "step": 6179 + }, + { + "epoch": 0.3982728620222981, + "grad_norm": 0.6595386799147767, + "learning_rate": 6.689581095596134e-06, + "loss": 0.0037, + "step": 6180 + }, + { + "epoch": 0.3983373074692273, + "grad_norm": 0.016393956086872264, + "learning_rate": 6.688865019692088e-06, + "loss": 0.0, + "step": 6181 + }, + { + "epoch": 0.3984017529161565, + "grad_norm": 0.006429886962872136, + "learning_rate": 6.688148943788042e-06, + "loss": 0.0, + "step": 6182 + }, + { + "epoch": 0.39846619836308567, + "grad_norm": 0.03475362813153784, + "learning_rate": 6.687432867883996e-06, + "loss": 0.0, + "step": 6183 + }, + { + "epoch": 0.3985306438100148, + "grad_norm": 0.002141625646072484, + "learning_rate": 6.6867167919799505e-06, + "loss": 0.0, + "step": 6184 + }, + { + "epoch": 0.398595089256944, + "grad_norm": 0.009711719164443825, + "learning_rate": 6.686000716075905e-06, + "loss": 0.0001, + "step": 6185 + }, + { + "epoch": 0.39865953470387316, + "grad_norm": 0.007483122794986775, + "learning_rate": 6.685284640171858e-06, + "loss": 0.0001, + "step": 6186 + }, + { + "epoch": 0.39872398015080235, + "grad_norm": 0.0013225585110462972, + "learning_rate": 6.6845685642678126e-06, + "loss": 0.0, + "step": 6187 + }, + { + "epoch": 0.39878842559773153, + "grad_norm": 0.0007087233184781159, + "learning_rate": 6.683852488363767e-06, + "loss": 0.0, + "step": 6188 + }, + { + "epoch": 0.3988528710446607, + "grad_norm": 0.005808769746201201, + "learning_rate": 6.683136412459721e-06, + "loss": 0.0, + "step": 6189 + }, + { + "epoch": 0.39891731649158985, + "grad_norm": 0.03269624359247558, + "learning_rate": 6.682420336555675e-06, + "loss": 0.0001, + "step": 6190 + }, + { + "epoch": 0.39898176193851903, + "grad_norm": 0.06306389772063731, + "learning_rate": 6.681704260651629e-06, + "loss": 0.0003, + "step": 6191 + }, + { + "epoch": 0.3990462073854482, + "grad_norm": 0.0007749667094711631, + "learning_rate": 6.680988184747583e-06, + "loss": 0.0, + "step": 6192 + }, + { + "epoch": 0.3991106528323774, + "grad_norm": 1.1743724405160312, + "learning_rate": 6.680272108843538e-06, + "loss": 0.0051, + "step": 6193 + }, + { + "epoch": 0.3991750982793066, + "grad_norm": 0.0006233756383026622, + "learning_rate": 6.679556032939493e-06, + "loss": 0.0, + "step": 6194 + }, + { + "epoch": 0.39923954372623577, + "grad_norm": 0.1845996974583024, + "learning_rate": 6.678839957035447e-06, + "loss": 0.0012, + "step": 6195 + }, + { + "epoch": 0.3993039891731649, + "grad_norm": 0.04961964414596718, + "learning_rate": 6.678123881131401e-06, + "loss": 0.0001, + "step": 6196 + }, + { + "epoch": 0.3993684346200941, + "grad_norm": 0.002305596368655089, + "learning_rate": 6.677407805227355e-06, + "loss": 0.0, + "step": 6197 + }, + { + "epoch": 0.39943288006702327, + "grad_norm": 0.03989655149228882, + "learning_rate": 6.676691729323309e-06, + "loss": 0.0, + "step": 6198 + }, + { + "epoch": 0.39949732551395245, + "grad_norm": 0.11639607532249155, + "learning_rate": 6.675975653419263e-06, + "loss": 0.0017, + "step": 6199 + }, + { + "epoch": 0.39956177096088163, + "grad_norm": 0.08423818726228123, + "learning_rate": 6.675259577515218e-06, + "loss": 0.0017, + "step": 6200 + }, + { + "epoch": 0.39962621640781076, + "grad_norm": 0.0009415812576501265, + "learning_rate": 6.674543501611172e-06, + "loss": 0.0, + "step": 6201 + }, + { + "epoch": 0.39969066185473995, + "grad_norm": 0.03231412558563971, + "learning_rate": 6.673827425707125e-06, + "loss": 0.0, + "step": 6202 + }, + { + "epoch": 0.39975510730166913, + "grad_norm": 0.022533085776875975, + "learning_rate": 6.67311134980308e-06, + "loss": 0.0001, + "step": 6203 + }, + { + "epoch": 0.3998195527485983, + "grad_norm": 0.0004932371042244677, + "learning_rate": 6.672395273899034e-06, + "loss": 0.0, + "step": 6204 + }, + { + "epoch": 0.3998839981955275, + "grad_norm": 0.0016411312170160164, + "learning_rate": 6.671679197994988e-06, + "loss": 0.0, + "step": 6205 + }, + { + "epoch": 0.3999484436424567, + "grad_norm": 0.00045268622319586543, + "learning_rate": 6.670963122090942e-06, + "loss": 0.0, + "step": 6206 + }, + { + "epoch": 0.4000128890893858, + "grad_norm": 0.06922491009062504, + "learning_rate": 6.670247046186896e-06, + "loss": 0.0001, + "step": 6207 + }, + { + "epoch": 0.400077334536315, + "grad_norm": 0.012176174861812075, + "learning_rate": 6.66953097028285e-06, + "loss": 0.0, + "step": 6208 + }, + { + "epoch": 0.4001417799832442, + "grad_norm": 0.02192805518688391, + "learning_rate": 6.668814894378805e-06, + "loss": 0.0, + "step": 6209 + }, + { + "epoch": 0.40020622543017337, + "grad_norm": 0.01031527857257621, + "learning_rate": 6.668098818474759e-06, + "loss": 0.0, + "step": 6210 + }, + { + "epoch": 0.40027067087710255, + "grad_norm": 0.2088355398948857, + "learning_rate": 6.667382742570712e-06, + "loss": 0.0004, + "step": 6211 + }, + { + "epoch": 0.4003351163240317, + "grad_norm": 0.006589784635158972, + "learning_rate": 6.666666666666667e-06, + "loss": 0.0, + "step": 6212 + }, + { + "epoch": 0.40039956177096087, + "grad_norm": 0.590162759370787, + "learning_rate": 6.665950590762621e-06, + "loss": 0.0016, + "step": 6213 + }, + { + "epoch": 0.40046400721789005, + "grad_norm": 0.4982781116509973, + "learning_rate": 6.665234514858575e-06, + "loss": 0.0037, + "step": 6214 + }, + { + "epoch": 0.40052845266481923, + "grad_norm": 0.12883591900735675, + "learning_rate": 6.6645184389545295e-06, + "loss": 0.0, + "step": 6215 + }, + { + "epoch": 0.4005928981117484, + "grad_norm": 0.15875782090355953, + "learning_rate": 6.663802363050485e-06, + "loss": 0.003, + "step": 6216 + }, + { + "epoch": 0.4006573435586776, + "grad_norm": 0.021264245585666988, + "learning_rate": 6.663086287146439e-06, + "loss": 0.0, + "step": 6217 + }, + { + "epoch": 0.40072178900560673, + "grad_norm": 0.088981737988645, + "learning_rate": 6.6623702112423924e-06, + "loss": 0.0001, + "step": 6218 + }, + { + "epoch": 0.4007862344525359, + "grad_norm": 0.002934769665413855, + "learning_rate": 6.661654135338347e-06, + "loss": 0.0, + "step": 6219 + }, + { + "epoch": 0.4008506798994651, + "grad_norm": 0.15586118601461513, + "learning_rate": 6.660938059434301e-06, + "loss": 0.0006, + "step": 6220 + }, + { + "epoch": 0.4009151253463943, + "grad_norm": 0.28255903730610116, + "learning_rate": 6.660221983530255e-06, + "loss": 0.0004, + "step": 6221 + }, + { + "epoch": 0.40097957079332347, + "grad_norm": 0.028550108747655678, + "learning_rate": 6.659505907626209e-06, + "loss": 0.0001, + "step": 6222 + }, + { + "epoch": 0.4010440162402526, + "grad_norm": 0.27616440845200096, + "learning_rate": 6.658789831722163e-06, + "loss": 0.002, + "step": 6223 + }, + { + "epoch": 0.4011084616871818, + "grad_norm": 0.008006083560591043, + "learning_rate": 6.658073755818117e-06, + "loss": 0.0, + "step": 6224 + }, + { + "epoch": 0.40117290713411097, + "grad_norm": 0.07311599032007556, + "learning_rate": 6.657357679914072e-06, + "loss": 0.0002, + "step": 6225 + }, + { + "epoch": 0.40123735258104015, + "grad_norm": 0.0009471099356845897, + "learning_rate": 6.656641604010026e-06, + "loss": 0.0, + "step": 6226 + }, + { + "epoch": 0.40130179802796934, + "grad_norm": 0.00239955307892596, + "learning_rate": 6.6559255281059794e-06, + "loss": 0.0, + "step": 6227 + }, + { + "epoch": 0.4013662434748985, + "grad_norm": 0.03330112643076966, + "learning_rate": 6.655209452201934e-06, + "loss": 0.0, + "step": 6228 + }, + { + "epoch": 0.40143068892182765, + "grad_norm": 0.003579085964833083, + "learning_rate": 6.654493376297888e-06, + "loss": 0.0, + "step": 6229 + }, + { + "epoch": 0.40149513436875683, + "grad_norm": 0.0001288371734857651, + "learning_rate": 6.653777300393842e-06, + "loss": 0.0, + "step": 6230 + }, + { + "epoch": 0.401559579815686, + "grad_norm": 0.0001809409710459574, + "learning_rate": 6.653061224489797e-06, + "loss": 0.0, + "step": 6231 + }, + { + "epoch": 0.4016240252626152, + "grad_norm": 0.0018872319576630108, + "learning_rate": 6.65234514858575e-06, + "loss": 0.0, + "step": 6232 + }, + { + "epoch": 0.4016884707095444, + "grad_norm": 0.00037414685857723075, + "learning_rate": 6.651629072681704e-06, + "loss": 0.0, + "step": 6233 + }, + { + "epoch": 0.40175291615647357, + "grad_norm": 0.005488616829534191, + "learning_rate": 6.650912996777659e-06, + "loss": 0.0, + "step": 6234 + }, + { + "epoch": 0.4018173616034027, + "grad_norm": 0.0043051522679139476, + "learning_rate": 6.650196920873613e-06, + "loss": 0.0, + "step": 6235 + }, + { + "epoch": 0.4018818070503319, + "grad_norm": 0.18108599082511184, + "learning_rate": 6.649480844969567e-06, + "loss": 0.0005, + "step": 6236 + }, + { + "epoch": 0.40194625249726107, + "grad_norm": 0.0002171191700035768, + "learning_rate": 6.648764769065521e-06, + "loss": 0.0, + "step": 6237 + }, + { + "epoch": 0.40201069794419025, + "grad_norm": 0.0021956388300873758, + "learning_rate": 6.648048693161475e-06, + "loss": 0.0, + "step": 6238 + }, + { + "epoch": 0.40207514339111944, + "grad_norm": 0.009351226362029775, + "learning_rate": 6.647332617257429e-06, + "loss": 0.0001, + "step": 6239 + }, + { + "epoch": 0.40213958883804857, + "grad_norm": 0.00015364747358863305, + "learning_rate": 6.6466165413533845e-06, + "loss": 0.0, + "step": 6240 + }, + { + "epoch": 0.40220403428497775, + "grad_norm": 0.0046499385674727165, + "learning_rate": 6.645900465449339e-06, + "loss": 0.0, + "step": 6241 + }, + { + "epoch": 0.40226847973190694, + "grad_norm": 0.0010105762902176386, + "learning_rate": 6.645184389545293e-06, + "loss": 0.0, + "step": 6242 + }, + { + "epoch": 0.4023329251788361, + "grad_norm": 0.039143908131220026, + "learning_rate": 6.6444683136412465e-06, + "loss": 0.0001, + "step": 6243 + }, + { + "epoch": 0.4023973706257653, + "grad_norm": 0.0024909498493526964, + "learning_rate": 6.643752237737201e-06, + "loss": 0.0, + "step": 6244 + }, + { + "epoch": 0.4024618160726945, + "grad_norm": 0.030333604358856733, + "learning_rate": 6.643036161833155e-06, + "loss": 0.0, + "step": 6245 + }, + { + "epoch": 0.4025262615196236, + "grad_norm": 0.0003121790638401315, + "learning_rate": 6.642320085929109e-06, + "loss": 0.0, + "step": 6246 + }, + { + "epoch": 0.4025907069665528, + "grad_norm": 0.05674777091108059, + "learning_rate": 6.641604010025064e-06, + "loss": 0.0, + "step": 6247 + }, + { + "epoch": 0.402655152413482, + "grad_norm": 0.029250476772840036, + "learning_rate": 6.640887934121017e-06, + "loss": 0.0, + "step": 6248 + }, + { + "epoch": 0.40271959786041117, + "grad_norm": 9.698981643813166e-05, + "learning_rate": 6.6401718582169715e-06, + "loss": 0.0, + "step": 6249 + }, + { + "epoch": 0.40278404330734036, + "grad_norm": 0.27877300990720555, + "learning_rate": 6.639455782312926e-06, + "loss": 0.0026, + "step": 6250 + }, + { + "epoch": 0.4028484887542695, + "grad_norm": 0.0027097435262068716, + "learning_rate": 6.63873970640888e-06, + "loss": 0.0, + "step": 6251 + }, + { + "epoch": 0.40291293420119867, + "grad_norm": 0.05563554116756958, + "learning_rate": 6.638023630504834e-06, + "loss": 0.0001, + "step": 6252 + }, + { + "epoch": 0.40297737964812785, + "grad_norm": 0.03642531035428272, + "learning_rate": 6.637307554600788e-06, + "loss": 0.0001, + "step": 6253 + }, + { + "epoch": 0.40304182509505704, + "grad_norm": 0.029172964117816117, + "learning_rate": 6.636591478696742e-06, + "loss": 0.0001, + "step": 6254 + }, + { + "epoch": 0.4031062705419862, + "grad_norm": 0.002905543593009424, + "learning_rate": 6.635875402792696e-06, + "loss": 0.0, + "step": 6255 + }, + { + "epoch": 0.4031707159889154, + "grad_norm": 0.012390442078684512, + "learning_rate": 6.635159326888651e-06, + "loss": 0.0, + "step": 6256 + }, + { + "epoch": 0.40323516143584454, + "grad_norm": 0.021038528188060623, + "learning_rate": 6.634443250984605e-06, + "loss": 0.0002, + "step": 6257 + }, + { + "epoch": 0.4032996068827737, + "grad_norm": 0.3627454641874893, + "learning_rate": 6.6337271750805585e-06, + "loss": 0.0011, + "step": 6258 + }, + { + "epoch": 0.4033640523297029, + "grad_norm": 0.004103812286494867, + "learning_rate": 6.633011099176513e-06, + "loss": 0.0, + "step": 6259 + }, + { + "epoch": 0.4034284977766321, + "grad_norm": 0.46436891426010923, + "learning_rate": 6.632295023272467e-06, + "loss": 0.0023, + "step": 6260 + }, + { + "epoch": 0.4034929432235613, + "grad_norm": 0.05719079838357991, + "learning_rate": 6.631578947368421e-06, + "loss": 0.0001, + "step": 6261 + }, + { + "epoch": 0.4035573886704904, + "grad_norm": 0.001427135059155809, + "learning_rate": 6.630862871464376e-06, + "loss": 0.0, + "step": 6262 + }, + { + "epoch": 0.4036218341174196, + "grad_norm": 0.00043134871254642114, + "learning_rate": 6.630146795560331e-06, + "loss": 0.0, + "step": 6263 + }, + { + "epoch": 0.40368627956434877, + "grad_norm": 0.005838851823537722, + "learning_rate": 6.629430719656284e-06, + "loss": 0.0, + "step": 6264 + }, + { + "epoch": 0.40375072501127796, + "grad_norm": 0.0031162694259560825, + "learning_rate": 6.6287146437522385e-06, + "loss": 0.0, + "step": 6265 + }, + { + "epoch": 0.40381517045820714, + "grad_norm": 0.16612453815009942, + "learning_rate": 6.627998567848193e-06, + "loss": 0.0005, + "step": 6266 + }, + { + "epoch": 0.4038796159051363, + "grad_norm": 8.110659193095382e-05, + "learning_rate": 6.627282491944147e-06, + "loss": 0.0, + "step": 6267 + }, + { + "epoch": 0.40394406135206545, + "grad_norm": 0.09589340309400302, + "learning_rate": 6.6265664160401014e-06, + "loss": 0.0001, + "step": 6268 + }, + { + "epoch": 0.40400850679899464, + "grad_norm": 0.0001381144958984862, + "learning_rate": 6.625850340136055e-06, + "loss": 0.0, + "step": 6269 + }, + { + "epoch": 0.4040729522459238, + "grad_norm": 0.0010787557333930568, + "learning_rate": 6.625134264232009e-06, + "loss": 0.0, + "step": 6270 + }, + { + "epoch": 0.404137397692853, + "grad_norm": 0.0003107536158255425, + "learning_rate": 6.6244181883279635e-06, + "loss": 0.0, + "step": 6271 + }, + { + "epoch": 0.4042018431397822, + "grad_norm": 0.008608214525285639, + "learning_rate": 6.623702112423918e-06, + "loss": 0.0, + "step": 6272 + }, + { + "epoch": 0.4042662885867114, + "grad_norm": 0.00040518102927398483, + "learning_rate": 6.622986036519872e-06, + "loss": 0.0, + "step": 6273 + }, + { + "epoch": 0.4043307340336405, + "grad_norm": 0.36758800740593717, + "learning_rate": 6.6222699606158255e-06, + "loss": 0.0007, + "step": 6274 + }, + { + "epoch": 0.4043951794805697, + "grad_norm": 0.012724112433642078, + "learning_rate": 6.62155388471178e-06, + "loss": 0.0001, + "step": 6275 + }, + { + "epoch": 0.4044596249274989, + "grad_norm": 0.0034801321594823327, + "learning_rate": 6.620837808807734e-06, + "loss": 0.0015, + "step": 6276 + }, + { + "epoch": 0.40452407037442806, + "grad_norm": 0.0022402441000457106, + "learning_rate": 6.6201217329036884e-06, + "loss": 0.0, + "step": 6277 + }, + { + "epoch": 0.40458851582135724, + "grad_norm": 0.001198815179465232, + "learning_rate": 6.619405656999643e-06, + "loss": 0.0, + "step": 6278 + }, + { + "epoch": 0.40465296126828637, + "grad_norm": 0.02394095008108672, + "learning_rate": 6.618689581095596e-06, + "loss": 0.0001, + "step": 6279 + }, + { + "epoch": 0.40471740671521556, + "grad_norm": 0.008677542084958455, + "learning_rate": 6.6179735051915505e-06, + "loss": 0.0001, + "step": 6280 + }, + { + "epoch": 0.40478185216214474, + "grad_norm": 0.006015602815613465, + "learning_rate": 6.617257429287505e-06, + "loss": 0.0, + "step": 6281 + }, + { + "epoch": 0.4048462976090739, + "grad_norm": 0.00022256715957847932, + "learning_rate": 6.616541353383459e-06, + "loss": 0.0, + "step": 6282 + }, + { + "epoch": 0.4049107430560031, + "grad_norm": 0.0021269805569372334, + "learning_rate": 6.6158252774794125e-06, + "loss": 0.0, + "step": 6283 + }, + { + "epoch": 0.4049751885029323, + "grad_norm": 0.0019124917188764595, + "learning_rate": 6.615109201575367e-06, + "loss": 0.0, + "step": 6284 + }, + { + "epoch": 0.4050396339498614, + "grad_norm": 0.0006371406991821277, + "learning_rate": 6.614393125671321e-06, + "loss": 0.0, + "step": 6285 + }, + { + "epoch": 0.4051040793967906, + "grad_norm": 0.012221513144843947, + "learning_rate": 6.613677049767276e-06, + "loss": 0.0001, + "step": 6286 + }, + { + "epoch": 0.4051685248437198, + "grad_norm": 0.003629192668777277, + "learning_rate": 6.612960973863231e-06, + "loss": 0.0, + "step": 6287 + }, + { + "epoch": 0.405232970290649, + "grad_norm": 0.03566383629410653, + "learning_rate": 6.612244897959185e-06, + "loss": 0.0001, + "step": 6288 + }, + { + "epoch": 0.40529741573757816, + "grad_norm": 0.011285903547055491, + "learning_rate": 6.611528822055139e-06, + "loss": 0.0, + "step": 6289 + }, + { + "epoch": 0.4053618611845073, + "grad_norm": 0.0027852915169590963, + "learning_rate": 6.610812746151093e-06, + "loss": 0.0, + "step": 6290 + }, + { + "epoch": 0.4054263066314365, + "grad_norm": 0.10941760050982087, + "learning_rate": 6.610096670247047e-06, + "loss": 0.0009, + "step": 6291 + }, + { + "epoch": 0.40549075207836566, + "grad_norm": 0.002644033621041055, + "learning_rate": 6.609380594343001e-06, + "loss": 0.0, + "step": 6292 + }, + { + "epoch": 0.40555519752529484, + "grad_norm": 0.00019279917264443554, + "learning_rate": 6.6086645184389555e-06, + "loss": 0.0, + "step": 6293 + }, + { + "epoch": 0.405619642972224, + "grad_norm": 0.0005731582641601562, + "learning_rate": 6.60794844253491e-06, + "loss": 0.0, + "step": 6294 + }, + { + "epoch": 0.4056840884191532, + "grad_norm": 0.006801553876835982, + "learning_rate": 6.607232366630863e-06, + "loss": 0.0, + "step": 6295 + }, + { + "epoch": 0.40574853386608234, + "grad_norm": 0.003933302052609157, + "learning_rate": 6.6065162907268176e-06, + "loss": 0.0, + "step": 6296 + }, + { + "epoch": 0.4058129793130115, + "grad_norm": 0.0006329129133961246, + "learning_rate": 6.605800214822772e-06, + "loss": 0.0, + "step": 6297 + }, + { + "epoch": 0.4058774247599407, + "grad_norm": 0.0004284577227252947, + "learning_rate": 6.605084138918726e-06, + "loss": 0.0, + "step": 6298 + }, + { + "epoch": 0.4059418702068699, + "grad_norm": 0.01899693347187632, + "learning_rate": 6.60436806301468e-06, + "loss": 0.0001, + "step": 6299 + }, + { + "epoch": 0.4060063156537991, + "grad_norm": 0.008696248565383551, + "learning_rate": 6.603651987110634e-06, + "loss": 0.0001, + "step": 6300 + }, + { + "epoch": 0.4060707611007282, + "grad_norm": 0.00046881937012844134, + "learning_rate": 6.602935911206588e-06, + "loss": 0.0, + "step": 6301 + }, + { + "epoch": 0.4061352065476574, + "grad_norm": 0.0021209476661694926, + "learning_rate": 6.6022198353025425e-06, + "loss": 0.0, + "step": 6302 + }, + { + "epoch": 0.4061996519945866, + "grad_norm": 0.0002067565192842928, + "learning_rate": 6.601503759398497e-06, + "loss": 0.0, + "step": 6303 + }, + { + "epoch": 0.40626409744151576, + "grad_norm": 0.005868360340397581, + "learning_rate": 6.60078768349445e-06, + "loss": 0.0, + "step": 6304 + }, + { + "epoch": 0.40632854288844494, + "grad_norm": 0.0006698871174751843, + "learning_rate": 6.6000716075904046e-06, + "loss": 0.0, + "step": 6305 + }, + { + "epoch": 0.40639298833537413, + "grad_norm": 0.008249500911233668, + "learning_rate": 6.599355531686359e-06, + "loss": 0.0, + "step": 6306 + }, + { + "epoch": 0.40645743378230326, + "grad_norm": 0.0003297776276702966, + "learning_rate": 6.598639455782313e-06, + "loss": 0.0, + "step": 6307 + }, + { + "epoch": 0.40652187922923244, + "grad_norm": 0.763014566614935, + "learning_rate": 6.5979233798782675e-06, + "loss": 0.0032, + "step": 6308 + }, + { + "epoch": 0.4065863246761616, + "grad_norm": 0.0030389339003442026, + "learning_rate": 6.597207303974221e-06, + "loss": 0.0, + "step": 6309 + }, + { + "epoch": 0.4066507701230908, + "grad_norm": 0.002413791142186581, + "learning_rate": 6.596491228070177e-06, + "loss": 0.0, + "step": 6310 + }, + { + "epoch": 0.40671521557002, + "grad_norm": 0.037547443349476, + "learning_rate": 6.59577515216613e-06, + "loss": 0.0001, + "step": 6311 + }, + { + "epoch": 0.4067796610169492, + "grad_norm": 0.0012816616887669533, + "learning_rate": 6.595059076262085e-06, + "loss": 0.0, + "step": 6312 + }, + { + "epoch": 0.4068441064638783, + "grad_norm": 0.4274243489749732, + "learning_rate": 6.594343000358039e-06, + "loss": 0.0025, + "step": 6313 + }, + { + "epoch": 0.4069085519108075, + "grad_norm": 0.0009116007250358199, + "learning_rate": 6.593626924453993e-06, + "loss": 0.0, + "step": 6314 + }, + { + "epoch": 0.4069729973577367, + "grad_norm": 0.4122105153435198, + "learning_rate": 6.592910848549947e-06, + "loss": 0.0007, + "step": 6315 + }, + { + "epoch": 0.40703744280466586, + "grad_norm": 0.032060547766800215, + "learning_rate": 6.592194772645901e-06, + "loss": 0.0001, + "step": 6316 + }, + { + "epoch": 0.40710188825159505, + "grad_norm": 0.0014138136504818558, + "learning_rate": 6.591478696741855e-06, + "loss": 0.0, + "step": 6317 + }, + { + "epoch": 0.4071663336985242, + "grad_norm": 0.020313332069530073, + "learning_rate": 6.59076262083781e-06, + "loss": 0.0, + "step": 6318 + }, + { + "epoch": 0.40723077914545336, + "grad_norm": 0.0225892251709381, + "learning_rate": 6.590046544933764e-06, + "loss": 0.0, + "step": 6319 + }, + { + "epoch": 0.40729522459238254, + "grad_norm": 0.06057017977650917, + "learning_rate": 6.589330469029717e-06, + "loss": 0.0002, + "step": 6320 + }, + { + "epoch": 0.4073596700393117, + "grad_norm": 0.005657030130166821, + "learning_rate": 6.588614393125672e-06, + "loss": 0.0, + "step": 6321 + }, + { + "epoch": 0.4074241154862409, + "grad_norm": 0.27193709574173774, + "learning_rate": 6.587898317221626e-06, + "loss": 0.0003, + "step": 6322 + }, + { + "epoch": 0.4074885609331701, + "grad_norm": 0.005401159223127921, + "learning_rate": 6.58718224131758e-06, + "loss": 0.0, + "step": 6323 + }, + { + "epoch": 0.4075530063800992, + "grad_norm": 0.190735859372399, + "learning_rate": 6.5864661654135345e-06, + "loss": 0.0008, + "step": 6324 + }, + { + "epoch": 0.4076174518270284, + "grad_norm": 0.02942071707522714, + "learning_rate": 6.585750089509488e-06, + "loss": 0.0, + "step": 6325 + }, + { + "epoch": 0.4076818972739576, + "grad_norm": 0.04759147327995806, + "learning_rate": 6.585034013605442e-06, + "loss": 0.0002, + "step": 6326 + }, + { + "epoch": 0.4077463427208868, + "grad_norm": 0.0783865454767804, + "learning_rate": 6.584317937701397e-06, + "loss": 0.0001, + "step": 6327 + }, + { + "epoch": 0.40781078816781596, + "grad_norm": 0.009902174526617688, + "learning_rate": 6.583601861797351e-06, + "loss": 0.0015, + "step": 6328 + }, + { + "epoch": 0.4078752336147451, + "grad_norm": 0.3001118059233158, + "learning_rate": 6.582885785893305e-06, + "loss": 0.0004, + "step": 6329 + }, + { + "epoch": 0.4079396790616743, + "grad_norm": 0.023572379667072773, + "learning_rate": 6.582169709989259e-06, + "loss": 0.0001, + "step": 6330 + }, + { + "epoch": 0.40800412450860346, + "grad_norm": 0.005017211603975058, + "learning_rate": 6.581453634085213e-06, + "loss": 0.0, + "step": 6331 + }, + { + "epoch": 0.40806856995553265, + "grad_norm": 0.03636838824935784, + "learning_rate": 6.580737558181167e-06, + "loss": 0.0, + "step": 6332 + }, + { + "epoch": 0.40813301540246183, + "grad_norm": 0.1252912317573341, + "learning_rate": 6.580021482277122e-06, + "loss": 0.0001, + "step": 6333 + }, + { + "epoch": 0.408197460849391, + "grad_norm": 0.0002858440281521137, + "learning_rate": 6.579305406373077e-06, + "loss": 0.0, + "step": 6334 + }, + { + "epoch": 0.40826190629632014, + "grad_norm": 0.008682301258015307, + "learning_rate": 6.578589330469031e-06, + "loss": 0.0001, + "step": 6335 + }, + { + "epoch": 0.4083263517432493, + "grad_norm": 0.0008786334737352812, + "learning_rate": 6.5778732545649844e-06, + "loss": 0.0, + "step": 6336 + }, + { + "epoch": 0.4083907971901785, + "grad_norm": 0.054658848205107594, + "learning_rate": 6.577157178660939e-06, + "loss": 0.0001, + "step": 6337 + }, + { + "epoch": 0.4084552426371077, + "grad_norm": 0.009756242819836084, + "learning_rate": 6.576441102756893e-06, + "loss": 0.0, + "step": 6338 + }, + { + "epoch": 0.4085196880840369, + "grad_norm": 0.24148468360646674, + "learning_rate": 6.575725026852847e-06, + "loss": 0.0006, + "step": 6339 + }, + { + "epoch": 0.40858413353096606, + "grad_norm": 0.007297027798895586, + "learning_rate": 6.575008950948802e-06, + "loss": 0.0, + "step": 6340 + }, + { + "epoch": 0.4086485789778952, + "grad_norm": 0.003341223520785786, + "learning_rate": 6.574292875044755e-06, + "loss": 0.0, + "step": 6341 + }, + { + "epoch": 0.4087130244248244, + "grad_norm": 0.0011016815095849727, + "learning_rate": 6.573576799140709e-06, + "loss": 0.0, + "step": 6342 + }, + { + "epoch": 0.40877746987175356, + "grad_norm": 0.027504932317853434, + "learning_rate": 6.572860723236664e-06, + "loss": 0.0001, + "step": 6343 + }, + { + "epoch": 0.40884191531868275, + "grad_norm": 0.01037361559934303, + "learning_rate": 6.572144647332618e-06, + "loss": 0.0, + "step": 6344 + }, + { + "epoch": 0.40890636076561193, + "grad_norm": 0.2539894847889759, + "learning_rate": 6.571428571428572e-06, + "loss": 0.0018, + "step": 6345 + }, + { + "epoch": 0.40897080621254106, + "grad_norm": 0.01065426405305903, + "learning_rate": 6.570712495524526e-06, + "loss": 0.0001, + "step": 6346 + }, + { + "epoch": 0.40903525165947024, + "grad_norm": 0.0003099955461148386, + "learning_rate": 6.56999641962048e-06, + "loss": 0.0, + "step": 6347 + }, + { + "epoch": 0.40909969710639943, + "grad_norm": 0.0006434004654581663, + "learning_rate": 6.569280343716434e-06, + "loss": 0.0, + "step": 6348 + }, + { + "epoch": 0.4091641425533286, + "grad_norm": 0.004250379747714681, + "learning_rate": 6.568564267812389e-06, + "loss": 0.0, + "step": 6349 + }, + { + "epoch": 0.4092285880002578, + "grad_norm": 0.0016217436448476318, + "learning_rate": 6.567848191908343e-06, + "loss": 0.0, + "step": 6350 + }, + { + "epoch": 0.409293033447187, + "grad_norm": 0.10920754880728271, + "learning_rate": 6.567132116004296e-06, + "loss": 0.0014, + "step": 6351 + }, + { + "epoch": 0.4093574788941161, + "grad_norm": 0.0021528177987047534, + "learning_rate": 6.566416040100251e-06, + "loss": 0.0, + "step": 6352 + }, + { + "epoch": 0.4094219243410453, + "grad_norm": 0.002116073833938389, + "learning_rate": 6.565699964196205e-06, + "loss": 0.0, + "step": 6353 + }, + { + "epoch": 0.4094863697879745, + "grad_norm": 0.29838004832477744, + "learning_rate": 6.564983888292159e-06, + "loss": 0.0012, + "step": 6354 + }, + { + "epoch": 0.40955081523490366, + "grad_norm": 0.001972656939178585, + "learning_rate": 6.5642678123881136e-06, + "loss": 0.0, + "step": 6355 + }, + { + "epoch": 0.40961526068183285, + "grad_norm": 0.051000456672673784, + "learning_rate": 6.563551736484067e-06, + "loss": 0.0004, + "step": 6356 + }, + { + "epoch": 0.409679706128762, + "grad_norm": 0.002971262747549498, + "learning_rate": 6.562835660580022e-06, + "loss": 0.0, + "step": 6357 + }, + { + "epoch": 0.40974415157569116, + "grad_norm": 0.001269607342146986, + "learning_rate": 6.5621195846759765e-06, + "loss": 0.0, + "step": 6358 + }, + { + "epoch": 0.40980859702262035, + "grad_norm": 0.0013160284007964068, + "learning_rate": 6.561403508771931e-06, + "loss": 0.0, + "step": 6359 + }, + { + "epoch": 0.40987304246954953, + "grad_norm": 0.003960992563566679, + "learning_rate": 6.560687432867885e-06, + "loss": 0.0, + "step": 6360 + }, + { + "epoch": 0.4099374879164787, + "grad_norm": 0.000711062058098579, + "learning_rate": 6.559971356963839e-06, + "loss": 0.0, + "step": 6361 + }, + { + "epoch": 0.4100019333634079, + "grad_norm": 0.0006166932865461904, + "learning_rate": 6.559255281059793e-06, + "loss": 0.0, + "step": 6362 + }, + { + "epoch": 0.41006637881033703, + "grad_norm": 0.0008622193241519165, + "learning_rate": 6.558539205155747e-06, + "loss": 0.0, + "step": 6363 + }, + { + "epoch": 0.4101308242572662, + "grad_norm": 0.022749650694368222, + "learning_rate": 6.557823129251701e-06, + "loss": 0.0, + "step": 6364 + }, + { + "epoch": 0.4101952697041954, + "grad_norm": 0.0001210514390543481, + "learning_rate": 6.557107053347656e-06, + "loss": 0.0, + "step": 6365 + }, + { + "epoch": 0.4102597151511246, + "grad_norm": 0.30534664406291645, + "learning_rate": 6.55639097744361e-06, + "loss": 0.0007, + "step": 6366 + }, + { + "epoch": 0.41032416059805377, + "grad_norm": 0.0009480997115783098, + "learning_rate": 6.5556749015395635e-06, + "loss": 0.0, + "step": 6367 + }, + { + "epoch": 0.4103886060449829, + "grad_norm": 0.0030546703447422857, + "learning_rate": 6.554958825635518e-06, + "loss": 0.0, + "step": 6368 + }, + { + "epoch": 0.4104530514919121, + "grad_norm": 3.549411554795448e-05, + "learning_rate": 6.554242749731472e-06, + "loss": 0.0, + "step": 6369 + }, + { + "epoch": 0.41051749693884126, + "grad_norm": 0.06431834983958828, + "learning_rate": 6.553526673827426e-06, + "loss": 0.0006, + "step": 6370 + }, + { + "epoch": 0.41058194238577045, + "grad_norm": 0.0015877173040344272, + "learning_rate": 6.552810597923381e-06, + "loss": 0.0, + "step": 6371 + }, + { + "epoch": 0.41064638783269963, + "grad_norm": 0.3677186573657693, + "learning_rate": 6.552094522019334e-06, + "loss": 0.0003, + "step": 6372 + }, + { + "epoch": 0.4107108332796288, + "grad_norm": 0.015533271144025876, + "learning_rate": 6.551378446115288e-06, + "loss": 0.0001, + "step": 6373 + }, + { + "epoch": 0.41077527872655795, + "grad_norm": 0.0001311708652832042, + "learning_rate": 6.550662370211243e-06, + "loss": 0.0, + "step": 6374 + }, + { + "epoch": 0.41083972417348713, + "grad_norm": 0.03579685409460785, + "learning_rate": 6.549946294307197e-06, + "loss": 0.0001, + "step": 6375 + }, + { + "epoch": 0.4109041696204163, + "grad_norm": 0.010680600901401511, + "learning_rate": 6.5492302184031505e-06, + "loss": 0.0, + "step": 6376 + }, + { + "epoch": 0.4109686150673455, + "grad_norm": 0.0014740273723926735, + "learning_rate": 6.548514142499105e-06, + "loss": 0.0, + "step": 6377 + }, + { + "epoch": 0.4110330605142747, + "grad_norm": 0.17344039901681835, + "learning_rate": 6.547798066595059e-06, + "loss": 0.0016, + "step": 6378 + }, + { + "epoch": 0.41109750596120387, + "grad_norm": 2.5766377467299022e-05, + "learning_rate": 6.547081990691013e-06, + "loss": 0.0, + "step": 6379 + }, + { + "epoch": 0.411161951408133, + "grad_norm": 0.4452945638691878, + "learning_rate": 6.5463659147869685e-06, + "loss": 0.0052, + "step": 6380 + }, + { + "epoch": 0.4112263968550622, + "grad_norm": 0.0008473765258398587, + "learning_rate": 6.545649838882923e-06, + "loss": 0.0, + "step": 6381 + }, + { + "epoch": 0.41129084230199137, + "grad_norm": 0.000153973536380325, + "learning_rate": 6.544933762978877e-06, + "loss": 0.0, + "step": 6382 + }, + { + "epoch": 0.41135528774892055, + "grad_norm": 0.0008808541464115172, + "learning_rate": 6.5442176870748305e-06, + "loss": 0.0, + "step": 6383 + }, + { + "epoch": 0.41141973319584974, + "grad_norm": 0.04910274289866252, + "learning_rate": 6.543501611170785e-06, + "loss": 0.0001, + "step": 6384 + }, + { + "epoch": 0.41148417864277886, + "grad_norm": 0.6920763852250857, + "learning_rate": 6.542785535266739e-06, + "loss": 0.0039, + "step": 6385 + }, + { + "epoch": 0.41154862408970805, + "grad_norm": 0.10766323025445916, + "learning_rate": 6.5420694593626934e-06, + "loss": 0.0002, + "step": 6386 + }, + { + "epoch": 0.41161306953663723, + "grad_norm": 0.0026613364721375463, + "learning_rate": 6.541353383458648e-06, + "loss": 0.0, + "step": 6387 + }, + { + "epoch": 0.4116775149835664, + "grad_norm": 0.007791129047614624, + "learning_rate": 6.540637307554601e-06, + "loss": 0.0, + "step": 6388 + }, + { + "epoch": 0.4117419604304956, + "grad_norm": 0.0023654626963615804, + "learning_rate": 6.5399212316505555e-06, + "loss": 0.0, + "step": 6389 + }, + { + "epoch": 0.4118064058774248, + "grad_norm": 0.007302414206805107, + "learning_rate": 6.53920515574651e-06, + "loss": 0.0001, + "step": 6390 + }, + { + "epoch": 0.4118708513243539, + "grad_norm": 0.045215327175843444, + "learning_rate": 6.538489079842464e-06, + "loss": 0.0, + "step": 6391 + }, + { + "epoch": 0.4119352967712831, + "grad_norm": 0.0042823292236435235, + "learning_rate": 6.5377730039384175e-06, + "loss": 0.0, + "step": 6392 + }, + { + "epoch": 0.4119997422182123, + "grad_norm": 0.009907462456016054, + "learning_rate": 6.537056928034372e-06, + "loss": 0.0, + "step": 6393 + }, + { + "epoch": 0.41206418766514147, + "grad_norm": 1.2236248238341578, + "learning_rate": 6.536340852130326e-06, + "loss": 0.0013, + "step": 6394 + }, + { + "epoch": 0.41212863311207065, + "grad_norm": 0.04217290112438516, + "learning_rate": 6.5356247762262804e-06, + "loss": 0.0002, + "step": 6395 + }, + { + "epoch": 0.4121930785589998, + "grad_norm": 0.004078594013241326, + "learning_rate": 6.534908700322235e-06, + "loss": 0.0, + "step": 6396 + }, + { + "epoch": 0.41225752400592897, + "grad_norm": 0.06957932522015041, + "learning_rate": 6.534192624418188e-06, + "loss": 0.0001, + "step": 6397 + }, + { + "epoch": 0.41232196945285815, + "grad_norm": 0.3205191480641675, + "learning_rate": 6.5334765485141425e-06, + "loss": 0.0037, + "step": 6398 + }, + { + "epoch": 0.41238641489978733, + "grad_norm": 0.010984131143033277, + "learning_rate": 6.532760472610097e-06, + "loss": 0.0001, + "step": 6399 + }, + { + "epoch": 0.4124508603467165, + "grad_norm": 0.3163248357698386, + "learning_rate": 6.532044396706051e-06, + "loss": 0.0022, + "step": 6400 + }, + { + "epoch": 0.4125153057936457, + "grad_norm": 0.0038705281878985156, + "learning_rate": 6.531328320802005e-06, + "loss": 0.0, + "step": 6401 + }, + { + "epoch": 0.41257975124057483, + "grad_norm": 0.000454026447775615, + "learning_rate": 6.530612244897959e-06, + "loss": 0.0, + "step": 6402 + }, + { + "epoch": 0.412644196687504, + "grad_norm": 0.018020890708024186, + "learning_rate": 6.529896168993915e-06, + "loss": 0.0001, + "step": 6403 + }, + { + "epoch": 0.4127086421344332, + "grad_norm": 0.0011114438458940314, + "learning_rate": 6.529180093089868e-06, + "loss": 0.0, + "step": 6404 + }, + { + "epoch": 0.4127730875813624, + "grad_norm": 1.4026249680627463, + "learning_rate": 6.528464017185823e-06, + "loss": 0.0068, + "step": 6405 + }, + { + "epoch": 0.41283753302829157, + "grad_norm": 0.001006156064450549, + "learning_rate": 6.527747941281777e-06, + "loss": 0.0, + "step": 6406 + }, + { + "epoch": 0.4129019784752207, + "grad_norm": 0.7100981903682718, + "learning_rate": 6.527031865377731e-06, + "loss": 0.0024, + "step": 6407 + }, + { + "epoch": 0.4129664239221499, + "grad_norm": 0.00165587619774657, + "learning_rate": 6.526315789473685e-06, + "loss": 0.0, + "step": 6408 + }, + { + "epoch": 0.41303086936907907, + "grad_norm": 0.27788876991628714, + "learning_rate": 6.525599713569639e-06, + "loss": 0.0012, + "step": 6409 + }, + { + "epoch": 0.41309531481600825, + "grad_norm": 0.0005789179176985119, + "learning_rate": 6.524883637665593e-06, + "loss": 0.0, + "step": 6410 + }, + { + "epoch": 0.41315976026293744, + "grad_norm": 0.005029905999029246, + "learning_rate": 6.5241675617615475e-06, + "loss": 0.0001, + "step": 6411 + }, + { + "epoch": 0.4132242057098666, + "grad_norm": 0.0034671998073162475, + "learning_rate": 6.523451485857502e-06, + "loss": 0.0, + "step": 6412 + }, + { + "epoch": 0.41328865115679575, + "grad_norm": 0.005740657103420485, + "learning_rate": 6.522735409953455e-06, + "loss": 0.0, + "step": 6413 + }, + { + "epoch": 0.41335309660372493, + "grad_norm": 0.00789786293433695, + "learning_rate": 6.5220193340494096e-06, + "loss": 0.0, + "step": 6414 + }, + { + "epoch": 0.4134175420506541, + "grad_norm": 0.04233837478909208, + "learning_rate": 6.521303258145364e-06, + "loss": 0.0, + "step": 6415 + }, + { + "epoch": 0.4134819874975833, + "grad_norm": 0.031302123632725096, + "learning_rate": 6.520587182241318e-06, + "loss": 0.0001, + "step": 6416 + }, + { + "epoch": 0.4135464329445125, + "grad_norm": 0.011742804966906406, + "learning_rate": 6.5198711063372725e-06, + "loss": 0.0, + "step": 6417 + }, + { + "epoch": 0.41361087839144167, + "grad_norm": 0.000555710875784751, + "learning_rate": 6.519155030433226e-06, + "loss": 0.0, + "step": 6418 + }, + { + "epoch": 0.4136753238383708, + "grad_norm": 0.1739312573212879, + "learning_rate": 6.51843895452918e-06, + "loss": 0.0005, + "step": 6419 + }, + { + "epoch": 0.4137397692853, + "grad_norm": 0.004575221895012938, + "learning_rate": 6.5177228786251345e-06, + "loss": 0.0, + "step": 6420 + }, + { + "epoch": 0.41380421473222917, + "grad_norm": 0.00011079464582073852, + "learning_rate": 6.517006802721089e-06, + "loss": 0.0, + "step": 6421 + }, + { + "epoch": 0.41386866017915835, + "grad_norm": 0.005995697259705144, + "learning_rate": 6.516290726817043e-06, + "loss": 0.0, + "step": 6422 + }, + { + "epoch": 0.41393310562608754, + "grad_norm": 0.0034095905482489468, + "learning_rate": 6.5155746509129966e-06, + "loss": 0.0, + "step": 6423 + }, + { + "epoch": 0.41399755107301667, + "grad_norm": 0.00577780198281472, + "learning_rate": 6.514858575008951e-06, + "loss": 0.0, + "step": 6424 + }, + { + "epoch": 0.41406199651994585, + "grad_norm": 0.46146711537749807, + "learning_rate": 6.514142499104905e-06, + "loss": 0.0012, + "step": 6425 + }, + { + "epoch": 0.41412644196687504, + "grad_norm": 0.0009439974016115886, + "learning_rate": 6.5134264232008595e-06, + "loss": 0.0, + "step": 6426 + }, + { + "epoch": 0.4141908874138042, + "grad_norm": 0.03680339894602934, + "learning_rate": 6.512710347296815e-06, + "loss": 0.0, + "step": 6427 + }, + { + "epoch": 0.4142553328607334, + "grad_norm": 0.0009701746796509103, + "learning_rate": 6.511994271392769e-06, + "loss": 0.0, + "step": 6428 + }, + { + "epoch": 0.4143197783076626, + "grad_norm": 0.0010483196330762464, + "learning_rate": 6.511278195488722e-06, + "loss": 0.0, + "step": 6429 + }, + { + "epoch": 0.4143842237545917, + "grad_norm": 0.0006480988198016862, + "learning_rate": 6.510562119584677e-06, + "loss": 0.0, + "step": 6430 + }, + { + "epoch": 0.4144486692015209, + "grad_norm": 0.07760918026875319, + "learning_rate": 6.509846043680631e-06, + "loss": 0.0002, + "step": 6431 + }, + { + "epoch": 0.4145131146484501, + "grad_norm": 0.12619666898186474, + "learning_rate": 6.509129967776585e-06, + "loss": 0.0003, + "step": 6432 + }, + { + "epoch": 0.41457756009537927, + "grad_norm": 0.06872515229381396, + "learning_rate": 6.5084138918725396e-06, + "loss": 0.0001, + "step": 6433 + }, + { + "epoch": 0.41464200554230846, + "grad_norm": 0.005405009744319238, + "learning_rate": 6.507697815968493e-06, + "loss": 0.0, + "step": 6434 + }, + { + "epoch": 0.4147064509892376, + "grad_norm": 0.0006958994388959971, + "learning_rate": 6.506981740064447e-06, + "loss": 0.0, + "step": 6435 + }, + { + "epoch": 0.41477089643616677, + "grad_norm": 0.00012114286637400527, + "learning_rate": 6.506265664160402e-06, + "loss": 0.0, + "step": 6436 + }, + { + "epoch": 0.41483534188309595, + "grad_norm": 0.016547558757412303, + "learning_rate": 6.505549588256356e-06, + "loss": 0.0, + "step": 6437 + }, + { + "epoch": 0.41489978733002514, + "grad_norm": 0.013458047052151412, + "learning_rate": 6.50483351235231e-06, + "loss": 0.0, + "step": 6438 + }, + { + "epoch": 0.4149642327769543, + "grad_norm": 0.0021026141925494257, + "learning_rate": 6.504117436448264e-06, + "loss": 0.0, + "step": 6439 + }, + { + "epoch": 0.4150286782238835, + "grad_norm": 0.00038938549783562923, + "learning_rate": 6.503401360544218e-06, + "loss": 0.0, + "step": 6440 + }, + { + "epoch": 0.41509312367081264, + "grad_norm": 0.0020553606478648863, + "learning_rate": 6.502685284640172e-06, + "loss": 0.0, + "step": 6441 + }, + { + "epoch": 0.4151575691177418, + "grad_norm": 0.0011236673450610109, + "learning_rate": 6.5019692087361265e-06, + "loss": 0.0, + "step": 6442 + }, + { + "epoch": 0.415222014564671, + "grad_norm": 0.0008312099867464999, + "learning_rate": 6.501253132832081e-06, + "loss": 0.0, + "step": 6443 + }, + { + "epoch": 0.4152864600116002, + "grad_norm": 0.016113894443693048, + "learning_rate": 6.500537056928034e-06, + "loss": 0.0, + "step": 6444 + }, + { + "epoch": 0.4153509054585294, + "grad_norm": 0.001129702907296028, + "learning_rate": 6.499820981023989e-06, + "loss": 0.0, + "step": 6445 + }, + { + "epoch": 0.4154153509054585, + "grad_norm": 0.0036121507193785907, + "learning_rate": 6.499104905119943e-06, + "loss": 0.0, + "step": 6446 + }, + { + "epoch": 0.4154797963523877, + "grad_norm": 0.4637558492255657, + "learning_rate": 6.498388829215897e-06, + "loss": 0.0018, + "step": 6447 + }, + { + "epoch": 0.41554424179931687, + "grad_norm": 0.002091072027737738, + "learning_rate": 6.4976727533118515e-06, + "loss": 0.0, + "step": 6448 + }, + { + "epoch": 0.41560868724624606, + "grad_norm": 0.005425796910282488, + "learning_rate": 6.496956677407805e-06, + "loss": 0.0, + "step": 6449 + }, + { + "epoch": 0.41567313269317524, + "grad_norm": 0.0006827120071557798, + "learning_rate": 6.49624060150376e-06, + "loss": 0.0, + "step": 6450 + }, + { + "epoch": 0.4157375781401044, + "grad_norm": 0.0009370663355223362, + "learning_rate": 6.495524525599714e-06, + "loss": 0.0, + "step": 6451 + }, + { + "epoch": 0.41580202358703355, + "grad_norm": 0.005268021478741142, + "learning_rate": 6.494808449695669e-06, + "loss": 0.0, + "step": 6452 + }, + { + "epoch": 0.41586646903396274, + "grad_norm": 0.0018895030845587368, + "learning_rate": 6.494092373791623e-06, + "loss": 0.0, + "step": 6453 + }, + { + "epoch": 0.4159309144808919, + "grad_norm": 0.005175601673834675, + "learning_rate": 6.493376297887577e-06, + "loss": 0.0, + "step": 6454 + }, + { + "epoch": 0.4159953599278211, + "grad_norm": 0.4407284520497393, + "learning_rate": 6.492660221983531e-06, + "loss": 0.0032, + "step": 6455 + }, + { + "epoch": 0.4160598053747503, + "grad_norm": 0.0007340834875259358, + "learning_rate": 6.491944146079485e-06, + "loss": 0.0, + "step": 6456 + }, + { + "epoch": 0.4161242508216795, + "grad_norm": 0.010261719996274735, + "learning_rate": 6.491228070175439e-06, + "loss": 0.0, + "step": 6457 + }, + { + "epoch": 0.4161886962686086, + "grad_norm": 0.0008106852255236457, + "learning_rate": 6.490511994271394e-06, + "loss": 0.0, + "step": 6458 + }, + { + "epoch": 0.4162531417155378, + "grad_norm": 0.010090045530888448, + "learning_rate": 6.489795918367348e-06, + "loss": 0.0, + "step": 6459 + }, + { + "epoch": 0.416317587162467, + "grad_norm": 0.008465445410221185, + "learning_rate": 6.489079842463301e-06, + "loss": 0.0, + "step": 6460 + }, + { + "epoch": 0.41638203260939616, + "grad_norm": 0.02899654347956042, + "learning_rate": 6.488363766559256e-06, + "loss": 0.0001, + "step": 6461 + }, + { + "epoch": 0.41644647805632534, + "grad_norm": 0.001056778135030049, + "learning_rate": 6.48764769065521e-06, + "loss": 0.0, + "step": 6462 + }, + { + "epoch": 0.41651092350325447, + "grad_norm": 0.3688163746079339, + "learning_rate": 6.486931614751164e-06, + "loss": 0.0006, + "step": 6463 + }, + { + "epoch": 0.41657536895018366, + "grad_norm": 0.005144527660957336, + "learning_rate": 6.486215538847119e-06, + "loss": 0.0, + "step": 6464 + }, + { + "epoch": 0.41663981439711284, + "grad_norm": 0.017784187385494993, + "learning_rate": 6.485499462943072e-06, + "loss": 0.0002, + "step": 6465 + }, + { + "epoch": 0.416704259844042, + "grad_norm": 0.06417901822958386, + "learning_rate": 6.484783387039026e-06, + "loss": 0.0001, + "step": 6466 + }, + { + "epoch": 0.4167687052909712, + "grad_norm": 0.0006121339328757751, + "learning_rate": 6.484067311134981e-06, + "loss": 0.0, + "step": 6467 + }, + { + "epoch": 0.4168331507379004, + "grad_norm": 0.18229139305275863, + "learning_rate": 6.483351235230935e-06, + "loss": 0.0004, + "step": 6468 + }, + { + "epoch": 0.4168975961848295, + "grad_norm": 0.041040044281895095, + "learning_rate": 6.482635159326889e-06, + "loss": 0.0001, + "step": 6469 + }, + { + "epoch": 0.4169620416317587, + "grad_norm": 0.021999025416732904, + "learning_rate": 6.481919083422843e-06, + "loss": 0.0001, + "step": 6470 + }, + { + "epoch": 0.4170264870786879, + "grad_norm": 0.002544240789084966, + "learning_rate": 6.481203007518797e-06, + "loss": 0.0, + "step": 6471 + }, + { + "epoch": 0.4170909325256171, + "grad_norm": 0.0026852858379955535, + "learning_rate": 6.480486931614751e-06, + "loss": 0.0, + "step": 6472 + }, + { + "epoch": 0.41715537797254626, + "grad_norm": 0.43469944439074326, + "learning_rate": 6.479770855710706e-06, + "loss": 0.0012, + "step": 6473 + }, + { + "epoch": 0.4172198234194754, + "grad_norm": 0.347051651967674, + "learning_rate": 6.479054779806661e-06, + "loss": 0.0006, + "step": 6474 + }, + { + "epoch": 0.4172842688664046, + "grad_norm": 0.25698550840535966, + "learning_rate": 6.478338703902615e-06, + "loss": 0.0005, + "step": 6475 + }, + { + "epoch": 0.41734871431333376, + "grad_norm": 0.2126062926753528, + "learning_rate": 6.4776226279985685e-06, + "loss": 0.0019, + "step": 6476 + }, + { + "epoch": 0.41741315976026294, + "grad_norm": 0.28151246910276284, + "learning_rate": 6.476906552094523e-06, + "loss": 0.0012, + "step": 6477 + }, + { + "epoch": 0.4174776052071921, + "grad_norm": 0.0017983784302793271, + "learning_rate": 6.476190476190477e-06, + "loss": 0.0, + "step": 6478 + }, + { + "epoch": 0.4175420506541213, + "grad_norm": 0.029167512556955478, + "learning_rate": 6.475474400286431e-06, + "loss": 0.0002, + "step": 6479 + }, + { + "epoch": 0.41760649610105044, + "grad_norm": 6.0273887407271015e-05, + "learning_rate": 6.474758324382386e-06, + "loss": 0.0, + "step": 6480 + }, + { + "epoch": 0.4176709415479796, + "grad_norm": 0.003980392115880526, + "learning_rate": 6.474042248478339e-06, + "loss": 0.0, + "step": 6481 + }, + { + "epoch": 0.4177353869949088, + "grad_norm": 0.0011862001687265543, + "learning_rate": 6.473326172574293e-06, + "loss": 0.0, + "step": 6482 + }, + { + "epoch": 0.417799832441838, + "grad_norm": 0.28784543359479664, + "learning_rate": 6.472610096670248e-06, + "loss": 0.0004, + "step": 6483 + }, + { + "epoch": 0.4178642778887672, + "grad_norm": 0.002859249041893461, + "learning_rate": 6.471894020766202e-06, + "loss": 0.0, + "step": 6484 + }, + { + "epoch": 0.4179287233356963, + "grad_norm": 0.0022861943502021026, + "learning_rate": 6.471177944862156e-06, + "loss": 0.0, + "step": 6485 + }, + { + "epoch": 0.4179931687826255, + "grad_norm": 0.0020342299031710404, + "learning_rate": 6.47046186895811e-06, + "loss": 0.0, + "step": 6486 + }, + { + "epoch": 0.4180576142295547, + "grad_norm": 8.07128737944767e-05, + "learning_rate": 6.469745793054064e-06, + "loss": 0.0, + "step": 6487 + }, + { + "epoch": 0.41812205967648386, + "grad_norm": 0.1435686221463562, + "learning_rate": 6.469029717150018e-06, + "loss": 0.0017, + "step": 6488 + }, + { + "epoch": 0.41818650512341304, + "grad_norm": 9.83520528507581e-05, + "learning_rate": 6.468313641245973e-06, + "loss": 0.0, + "step": 6489 + }, + { + "epoch": 0.41825095057034223, + "grad_norm": 5.192385256500709e-05, + "learning_rate": 6.467597565341926e-06, + "loss": 0.0, + "step": 6490 + }, + { + "epoch": 0.41831539601727136, + "grad_norm": 0.005567069063858515, + "learning_rate": 6.46688148943788e-06, + "loss": 0.0, + "step": 6491 + }, + { + "epoch": 0.41837984146420054, + "grad_norm": 0.00021615406446108586, + "learning_rate": 6.466165413533835e-06, + "loss": 0.0, + "step": 6492 + }, + { + "epoch": 0.4184442869111297, + "grad_norm": 0.0021729118234671005, + "learning_rate": 6.465449337629789e-06, + "loss": 0.0, + "step": 6493 + }, + { + "epoch": 0.4185087323580589, + "grad_norm": 0.006202733531252822, + "learning_rate": 6.464733261725743e-06, + "loss": 0.0, + "step": 6494 + }, + { + "epoch": 0.4185731778049881, + "grad_norm": 0.0008673489649001512, + "learning_rate": 6.464017185821697e-06, + "loss": 0.0, + "step": 6495 + }, + { + "epoch": 0.4186376232519173, + "grad_norm": 0.01283363549355941, + "learning_rate": 6.463301109917651e-06, + "loss": 0.0, + "step": 6496 + }, + { + "epoch": 0.4187020686988464, + "grad_norm": 0.0971751521920101, + "learning_rate": 6.462585034013606e-06, + "loss": 0.0001, + "step": 6497 + }, + { + "epoch": 0.4187665141457756, + "grad_norm": 0.037483021995562936, + "learning_rate": 6.4618689581095605e-06, + "loss": 0.0002, + "step": 6498 + }, + { + "epoch": 0.4188309595927048, + "grad_norm": 0.002619854872960852, + "learning_rate": 6.461152882205515e-06, + "loss": 0.0, + "step": 6499 + }, + { + "epoch": 0.41889540503963396, + "grad_norm": 0.4253153570613154, + "learning_rate": 6.460436806301469e-06, + "loss": 0.0033, + "step": 6500 + }, + { + "epoch": 0.41895985048656315, + "grad_norm": 0.30405405417039943, + "learning_rate": 6.459720730397423e-06, + "loss": 0.0014, + "step": 6501 + }, + { + "epoch": 0.4190242959334923, + "grad_norm": 0.061062530101131295, + "learning_rate": 6.459004654493377e-06, + "loss": 0.0, + "step": 6502 + }, + { + "epoch": 0.41908874138042146, + "grad_norm": 0.0022728235938479116, + "learning_rate": 6.458288578589331e-06, + "loss": 0.0, + "step": 6503 + }, + { + "epoch": 0.41915318682735064, + "grad_norm": 0.018741473727967656, + "learning_rate": 6.4575725026852854e-06, + "loss": 0.0015, + "step": 6504 + }, + { + "epoch": 0.41921763227427983, + "grad_norm": 0.00014293611391536147, + "learning_rate": 6.45685642678124e-06, + "loss": 0.0, + "step": 6505 + }, + { + "epoch": 0.419282077721209, + "grad_norm": 0.1856076392009028, + "learning_rate": 6.456140350877193e-06, + "loss": 0.0007, + "step": 6506 + }, + { + "epoch": 0.4193465231681382, + "grad_norm": 0.0038470830664775165, + "learning_rate": 6.4554242749731475e-06, + "loss": 0.0, + "step": 6507 + }, + { + "epoch": 0.4194109686150673, + "grad_norm": 0.0002562884533311774, + "learning_rate": 6.454708199069102e-06, + "loss": 0.0, + "step": 6508 + }, + { + "epoch": 0.4194754140619965, + "grad_norm": 0.1724127247382639, + "learning_rate": 6.453992123165056e-06, + "loss": 0.0002, + "step": 6509 + }, + { + "epoch": 0.4195398595089257, + "grad_norm": 0.00018884989328603747, + "learning_rate": 6.45327604726101e-06, + "loss": 0.0, + "step": 6510 + }, + { + "epoch": 0.4196043049558549, + "grad_norm": 0.005897050359999245, + "learning_rate": 6.452559971356964e-06, + "loss": 0.0, + "step": 6511 + }, + { + "epoch": 0.41966875040278406, + "grad_norm": 0.08656624785462537, + "learning_rate": 6.451843895452918e-06, + "loss": 0.001, + "step": 6512 + }, + { + "epoch": 0.4197331958497132, + "grad_norm": 0.0008088807705909768, + "learning_rate": 6.4511278195488724e-06, + "loss": 0.0, + "step": 6513 + }, + { + "epoch": 0.4197976412966424, + "grad_norm": 0.04547051038808133, + "learning_rate": 6.450411743644827e-06, + "loss": 0.0, + "step": 6514 + }, + { + "epoch": 0.41986208674357156, + "grad_norm": 6.935269685480056e-05, + "learning_rate": 6.449695667740781e-06, + "loss": 0.0, + "step": 6515 + }, + { + "epoch": 0.41992653219050075, + "grad_norm": 0.013221494184816038, + "learning_rate": 6.4489795918367345e-06, + "loss": 0.0, + "step": 6516 + }, + { + "epoch": 0.41999097763742993, + "grad_norm": 0.004337338260439512, + "learning_rate": 6.448263515932689e-06, + "loss": 0.0, + "step": 6517 + }, + { + "epoch": 0.4200554230843591, + "grad_norm": 0.0007477333484548366, + "learning_rate": 6.447547440028643e-06, + "loss": 0.0, + "step": 6518 + }, + { + "epoch": 0.42011986853128824, + "grad_norm": 0.22266532645213608, + "learning_rate": 6.446831364124597e-06, + "loss": 0.0007, + "step": 6519 + }, + { + "epoch": 0.4201843139782174, + "grad_norm": 0.001061479785357458, + "learning_rate": 6.4461152882205525e-06, + "loss": 0.0, + "step": 6520 + }, + { + "epoch": 0.4202487594251466, + "grad_norm": 0.01430381819889332, + "learning_rate": 6.445399212316507e-06, + "loss": 0.0001, + "step": 6521 + }, + { + "epoch": 0.4203132048720758, + "grad_norm": 0.2277960656102833, + "learning_rate": 6.44468313641246e-06, + "loss": 0.0082, + "step": 6522 + }, + { + "epoch": 0.420377650319005, + "grad_norm": 0.009277152777889666, + "learning_rate": 6.443967060508415e-06, + "loss": 0.0001, + "step": 6523 + }, + { + "epoch": 0.4204420957659341, + "grad_norm": 0.014832602078491002, + "learning_rate": 6.443250984604369e-06, + "loss": 0.0, + "step": 6524 + }, + { + "epoch": 0.4205065412128633, + "grad_norm": 0.000828742706597286, + "learning_rate": 6.442534908700323e-06, + "loss": 0.0, + "step": 6525 + }, + { + "epoch": 0.4205709866597925, + "grad_norm": 0.00023983841067913176, + "learning_rate": 6.4418188327962775e-06, + "loss": 0.0, + "step": 6526 + }, + { + "epoch": 0.42063543210672166, + "grad_norm": 0.0060839587052440545, + "learning_rate": 6.441102756892231e-06, + "loss": 0.0001, + "step": 6527 + }, + { + "epoch": 0.42069987755365085, + "grad_norm": 0.12049660035438453, + "learning_rate": 6.440386680988185e-06, + "loss": 0.0004, + "step": 6528 + }, + { + "epoch": 0.42076432300058003, + "grad_norm": 0.0002864290770283571, + "learning_rate": 6.4396706050841395e-06, + "loss": 0.0, + "step": 6529 + }, + { + "epoch": 0.42082876844750916, + "grad_norm": 0.008323643209694061, + "learning_rate": 6.438954529180094e-06, + "loss": 0.0, + "step": 6530 + }, + { + "epoch": 0.42089321389443834, + "grad_norm": 0.0008086087638468122, + "learning_rate": 6.438238453276048e-06, + "loss": 0.0, + "step": 6531 + }, + { + "epoch": 0.42095765934136753, + "grad_norm": 0.005185411844307165, + "learning_rate": 6.4375223773720016e-06, + "loss": 0.0, + "step": 6532 + }, + { + "epoch": 0.4210221047882967, + "grad_norm": 0.0006719877031553268, + "learning_rate": 6.436806301467956e-06, + "loss": 0.0, + "step": 6533 + }, + { + "epoch": 0.4210865502352259, + "grad_norm": 2.6934940144879813e-05, + "learning_rate": 6.43609022556391e-06, + "loss": 0.0, + "step": 6534 + }, + { + "epoch": 0.4211509956821551, + "grad_norm": 0.00015571736547918388, + "learning_rate": 6.4353741496598645e-06, + "loss": 0.0, + "step": 6535 + }, + { + "epoch": 0.4212154411290842, + "grad_norm": 0.00315484098134091, + "learning_rate": 6.434658073755819e-06, + "loss": 0.0, + "step": 6536 + }, + { + "epoch": 0.4212798865760134, + "grad_norm": 0.005351614273942891, + "learning_rate": 6.433941997851772e-06, + "loss": 0.0001, + "step": 6537 + }, + { + "epoch": 0.4213443320229426, + "grad_norm": 0.010094460236600215, + "learning_rate": 6.4332259219477265e-06, + "loss": 0.0, + "step": 6538 + }, + { + "epoch": 0.42140877746987176, + "grad_norm": 0.010835629956553901, + "learning_rate": 6.432509846043681e-06, + "loss": 0.0002, + "step": 6539 + }, + { + "epoch": 0.42147322291680095, + "grad_norm": 0.00319520008482836, + "learning_rate": 6.431793770139635e-06, + "loss": 0.0, + "step": 6540 + }, + { + "epoch": 0.4215376683637301, + "grad_norm": 0.010853856846134343, + "learning_rate": 6.431077694235589e-06, + "loss": 0.0, + "step": 6541 + }, + { + "epoch": 0.42160211381065926, + "grad_norm": 0.0114451280760472, + "learning_rate": 6.430361618331543e-06, + "loss": 0.0, + "step": 6542 + }, + { + "epoch": 0.42166655925758845, + "grad_norm": 0.02084433664255218, + "learning_rate": 6.429645542427498e-06, + "loss": 0.0001, + "step": 6543 + }, + { + "epoch": 0.42173100470451763, + "grad_norm": 0.0003189129349114517, + "learning_rate": 6.428929466523452e-06, + "loss": 0.0, + "step": 6544 + }, + { + "epoch": 0.4217954501514468, + "grad_norm": 0.0035933766135850926, + "learning_rate": 6.428213390619407e-06, + "loss": 0.0, + "step": 6545 + }, + { + "epoch": 0.421859895598376, + "grad_norm": 0.024217145235977393, + "learning_rate": 6.427497314715361e-06, + "loss": 0.0001, + "step": 6546 + }, + { + "epoch": 0.42192434104530513, + "grad_norm": 0.007078477096816555, + "learning_rate": 6.426781238811315e-06, + "loss": 0.0, + "step": 6547 + }, + { + "epoch": 0.4219887864922343, + "grad_norm": 0.16918780173284304, + "learning_rate": 6.426065162907269e-06, + "loss": 0.0017, + "step": 6548 + }, + { + "epoch": 0.4220532319391635, + "grad_norm": 0.00042276683359260573, + "learning_rate": 6.425349087003223e-06, + "loss": 0.0, + "step": 6549 + }, + { + "epoch": 0.4221176773860927, + "grad_norm": 0.005739171150647594, + "learning_rate": 6.424633011099177e-06, + "loss": 0.0, + "step": 6550 + }, + { + "epoch": 0.42218212283302187, + "grad_norm": 0.00012378405994403383, + "learning_rate": 6.4239169351951316e-06, + "loss": 0.0, + "step": 6551 + }, + { + "epoch": 0.422246568279951, + "grad_norm": 0.00235690386382886, + "learning_rate": 6.423200859291086e-06, + "loss": 0.0, + "step": 6552 + }, + { + "epoch": 0.4223110137268802, + "grad_norm": 7.031785182137648e-05, + "learning_rate": 6.422484783387039e-06, + "loss": 0.0, + "step": 6553 + }, + { + "epoch": 0.42237545917380936, + "grad_norm": 0.0026047197359357352, + "learning_rate": 6.421768707482994e-06, + "loss": 0.0, + "step": 6554 + }, + { + "epoch": 0.42243990462073855, + "grad_norm": 0.12465103807190193, + "learning_rate": 6.421052631578948e-06, + "loss": 0.0001, + "step": 6555 + }, + { + "epoch": 0.42250435006766773, + "grad_norm": 0.0011322028151738962, + "learning_rate": 6.420336555674902e-06, + "loss": 0.0, + "step": 6556 + }, + { + "epoch": 0.4225687955145969, + "grad_norm": 0.005403863207130577, + "learning_rate": 6.4196204797708565e-06, + "loss": 0.0001, + "step": 6557 + }, + { + "epoch": 0.42263324096152605, + "grad_norm": 0.09145143117725939, + "learning_rate": 6.41890440386681e-06, + "loss": 0.0008, + "step": 6558 + }, + { + "epoch": 0.42269768640845523, + "grad_norm": 0.0007512461604921746, + "learning_rate": 6.418188327962764e-06, + "loss": 0.0, + "step": 6559 + }, + { + "epoch": 0.4227621318553844, + "grad_norm": 0.001425860074836207, + "learning_rate": 6.4174722520587185e-06, + "loss": 0.0, + "step": 6560 + }, + { + "epoch": 0.4228265773023136, + "grad_norm": 1.1363235783018837, + "learning_rate": 6.416756176154673e-06, + "loss": 0.0049, + "step": 6561 + }, + { + "epoch": 0.4228910227492428, + "grad_norm": 0.001521368662538063, + "learning_rate": 6.416040100250627e-06, + "loss": 0.0, + "step": 6562 + }, + { + "epoch": 0.4229554681961719, + "grad_norm": 0.0006320624923546955, + "learning_rate": 6.415324024346581e-06, + "loss": 0.0, + "step": 6563 + }, + { + "epoch": 0.4230199136431011, + "grad_norm": 0.0170555938090754, + "learning_rate": 6.414607948442535e-06, + "loss": 0.0, + "step": 6564 + }, + { + "epoch": 0.4230843590900303, + "grad_norm": 0.007227821830375058, + "learning_rate": 6.413891872538489e-06, + "loss": 0.0, + "step": 6565 + }, + { + "epoch": 0.42314880453695947, + "grad_norm": 0.3460220977460297, + "learning_rate": 6.4131757966344435e-06, + "loss": 0.0011, + "step": 6566 + }, + { + "epoch": 0.42321324998388865, + "grad_norm": 0.0028084542812121535, + "learning_rate": 6.412459720730399e-06, + "loss": 0.0, + "step": 6567 + }, + { + "epoch": 0.42327769543081784, + "grad_norm": 0.049689493945287815, + "learning_rate": 6.411743644826353e-06, + "loss": 0.0002, + "step": 6568 + }, + { + "epoch": 0.42334214087774696, + "grad_norm": 0.018442280869811974, + "learning_rate": 6.411027568922306e-06, + "loss": 0.0, + "step": 6569 + }, + { + "epoch": 0.42340658632467615, + "grad_norm": 0.016176521402883174, + "learning_rate": 6.410311493018261e-06, + "loss": 0.0001, + "step": 6570 + }, + { + "epoch": 0.42347103177160533, + "grad_norm": 0.0045036594846243954, + "learning_rate": 6.409595417114215e-06, + "loss": 0.0001, + "step": 6571 + }, + { + "epoch": 0.4235354772185345, + "grad_norm": 0.000763490671469306, + "learning_rate": 6.408879341210169e-06, + "loss": 0.0, + "step": 6572 + }, + { + "epoch": 0.4235999226654637, + "grad_norm": 0.01358307692632028, + "learning_rate": 6.408163265306124e-06, + "loss": 0.0, + "step": 6573 + }, + { + "epoch": 0.4236643681123929, + "grad_norm": 0.07061323450311378, + "learning_rate": 6.407447189402077e-06, + "loss": 0.0001, + "step": 6574 + }, + { + "epoch": 0.423728813559322, + "grad_norm": 0.0009675888097842737, + "learning_rate": 6.406731113498031e-06, + "loss": 0.0, + "step": 6575 + }, + { + "epoch": 0.4237932590062512, + "grad_norm": 0.05086051961970249, + "learning_rate": 6.406015037593986e-06, + "loss": 0.0002, + "step": 6576 + }, + { + "epoch": 0.4238577044531804, + "grad_norm": 0.016505252166751583, + "learning_rate": 6.40529896168994e-06, + "loss": 0.0, + "step": 6577 + }, + { + "epoch": 0.42392214990010957, + "grad_norm": 0.0004049548514622685, + "learning_rate": 6.404582885785894e-06, + "loss": 0.0015, + "step": 6578 + }, + { + "epoch": 0.42398659534703875, + "grad_norm": 0.623974793736584, + "learning_rate": 6.403866809881848e-06, + "loss": 0.0023, + "step": 6579 + }, + { + "epoch": 0.4240510407939679, + "grad_norm": 4.7767857672534735, + "learning_rate": 6.403150733977802e-06, + "loss": 0.0369, + "step": 6580 + }, + { + "epoch": 0.42411548624089707, + "grad_norm": 0.0056384048468675904, + "learning_rate": 6.402434658073756e-06, + "loss": 0.0, + "step": 6581 + }, + { + "epoch": 0.42417993168782625, + "grad_norm": 0.041177984595075684, + "learning_rate": 6.401718582169711e-06, + "loss": 0.0, + "step": 6582 + }, + { + "epoch": 0.42424437713475543, + "grad_norm": 0.0028160901279018036, + "learning_rate": 6.401002506265664e-06, + "loss": 0.0, + "step": 6583 + }, + { + "epoch": 0.4243088225816846, + "grad_norm": 0.03627116547304098, + "learning_rate": 6.400286430361618e-06, + "loss": 0.0003, + "step": 6584 + }, + { + "epoch": 0.4243732680286138, + "grad_norm": 0.12108684335516592, + "learning_rate": 6.399570354457573e-06, + "loss": 0.0001, + "step": 6585 + }, + { + "epoch": 0.42443771347554293, + "grad_norm": 0.05032057374908024, + "learning_rate": 6.398854278553527e-06, + "loss": 0.0004, + "step": 6586 + }, + { + "epoch": 0.4245021589224721, + "grad_norm": 0.23586408913024615, + "learning_rate": 6.398138202649481e-06, + "loss": 0.0008, + "step": 6587 + }, + { + "epoch": 0.4245666043694013, + "grad_norm": 0.042724430901290876, + "learning_rate": 6.397422126745435e-06, + "loss": 0.0, + "step": 6588 + }, + { + "epoch": 0.4246310498163305, + "grad_norm": 0.007525076075820635, + "learning_rate": 6.396706050841389e-06, + "loss": 0.0, + "step": 6589 + }, + { + "epoch": 0.42469549526325967, + "grad_norm": 0.03318998658871642, + "learning_rate": 6.395989974937344e-06, + "loss": 0.0, + "step": 6590 + }, + { + "epoch": 0.4247599407101888, + "grad_norm": 0.032235455008593907, + "learning_rate": 6.395273899033298e-06, + "loss": 0.0002, + "step": 6591 + }, + { + "epoch": 0.424824386157118, + "grad_norm": 0.0873344546653782, + "learning_rate": 6.394557823129253e-06, + "loss": 0.0016, + "step": 6592 + }, + { + "epoch": 0.42488883160404717, + "grad_norm": 0.012324949783537603, + "learning_rate": 6.393841747225207e-06, + "loss": 0.0, + "step": 6593 + }, + { + "epoch": 0.42495327705097635, + "grad_norm": 0.029404377397181828, + "learning_rate": 6.393125671321161e-06, + "loss": 0.0003, + "step": 6594 + }, + { + "epoch": 0.42501772249790554, + "grad_norm": 0.004150097631532944, + "learning_rate": 6.392409595417115e-06, + "loss": 0.0, + "step": 6595 + }, + { + "epoch": 0.4250821679448347, + "grad_norm": 0.426125046406366, + "learning_rate": 6.391693519513069e-06, + "loss": 0.0014, + "step": 6596 + }, + { + "epoch": 0.42514661339176385, + "grad_norm": 0.007814042117983378, + "learning_rate": 6.390977443609023e-06, + "loss": 0.0001, + "step": 6597 + }, + { + "epoch": 0.42521105883869303, + "grad_norm": 0.016080993757632295, + "learning_rate": 6.390261367704978e-06, + "loss": 0.0, + "step": 6598 + }, + { + "epoch": 0.4252755042856222, + "grad_norm": 0.001690795443047797, + "learning_rate": 6.389545291800931e-06, + "loss": 0.0, + "step": 6599 + }, + { + "epoch": 0.4253399497325514, + "grad_norm": 0.16194163111905666, + "learning_rate": 6.388829215896885e-06, + "loss": 0.0003, + "step": 6600 + }, + { + "epoch": 0.4254043951794806, + "grad_norm": 0.011428128871439189, + "learning_rate": 6.38811313999284e-06, + "loss": 0.0, + "step": 6601 + }, + { + "epoch": 0.4254688406264097, + "grad_norm": 0.08761187696328585, + "learning_rate": 6.387397064088794e-06, + "loss": 0.0017, + "step": 6602 + }, + { + "epoch": 0.4255332860733389, + "grad_norm": 0.2612283581799962, + "learning_rate": 6.386680988184748e-06, + "loss": 0.0003, + "step": 6603 + }, + { + "epoch": 0.4255977315202681, + "grad_norm": 0.019915802669471502, + "learning_rate": 6.385964912280702e-06, + "loss": 0.0, + "step": 6604 + }, + { + "epoch": 0.42566217696719727, + "grad_norm": 0.02653936073343681, + "learning_rate": 6.385248836376656e-06, + "loss": 0.0, + "step": 6605 + }, + { + "epoch": 0.42572662241412645, + "grad_norm": 0.00020666814500324758, + "learning_rate": 6.38453276047261e-06, + "loss": 0.0, + "step": 6606 + }, + { + "epoch": 0.42579106786105564, + "grad_norm": 0.02526154973886526, + "learning_rate": 6.383816684568565e-06, + "loss": 0.0, + "step": 6607 + }, + { + "epoch": 0.42585551330798477, + "grad_norm": 0.007133051405255623, + "learning_rate": 6.383100608664519e-06, + "loss": 0.0, + "step": 6608 + }, + { + "epoch": 0.42591995875491395, + "grad_norm": 0.0021597927325183465, + "learning_rate": 6.382384532760472e-06, + "loss": 0.0, + "step": 6609 + }, + { + "epoch": 0.42598440420184314, + "grad_norm": 0.023160755933042062, + "learning_rate": 6.381668456856427e-06, + "loss": 0.0001, + "step": 6610 + }, + { + "epoch": 0.4260488496487723, + "grad_norm": 0.07757443802956861, + "learning_rate": 6.380952380952381e-06, + "loss": 0.0001, + "step": 6611 + }, + { + "epoch": 0.4261132950957015, + "grad_norm": 0.06177029019459271, + "learning_rate": 6.380236305048335e-06, + "loss": 0.0002, + "step": 6612 + }, + { + "epoch": 0.4261777405426307, + "grad_norm": 0.22313402180370537, + "learning_rate": 6.3795202291442905e-06, + "loss": 0.0012, + "step": 6613 + }, + { + "epoch": 0.4262421859895598, + "grad_norm": 0.000392622305714873, + "learning_rate": 6.378804153240245e-06, + "loss": 0.0, + "step": 6614 + }, + { + "epoch": 0.426306631436489, + "grad_norm": 0.00409946774879405, + "learning_rate": 6.378088077336198e-06, + "loss": 0.0, + "step": 6615 + }, + { + "epoch": 0.4263710768834182, + "grad_norm": 0.0188214650348023, + "learning_rate": 6.3773720014321525e-06, + "loss": 0.0001, + "step": 6616 + }, + { + "epoch": 0.42643552233034737, + "grad_norm": 0.05915115781377536, + "learning_rate": 6.376655925528107e-06, + "loss": 0.0005, + "step": 6617 + }, + { + "epoch": 0.42649996777727656, + "grad_norm": 0.004750804011270957, + "learning_rate": 6.375939849624061e-06, + "loss": 0.0, + "step": 6618 + }, + { + "epoch": 0.4265644132242057, + "grad_norm": 0.0009950607837926515, + "learning_rate": 6.375223773720015e-06, + "loss": 0.0, + "step": 6619 + }, + { + "epoch": 0.42662885867113487, + "grad_norm": 0.0003103790189822709, + "learning_rate": 6.374507697815969e-06, + "loss": 0.0, + "step": 6620 + }, + { + "epoch": 0.42669330411806405, + "grad_norm": 0.006672502559690551, + "learning_rate": 6.373791621911923e-06, + "loss": 0.0, + "step": 6621 + }, + { + "epoch": 0.42675774956499324, + "grad_norm": 0.0012132214678914587, + "learning_rate": 6.3730755460078774e-06, + "loss": 0.0, + "step": 6622 + }, + { + "epoch": 0.4268221950119224, + "grad_norm": 0.5459795569131483, + "learning_rate": 6.372359470103832e-06, + "loss": 0.001, + "step": 6623 + }, + { + "epoch": 0.4268866404588516, + "grad_norm": 0.1584840858127378, + "learning_rate": 6.371643394199786e-06, + "loss": 0.0018, + "step": 6624 + }, + { + "epoch": 0.42695108590578074, + "grad_norm": 0.02230071107940539, + "learning_rate": 6.3709273182957395e-06, + "loss": 0.0, + "step": 6625 + }, + { + "epoch": 0.4270155313527099, + "grad_norm": 0.2010288563453443, + "learning_rate": 6.370211242391694e-06, + "loss": 0.0002, + "step": 6626 + }, + { + "epoch": 0.4270799767996391, + "grad_norm": 0.0005539057811006905, + "learning_rate": 6.369495166487648e-06, + "loss": 0.0, + "step": 6627 + }, + { + "epoch": 0.4271444222465683, + "grad_norm": 8.601880093504294e-05, + "learning_rate": 6.368779090583602e-06, + "loss": 0.0, + "step": 6628 + }, + { + "epoch": 0.4272088676934975, + "grad_norm": 0.00694744794728398, + "learning_rate": 6.368063014679557e-06, + "loss": 0.0, + "step": 6629 + }, + { + "epoch": 0.4272733131404266, + "grad_norm": 0.00011373609568961924, + "learning_rate": 6.36734693877551e-06, + "loss": 0.0, + "step": 6630 + }, + { + "epoch": 0.4273377585873558, + "grad_norm": 0.2741989161173644, + "learning_rate": 6.3666308628714644e-06, + "loss": 0.0024, + "step": 6631 + }, + { + "epoch": 0.42740220403428497, + "grad_norm": 0.121421209699953, + "learning_rate": 6.365914786967419e-06, + "loss": 0.0003, + "step": 6632 + }, + { + "epoch": 0.42746664948121416, + "grad_norm": 0.0009714433687004506, + "learning_rate": 6.365198711063373e-06, + "loss": 0.0, + "step": 6633 + }, + { + "epoch": 0.42753109492814334, + "grad_norm": 0.10040541532491891, + "learning_rate": 6.364482635159327e-06, + "loss": 0.0007, + "step": 6634 + }, + { + "epoch": 0.4275955403750725, + "grad_norm": 6.250609963284367e-05, + "learning_rate": 6.363766559255281e-06, + "loss": 0.0, + "step": 6635 + }, + { + "epoch": 0.42765998582200165, + "grad_norm": 0.0007349660592156871, + "learning_rate": 6.363050483351235e-06, + "loss": 0.0, + "step": 6636 + }, + { + "epoch": 0.42772443126893084, + "grad_norm": 0.00017060009315793165, + "learning_rate": 6.36233440744719e-06, + "loss": 0.0, + "step": 6637 + }, + { + "epoch": 0.42778887671586, + "grad_norm": 0.002033436154090894, + "learning_rate": 6.3616183315431445e-06, + "loss": 0.0, + "step": 6638 + }, + { + "epoch": 0.4278533221627892, + "grad_norm": 0.001637918147364348, + "learning_rate": 6.360902255639099e-06, + "loss": 0.0, + "step": 6639 + }, + { + "epoch": 0.4279177676097184, + "grad_norm": 0.19539891238751805, + "learning_rate": 6.360186179735053e-06, + "loss": 0.0004, + "step": 6640 + }, + { + "epoch": 0.4279822130566475, + "grad_norm": 0.0018548896809036603, + "learning_rate": 6.359470103831007e-06, + "loss": 0.0, + "step": 6641 + }, + { + "epoch": 0.4280466585035767, + "grad_norm": 0.0013334698909709792, + "learning_rate": 6.358754027926961e-06, + "loss": 0.0, + "step": 6642 + }, + { + "epoch": 0.4281111039505059, + "grad_norm": 0.0028419573034907998, + "learning_rate": 6.358037952022915e-06, + "loss": 0.0, + "step": 6643 + }, + { + "epoch": 0.4281755493974351, + "grad_norm": 0.006881097440326114, + "learning_rate": 6.3573218761188695e-06, + "loss": 0.0, + "step": 6644 + }, + { + "epoch": 0.42823999484436426, + "grad_norm": 0.001116479305550302, + "learning_rate": 6.356605800214824e-06, + "loss": 0.0, + "step": 6645 + }, + { + "epoch": 0.42830444029129344, + "grad_norm": 0.00025453688824276247, + "learning_rate": 6.355889724310777e-06, + "loss": 0.0, + "step": 6646 + }, + { + "epoch": 0.42836888573822257, + "grad_norm": 0.005216877980391289, + "learning_rate": 6.3551736484067315e-06, + "loss": 0.0, + "step": 6647 + }, + { + "epoch": 0.42843333118515176, + "grad_norm": 0.001896177419504992, + "learning_rate": 6.354457572502686e-06, + "loss": 0.0, + "step": 6648 + }, + { + "epoch": 0.42849777663208094, + "grad_norm": 0.0002611695027218609, + "learning_rate": 6.35374149659864e-06, + "loss": 0.0, + "step": 6649 + }, + { + "epoch": 0.4285622220790101, + "grad_norm": 0.22977676603252645, + "learning_rate": 6.353025420694594e-06, + "loss": 0.0003, + "step": 6650 + }, + { + "epoch": 0.4286266675259393, + "grad_norm": 0.0017587722789177331, + "learning_rate": 6.352309344790548e-06, + "loss": 0.0, + "step": 6651 + }, + { + "epoch": 0.4286911129728685, + "grad_norm": 0.0020766966092716993, + "learning_rate": 6.351593268886502e-06, + "loss": 0.0, + "step": 6652 + }, + { + "epoch": 0.4287555584197976, + "grad_norm": 0.40843597414743027, + "learning_rate": 6.3508771929824565e-06, + "loss": 0.0014, + "step": 6653 + }, + { + "epoch": 0.4288200038667268, + "grad_norm": 0.013422847530458746, + "learning_rate": 6.350161117078411e-06, + "loss": 0.0, + "step": 6654 + }, + { + "epoch": 0.428884449313656, + "grad_norm": 0.7281807546593764, + "learning_rate": 6.349445041174365e-06, + "loss": 0.005, + "step": 6655 + }, + { + "epoch": 0.4289488947605852, + "grad_norm": 0.0007956452725589806, + "learning_rate": 6.3487289652703185e-06, + "loss": 0.0, + "step": 6656 + }, + { + "epoch": 0.42901334020751436, + "grad_norm": 0.008950084640330636, + "learning_rate": 6.348012889366273e-06, + "loss": 0.0, + "step": 6657 + }, + { + "epoch": 0.4290777856544435, + "grad_norm": 0.17089821406758554, + "learning_rate": 6.347296813462227e-06, + "loss": 0.0018, + "step": 6658 + }, + { + "epoch": 0.4291422311013727, + "grad_norm": 0.0030694031207953013, + "learning_rate": 6.346580737558181e-06, + "loss": 0.0, + "step": 6659 + }, + { + "epoch": 0.42920667654830186, + "grad_norm": 0.02444291912822385, + "learning_rate": 6.3458646616541366e-06, + "loss": 0.0, + "step": 6660 + }, + { + "epoch": 0.42927112199523104, + "grad_norm": 0.002884947247548892, + "learning_rate": 6.345148585750091e-06, + "loss": 0.0, + "step": 6661 + }, + { + "epoch": 0.4293355674421602, + "grad_norm": 0.005064100957951929, + "learning_rate": 6.344432509846044e-06, + "loss": 0.0, + "step": 6662 + }, + { + "epoch": 0.4294000128890894, + "grad_norm": 0.023081917055650943, + "learning_rate": 6.343716433941999e-06, + "loss": 0.0002, + "step": 6663 + }, + { + "epoch": 0.42946445833601854, + "grad_norm": 0.0009968242300762613, + "learning_rate": 6.343000358037953e-06, + "loss": 0.0, + "step": 6664 + }, + { + "epoch": 0.4295289037829477, + "grad_norm": 0.002290510605043889, + "learning_rate": 6.342284282133907e-06, + "loss": 0.0, + "step": 6665 + }, + { + "epoch": 0.4295933492298769, + "grad_norm": 0.00029481900990017935, + "learning_rate": 6.3415682062298615e-06, + "loss": 0.0, + "step": 6666 + }, + { + "epoch": 0.4296577946768061, + "grad_norm": 0.002503810212365343, + "learning_rate": 6.340852130325815e-06, + "loss": 0.0, + "step": 6667 + }, + { + "epoch": 0.4297222401237353, + "grad_norm": 0.08980436602116373, + "learning_rate": 6.340136054421769e-06, + "loss": 0.0002, + "step": 6668 + }, + { + "epoch": 0.4297866855706644, + "grad_norm": 0.011429338482220037, + "learning_rate": 6.3394199785177236e-06, + "loss": 0.0, + "step": 6669 + }, + { + "epoch": 0.4298511310175936, + "grad_norm": 0.009484022902911424, + "learning_rate": 6.338703902613678e-06, + "loss": 0.0, + "step": 6670 + }, + { + "epoch": 0.4299155764645228, + "grad_norm": 0.02464525902804644, + "learning_rate": 6.337987826709632e-06, + "loss": 0.0002, + "step": 6671 + }, + { + "epoch": 0.42998002191145196, + "grad_norm": 0.0070729571841776796, + "learning_rate": 6.337271750805586e-06, + "loss": 0.0, + "step": 6672 + }, + { + "epoch": 0.43004446735838114, + "grad_norm": 0.0005698593327859449, + "learning_rate": 6.33655567490154e-06, + "loss": 0.0, + "step": 6673 + }, + { + "epoch": 0.43010891280531033, + "grad_norm": 0.007270899548970992, + "learning_rate": 6.335839598997494e-06, + "loss": 0.0, + "step": 6674 + }, + { + "epoch": 0.43017335825223946, + "grad_norm": 0.0009913103341804657, + "learning_rate": 6.3351235230934485e-06, + "loss": 0.0, + "step": 6675 + }, + { + "epoch": 0.43023780369916864, + "grad_norm": 0.002595574146712181, + "learning_rate": 6.334407447189402e-06, + "loss": 0.0, + "step": 6676 + }, + { + "epoch": 0.4303022491460978, + "grad_norm": 0.14062348338474917, + "learning_rate": 6.333691371285356e-06, + "loss": 0.001, + "step": 6677 + }, + { + "epoch": 0.430366694593027, + "grad_norm": 0.000567302968704546, + "learning_rate": 6.3329752953813105e-06, + "loss": 0.0, + "step": 6678 + }, + { + "epoch": 0.4304311400399562, + "grad_norm": 0.0054263714955121115, + "learning_rate": 6.332259219477265e-06, + "loss": 0.0, + "step": 6679 + }, + { + "epoch": 0.4304955854868854, + "grad_norm": 0.016467133939146297, + "learning_rate": 6.331543143573219e-06, + "loss": 0.0001, + "step": 6680 + }, + { + "epoch": 0.4305600309338145, + "grad_norm": 0.0013694414101803056, + "learning_rate": 6.330827067669173e-06, + "loss": 0.0, + "step": 6681 + }, + { + "epoch": 0.4306244763807437, + "grad_norm": 0.0037395349074488956, + "learning_rate": 6.330110991765127e-06, + "loss": 0.0, + "step": 6682 + }, + { + "epoch": 0.4306889218276729, + "grad_norm": 0.29080137264148787, + "learning_rate": 6.329394915861082e-06, + "loss": 0.0009, + "step": 6683 + }, + { + "epoch": 0.43075336727460206, + "grad_norm": 0.002323678893051278, + "learning_rate": 6.328678839957036e-06, + "loss": 0.0, + "step": 6684 + }, + { + "epoch": 0.43081781272153125, + "grad_norm": 0.08255165084440778, + "learning_rate": 6.327962764052991e-06, + "loss": 0.0001, + "step": 6685 + }, + { + "epoch": 0.4308822581684604, + "grad_norm": 0.1910078417668657, + "learning_rate": 6.327246688148945e-06, + "loss": 0.0004, + "step": 6686 + }, + { + "epoch": 0.43094670361538956, + "grad_norm": 0.0015552556160902183, + "learning_rate": 6.326530612244899e-06, + "loss": 0.0, + "step": 6687 + }, + { + "epoch": 0.43101114906231874, + "grad_norm": 0.000931274017975527, + "learning_rate": 6.325814536340853e-06, + "loss": 0.0, + "step": 6688 + }, + { + "epoch": 0.43107559450924793, + "grad_norm": 0.026236450627385367, + "learning_rate": 6.325098460436807e-06, + "loss": 0.0002, + "step": 6689 + }, + { + "epoch": 0.4311400399561771, + "grad_norm": 0.007944311984236513, + "learning_rate": 6.324382384532761e-06, + "loss": 0.0, + "step": 6690 + }, + { + "epoch": 0.4312044854031063, + "grad_norm": 0.0017483911802880752, + "learning_rate": 6.323666308628716e-06, + "loss": 0.0, + "step": 6691 + }, + { + "epoch": 0.4312689308500354, + "grad_norm": 0.8183681514879361, + "learning_rate": 6.322950232724669e-06, + "loss": 0.0008, + "step": 6692 + }, + { + "epoch": 0.4313333762969646, + "grad_norm": 0.013792580667864334, + "learning_rate": 6.322234156820623e-06, + "loss": 0.0, + "step": 6693 + }, + { + "epoch": 0.4313978217438938, + "grad_norm": 0.015097850842103971, + "learning_rate": 6.321518080916578e-06, + "loss": 0.0, + "step": 6694 + }, + { + "epoch": 0.431462267190823, + "grad_norm": 0.006704856775930077, + "learning_rate": 6.320802005012532e-06, + "loss": 0.0, + "step": 6695 + }, + { + "epoch": 0.43152671263775216, + "grad_norm": 0.0008574753945796366, + "learning_rate": 6.320085929108486e-06, + "loss": 0.0, + "step": 6696 + }, + { + "epoch": 0.4315911580846813, + "grad_norm": 0.18016405765260005, + "learning_rate": 6.31936985320444e-06, + "loss": 0.0019, + "step": 6697 + }, + { + "epoch": 0.4316556035316105, + "grad_norm": 4.158763383683872, + "learning_rate": 6.318653777300394e-06, + "loss": 0.0134, + "step": 6698 + }, + { + "epoch": 0.43172004897853966, + "grad_norm": 0.04242575207314043, + "learning_rate": 6.317937701396348e-06, + "loss": 0.0001, + "step": 6699 + }, + { + "epoch": 0.43178449442546885, + "grad_norm": 0.0038851543432019396, + "learning_rate": 6.317221625492303e-06, + "loss": 0.0, + "step": 6700 + }, + { + "epoch": 0.43184893987239803, + "grad_norm": 0.15580466106012691, + "learning_rate": 6.316505549588257e-06, + "loss": 0.0009, + "step": 6701 + }, + { + "epoch": 0.4319133853193272, + "grad_norm": 0.005339445096540471, + "learning_rate": 6.31578947368421e-06, + "loss": 0.0, + "step": 6702 + }, + { + "epoch": 0.43197783076625634, + "grad_norm": 0.0005897817405355401, + "learning_rate": 6.315073397780165e-06, + "loss": 0.0, + "step": 6703 + }, + { + "epoch": 0.4320422762131855, + "grad_norm": 0.00029861629602622337, + "learning_rate": 6.314357321876119e-06, + "loss": 0.0, + "step": 6704 + }, + { + "epoch": 0.4321067216601147, + "grad_norm": 0.0011596472343831097, + "learning_rate": 6.313641245972073e-06, + "loss": 0.0, + "step": 6705 + }, + { + "epoch": 0.4321711671070439, + "grad_norm": 0.21832649072057134, + "learning_rate": 6.3129251700680275e-06, + "loss": 0.0002, + "step": 6706 + }, + { + "epoch": 0.4322356125539731, + "grad_norm": 8.891694569460509e-05, + "learning_rate": 6.312209094163983e-06, + "loss": 0.0, + "step": 6707 + }, + { + "epoch": 0.4323000580009022, + "grad_norm": 0.0010427760358763495, + "learning_rate": 6.311493018259936e-06, + "loss": 0.0, + "step": 6708 + }, + { + "epoch": 0.4323645034478314, + "grad_norm": 0.044251419583747786, + "learning_rate": 6.31077694235589e-06, + "loss": 0.0, + "step": 6709 + }, + { + "epoch": 0.4324289488947606, + "grad_norm": 0.000149714890507064, + "learning_rate": 6.310060866451845e-06, + "loss": 0.0, + "step": 6710 + }, + { + "epoch": 0.43249339434168976, + "grad_norm": 0.20032516548059928, + "learning_rate": 6.309344790547799e-06, + "loss": 0.0002, + "step": 6711 + }, + { + "epoch": 0.43255783978861895, + "grad_norm": 0.002627819770958189, + "learning_rate": 6.308628714643753e-06, + "loss": 0.0, + "step": 6712 + }, + { + "epoch": 0.43262228523554813, + "grad_norm": 0.01592602803687255, + "learning_rate": 6.307912638739707e-06, + "loss": 0.0, + "step": 6713 + }, + { + "epoch": 0.43268673068247726, + "grad_norm": 0.004949941235307722, + "learning_rate": 6.307196562835661e-06, + "loss": 0.0, + "step": 6714 + }, + { + "epoch": 0.43275117612940645, + "grad_norm": 0.4457405776687715, + "learning_rate": 6.306480486931615e-06, + "loss": 0.0016, + "step": 6715 + }, + { + "epoch": 0.43281562157633563, + "grad_norm": 0.0001713559314944813, + "learning_rate": 6.30576441102757e-06, + "loss": 0.0, + "step": 6716 + }, + { + "epoch": 0.4328800670232648, + "grad_norm": 0.52003317085252, + "learning_rate": 6.305048335123524e-06, + "loss": 0.0034, + "step": 6717 + }, + { + "epoch": 0.432944512470194, + "grad_norm": 0.24328987666119062, + "learning_rate": 6.304332259219477e-06, + "loss": 0.0004, + "step": 6718 + }, + { + "epoch": 0.4330089579171232, + "grad_norm": 6.05488712972811e-05, + "learning_rate": 6.303616183315432e-06, + "loss": 0.0, + "step": 6719 + }, + { + "epoch": 0.4330734033640523, + "grad_norm": 0.9154862295455783, + "learning_rate": 6.302900107411386e-06, + "loss": 0.0044, + "step": 6720 + }, + { + "epoch": 0.4331378488109815, + "grad_norm": 0.00011452810699505064, + "learning_rate": 6.30218403150734e-06, + "loss": 0.0, + "step": 6721 + }, + { + "epoch": 0.4332022942579107, + "grad_norm": 0.2684594141416232, + "learning_rate": 6.301467955603295e-06, + "loss": 0.0047, + "step": 6722 + }, + { + "epoch": 0.43326673970483986, + "grad_norm": 0.02207255766112272, + "learning_rate": 6.300751879699248e-06, + "loss": 0.0, + "step": 6723 + }, + { + "epoch": 0.43333118515176905, + "grad_norm": 0.0029896027307358413, + "learning_rate": 6.300035803795202e-06, + "loss": 0.0, + "step": 6724 + }, + { + "epoch": 0.4333956305986982, + "grad_norm": 0.16950898046179497, + "learning_rate": 6.299319727891157e-06, + "loss": 0.0003, + "step": 6725 + }, + { + "epoch": 0.43346007604562736, + "grad_norm": 0.29806840851315397, + "learning_rate": 6.298603651987111e-06, + "loss": 0.0017, + "step": 6726 + }, + { + "epoch": 0.43352452149255655, + "grad_norm": 0.016236714793457938, + "learning_rate": 6.297887576083065e-06, + "loss": 0.0, + "step": 6727 + }, + { + "epoch": 0.43358896693948573, + "grad_norm": 0.009315682470924137, + "learning_rate": 6.297171500179019e-06, + "loss": 0.0, + "step": 6728 + }, + { + "epoch": 0.4336534123864149, + "grad_norm": 0.03930989690288691, + "learning_rate": 6.296455424274973e-06, + "loss": 0.0001, + "step": 6729 + }, + { + "epoch": 0.4337178578333441, + "grad_norm": 0.17229336517397006, + "learning_rate": 6.295739348370928e-06, + "loss": 0.0018, + "step": 6730 + }, + { + "epoch": 0.43378230328027323, + "grad_norm": 0.010794653699986811, + "learning_rate": 6.2950232724668825e-06, + "loss": 0.0, + "step": 6731 + }, + { + "epoch": 0.4338467487272024, + "grad_norm": 0.09056349817328786, + "learning_rate": 6.294307196562837e-06, + "loss": 0.0001, + "step": 6732 + }, + { + "epoch": 0.4339111941741316, + "grad_norm": 0.0032560131138640283, + "learning_rate": 6.293591120658791e-06, + "loss": 0.0, + "step": 6733 + }, + { + "epoch": 0.4339756396210608, + "grad_norm": 0.2929552456604397, + "learning_rate": 6.2928750447547445e-06, + "loss": 0.0024, + "step": 6734 + }, + { + "epoch": 0.43404008506798997, + "grad_norm": 0.004358307485739401, + "learning_rate": 6.292158968850699e-06, + "loss": 0.0, + "step": 6735 + }, + { + "epoch": 0.4341045305149191, + "grad_norm": 0.7401282000325132, + "learning_rate": 6.291442892946653e-06, + "loss": 0.0031, + "step": 6736 + }, + { + "epoch": 0.4341689759618483, + "grad_norm": 0.024909603939854592, + "learning_rate": 6.290726817042607e-06, + "loss": 0.0001, + "step": 6737 + }, + { + "epoch": 0.43423342140877746, + "grad_norm": 0.005705359287954765, + "learning_rate": 6.290010741138562e-06, + "loss": 0.0, + "step": 6738 + }, + { + "epoch": 0.43429786685570665, + "grad_norm": 0.1156991258673197, + "learning_rate": 6.289294665234515e-06, + "loss": 0.0001, + "step": 6739 + }, + { + "epoch": 0.43436231230263583, + "grad_norm": 0.008699033419681219, + "learning_rate": 6.2885785893304694e-06, + "loss": 0.0, + "step": 6740 + }, + { + "epoch": 0.434426757749565, + "grad_norm": 0.6171667602531093, + "learning_rate": 6.287862513426424e-06, + "loss": 0.0039, + "step": 6741 + }, + { + "epoch": 0.43449120319649415, + "grad_norm": 0.16432638744027941, + "learning_rate": 6.287146437522378e-06, + "loss": 0.0004, + "step": 6742 + }, + { + "epoch": 0.43455564864342333, + "grad_norm": 0.02447242080053091, + "learning_rate": 6.286430361618332e-06, + "loss": 0.0, + "step": 6743 + }, + { + "epoch": 0.4346200940903525, + "grad_norm": 0.0921131984266586, + "learning_rate": 6.285714285714286e-06, + "loss": 0.0001, + "step": 6744 + }, + { + "epoch": 0.4346845395372817, + "grad_norm": 0.022735265434989764, + "learning_rate": 6.28499820981024e-06, + "loss": 0.0, + "step": 6745 + }, + { + "epoch": 0.4347489849842109, + "grad_norm": 0.0013172423891951862, + "learning_rate": 6.284282133906194e-06, + "loss": 0.0, + "step": 6746 + }, + { + "epoch": 0.43481343043114, + "grad_norm": 0.007643909237297049, + "learning_rate": 6.283566058002149e-06, + "loss": 0.0, + "step": 6747 + }, + { + "epoch": 0.4348778758780692, + "grad_norm": 0.04607741987970149, + "learning_rate": 6.282849982098103e-06, + "loss": 0.0001, + "step": 6748 + }, + { + "epoch": 0.4349423213249984, + "grad_norm": 0.06911486318143999, + "learning_rate": 6.2821339061940564e-06, + "loss": 0.0, + "step": 6749 + }, + { + "epoch": 0.43500676677192757, + "grad_norm": 0.02393797398342579, + "learning_rate": 6.281417830290011e-06, + "loss": 0.0, + "step": 6750 + }, + { + "epoch": 0.43507121221885675, + "grad_norm": 0.26460427623124094, + "learning_rate": 6.280701754385965e-06, + "loss": 0.0016, + "step": 6751 + }, + { + "epoch": 0.43513565766578594, + "grad_norm": 0.00026218383385574675, + "learning_rate": 6.279985678481919e-06, + "loss": 0.0, + "step": 6752 + }, + { + "epoch": 0.43520010311271506, + "grad_norm": 0.0026633181035528104, + "learning_rate": 6.279269602577874e-06, + "loss": 0.0, + "step": 6753 + }, + { + "epoch": 0.43526454855964425, + "grad_norm": 0.11784700149040153, + "learning_rate": 6.278553526673829e-06, + "loss": 0.0006, + "step": 6754 + }, + { + "epoch": 0.43532899400657343, + "grad_norm": 0.0017846129932184572, + "learning_rate": 6.277837450769782e-06, + "loss": 0.0, + "step": 6755 + }, + { + "epoch": 0.4353934394535026, + "grad_norm": 0.07136146578848694, + "learning_rate": 6.2771213748657365e-06, + "loss": 0.0001, + "step": 6756 + }, + { + "epoch": 0.4354578849004318, + "grad_norm": 0.17367550761601813, + "learning_rate": 6.276405298961691e-06, + "loss": 0.0003, + "step": 6757 + }, + { + "epoch": 0.435522330347361, + "grad_norm": 0.06442347118931648, + "learning_rate": 6.275689223057645e-06, + "loss": 0.0009, + "step": 6758 + }, + { + "epoch": 0.4355867757942901, + "grad_norm": 1.113797680383268, + "learning_rate": 6.2749731471535994e-06, + "loss": 0.0098, + "step": 6759 + }, + { + "epoch": 0.4356512212412193, + "grad_norm": 0.005809118892805713, + "learning_rate": 6.274257071249553e-06, + "loss": 0.0, + "step": 6760 + }, + { + "epoch": 0.4357156666881485, + "grad_norm": 1.4354561913321795, + "learning_rate": 6.273540995345507e-06, + "loss": 0.0023, + "step": 6761 + }, + { + "epoch": 0.43578011213507767, + "grad_norm": 0.052269633952662035, + "learning_rate": 6.2728249194414615e-06, + "loss": 0.0005, + "step": 6762 + }, + { + "epoch": 0.43584455758200685, + "grad_norm": 0.012401707536231203, + "learning_rate": 6.272108843537416e-06, + "loss": 0.0001, + "step": 6763 + }, + { + "epoch": 0.435909003028936, + "grad_norm": 0.0037154901687754456, + "learning_rate": 6.27139276763337e-06, + "loss": 0.0, + "step": 6764 + }, + { + "epoch": 0.43597344847586517, + "grad_norm": 0.0018004172015471341, + "learning_rate": 6.2706766917293235e-06, + "loss": 0.0, + "step": 6765 + }, + { + "epoch": 0.43603789392279435, + "grad_norm": 0.03574004342730524, + "learning_rate": 6.269960615825278e-06, + "loss": 0.0001, + "step": 6766 + }, + { + "epoch": 0.43610233936972354, + "grad_norm": 0.010076841435620492, + "learning_rate": 6.269244539921232e-06, + "loss": 0.0, + "step": 6767 + }, + { + "epoch": 0.4361667848166527, + "grad_norm": 0.15066047090537446, + "learning_rate": 6.268528464017186e-06, + "loss": 0.0016, + "step": 6768 + }, + { + "epoch": 0.4362312302635819, + "grad_norm": 0.15723401401466586, + "learning_rate": 6.267812388113141e-06, + "loss": 0.0007, + "step": 6769 + }, + { + "epoch": 0.43629567571051103, + "grad_norm": 0.29885035164145274, + "learning_rate": 6.267096312209094e-06, + "loss": 0.0011, + "step": 6770 + }, + { + "epoch": 0.4363601211574402, + "grad_norm": 0.06953103863759008, + "learning_rate": 6.2663802363050485e-06, + "loss": 0.0, + "step": 6771 + }, + { + "epoch": 0.4364245666043694, + "grad_norm": 0.0003913824342631887, + "learning_rate": 6.265664160401003e-06, + "loss": 0.0, + "step": 6772 + }, + { + "epoch": 0.4364890120512986, + "grad_norm": 5.8933252323782934e-05, + "learning_rate": 6.264948084496957e-06, + "loss": 0.0, + "step": 6773 + }, + { + "epoch": 0.43655345749822777, + "grad_norm": 0.003684676938295499, + "learning_rate": 6.2642320085929105e-06, + "loss": 0.0, + "step": 6774 + }, + { + "epoch": 0.4366179029451569, + "grad_norm": 0.00030307847703955963, + "learning_rate": 6.263515932688865e-06, + "loss": 0.0, + "step": 6775 + }, + { + "epoch": 0.4366823483920861, + "grad_norm": 0.007013575093070469, + "learning_rate": 6.262799856784819e-06, + "loss": 0.0, + "step": 6776 + }, + { + "epoch": 0.43674679383901527, + "grad_norm": 0.00314157343057512, + "learning_rate": 6.262083780880774e-06, + "loss": 0.0, + "step": 6777 + }, + { + "epoch": 0.43681123928594445, + "grad_norm": 0.0032058113583623357, + "learning_rate": 6.2613677049767286e-06, + "loss": 0.0, + "step": 6778 + }, + { + "epoch": 0.43687568473287364, + "grad_norm": 0.14535620811397174, + "learning_rate": 6.260651629072683e-06, + "loss": 0.0004, + "step": 6779 + }, + { + "epoch": 0.4369401301798028, + "grad_norm": 0.0010641447845441464, + "learning_rate": 6.259935553168637e-06, + "loss": 0.0, + "step": 6780 + }, + { + "epoch": 0.43700457562673195, + "grad_norm": 0.030892537690979292, + "learning_rate": 6.259219477264591e-06, + "loss": 0.0002, + "step": 6781 + }, + { + "epoch": 0.43706902107366113, + "grad_norm": 0.5230201366789412, + "learning_rate": 6.258503401360545e-06, + "loss": 0.0012, + "step": 6782 + }, + { + "epoch": 0.4371334665205903, + "grad_norm": 0.18978095448541774, + "learning_rate": 6.257787325456499e-06, + "loss": 0.0007, + "step": 6783 + }, + { + "epoch": 0.4371979119675195, + "grad_norm": 0.00987531260439076, + "learning_rate": 6.2570712495524535e-06, + "loss": 0.0, + "step": 6784 + }, + { + "epoch": 0.4372623574144487, + "grad_norm": 0.25283649745634335, + "learning_rate": 6.256355173648408e-06, + "loss": 0.0003, + "step": 6785 + }, + { + "epoch": 0.4373268028613778, + "grad_norm": 0.0061907236244896895, + "learning_rate": 6.255639097744361e-06, + "loss": 0.0, + "step": 6786 + }, + { + "epoch": 0.437391248308307, + "grad_norm": 0.2543641582962293, + "learning_rate": 6.2549230218403156e-06, + "loss": 0.0018, + "step": 6787 + }, + { + "epoch": 0.4374556937552362, + "grad_norm": 0.008615261739393556, + "learning_rate": 6.25420694593627e-06, + "loss": 0.0, + "step": 6788 + }, + { + "epoch": 0.43752013920216537, + "grad_norm": 0.01651753520272118, + "learning_rate": 6.253490870032224e-06, + "loss": 0.0002, + "step": 6789 + }, + { + "epoch": 0.43758458464909455, + "grad_norm": 0.21448941922489909, + "learning_rate": 6.252774794128178e-06, + "loss": 0.0007, + "step": 6790 + }, + { + "epoch": 0.43764903009602374, + "grad_norm": 3.2058399888517126e-05, + "learning_rate": 6.252058718224132e-06, + "loss": 0.0, + "step": 6791 + }, + { + "epoch": 0.43771347554295287, + "grad_norm": 0.7354880075909804, + "learning_rate": 6.251342642320086e-06, + "loss": 0.0028, + "step": 6792 + }, + { + "epoch": 0.43777792098988205, + "grad_norm": 0.0030102187090905903, + "learning_rate": 6.2506265664160405e-06, + "loss": 0.0, + "step": 6793 + }, + { + "epoch": 0.43784236643681124, + "grad_norm": 0.01540114091165498, + "learning_rate": 6.249910490511995e-06, + "loss": 0.0001, + "step": 6794 + }, + { + "epoch": 0.4379068118837404, + "grad_norm": 0.08756405771852145, + "learning_rate": 6.249194414607948e-06, + "loss": 0.0001, + "step": 6795 + }, + { + "epoch": 0.4379712573306696, + "grad_norm": 0.004207340449658778, + "learning_rate": 6.2484783387039025e-06, + "loss": 0.0, + "step": 6796 + }, + { + "epoch": 0.4380357027775988, + "grad_norm": 0.03770904945438654, + "learning_rate": 6.247762262799857e-06, + "loss": 0.0001, + "step": 6797 + }, + { + "epoch": 0.4381001482245279, + "grad_norm": 0.0003550056645818315, + "learning_rate": 6.247046186895811e-06, + "loss": 0.0, + "step": 6798 + }, + { + "epoch": 0.4381645936714571, + "grad_norm": 0.14162342297415867, + "learning_rate": 6.2463301109917654e-06, + "loss": 0.0019, + "step": 6799 + }, + { + "epoch": 0.4382290391183863, + "grad_norm": 0.00033271973675798647, + "learning_rate": 6.245614035087721e-06, + "loss": 0.0, + "step": 6800 + }, + { + "epoch": 0.43829348456531547, + "grad_norm": 1.0295957533116706, + "learning_rate": 6.244897959183675e-06, + "loss": 0.007, + "step": 6801 + }, + { + "epoch": 0.43835793001224466, + "grad_norm": 0.011322654464795303, + "learning_rate": 6.244181883279628e-06, + "loss": 0.0001, + "step": 6802 + }, + { + "epoch": 0.4384223754591738, + "grad_norm": 0.005792339884773891, + "learning_rate": 6.243465807375583e-06, + "loss": 0.0, + "step": 6803 + }, + { + "epoch": 0.43848682090610297, + "grad_norm": 0.7715247421327011, + "learning_rate": 6.242749731471537e-06, + "loss": 0.0032, + "step": 6804 + }, + { + "epoch": 0.43855126635303215, + "grad_norm": 0.029430314989175126, + "learning_rate": 6.242033655567491e-06, + "loss": 0.0001, + "step": 6805 + }, + { + "epoch": 0.43861571179996134, + "grad_norm": 0.0044577108409088175, + "learning_rate": 6.241317579663445e-06, + "loss": 0.0, + "step": 6806 + }, + { + "epoch": 0.4386801572468905, + "grad_norm": 0.0011134538704909022, + "learning_rate": 6.240601503759399e-06, + "loss": 0.0, + "step": 6807 + }, + { + "epoch": 0.4387446026938197, + "grad_norm": 0.00017366062586490197, + "learning_rate": 6.239885427855353e-06, + "loss": 0.0, + "step": 6808 + }, + { + "epoch": 0.43880904814074884, + "grad_norm": 0.006931424599151439, + "learning_rate": 6.239169351951308e-06, + "loss": 0.0, + "step": 6809 + }, + { + "epoch": 0.438873493587678, + "grad_norm": 0.06903922706188115, + "learning_rate": 6.238453276047262e-06, + "loss": 0.0001, + "step": 6810 + }, + { + "epoch": 0.4389379390346072, + "grad_norm": 0.0007704492971362574, + "learning_rate": 6.237737200143215e-06, + "loss": 0.0, + "step": 6811 + }, + { + "epoch": 0.4390023844815364, + "grad_norm": 0.4028598105360153, + "learning_rate": 6.23702112423917e-06, + "loss": 0.0006, + "step": 6812 + }, + { + "epoch": 0.4390668299284656, + "grad_norm": 0.008059346311853226, + "learning_rate": 6.236305048335124e-06, + "loss": 0.0, + "step": 6813 + }, + { + "epoch": 0.4391312753753947, + "grad_norm": 0.2791621410064719, + "learning_rate": 6.235588972431078e-06, + "loss": 0.0007, + "step": 6814 + }, + { + "epoch": 0.4391957208223239, + "grad_norm": 0.01712343117021725, + "learning_rate": 6.2348728965270325e-06, + "loss": 0.0, + "step": 6815 + }, + { + "epoch": 0.43926016626925307, + "grad_norm": 0.00298623967276542, + "learning_rate": 6.234156820622986e-06, + "loss": 0.0, + "step": 6816 + }, + { + "epoch": 0.43932461171618226, + "grad_norm": 0.0034664114644248696, + "learning_rate": 6.23344074471894e-06, + "loss": 0.0, + "step": 6817 + }, + { + "epoch": 0.43938905716311144, + "grad_norm": 0.03522052287221309, + "learning_rate": 6.232724668814895e-06, + "loss": 0.0003, + "step": 6818 + }, + { + "epoch": 0.4394535026100406, + "grad_norm": 0.05915418664026023, + "learning_rate": 6.232008592910849e-06, + "loss": 0.0001, + "step": 6819 + }, + { + "epoch": 0.43951794805696975, + "grad_norm": 0.00684155053848704, + "learning_rate": 6.231292517006803e-06, + "loss": 0.0, + "step": 6820 + }, + { + "epoch": 0.43958239350389894, + "grad_norm": 0.034032131900648584, + "learning_rate": 6.230576441102757e-06, + "loss": 0.0, + "step": 6821 + }, + { + "epoch": 0.4396468389508281, + "grad_norm": 0.006321578377653734, + "learning_rate": 6.229860365198711e-06, + "loss": 0.0, + "step": 6822 + }, + { + "epoch": 0.4397112843977573, + "grad_norm": 0.0879231922423166, + "learning_rate": 6.229144289294665e-06, + "loss": 0.0002, + "step": 6823 + }, + { + "epoch": 0.4397757298446865, + "grad_norm": 0.05236788173857305, + "learning_rate": 6.22842821339062e-06, + "loss": 0.0001, + "step": 6824 + }, + { + "epoch": 0.4398401752916156, + "grad_norm": 0.002943995240670316, + "learning_rate": 6.227712137486575e-06, + "loss": 0.0, + "step": 6825 + }, + { + "epoch": 0.4399046207385448, + "grad_norm": 0.022349262684986156, + "learning_rate": 6.226996061582529e-06, + "loss": 0.0001, + "step": 6826 + }, + { + "epoch": 0.439969066185474, + "grad_norm": 0.007797418739776279, + "learning_rate": 6.226279985678482e-06, + "loss": 0.0, + "step": 6827 + }, + { + "epoch": 0.4400335116324032, + "grad_norm": 0.749392223462473, + "learning_rate": 6.225563909774437e-06, + "loss": 0.0043, + "step": 6828 + }, + { + "epoch": 0.44009795707933236, + "grad_norm": 0.014145049204418786, + "learning_rate": 6.224847833870391e-06, + "loss": 0.0001, + "step": 6829 + }, + { + "epoch": 0.44016240252626154, + "grad_norm": 0.22435268993820356, + "learning_rate": 6.224131757966345e-06, + "loss": 0.0003, + "step": 6830 + }, + { + "epoch": 0.44022684797319067, + "grad_norm": 0.019355140889198114, + "learning_rate": 6.2234156820623e-06, + "loss": 0.0001, + "step": 6831 + }, + { + "epoch": 0.44029129342011986, + "grad_norm": 0.15658782559037665, + "learning_rate": 6.222699606158253e-06, + "loss": 0.0001, + "step": 6832 + }, + { + "epoch": 0.44035573886704904, + "grad_norm": 0.010001780505805401, + "learning_rate": 6.221983530254207e-06, + "loss": 0.0, + "step": 6833 + }, + { + "epoch": 0.4404201843139782, + "grad_norm": 0.03181574062344294, + "learning_rate": 6.221267454350162e-06, + "loss": 0.0, + "step": 6834 + }, + { + "epoch": 0.4404846297609074, + "grad_norm": 0.5931863116782081, + "learning_rate": 6.220551378446116e-06, + "loss": 0.0014, + "step": 6835 + }, + { + "epoch": 0.4405490752078366, + "grad_norm": 0.4585510156011477, + "learning_rate": 6.21983530254207e-06, + "loss": 0.0013, + "step": 6836 + }, + { + "epoch": 0.4406135206547657, + "grad_norm": 0.013945956708650028, + "learning_rate": 6.219119226638024e-06, + "loss": 0.0, + "step": 6837 + }, + { + "epoch": 0.4406779661016949, + "grad_norm": 0.023095446256535816, + "learning_rate": 6.218403150733978e-06, + "loss": 0.0, + "step": 6838 + }, + { + "epoch": 0.4407424115486241, + "grad_norm": 0.005353204918259331, + "learning_rate": 6.217687074829932e-06, + "loss": 0.0, + "step": 6839 + }, + { + "epoch": 0.4408068569955533, + "grad_norm": 0.03502575011971063, + "learning_rate": 6.216970998925887e-06, + "loss": 0.0002, + "step": 6840 + }, + { + "epoch": 0.44087130244248246, + "grad_norm": 0.12758947956133784, + "learning_rate": 6.216254923021841e-06, + "loss": 0.0005, + "step": 6841 + }, + { + "epoch": 0.4409357478894116, + "grad_norm": 0.006109814671694446, + "learning_rate": 6.215538847117794e-06, + "loss": 0.0016, + "step": 6842 + }, + { + "epoch": 0.4410001933363408, + "grad_norm": 0.0051795290955148395, + "learning_rate": 6.214822771213749e-06, + "loss": 0.0, + "step": 6843 + }, + { + "epoch": 0.44106463878326996, + "grad_norm": 0.0002441407996229203, + "learning_rate": 6.214106695309703e-06, + "loss": 0.0, + "step": 6844 + }, + { + "epoch": 0.44112908423019914, + "grad_norm": 0.1309906813181773, + "learning_rate": 6.213390619405657e-06, + "loss": 0.001, + "step": 6845 + }, + { + "epoch": 0.4411935296771283, + "grad_norm": 0.0005050727517481047, + "learning_rate": 6.2126745435016116e-06, + "loss": 0.0, + "step": 6846 + }, + { + "epoch": 0.4412579751240575, + "grad_norm": 0.005029458636800389, + "learning_rate": 6.211958467597567e-06, + "loss": 0.0, + "step": 6847 + }, + { + "epoch": 0.44132242057098664, + "grad_norm": 0.015254279423934458, + "learning_rate": 6.21124239169352e-06, + "loss": 0.0001, + "step": 6848 + }, + { + "epoch": 0.4413868660179158, + "grad_norm": 0.00022741485742966934, + "learning_rate": 6.2105263157894745e-06, + "loss": 0.0, + "step": 6849 + }, + { + "epoch": 0.441451311464845, + "grad_norm": 0.22921487814058641, + "learning_rate": 6.209810239885429e-06, + "loss": 0.0005, + "step": 6850 + }, + { + "epoch": 0.4415157569117742, + "grad_norm": 0.0032367808859397958, + "learning_rate": 6.209094163981383e-06, + "loss": 0.0, + "step": 6851 + }, + { + "epoch": 0.4415802023587034, + "grad_norm": 0.7078035888784612, + "learning_rate": 6.208378088077337e-06, + "loss": 0.0009, + "step": 6852 + }, + { + "epoch": 0.4416446478056325, + "grad_norm": 0.0015278402776132136, + "learning_rate": 6.207662012173291e-06, + "loss": 0.0, + "step": 6853 + }, + { + "epoch": 0.4417090932525617, + "grad_norm": 0.005985628339181594, + "learning_rate": 6.206945936269245e-06, + "loss": 0.0001, + "step": 6854 + }, + { + "epoch": 0.4417735386994909, + "grad_norm": 0.08176947446269071, + "learning_rate": 6.206229860365199e-06, + "loss": 0.0001, + "step": 6855 + }, + { + "epoch": 0.44183798414642006, + "grad_norm": 3.5829103841858606e-05, + "learning_rate": 6.205513784461154e-06, + "loss": 0.0, + "step": 6856 + }, + { + "epoch": 0.44190242959334924, + "grad_norm": 0.006482419460032692, + "learning_rate": 6.204797708557108e-06, + "loss": 0.0, + "step": 6857 + }, + { + "epoch": 0.44196687504027843, + "grad_norm": 0.0007300640455878939, + "learning_rate": 6.2040816326530614e-06, + "loss": 0.0, + "step": 6858 + }, + { + "epoch": 0.44203132048720756, + "grad_norm": 0.005625053429722351, + "learning_rate": 6.203365556749016e-06, + "loss": 0.0, + "step": 6859 + }, + { + "epoch": 0.44209576593413674, + "grad_norm": 0.0861411175000196, + "learning_rate": 6.20264948084497e-06, + "loss": 0.0003, + "step": 6860 + }, + { + "epoch": 0.4421602113810659, + "grad_norm": 0.004054258970734921, + "learning_rate": 6.201933404940924e-06, + "loss": 0.0, + "step": 6861 + }, + { + "epoch": 0.4422246568279951, + "grad_norm": 0.009220336750607218, + "learning_rate": 6.201217329036879e-06, + "loss": 0.0001, + "step": 6862 + }, + { + "epoch": 0.4422891022749243, + "grad_norm": 0.011453056003512678, + "learning_rate": 6.200501253132832e-06, + "loss": 0.0001, + "step": 6863 + }, + { + "epoch": 0.4423535477218534, + "grad_norm": 0.0011940250474132966, + "learning_rate": 6.199785177228786e-06, + "loss": 0.0, + "step": 6864 + }, + { + "epoch": 0.4424179931687826, + "grad_norm": 6.666298560305661e-05, + "learning_rate": 6.199069101324741e-06, + "loss": 0.0, + "step": 6865 + }, + { + "epoch": 0.4424824386157118, + "grad_norm": 0.02623876561333548, + "learning_rate": 6.198353025420695e-06, + "loss": 0.0001, + "step": 6866 + }, + { + "epoch": 0.442546884062641, + "grad_norm": 0.04930259728432476, + "learning_rate": 6.1976369495166484e-06, + "loss": 0.0005, + "step": 6867 + }, + { + "epoch": 0.44261132950957016, + "grad_norm": 0.006099219300208031, + "learning_rate": 6.196920873612603e-06, + "loss": 0.0, + "step": 6868 + }, + { + "epoch": 0.44267577495649935, + "grad_norm": 0.0012266434546431698, + "learning_rate": 6.196204797708557e-06, + "loss": 0.0, + "step": 6869 + }, + { + "epoch": 0.4427402204034285, + "grad_norm": 0.9596212412780556, + "learning_rate": 6.195488721804512e-06, + "loss": 0.0027, + "step": 6870 + }, + { + "epoch": 0.44280466585035766, + "grad_norm": 0.004923342711700275, + "learning_rate": 6.1947726459004665e-06, + "loss": 0.0, + "step": 6871 + }, + { + "epoch": 0.44286911129728684, + "grad_norm": 0.001109321347760395, + "learning_rate": 6.194056569996421e-06, + "loss": 0.0, + "step": 6872 + }, + { + "epoch": 0.44293355674421603, + "grad_norm": 0.0483371247384216, + "learning_rate": 6.193340494092375e-06, + "loss": 0.0002, + "step": 6873 + }, + { + "epoch": 0.4429980021911452, + "grad_norm": 0.005611801523459228, + "learning_rate": 6.1926244181883285e-06, + "loss": 0.0, + "step": 6874 + }, + { + "epoch": 0.4430624476380744, + "grad_norm": 0.004807293352621903, + "learning_rate": 6.191908342284283e-06, + "loss": 0.0, + "step": 6875 + }, + { + "epoch": 0.4431268930850035, + "grad_norm": 0.0006191547115299766, + "learning_rate": 6.191192266380237e-06, + "loss": 0.0, + "step": 6876 + }, + { + "epoch": 0.4431913385319327, + "grad_norm": 0.020956838583349454, + "learning_rate": 6.1904761904761914e-06, + "loss": 0.0, + "step": 6877 + }, + { + "epoch": 0.4432557839788619, + "grad_norm": 0.0281989780759363, + "learning_rate": 6.189760114572146e-06, + "loss": 0.0003, + "step": 6878 + }, + { + "epoch": 0.4433202294257911, + "grad_norm": 0.0013422409540706092, + "learning_rate": 6.189044038668099e-06, + "loss": 0.0, + "step": 6879 + }, + { + "epoch": 0.44338467487272026, + "grad_norm": 0.20020187181101248, + "learning_rate": 6.1883279627640535e-06, + "loss": 0.0001, + "step": 6880 + }, + { + "epoch": 0.4434491203196494, + "grad_norm": 0.0016931356119931837, + "learning_rate": 6.187611886860008e-06, + "loss": 0.0, + "step": 6881 + }, + { + "epoch": 0.4435135657665786, + "grad_norm": 0.03510104421217682, + "learning_rate": 6.186895810955962e-06, + "loss": 0.0001, + "step": 6882 + }, + { + "epoch": 0.44357801121350776, + "grad_norm": 0.0005879157791024848, + "learning_rate": 6.1861797350519155e-06, + "loss": 0.0, + "step": 6883 + }, + { + "epoch": 0.44364245666043695, + "grad_norm": 0.015057599267392894, + "learning_rate": 6.18546365914787e-06, + "loss": 0.0001, + "step": 6884 + }, + { + "epoch": 0.44370690210736613, + "grad_norm": 0.004394785597182588, + "learning_rate": 6.184747583243824e-06, + "loss": 0.0, + "step": 6885 + }, + { + "epoch": 0.4437713475542953, + "grad_norm": 0.20035424785335368, + "learning_rate": 6.184031507339778e-06, + "loss": 0.001, + "step": 6886 + }, + { + "epoch": 0.44383579300122444, + "grad_norm": 0.002954961677625816, + "learning_rate": 6.183315431435733e-06, + "loss": 0.0, + "step": 6887 + }, + { + "epoch": 0.44390023844815363, + "grad_norm": 0.14626720861118214, + "learning_rate": 6.182599355531686e-06, + "loss": 0.0003, + "step": 6888 + }, + { + "epoch": 0.4439646838950828, + "grad_norm": 0.004259351645734143, + "learning_rate": 6.1818832796276405e-06, + "loss": 0.0, + "step": 6889 + }, + { + "epoch": 0.444029129342012, + "grad_norm": 0.0006302645757763032, + "learning_rate": 6.181167203723595e-06, + "loss": 0.0, + "step": 6890 + }, + { + "epoch": 0.4440935747889412, + "grad_norm": 0.0005572306616088863, + "learning_rate": 6.180451127819549e-06, + "loss": 0.0, + "step": 6891 + }, + { + "epoch": 0.4441580202358703, + "grad_norm": 0.0032139911123859584, + "learning_rate": 6.179735051915503e-06, + "loss": 0.0, + "step": 6892 + }, + { + "epoch": 0.4442224656827995, + "grad_norm": 0.1437218464876338, + "learning_rate": 6.179018976011457e-06, + "loss": 0.0004, + "step": 6893 + }, + { + "epoch": 0.4442869111297287, + "grad_norm": 0.00045007094295306676, + "learning_rate": 6.178302900107413e-06, + "loss": 0.0, + "step": 6894 + }, + { + "epoch": 0.44435135657665786, + "grad_norm": 0.0007318578584300964, + "learning_rate": 6.177586824203366e-06, + "loss": 0.0, + "step": 6895 + }, + { + "epoch": 0.44441580202358705, + "grad_norm": 0.003502809933033344, + "learning_rate": 6.1768707482993206e-06, + "loss": 0.0, + "step": 6896 + }, + { + "epoch": 0.44448024747051623, + "grad_norm": 0.06071923318971043, + "learning_rate": 6.176154672395275e-06, + "loss": 0.0001, + "step": 6897 + }, + { + "epoch": 0.44454469291744536, + "grad_norm": 0.0004200751505464457, + "learning_rate": 6.175438596491229e-06, + "loss": 0.0, + "step": 6898 + }, + { + "epoch": 0.44460913836437455, + "grad_norm": 0.03086781277013602, + "learning_rate": 6.174722520587183e-06, + "loss": 0.0001, + "step": 6899 + }, + { + "epoch": 0.44467358381130373, + "grad_norm": 0.18499814256496003, + "learning_rate": 6.174006444683137e-06, + "loss": 0.0007, + "step": 6900 + }, + { + "epoch": 0.4447380292582329, + "grad_norm": 0.0012860809106687132, + "learning_rate": 6.173290368779091e-06, + "loss": 0.0, + "step": 6901 + }, + { + "epoch": 0.4448024747051621, + "grad_norm": 0.011116800170123724, + "learning_rate": 6.1725742928750455e-06, + "loss": 0.0, + "step": 6902 + }, + { + "epoch": 0.4448669201520912, + "grad_norm": 0.0315108827698995, + "learning_rate": 6.171858216971e-06, + "loss": 0.0002, + "step": 6903 + }, + { + "epoch": 0.4449313655990204, + "grad_norm": 0.00132297479172432, + "learning_rate": 6.171142141066953e-06, + "loss": 0.0, + "step": 6904 + }, + { + "epoch": 0.4449958110459496, + "grad_norm": 0.06148439258298901, + "learning_rate": 6.1704260651629076e-06, + "loss": 0.0002, + "step": 6905 + }, + { + "epoch": 0.4450602564928788, + "grad_norm": 0.00796708085715272, + "learning_rate": 6.169709989258862e-06, + "loss": 0.0, + "step": 6906 + }, + { + "epoch": 0.44512470193980797, + "grad_norm": 0.21879294518851708, + "learning_rate": 6.168993913354816e-06, + "loss": 0.0004, + "step": 6907 + }, + { + "epoch": 0.44518914738673715, + "grad_norm": 6.952659856934143e-05, + "learning_rate": 6.1682778374507705e-06, + "loss": 0.0, + "step": 6908 + }, + { + "epoch": 0.4452535928336663, + "grad_norm": 0.0029485427620246254, + "learning_rate": 6.167561761546724e-06, + "loss": 0.0, + "step": 6909 + }, + { + "epoch": 0.44531803828059546, + "grad_norm": 0.00095567100834392, + "learning_rate": 6.166845685642678e-06, + "loss": 0.0, + "step": 6910 + }, + { + "epoch": 0.44538248372752465, + "grad_norm": 0.00023555212651686218, + "learning_rate": 6.1661296097386325e-06, + "loss": 0.0, + "step": 6911 + }, + { + "epoch": 0.44544692917445383, + "grad_norm": 0.003228572952052681, + "learning_rate": 6.165413533834587e-06, + "loss": 0.0, + "step": 6912 + }, + { + "epoch": 0.445511374621383, + "grad_norm": 0.008805185668932146, + "learning_rate": 6.164697457930541e-06, + "loss": 0.0, + "step": 6913 + }, + { + "epoch": 0.4455758200683122, + "grad_norm": 0.00209328623167358, + "learning_rate": 6.1639813820264945e-06, + "loss": 0.0, + "step": 6914 + }, + { + "epoch": 0.44564026551524133, + "grad_norm": 4.6677382605160086e-05, + "learning_rate": 6.163265306122449e-06, + "loss": 0.0, + "step": 6915 + }, + { + "epoch": 0.4457047109621705, + "grad_norm": 6.16949104243802e-05, + "learning_rate": 6.162549230218403e-06, + "loss": 0.0, + "step": 6916 + }, + { + "epoch": 0.4457691564090997, + "grad_norm": 0.034629646127696145, + "learning_rate": 6.161833154314358e-06, + "loss": 0.0001, + "step": 6917 + }, + { + "epoch": 0.4458336018560289, + "grad_norm": 0.0003335354281628114, + "learning_rate": 6.161117078410313e-06, + "loss": 0.0, + "step": 6918 + }, + { + "epoch": 0.44589804730295807, + "grad_norm": 0.0004405627311754201, + "learning_rate": 6.160401002506267e-06, + "loss": 0.0, + "step": 6919 + }, + { + "epoch": 0.4459624927498872, + "grad_norm": 0.0006141990923472445, + "learning_rate": 6.15968492660222e-06, + "loss": 0.0, + "step": 6920 + }, + { + "epoch": 0.4460269381968164, + "grad_norm": 0.004484797434344207, + "learning_rate": 6.158968850698175e-06, + "loss": 0.0001, + "step": 6921 + }, + { + "epoch": 0.44609138364374556, + "grad_norm": 0.0006767052006918171, + "learning_rate": 6.158252774794129e-06, + "loss": 0.0, + "step": 6922 + }, + { + "epoch": 0.44615582909067475, + "grad_norm": 0.0006632304789780592, + "learning_rate": 6.157536698890083e-06, + "loss": 0.0, + "step": 6923 + }, + { + "epoch": 0.44622027453760393, + "grad_norm": 0.6850470870255448, + "learning_rate": 6.1568206229860375e-06, + "loss": 0.002, + "step": 6924 + }, + { + "epoch": 0.4462847199845331, + "grad_norm": 0.01536181478069005, + "learning_rate": 6.156104547081991e-06, + "loss": 0.0, + "step": 6925 + }, + { + "epoch": 0.44634916543146225, + "grad_norm": 0.000819613932033979, + "learning_rate": 6.155388471177945e-06, + "loss": 0.0, + "step": 6926 + }, + { + "epoch": 0.44641361087839143, + "grad_norm": 0.0013558397768012507, + "learning_rate": 6.1546723952739e-06, + "loss": 0.0, + "step": 6927 + }, + { + "epoch": 0.4464780563253206, + "grad_norm": 0.00011668090304934545, + "learning_rate": 6.153956319369854e-06, + "loss": 0.0, + "step": 6928 + }, + { + "epoch": 0.4465425017722498, + "grad_norm": 0.02503661579297557, + "learning_rate": 6.153240243465808e-06, + "loss": 0.0002, + "step": 6929 + }, + { + "epoch": 0.446606947219179, + "grad_norm": 0.003287645970191306, + "learning_rate": 6.152524167561762e-06, + "loss": 0.0, + "step": 6930 + }, + { + "epoch": 0.4466713926661081, + "grad_norm": 0.003287645970191306, + "learning_rate": 6.152524167561762e-06, + "loss": 0.0059, + "step": 6931 + }, + { + "epoch": 0.4467358381130373, + "grad_norm": 0.0013659329784108567, + "learning_rate": 6.151808091657716e-06, + "loss": 0.0, + "step": 6932 + }, + { + "epoch": 0.4468002835599665, + "grad_norm": 0.02730988103668324, + "learning_rate": 6.15109201575367e-06, + "loss": 0.0002, + "step": 6933 + }, + { + "epoch": 0.44686472900689567, + "grad_norm": 0.0002492699167945417, + "learning_rate": 6.1503759398496245e-06, + "loss": 0.0, + "step": 6934 + }, + { + "epoch": 0.44692917445382485, + "grad_norm": 0.0004506669312678534, + "learning_rate": 6.149659863945579e-06, + "loss": 0.0, + "step": 6935 + }, + { + "epoch": 0.44699361990075404, + "grad_norm": 0.8233311651435545, + "learning_rate": 6.148943788041532e-06, + "loss": 0.0042, + "step": 6936 + }, + { + "epoch": 0.44705806534768316, + "grad_norm": 0.00037711894076924535, + "learning_rate": 6.148227712137487e-06, + "loss": 0.0, + "step": 6937 + }, + { + "epoch": 0.44712251079461235, + "grad_norm": 0.022929007680163573, + "learning_rate": 6.147511636233441e-06, + "loss": 0.0001, + "step": 6938 + }, + { + "epoch": 0.44718695624154153, + "grad_norm": 0.03777062891841532, + "learning_rate": 6.146795560329395e-06, + "loss": 0.0001, + "step": 6939 + }, + { + "epoch": 0.4472514016884707, + "grad_norm": 0.1337892295669292, + "learning_rate": 6.1460794844253495e-06, + "loss": 0.0004, + "step": 6940 + }, + { + "epoch": 0.4473158471353999, + "grad_norm": 0.024741520669856795, + "learning_rate": 6.145363408521305e-06, + "loss": 0.0001, + "step": 6941 + }, + { + "epoch": 0.44738029258232903, + "grad_norm": 0.006022601529771838, + "learning_rate": 6.144647332617258e-06, + "loss": 0.0, + "step": 6942 + }, + { + "epoch": 0.4474447380292582, + "grad_norm": 0.03185848800646121, + "learning_rate": 6.143931256713212e-06, + "loss": 0.0003, + "step": 6943 + }, + { + "epoch": 0.4475091834761874, + "grad_norm": 0.04492633735220352, + "learning_rate": 6.143215180809167e-06, + "loss": 0.0005, + "step": 6944 + }, + { + "epoch": 0.4475736289231166, + "grad_norm": 0.0009602092421418444, + "learning_rate": 6.142499104905121e-06, + "loss": 0.0, + "step": 6945 + }, + { + "epoch": 0.44763807437004577, + "grad_norm": 0.00023928778128441458, + "learning_rate": 6.141783029001075e-06, + "loss": 0.0, + "step": 6946 + }, + { + "epoch": 0.44770251981697495, + "grad_norm": 0.09243969869578006, + "learning_rate": 6.141066953097029e-06, + "loss": 0.0001, + "step": 6947 + }, + { + "epoch": 0.4477669652639041, + "grad_norm": 0.020509047130477586, + "learning_rate": 6.140350877192983e-06, + "loss": 0.0002, + "step": 6948 + }, + { + "epoch": 0.44783141071083327, + "grad_norm": 0.5742998130825934, + "learning_rate": 6.139634801288937e-06, + "loss": 0.0026, + "step": 6949 + }, + { + "epoch": 0.44789585615776245, + "grad_norm": 0.0006601064763192048, + "learning_rate": 6.138918725384892e-06, + "loss": 0.0, + "step": 6950 + }, + { + "epoch": 0.44796030160469164, + "grad_norm": 0.4013238219805938, + "learning_rate": 6.138202649480846e-06, + "loss": 0.0024, + "step": 6951 + }, + { + "epoch": 0.4480247470516208, + "grad_norm": 0.19748266008165033, + "learning_rate": 6.137486573576799e-06, + "loss": 0.0003, + "step": 6952 + }, + { + "epoch": 0.44808919249855, + "grad_norm": 0.005614501578779723, + "learning_rate": 6.136770497672754e-06, + "loss": 0.0, + "step": 6953 + }, + { + "epoch": 0.44815363794547913, + "grad_norm": 0.0018398338943028784, + "learning_rate": 6.136054421768708e-06, + "loss": 0.0, + "step": 6954 + }, + { + "epoch": 0.4482180833924083, + "grad_norm": 0.006431620033200353, + "learning_rate": 6.135338345864662e-06, + "loss": 0.0, + "step": 6955 + }, + { + "epoch": 0.4482825288393375, + "grad_norm": 5.3012932827666375e-05, + "learning_rate": 6.1346222699606166e-06, + "loss": 0.0, + "step": 6956 + }, + { + "epoch": 0.4483469742862667, + "grad_norm": 0.0054546841273027434, + "learning_rate": 6.13390619405657e-06, + "loss": 0.0, + "step": 6957 + }, + { + "epoch": 0.44841141973319587, + "grad_norm": 0.008955214094707408, + "learning_rate": 6.133190118152524e-06, + "loss": 0.0, + "step": 6958 + }, + { + "epoch": 0.448475865180125, + "grad_norm": 0.012356203690497503, + "learning_rate": 6.132474042248479e-06, + "loss": 0.0, + "step": 6959 + }, + { + "epoch": 0.4485403106270542, + "grad_norm": 0.38077338788568676, + "learning_rate": 6.131757966344433e-06, + "loss": 0.0019, + "step": 6960 + }, + { + "epoch": 0.44860475607398337, + "grad_norm": 0.3053363835218518, + "learning_rate": 6.131041890440386e-06, + "loss": 0.0017, + "step": 6961 + }, + { + "epoch": 0.44866920152091255, + "grad_norm": 0.0024220880437371517, + "learning_rate": 6.130325814536341e-06, + "loss": 0.0, + "step": 6962 + }, + { + "epoch": 0.44873364696784174, + "grad_norm": 0.0015772679082248853, + "learning_rate": 6.129609738632295e-06, + "loss": 0.0, + "step": 6963 + }, + { + "epoch": 0.4487980924147709, + "grad_norm": 0.00033190575499703344, + "learning_rate": 6.128893662728249e-06, + "loss": 0.0, + "step": 6964 + }, + { + "epoch": 0.44886253786170005, + "grad_norm": 0.00012355856685873574, + "learning_rate": 6.128177586824204e-06, + "loss": 0.0, + "step": 6965 + }, + { + "epoch": 0.44892698330862923, + "grad_norm": 0.0008402414323257852, + "learning_rate": 6.127461510920159e-06, + "loss": 0.0, + "step": 6966 + }, + { + "epoch": 0.4489914287555584, + "grad_norm": 0.3629500817741936, + "learning_rate": 6.126745435016113e-06, + "loss": 0.0032, + "step": 6967 + }, + { + "epoch": 0.4490558742024876, + "grad_norm": 2.327572136077638, + "learning_rate": 6.1260293591120665e-06, + "loss": 0.009, + "step": 6968 + }, + { + "epoch": 0.4491203196494168, + "grad_norm": 0.02112311101313786, + "learning_rate": 6.125313283208021e-06, + "loss": 0.0002, + "step": 6969 + }, + { + "epoch": 0.4491847650963459, + "grad_norm": 0.014130007366206468, + "learning_rate": 6.124597207303975e-06, + "loss": 0.0, + "step": 6970 + }, + { + "epoch": 0.4492492105432751, + "grad_norm": 0.0053104441258905654, + "learning_rate": 6.123881131399929e-06, + "loss": 0.0001, + "step": 6971 + }, + { + "epoch": 0.4493136559902043, + "grad_norm": 0.10688876165916378, + "learning_rate": 6.123165055495884e-06, + "loss": 0.0002, + "step": 6972 + }, + { + "epoch": 0.44937810143713347, + "grad_norm": 0.004312108841936607, + "learning_rate": 6.122448979591837e-06, + "loss": 0.0, + "step": 6973 + }, + { + "epoch": 0.44944254688406265, + "grad_norm": 0.0020536256520046975, + "learning_rate": 6.121732903687791e-06, + "loss": 0.0, + "step": 6974 + }, + { + "epoch": 0.44950699233099184, + "grad_norm": 0.4778005434065899, + "learning_rate": 6.121016827783746e-06, + "loss": 0.0004, + "step": 6975 + }, + { + "epoch": 0.44957143777792097, + "grad_norm": 0.3517645890823827, + "learning_rate": 6.1203007518797e-06, + "loss": 0.0019, + "step": 6976 + }, + { + "epoch": 0.44963588322485015, + "grad_norm": 0.00961769649574364, + "learning_rate": 6.1195846759756534e-06, + "loss": 0.0, + "step": 6977 + }, + { + "epoch": 0.44970032867177934, + "grad_norm": 0.02834457866455974, + "learning_rate": 6.118868600071608e-06, + "loss": 0.0, + "step": 6978 + }, + { + "epoch": 0.4497647741187085, + "grad_norm": 0.3496750604736784, + "learning_rate": 6.118152524167562e-06, + "loss": 0.0005, + "step": 6979 + }, + { + "epoch": 0.4498292195656377, + "grad_norm": 0.5185237666966998, + "learning_rate": 6.117436448263516e-06, + "loss": 0.0009, + "step": 6980 + }, + { + "epoch": 0.44989366501256683, + "grad_norm": 0.0019981095150621467, + "learning_rate": 6.116720372359471e-06, + "loss": 0.0, + "step": 6981 + }, + { + "epoch": 0.449958110459496, + "grad_norm": 0.001970621272454851, + "learning_rate": 6.116004296455424e-06, + "loss": 0.0, + "step": 6982 + }, + { + "epoch": 0.4500225559064252, + "grad_norm": 0.14547553760578696, + "learning_rate": 6.115288220551378e-06, + "loss": 0.0003, + "step": 6983 + }, + { + "epoch": 0.4500870013533544, + "grad_norm": 0.04114063122287838, + "learning_rate": 6.114572144647333e-06, + "loss": 0.0, + "step": 6984 + }, + { + "epoch": 0.4501514468002836, + "grad_norm": 0.007233528345344776, + "learning_rate": 6.113856068743287e-06, + "loss": 0.0, + "step": 6985 + }, + { + "epoch": 0.45021589224721276, + "grad_norm": 0.0026296207033014393, + "learning_rate": 6.113139992839241e-06, + "loss": 0.0, + "step": 6986 + }, + { + "epoch": 0.4502803376941419, + "grad_norm": 0.03319251543124218, + "learning_rate": 6.112423916935195e-06, + "loss": 0.0002, + "step": 6987 + }, + { + "epoch": 0.45034478314107107, + "grad_norm": 0.0024029586808359893, + "learning_rate": 6.111707841031151e-06, + "loss": 0.0, + "step": 6988 + }, + { + "epoch": 0.45040922858800025, + "grad_norm": 0.001096244893310349, + "learning_rate": 6.110991765127104e-06, + "loss": 0.0, + "step": 6989 + }, + { + "epoch": 0.45047367403492944, + "grad_norm": 0.47845158051066183, + "learning_rate": 6.1102756892230585e-06, + "loss": 0.0026, + "step": 6990 + }, + { + "epoch": 0.4505381194818586, + "grad_norm": 0.005456332068239849, + "learning_rate": 6.109559613319013e-06, + "loss": 0.0001, + "step": 6991 + }, + { + "epoch": 0.4506025649287878, + "grad_norm": 0.023044872550530067, + "learning_rate": 6.108843537414967e-06, + "loss": 0.0, + "step": 6992 + }, + { + "epoch": 0.45066701037571694, + "grad_norm": 9.10151264291464e-05, + "learning_rate": 6.1081274615109205e-06, + "loss": 0.0, + "step": 6993 + }, + { + "epoch": 0.4507314558226461, + "grad_norm": 0.0011771712542816663, + "learning_rate": 6.107411385606875e-06, + "loss": 0.0, + "step": 6994 + }, + { + "epoch": 0.4507959012695753, + "grad_norm": 0.004017593902929894, + "learning_rate": 6.106695309702829e-06, + "loss": 0.0, + "step": 6995 + }, + { + "epoch": 0.4508603467165045, + "grad_norm": 0.16323827150517303, + "learning_rate": 6.1059792337987834e-06, + "loss": 0.0008, + "step": 6996 + }, + { + "epoch": 0.4509247921634337, + "grad_norm": 0.027595080223723742, + "learning_rate": 6.105263157894738e-06, + "loss": 0.0, + "step": 6997 + }, + { + "epoch": 0.4509892376103628, + "grad_norm": 0.0021043324610431602, + "learning_rate": 6.104547081990691e-06, + "loss": 0.0, + "step": 6998 + }, + { + "epoch": 0.451053683057292, + "grad_norm": 0.03236437900145626, + "learning_rate": 6.1038310060866455e-06, + "loss": 0.0001, + "step": 6999 + }, + { + "epoch": 0.45111812850422117, + "grad_norm": 0.4955685155462287, + "learning_rate": 6.1031149301826e-06, + "loss": 0.0036, + "step": 7000 + }, + { + "epoch": 0.45118257395115036, + "grad_norm": 0.0005264324608317756, + "learning_rate": 6.102398854278554e-06, + "loss": 0.0, + "step": 7001 + }, + { + "epoch": 0.45124701939807954, + "grad_norm": 0.03288913141526833, + "learning_rate": 6.101682778374508e-06, + "loss": 0.0001, + "step": 7002 + }, + { + "epoch": 0.4513114648450087, + "grad_norm": 0.001246752781181037, + "learning_rate": 6.100966702470462e-06, + "loss": 0.0015, + "step": 7003 + }, + { + "epoch": 0.45137591029193785, + "grad_norm": 0.0005902236479268267, + "learning_rate": 6.100250626566416e-06, + "loss": 0.0, + "step": 7004 + }, + { + "epoch": 0.45144035573886704, + "grad_norm": 0.2308556251414973, + "learning_rate": 6.09953455066237e-06, + "loss": 0.0011, + "step": 7005 + }, + { + "epoch": 0.4515048011857962, + "grad_norm": 0.0017562515855380186, + "learning_rate": 6.098818474758325e-06, + "loss": 0.0, + "step": 7006 + }, + { + "epoch": 0.4515692466327254, + "grad_norm": 0.01532581818107362, + "learning_rate": 6.098102398854279e-06, + "loss": 0.0, + "step": 7007 + }, + { + "epoch": 0.4516336920796546, + "grad_norm": 0.08927833375484105, + "learning_rate": 6.0973863229502325e-06, + "loss": 0.0003, + "step": 7008 + }, + { + "epoch": 0.4516981375265837, + "grad_norm": 0.0284605009468016, + "learning_rate": 6.096670247046187e-06, + "loss": 0.0, + "step": 7009 + }, + { + "epoch": 0.4517625829735129, + "grad_norm": 0.0016943289754871211, + "learning_rate": 6.095954171142141e-06, + "loss": 0.0, + "step": 7010 + }, + { + "epoch": 0.4518270284204421, + "grad_norm": 0.02218787044607103, + "learning_rate": 6.095238095238096e-06, + "loss": 0.0, + "step": 7011 + }, + { + "epoch": 0.4518914738673713, + "grad_norm": 0.014370586866855587, + "learning_rate": 6.0945220193340505e-06, + "loss": 0.0001, + "step": 7012 + }, + { + "epoch": 0.45195591931430046, + "grad_norm": 0.0029353212413927815, + "learning_rate": 6.093805943430005e-06, + "loss": 0.0, + "step": 7013 + }, + { + "epoch": 0.45202036476122964, + "grad_norm": 0.009811081740184684, + "learning_rate": 6.093089867525958e-06, + "loss": 0.0, + "step": 7014 + }, + { + "epoch": 0.45208481020815877, + "grad_norm": 0.05393490263073934, + "learning_rate": 6.0923737916219126e-06, + "loss": 0.0002, + "step": 7015 + }, + { + "epoch": 0.45214925565508796, + "grad_norm": 2.5784644625886863, + "learning_rate": 6.091657715717867e-06, + "loss": 0.0189, + "step": 7016 + }, + { + "epoch": 0.45221370110201714, + "grad_norm": 0.0029356539960954304, + "learning_rate": 6.090941639813821e-06, + "loss": 0.0, + "step": 7017 + }, + { + "epoch": 0.4522781465489463, + "grad_norm": 0.00022978794218516798, + "learning_rate": 6.0902255639097755e-06, + "loss": 0.0, + "step": 7018 + }, + { + "epoch": 0.4523425919958755, + "grad_norm": 0.029644232123119035, + "learning_rate": 6.089509488005729e-06, + "loss": 0.0002, + "step": 7019 + }, + { + "epoch": 0.45240703744280464, + "grad_norm": 0.0034083100602888576, + "learning_rate": 6.088793412101683e-06, + "loss": 0.0, + "step": 7020 + }, + { + "epoch": 0.4524714828897338, + "grad_norm": 0.004447110035412839, + "learning_rate": 6.0880773361976375e-06, + "loss": 0.0, + "step": 7021 + }, + { + "epoch": 0.452535928336663, + "grad_norm": 0.13061681728651678, + "learning_rate": 6.087361260293592e-06, + "loss": 0.0003, + "step": 7022 + }, + { + "epoch": 0.4526003737835922, + "grad_norm": 0.03177697500649157, + "learning_rate": 6.086645184389546e-06, + "loss": 0.0, + "step": 7023 + }, + { + "epoch": 0.4526648192305214, + "grad_norm": 0.06309334915132048, + "learning_rate": 6.0859291084854996e-06, + "loss": 0.0003, + "step": 7024 + }, + { + "epoch": 0.45272926467745056, + "grad_norm": 0.04666786572863354, + "learning_rate": 6.085213032581454e-06, + "loss": 0.0001, + "step": 7025 + }, + { + "epoch": 0.4527937101243797, + "grad_norm": 0.030821576426459607, + "learning_rate": 6.084496956677408e-06, + "loss": 0.0001, + "step": 7026 + }, + { + "epoch": 0.4528581555713089, + "grad_norm": 0.010487922371730633, + "learning_rate": 6.0837808807733625e-06, + "loss": 0.0, + "step": 7027 + }, + { + "epoch": 0.45292260101823806, + "grad_norm": 0.2080304755817776, + "learning_rate": 6.083064804869317e-06, + "loss": 0.0021, + "step": 7028 + }, + { + "epoch": 0.45298704646516724, + "grad_norm": 0.009692660458068866, + "learning_rate": 6.08234872896527e-06, + "loss": 0.0016, + "step": 7029 + }, + { + "epoch": 0.4530514919120964, + "grad_norm": 0.004954783683027185, + "learning_rate": 6.0816326530612245e-06, + "loss": 0.0, + "step": 7030 + }, + { + "epoch": 0.4531159373590256, + "grad_norm": 0.003751176911784929, + "learning_rate": 6.080916577157179e-06, + "loss": 0.0, + "step": 7031 + }, + { + "epoch": 0.45318038280595474, + "grad_norm": 0.04467092785293174, + "learning_rate": 6.080200501253133e-06, + "loss": 0.0004, + "step": 7032 + }, + { + "epoch": 0.4532448282528839, + "grad_norm": 0.20824716792808376, + "learning_rate": 6.079484425349087e-06, + "loss": 0.0005, + "step": 7033 + }, + { + "epoch": 0.4533092736998131, + "grad_norm": 7.700961625131927e-05, + "learning_rate": 6.078768349445041e-06, + "loss": 0.0, + "step": 7034 + }, + { + "epoch": 0.4533737191467423, + "grad_norm": 0.03432253720668772, + "learning_rate": 6.078052273540996e-06, + "loss": 0.0, + "step": 7035 + }, + { + "epoch": 0.4534381645936715, + "grad_norm": 0.00040768886861480716, + "learning_rate": 6.07733619763695e-06, + "loss": 0.0, + "step": 7036 + }, + { + "epoch": 0.4535026100406006, + "grad_norm": 0.009148011904105437, + "learning_rate": 6.076620121732905e-06, + "loss": 0.0, + "step": 7037 + }, + { + "epoch": 0.4535670554875298, + "grad_norm": 0.42105234257474816, + "learning_rate": 6.075904045828859e-06, + "loss": 0.0032, + "step": 7038 + }, + { + "epoch": 0.453631500934459, + "grad_norm": 0.003082251194585121, + "learning_rate": 6.075187969924813e-06, + "loss": 0.0, + "step": 7039 + }, + { + "epoch": 0.45369594638138816, + "grad_norm": 0.0007968014546186246, + "learning_rate": 6.074471894020767e-06, + "loss": 0.0, + "step": 7040 + }, + { + "epoch": 0.45376039182831734, + "grad_norm": 0.0013500992183111478, + "learning_rate": 6.073755818116721e-06, + "loss": 0.0, + "step": 7041 + }, + { + "epoch": 0.45382483727524653, + "grad_norm": 0.020758522620687073, + "learning_rate": 6.073039742212675e-06, + "loss": 0.0001, + "step": 7042 + }, + { + "epoch": 0.45388928272217566, + "grad_norm": 0.003780350314592039, + "learning_rate": 6.0723236663086295e-06, + "loss": 0.0, + "step": 7043 + }, + { + "epoch": 0.45395372816910484, + "grad_norm": 0.0025714049356480063, + "learning_rate": 6.071607590404584e-06, + "loss": 0.0, + "step": 7044 + }, + { + "epoch": 0.454018173616034, + "grad_norm": 0.000688096630702408, + "learning_rate": 6.070891514500537e-06, + "loss": 0.0, + "step": 7045 + }, + { + "epoch": 0.4540826190629632, + "grad_norm": 0.0006434500711874743, + "learning_rate": 6.070175438596492e-06, + "loss": 0.0, + "step": 7046 + }, + { + "epoch": 0.4541470645098924, + "grad_norm": 0.35732653654378393, + "learning_rate": 6.069459362692446e-06, + "loss": 0.0023, + "step": 7047 + }, + { + "epoch": 0.4542115099568215, + "grad_norm": 0.01280457731163026, + "learning_rate": 6.0687432867884e-06, + "loss": 0.0, + "step": 7048 + }, + { + "epoch": 0.4542759554037507, + "grad_norm": 0.1197270360691481, + "learning_rate": 6.0680272108843545e-06, + "loss": 0.0004, + "step": 7049 + }, + { + "epoch": 0.4543404008506799, + "grad_norm": 0.0010014052039343515, + "learning_rate": 6.067311134980308e-06, + "loss": 0.0, + "step": 7050 + }, + { + "epoch": 0.4544048462976091, + "grad_norm": 0.03635723904725859, + "learning_rate": 6.066595059076262e-06, + "loss": 0.0002, + "step": 7051 + }, + { + "epoch": 0.45446929174453826, + "grad_norm": 0.00575358947033153, + "learning_rate": 6.0658789831722165e-06, + "loss": 0.0, + "step": 7052 + }, + { + "epoch": 0.45453373719146745, + "grad_norm": 0.013448601461061216, + "learning_rate": 6.065162907268171e-06, + "loss": 0.0001, + "step": 7053 + }, + { + "epoch": 0.4545981826383966, + "grad_norm": 0.00041888456590139113, + "learning_rate": 6.064446831364125e-06, + "loss": 0.0, + "step": 7054 + }, + { + "epoch": 0.45466262808532576, + "grad_norm": 0.002986199622612587, + "learning_rate": 6.063730755460079e-06, + "loss": 0.0, + "step": 7055 + }, + { + "epoch": 0.45472707353225494, + "grad_norm": 0.0008021949438291668, + "learning_rate": 6.063014679556033e-06, + "loss": 0.0, + "step": 7056 + }, + { + "epoch": 0.45479151897918413, + "grad_norm": 0.3501131487920616, + "learning_rate": 6.062298603651987e-06, + "loss": 0.0012, + "step": 7057 + }, + { + "epoch": 0.4548559644261133, + "grad_norm": 0.001603722886898084, + "learning_rate": 6.061582527747942e-06, + "loss": 0.0, + "step": 7058 + }, + { + "epoch": 0.4549204098730425, + "grad_norm": 0.1883643315255252, + "learning_rate": 6.060866451843897e-06, + "loss": 0.0003, + "step": 7059 + }, + { + "epoch": 0.4549848553199716, + "grad_norm": 0.0004600154112903025, + "learning_rate": 6.060150375939851e-06, + "loss": 0.0, + "step": 7060 + }, + { + "epoch": 0.4550493007669008, + "grad_norm": 0.008173502769027983, + "learning_rate": 6.059434300035804e-06, + "loss": 0.0, + "step": 7061 + }, + { + "epoch": 0.45511374621383, + "grad_norm": 0.02577972412109375, + "learning_rate": 6.058718224131759e-06, + "loss": 0.0001, + "step": 7062 + }, + { + "epoch": 0.4551781916607592, + "grad_norm": 0.010985050193318776, + "learning_rate": 6.058002148227713e-06, + "loss": 0.0, + "step": 7063 + }, + { + "epoch": 0.45524263710768836, + "grad_norm": 0.00025358798746525133, + "learning_rate": 6.057286072323667e-06, + "loss": 0.0, + "step": 7064 + }, + { + "epoch": 0.4553070825546175, + "grad_norm": 0.001464238886327982, + "learning_rate": 6.0565699964196216e-06, + "loss": 0.0, + "step": 7065 + }, + { + "epoch": 0.4553715280015467, + "grad_norm": 0.018947541797622022, + "learning_rate": 6.055853920515575e-06, + "loss": 0.0001, + "step": 7066 + }, + { + "epoch": 0.45543597344847586, + "grad_norm": 0.0005999839663420513, + "learning_rate": 6.055137844611529e-06, + "loss": 0.0, + "step": 7067 + }, + { + "epoch": 0.45550041889540505, + "grad_norm": 0.1648804562727237, + "learning_rate": 6.054421768707484e-06, + "loss": 0.0012, + "step": 7068 + }, + { + "epoch": 0.45556486434233423, + "grad_norm": 0.5202259495779435, + "learning_rate": 6.053705692803438e-06, + "loss": 0.0021, + "step": 7069 + }, + { + "epoch": 0.4556293097892634, + "grad_norm": 0.003008793348343684, + "learning_rate": 6.052989616899392e-06, + "loss": 0.0, + "step": 7070 + }, + { + "epoch": 0.45569375523619254, + "grad_norm": 0.07646857151479122, + "learning_rate": 6.052273540995346e-06, + "loss": 0.0002, + "step": 7071 + }, + { + "epoch": 0.45575820068312173, + "grad_norm": 0.2913056023367716, + "learning_rate": 6.0515574650913e-06, + "loss": 0.002, + "step": 7072 + }, + { + "epoch": 0.4558226461300509, + "grad_norm": 0.09931329124014251, + "learning_rate": 6.050841389187254e-06, + "loss": 0.0005, + "step": 7073 + }, + { + "epoch": 0.4558870915769801, + "grad_norm": 0.2881209788590056, + "learning_rate": 6.0501253132832086e-06, + "loss": 0.0089, + "step": 7074 + }, + { + "epoch": 0.4559515370239093, + "grad_norm": 0.00044719869471577915, + "learning_rate": 6.049409237379162e-06, + "loss": 0.0, + "step": 7075 + }, + { + "epoch": 0.4560159824708384, + "grad_norm": 0.011173389599324287, + "learning_rate": 6.048693161475116e-06, + "loss": 0.0, + "step": 7076 + }, + { + "epoch": 0.4560804279177676, + "grad_norm": 0.0010514280422994013, + "learning_rate": 6.047977085571071e-06, + "loss": 0.0, + "step": 7077 + }, + { + "epoch": 0.4561448733646968, + "grad_norm": 0.0006105020922317698, + "learning_rate": 6.047261009667025e-06, + "loss": 0.0, + "step": 7078 + }, + { + "epoch": 0.45620931881162596, + "grad_norm": 0.20810970980521848, + "learning_rate": 6.046544933762979e-06, + "loss": 0.0003, + "step": 7079 + }, + { + "epoch": 0.45627376425855515, + "grad_norm": 0.00014491230861546294, + "learning_rate": 6.045828857858933e-06, + "loss": 0.0, + "step": 7080 + }, + { + "epoch": 0.45633820970548433, + "grad_norm": 0.0008180527801640292, + "learning_rate": 6.045112781954889e-06, + "loss": 0.0, + "step": 7081 + }, + { + "epoch": 0.45640265515241346, + "grad_norm": 0.015151065614577604, + "learning_rate": 6.044396706050842e-06, + "loss": 0.0, + "step": 7082 + }, + { + "epoch": 0.45646710059934265, + "grad_norm": 0.00016913031952718266, + "learning_rate": 6.043680630146796e-06, + "loss": 0.0, + "step": 7083 + }, + { + "epoch": 0.45653154604627183, + "grad_norm": 0.0003290981387745927, + "learning_rate": 6.042964554242751e-06, + "loss": 0.0, + "step": 7084 + }, + { + "epoch": 0.456595991493201, + "grad_norm": 0.00033238180508284173, + "learning_rate": 6.042248478338705e-06, + "loss": 0.0, + "step": 7085 + }, + { + "epoch": 0.4566604369401302, + "grad_norm": 0.0005725779904502857, + "learning_rate": 6.041532402434659e-06, + "loss": 0.0, + "step": 7086 + }, + { + "epoch": 0.4567248823870593, + "grad_norm": 0.0006684580100672684, + "learning_rate": 6.040816326530613e-06, + "loss": 0.0, + "step": 7087 + }, + { + "epoch": 0.4567893278339885, + "grad_norm": 0.0015680347505695766, + "learning_rate": 6.040100250626567e-06, + "loss": 0.0, + "step": 7088 + }, + { + "epoch": 0.4568537732809177, + "grad_norm": 0.00016943457185629763, + "learning_rate": 6.039384174722521e-06, + "loss": 0.0, + "step": 7089 + }, + { + "epoch": 0.4569182187278469, + "grad_norm": 0.005080476489226899, + "learning_rate": 6.038668098818476e-06, + "loss": 0.0, + "step": 7090 + }, + { + "epoch": 0.45698266417477607, + "grad_norm": 0.011049966479080763, + "learning_rate": 6.037952022914429e-06, + "loss": 0.0002, + "step": 7091 + }, + { + "epoch": 0.45704710962170525, + "grad_norm": 0.0004842771125507965, + "learning_rate": 6.037235947010383e-06, + "loss": 0.0, + "step": 7092 + }, + { + "epoch": 0.4571115550686344, + "grad_norm": 3.9384781038392525e-05, + "learning_rate": 6.036519871106338e-06, + "loss": 0.0, + "step": 7093 + }, + { + "epoch": 0.45717600051556356, + "grad_norm": 0.005493382939640472, + "learning_rate": 6.035803795202292e-06, + "loss": 0.0, + "step": 7094 + }, + { + "epoch": 0.45724044596249275, + "grad_norm": 0.016864852974129232, + "learning_rate": 6.035087719298246e-06, + "loss": 0.0001, + "step": 7095 + }, + { + "epoch": 0.45730489140942193, + "grad_norm": 0.23987302508294103, + "learning_rate": 6.0343716433942e-06, + "loss": 0.0055, + "step": 7096 + }, + { + "epoch": 0.4573693368563511, + "grad_norm": 0.000661137041265991, + "learning_rate": 6.033655567490154e-06, + "loss": 0.0, + "step": 7097 + }, + { + "epoch": 0.4574337823032803, + "grad_norm": 0.1169914314041797, + "learning_rate": 6.032939491586108e-06, + "loss": 0.0018, + "step": 7098 + }, + { + "epoch": 0.45749822775020943, + "grad_norm": 0.09465142347291379, + "learning_rate": 6.032223415682063e-06, + "loss": 0.0002, + "step": 7099 + }, + { + "epoch": 0.4575626731971386, + "grad_norm": 0.0004188542693633139, + "learning_rate": 6.031507339778017e-06, + "loss": 0.0, + "step": 7100 + }, + { + "epoch": 0.4576271186440678, + "grad_norm": 0.0006338906192689149, + "learning_rate": 6.03079126387397e-06, + "loss": 0.0, + "step": 7101 + }, + { + "epoch": 0.457691564090997, + "grad_norm": 0.24523413098807104, + "learning_rate": 6.030075187969925e-06, + "loss": 0.0021, + "step": 7102 + }, + { + "epoch": 0.45775600953792617, + "grad_norm": 0.005114259183133491, + "learning_rate": 6.029359112065879e-06, + "loss": 0.0, + "step": 7103 + }, + { + "epoch": 0.4578204549848553, + "grad_norm": 0.00010116153271195064, + "learning_rate": 6.028643036161833e-06, + "loss": 0.0, + "step": 7104 + }, + { + "epoch": 0.4578849004317845, + "grad_norm": 0.0012903867681714094, + "learning_rate": 6.0279269602577884e-06, + "loss": 0.0, + "step": 7105 + }, + { + "epoch": 0.45794934587871367, + "grad_norm": 0.0012879498751937105, + "learning_rate": 6.027210884353743e-06, + "loss": 0.0, + "step": 7106 + }, + { + "epoch": 0.45801379132564285, + "grad_norm": 0.02153369947324202, + "learning_rate": 6.026494808449696e-06, + "loss": 0.0001, + "step": 7107 + }, + { + "epoch": 0.45807823677257203, + "grad_norm": 0.0064330967485958694, + "learning_rate": 6.0257787325456505e-06, + "loss": 0.0, + "step": 7108 + }, + { + "epoch": 0.4581426822195012, + "grad_norm": 0.00024972203715889577, + "learning_rate": 6.025062656641605e-06, + "loss": 0.0, + "step": 7109 + }, + { + "epoch": 0.45820712766643035, + "grad_norm": 0.0016818562335592555, + "learning_rate": 6.024346580737559e-06, + "loss": 0.0, + "step": 7110 + }, + { + "epoch": 0.45827157311335953, + "grad_norm": 0.0129347845643351, + "learning_rate": 6.023630504833513e-06, + "loss": 0.0, + "step": 7111 + }, + { + "epoch": 0.4583360185602887, + "grad_norm": 5.749007999088142e-05, + "learning_rate": 6.022914428929467e-06, + "loss": 0.0, + "step": 7112 + }, + { + "epoch": 0.4584004640072179, + "grad_norm": 0.0006387188346334157, + "learning_rate": 6.022198353025421e-06, + "loss": 0.0, + "step": 7113 + }, + { + "epoch": 0.4584649094541471, + "grad_norm": 0.00012755880001202847, + "learning_rate": 6.0214822771213754e-06, + "loss": 0.0, + "step": 7114 + }, + { + "epoch": 0.4585293549010762, + "grad_norm": 0.10691374328866679, + "learning_rate": 6.02076620121733e-06, + "loss": 0.0002, + "step": 7115 + }, + { + "epoch": 0.4585938003480054, + "grad_norm": 0.9480175376851292, + "learning_rate": 6.020050125313284e-06, + "loss": 0.0168, + "step": 7116 + }, + { + "epoch": 0.4586582457949346, + "grad_norm": 0.0006098464369457968, + "learning_rate": 6.0193340494092375e-06, + "loss": 0.0, + "step": 7117 + }, + { + "epoch": 0.45872269124186377, + "grad_norm": 0.0047673415025089256, + "learning_rate": 6.018617973505192e-06, + "loss": 0.0, + "step": 7118 + }, + { + "epoch": 0.45878713668879295, + "grad_norm": 0.01061895586109674, + "learning_rate": 6.017901897601146e-06, + "loss": 0.0001, + "step": 7119 + }, + { + "epoch": 0.45885158213572214, + "grad_norm": 0.00033589021288196004, + "learning_rate": 6.0171858216971e-06, + "loss": 0.0, + "step": 7120 + }, + { + "epoch": 0.45891602758265126, + "grad_norm": 0.0008578054963712787, + "learning_rate": 6.016469745793055e-06, + "loss": 0.0, + "step": 7121 + }, + { + "epoch": 0.45898047302958045, + "grad_norm": 0.07172960847940046, + "learning_rate": 6.015753669889008e-06, + "loss": 0.0001, + "step": 7122 + }, + { + "epoch": 0.45904491847650963, + "grad_norm": 0.0005887507011680352, + "learning_rate": 6.015037593984962e-06, + "loss": 0.0, + "step": 7123 + }, + { + "epoch": 0.4591093639234388, + "grad_norm": 0.002714727540164013, + "learning_rate": 6.014321518080917e-06, + "loss": 0.0, + "step": 7124 + }, + { + "epoch": 0.459173809370368, + "grad_norm": 0.004810577714593613, + "learning_rate": 6.013605442176871e-06, + "loss": 0.0, + "step": 7125 + }, + { + "epoch": 0.45923825481729713, + "grad_norm": 0.09211610013209857, + "learning_rate": 6.012889366272825e-06, + "loss": 0.0001, + "step": 7126 + }, + { + "epoch": 0.4593027002642263, + "grad_norm": 0.04762401536160834, + "learning_rate": 6.012173290368779e-06, + "loss": 0.0001, + "step": 7127 + }, + { + "epoch": 0.4593671457111555, + "grad_norm": 0.0002858773479675718, + "learning_rate": 6.011457214464734e-06, + "loss": 0.0, + "step": 7128 + }, + { + "epoch": 0.4594315911580847, + "grad_norm": 0.0007525722764745775, + "learning_rate": 6.010741138560688e-06, + "loss": 0.0, + "step": 7129 + }, + { + "epoch": 0.45949603660501387, + "grad_norm": 1.6808105972308205, + "learning_rate": 6.0100250626566425e-06, + "loss": 0.018, + "step": 7130 + }, + { + "epoch": 0.45956048205194305, + "grad_norm": 0.001529241299410118, + "learning_rate": 6.009308986752597e-06, + "loss": 0.0, + "step": 7131 + }, + { + "epoch": 0.4596249274988722, + "grad_norm": 0.0038925792922026225, + "learning_rate": 6.008592910848551e-06, + "loss": 0.0, + "step": 7132 + }, + { + "epoch": 0.45968937294580137, + "grad_norm": 0.010454839195876581, + "learning_rate": 6.0078768349445046e-06, + "loss": 0.0001, + "step": 7133 + }, + { + "epoch": 0.45975381839273055, + "grad_norm": 0.0007752854575114376, + "learning_rate": 6.007160759040459e-06, + "loss": 0.0, + "step": 7134 + }, + { + "epoch": 0.45981826383965974, + "grad_norm": 0.007434410935910509, + "learning_rate": 6.006444683136413e-06, + "loss": 0.0001, + "step": 7135 + }, + { + "epoch": 0.4598827092865889, + "grad_norm": 0.6166326950570551, + "learning_rate": 6.0057286072323675e-06, + "loss": 0.0026, + "step": 7136 + }, + { + "epoch": 0.4599471547335181, + "grad_norm": 0.0003005682636650433, + "learning_rate": 6.005012531328322e-06, + "loss": 0.0, + "step": 7137 + }, + { + "epoch": 0.46001160018044723, + "grad_norm": 0.00016686412122810733, + "learning_rate": 6.004296455424275e-06, + "loss": 0.0, + "step": 7138 + }, + { + "epoch": 0.4600760456273764, + "grad_norm": 0.0008105308466117006, + "learning_rate": 6.0035803795202295e-06, + "loss": 0.0, + "step": 7139 + }, + { + "epoch": 0.4601404910743056, + "grad_norm": 0.002631408133243594, + "learning_rate": 6.002864303616184e-06, + "loss": 0.0, + "step": 7140 + }, + { + "epoch": 0.4602049365212348, + "grad_norm": 0.00022067880593967695, + "learning_rate": 6.002148227712138e-06, + "loss": 0.0, + "step": 7141 + }, + { + "epoch": 0.46026938196816397, + "grad_norm": 0.001211258225825929, + "learning_rate": 6.001432151808092e-06, + "loss": 0.0, + "step": 7142 + }, + { + "epoch": 0.4603338274150931, + "grad_norm": 9.802932496178768e-05, + "learning_rate": 6.000716075904046e-06, + "loss": 0.0, + "step": 7143 + }, + { + "epoch": 0.4603982728620223, + "grad_norm": 0.26474249319163073, + "learning_rate": 6e-06, + "loss": 0.0026, + "step": 7144 + }, + { + "epoch": 0.46046271830895147, + "grad_norm": 1.265212050520982, + "learning_rate": 5.9992839240959545e-06, + "loss": 0.0032, + "step": 7145 + }, + { + "epoch": 0.46052716375588065, + "grad_norm": 0.004371471218572817, + "learning_rate": 5.998567848191909e-06, + "loss": 0.0, + "step": 7146 + }, + { + "epoch": 0.46059160920280984, + "grad_norm": 0.00043372477915415046, + "learning_rate": 5.997851772287863e-06, + "loss": 0.0, + "step": 7147 + }, + { + "epoch": 0.460656054649739, + "grad_norm": 0.008678506284997306, + "learning_rate": 5.9971356963838165e-06, + "loss": 0.0, + "step": 7148 + }, + { + "epoch": 0.46072050009666815, + "grad_norm": 0.0003419930026615307, + "learning_rate": 5.996419620479771e-06, + "loss": 0.0, + "step": 7149 + }, + { + "epoch": 0.46078494554359734, + "grad_norm": 0.002855349159979639, + "learning_rate": 5.995703544575725e-06, + "loss": 0.0, + "step": 7150 + }, + { + "epoch": 0.4608493909905265, + "grad_norm": 0.003908428454548936, + "learning_rate": 5.994987468671679e-06, + "loss": 0.0, + "step": 7151 + }, + { + "epoch": 0.4609138364374557, + "grad_norm": 0.0010189799207077413, + "learning_rate": 5.9942713927676345e-06, + "loss": 0.0, + "step": 7152 + }, + { + "epoch": 0.4609782818843849, + "grad_norm": 0.02421507927741126, + "learning_rate": 5.993555316863589e-06, + "loss": 0.0001, + "step": 7153 + }, + { + "epoch": 0.461042727331314, + "grad_norm": 8.896939169050152e-05, + "learning_rate": 5.992839240959542e-06, + "loss": 0.0, + "step": 7154 + }, + { + "epoch": 0.4611071727782432, + "grad_norm": 0.029285788425192732, + "learning_rate": 5.992123165055497e-06, + "loss": 0.0001, + "step": 7155 + }, + { + "epoch": 0.4611716182251724, + "grad_norm": 0.034353822516338714, + "learning_rate": 5.991407089151451e-06, + "loss": 0.0, + "step": 7156 + }, + { + "epoch": 0.46123606367210157, + "grad_norm": 0.06933937937293207, + "learning_rate": 5.990691013247405e-06, + "loss": 0.0001, + "step": 7157 + }, + { + "epoch": 0.46130050911903075, + "grad_norm": 0.002533742700116563, + "learning_rate": 5.9899749373433595e-06, + "loss": 0.0, + "step": 7158 + }, + { + "epoch": 0.46136495456595994, + "grad_norm": 0.03438283799772264, + "learning_rate": 5.989258861439313e-06, + "loss": 0.0002, + "step": 7159 + }, + { + "epoch": 0.46142940001288907, + "grad_norm": 0.009976091964890367, + "learning_rate": 5.988542785535267e-06, + "loss": 0.0, + "step": 7160 + }, + { + "epoch": 0.46149384545981825, + "grad_norm": 0.013725964591191813, + "learning_rate": 5.9878267096312215e-06, + "loss": 0.0, + "step": 7161 + }, + { + "epoch": 0.46155829090674744, + "grad_norm": 0.022082188905863047, + "learning_rate": 5.987110633727176e-06, + "loss": 0.0001, + "step": 7162 + }, + { + "epoch": 0.4616227363536766, + "grad_norm": 0.01672156348965313, + "learning_rate": 5.98639455782313e-06, + "loss": 0.0, + "step": 7163 + }, + { + "epoch": 0.4616871818006058, + "grad_norm": 0.007685841599548759, + "learning_rate": 5.985678481919084e-06, + "loss": 0.0, + "step": 7164 + }, + { + "epoch": 0.46175162724753493, + "grad_norm": 0.010648891229751439, + "learning_rate": 5.984962406015038e-06, + "loss": 0.0, + "step": 7165 + }, + { + "epoch": 0.4618160726944641, + "grad_norm": 0.08365923854229963, + "learning_rate": 5.984246330110992e-06, + "loss": 0.0017, + "step": 7166 + }, + { + "epoch": 0.4618805181413933, + "grad_norm": 0.08341798140164661, + "learning_rate": 5.9835302542069465e-06, + "loss": 0.0017, + "step": 7167 + }, + { + "epoch": 0.4619449635883225, + "grad_norm": 0.10906040554440197, + "learning_rate": 5.9828141783029e-06, + "loss": 0.0001, + "step": 7168 + }, + { + "epoch": 0.4620094090352517, + "grad_norm": 0.0016372742490571707, + "learning_rate": 5.982098102398854e-06, + "loss": 0.0, + "step": 7169 + }, + { + "epoch": 0.46207385448218086, + "grad_norm": 0.0005011299067149563, + "learning_rate": 5.9813820264948085e-06, + "loss": 0.0, + "step": 7170 + }, + { + "epoch": 0.46213829992911, + "grad_norm": 0.0173133371179596, + "learning_rate": 5.980665950590763e-06, + "loss": 0.0, + "step": 7171 + }, + { + "epoch": 0.46220274537603917, + "grad_norm": 0.0011703360208209618, + "learning_rate": 5.979949874686717e-06, + "loss": 0.0, + "step": 7172 + }, + { + "epoch": 0.46226719082296835, + "grad_norm": 0.0040470612125930484, + "learning_rate": 5.979233798782671e-06, + "loss": 0.0, + "step": 7173 + }, + { + "epoch": 0.46233163626989754, + "grad_norm": 0.0007731407965276777, + "learning_rate": 5.978517722878625e-06, + "loss": 0.0, + "step": 7174 + }, + { + "epoch": 0.4623960817168267, + "grad_norm": 0.005962191522255315, + "learning_rate": 5.97780164697458e-06, + "loss": 0.0001, + "step": 7175 + }, + { + "epoch": 0.4624605271637559, + "grad_norm": 0.04625911729908732, + "learning_rate": 5.977085571070534e-06, + "loss": 0.0001, + "step": 7176 + }, + { + "epoch": 0.46252497261068504, + "grad_norm": 0.002839065374832336, + "learning_rate": 5.976369495166489e-06, + "loss": 0.0, + "step": 7177 + }, + { + "epoch": 0.4625894180576142, + "grad_norm": 0.0056488077167803855, + "learning_rate": 5.975653419262443e-06, + "loss": 0.0001, + "step": 7178 + }, + { + "epoch": 0.4626538635045434, + "grad_norm": 2.9853239298381764, + "learning_rate": 5.974937343358397e-06, + "loss": 0.0079, + "step": 7179 + }, + { + "epoch": 0.4627183089514726, + "grad_norm": 0.15088591204430143, + "learning_rate": 5.974221267454351e-06, + "loss": 0.0007, + "step": 7180 + }, + { + "epoch": 0.4627827543984018, + "grad_norm": 0.004880895834493149, + "learning_rate": 5.973505191550305e-06, + "loss": 0.0, + "step": 7181 + }, + { + "epoch": 0.4628471998453309, + "grad_norm": 0.00042946334087172727, + "learning_rate": 5.972789115646259e-06, + "loss": 0.0, + "step": 7182 + }, + { + "epoch": 0.4629116452922601, + "grad_norm": 0.035591940752936105, + "learning_rate": 5.9720730397422136e-06, + "loss": 0.0001, + "step": 7183 + }, + { + "epoch": 0.46297609073918927, + "grad_norm": 0.0010547751112222126, + "learning_rate": 5.971356963838167e-06, + "loss": 0.0, + "step": 7184 + }, + { + "epoch": 0.46304053618611846, + "grad_norm": 0.06196757870758435, + "learning_rate": 5.970640887934121e-06, + "loss": 0.0001, + "step": 7185 + }, + { + "epoch": 0.46310498163304764, + "grad_norm": 0.0026980154325516037, + "learning_rate": 5.969924812030076e-06, + "loss": 0.0, + "step": 7186 + }, + { + "epoch": 0.4631694270799768, + "grad_norm": 0.22517366429317423, + "learning_rate": 5.96920873612603e-06, + "loss": 0.0002, + "step": 7187 + }, + { + "epoch": 0.46323387252690595, + "grad_norm": 0.004543474395993957, + "learning_rate": 5.968492660221984e-06, + "loss": 0.0, + "step": 7188 + }, + { + "epoch": 0.46329831797383514, + "grad_norm": 0.008096930609374857, + "learning_rate": 5.967776584317938e-06, + "loss": 0.0, + "step": 7189 + }, + { + "epoch": 0.4633627634207643, + "grad_norm": 8.040914560860149e-05, + "learning_rate": 5.967060508413892e-06, + "loss": 0.0, + "step": 7190 + }, + { + "epoch": 0.4634272088676935, + "grad_norm": 0.0038613975120519836, + "learning_rate": 5.966344432509846e-06, + "loss": 0.0, + "step": 7191 + }, + { + "epoch": 0.4634916543146227, + "grad_norm": 0.009150825453134418, + "learning_rate": 5.9656283566058006e-06, + "loss": 0.0, + "step": 7192 + }, + { + "epoch": 0.4635560997615518, + "grad_norm": 0.006888794586705892, + "learning_rate": 5.964912280701755e-06, + "loss": 0.0, + "step": 7193 + }, + { + "epoch": 0.463620545208481, + "grad_norm": 0.0015190788830026104, + "learning_rate": 5.964196204797708e-06, + "loss": 0.0, + "step": 7194 + }, + { + "epoch": 0.4636849906554102, + "grad_norm": 0.0010511826214208134, + "learning_rate": 5.963480128893663e-06, + "loss": 0.0, + "step": 7195 + }, + { + "epoch": 0.4637494361023394, + "grad_norm": 0.006848881538616532, + "learning_rate": 5.962764052989617e-06, + "loss": 0.0, + "step": 7196 + }, + { + "epoch": 0.46381388154926856, + "grad_norm": 0.19715999381198668, + "learning_rate": 5.962047977085571e-06, + "loss": 0.0018, + "step": 7197 + }, + { + "epoch": 0.46387832699619774, + "grad_norm": 9.671389949897078e-05, + "learning_rate": 5.961331901181526e-06, + "loss": 0.0, + "step": 7198 + }, + { + "epoch": 0.46394277244312687, + "grad_norm": 0.010491111385630709, + "learning_rate": 5.960615825277481e-06, + "loss": 0.0, + "step": 7199 + }, + { + "epoch": 0.46400721789005606, + "grad_norm": 0.000629562530891665, + "learning_rate": 5.959899749373434e-06, + "loss": 0.0, + "step": 7200 + }, + { + "epoch": 0.46407166333698524, + "grad_norm": 0.002112236082025321, + "learning_rate": 5.959183673469388e-06, + "loss": 0.0, + "step": 7201 + }, + { + "epoch": 0.4641361087839144, + "grad_norm": 0.2710224448099513, + "learning_rate": 5.958467597565343e-06, + "loss": 0.0002, + "step": 7202 + }, + { + "epoch": 0.4642005542308436, + "grad_norm": 0.02375578089601274, + "learning_rate": 5.957751521661297e-06, + "loss": 0.0017, + "step": 7203 + }, + { + "epoch": 0.46426499967777274, + "grad_norm": 0.0011988023769871627, + "learning_rate": 5.957035445757251e-06, + "loss": 0.0, + "step": 7204 + }, + { + "epoch": 0.4643294451247019, + "grad_norm": 0.003208027864138399, + "learning_rate": 5.956319369853205e-06, + "loss": 0.0, + "step": 7205 + }, + { + "epoch": 0.4643938905716311, + "grad_norm": 0.0009489984323486449, + "learning_rate": 5.955603293949159e-06, + "loss": 0.0, + "step": 7206 + }, + { + "epoch": 0.4644583360185603, + "grad_norm": 0.00045291492100388596, + "learning_rate": 5.954887218045113e-06, + "loss": 0.0, + "step": 7207 + }, + { + "epoch": 0.4645227814654895, + "grad_norm": 0.015716493476830035, + "learning_rate": 5.954171142141068e-06, + "loss": 0.0001, + "step": 7208 + }, + { + "epoch": 0.46458722691241866, + "grad_norm": 0.010753894111213871, + "learning_rate": 5.953455066237022e-06, + "loss": 0.0, + "step": 7209 + }, + { + "epoch": 0.4646516723593478, + "grad_norm": 0.0021181276175598375, + "learning_rate": 5.952738990332975e-06, + "loss": 0.0, + "step": 7210 + }, + { + "epoch": 0.464716117806277, + "grad_norm": 0.0005319157505790057, + "learning_rate": 5.95202291442893e-06, + "loss": 0.0, + "step": 7211 + }, + { + "epoch": 0.46478056325320616, + "grad_norm": 4.463178627000733e-05, + "learning_rate": 5.951306838524884e-06, + "loss": 0.0, + "step": 7212 + }, + { + "epoch": 0.46484500870013534, + "grad_norm": 0.0013662977275578781, + "learning_rate": 5.950590762620838e-06, + "loss": 0.0, + "step": 7213 + }, + { + "epoch": 0.4649094541470645, + "grad_norm": 0.010369585677639376, + "learning_rate": 5.949874686716793e-06, + "loss": 0.0, + "step": 7214 + }, + { + "epoch": 0.4649738995939937, + "grad_norm": 0.03391908857152616, + "learning_rate": 5.949158610812746e-06, + "loss": 0.0004, + "step": 7215 + }, + { + "epoch": 0.46503834504092284, + "grad_norm": 0.06855665758350678, + "learning_rate": 5.9484425349087e-06, + "loss": 0.0001, + "step": 7216 + }, + { + "epoch": 0.465102790487852, + "grad_norm": 0.21561523982086514, + "learning_rate": 5.947726459004655e-06, + "loss": 0.001, + "step": 7217 + }, + { + "epoch": 0.4651672359347812, + "grad_norm": 0.0034353563543390304, + "learning_rate": 5.947010383100609e-06, + "loss": 0.0, + "step": 7218 + }, + { + "epoch": 0.4652316813817104, + "grad_norm": 0.00018852596750020293, + "learning_rate": 5.946294307196563e-06, + "loss": 0.0, + "step": 7219 + }, + { + "epoch": 0.4652961268286396, + "grad_norm": 0.009362767432748718, + "learning_rate": 5.945578231292517e-06, + "loss": 0.0, + "step": 7220 + }, + { + "epoch": 0.4653605722755687, + "grad_norm": 0.022573569619495887, + "learning_rate": 5.944862155388471e-06, + "loss": 0.0001, + "step": 7221 + }, + { + "epoch": 0.4654250177224979, + "grad_norm": 0.006736289318203268, + "learning_rate": 5.944146079484426e-06, + "loss": 0.0, + "step": 7222 + }, + { + "epoch": 0.4654894631694271, + "grad_norm": 0.5131869329603391, + "learning_rate": 5.9434300035803804e-06, + "loss": 0.0035, + "step": 7223 + }, + { + "epoch": 0.46555390861635626, + "grad_norm": 0.15121273651669012, + "learning_rate": 5.942713927676335e-06, + "loss": 0.0005, + "step": 7224 + }, + { + "epoch": 0.46561835406328544, + "grad_norm": 0.000143468918569542, + "learning_rate": 5.941997851772289e-06, + "loss": 0.0, + "step": 7225 + }, + { + "epoch": 0.46568279951021463, + "grad_norm": 0.003423290574634767, + "learning_rate": 5.9412817758682425e-06, + "loss": 0.0, + "step": 7226 + }, + { + "epoch": 0.46574724495714376, + "grad_norm": 0.05099336860487578, + "learning_rate": 5.940565699964197e-06, + "loss": 0.0004, + "step": 7227 + }, + { + "epoch": 0.46581169040407294, + "grad_norm": 0.07674497419352205, + "learning_rate": 5.939849624060151e-06, + "loss": 0.0017, + "step": 7228 + }, + { + "epoch": 0.4658761358510021, + "grad_norm": 0.16468423587705824, + "learning_rate": 5.939133548156105e-06, + "loss": 0.0013, + "step": 7229 + }, + { + "epoch": 0.4659405812979313, + "grad_norm": 0.0024757373314606267, + "learning_rate": 5.93841747225206e-06, + "loss": 0.0, + "step": 7230 + }, + { + "epoch": 0.4660050267448605, + "grad_norm": 0.0039586155872088965, + "learning_rate": 5.937701396348013e-06, + "loss": 0.0, + "step": 7231 + }, + { + "epoch": 0.4660694721917896, + "grad_norm": 0.01858411356660055, + "learning_rate": 5.9369853204439674e-06, + "loss": 0.0, + "step": 7232 + }, + { + "epoch": 0.4661339176387188, + "grad_norm": 0.21115758681710384, + "learning_rate": 5.936269244539922e-06, + "loss": 0.0026, + "step": 7233 + }, + { + "epoch": 0.466198363085648, + "grad_norm": 0.10160640537815353, + "learning_rate": 5.935553168635876e-06, + "loss": 0.0002, + "step": 7234 + }, + { + "epoch": 0.4662628085325772, + "grad_norm": 0.045725959999042595, + "learning_rate": 5.93483709273183e-06, + "loss": 0.0, + "step": 7235 + }, + { + "epoch": 0.46632725397950636, + "grad_norm": 0.01414354404282994, + "learning_rate": 5.934121016827784e-06, + "loss": 0.0, + "step": 7236 + }, + { + "epoch": 0.46639169942643555, + "grad_norm": 0.01251770166379137, + "learning_rate": 5.933404940923738e-06, + "loss": 0.0001, + "step": 7237 + }, + { + "epoch": 0.4664561448733647, + "grad_norm": 0.0007334224021861777, + "learning_rate": 5.932688865019692e-06, + "loss": 0.0, + "step": 7238 + }, + { + "epoch": 0.46652059032029386, + "grad_norm": 0.0022743016070544573, + "learning_rate": 5.931972789115647e-06, + "loss": 0.0, + "step": 7239 + }, + { + "epoch": 0.46658503576722304, + "grad_norm": 0.00039270465816523245, + "learning_rate": 5.931256713211601e-06, + "loss": 0.0, + "step": 7240 + }, + { + "epoch": 0.46664948121415223, + "grad_norm": 0.00010237405302461057, + "learning_rate": 5.930540637307554e-06, + "loss": 0.0, + "step": 7241 + }, + { + "epoch": 0.4667139266610814, + "grad_norm": 0.004509319091979509, + "learning_rate": 5.929824561403509e-06, + "loss": 0.0, + "step": 7242 + }, + { + "epoch": 0.46677837210801054, + "grad_norm": 0.12337622011710904, + "learning_rate": 5.929108485499463e-06, + "loss": 0.0001, + "step": 7243 + }, + { + "epoch": 0.4668428175549397, + "grad_norm": 0.005279184753878209, + "learning_rate": 5.928392409595417e-06, + "loss": 0.0, + "step": 7244 + }, + { + "epoch": 0.4669072630018689, + "grad_norm": 0.013528141218017584, + "learning_rate": 5.9276763336913725e-06, + "loss": 0.0, + "step": 7245 + }, + { + "epoch": 0.4669717084487981, + "grad_norm": 7.838986385026141e-05, + "learning_rate": 5.926960257787327e-06, + "loss": 0.0, + "step": 7246 + }, + { + "epoch": 0.4670361538957273, + "grad_norm": 0.09289410924673282, + "learning_rate": 5.92624418188328e-06, + "loss": 0.0, + "step": 7247 + }, + { + "epoch": 0.46710059934265646, + "grad_norm": 0.005147704458191427, + "learning_rate": 5.9255281059792345e-06, + "loss": 0.0, + "step": 7248 + }, + { + "epoch": 0.4671650447895856, + "grad_norm": 0.0005078345397182034, + "learning_rate": 5.924812030075189e-06, + "loss": 0.0, + "step": 7249 + }, + { + "epoch": 0.4672294902365148, + "grad_norm": 0.01140905101926907, + "learning_rate": 5.924095954171143e-06, + "loss": 0.0001, + "step": 7250 + }, + { + "epoch": 0.46729393568344396, + "grad_norm": 0.013532734656177478, + "learning_rate": 5.923379878267097e-06, + "loss": 0.0001, + "step": 7251 + }, + { + "epoch": 0.46735838113037315, + "grad_norm": 0.00450951372079799, + "learning_rate": 5.922663802363051e-06, + "loss": 0.0, + "step": 7252 + }, + { + "epoch": 0.46742282657730233, + "grad_norm": 0.00013456809584500636, + "learning_rate": 5.921947726459005e-06, + "loss": 0.0, + "step": 7253 + }, + { + "epoch": 0.4674872720242315, + "grad_norm": 0.5415825747969696, + "learning_rate": 5.9212316505549595e-06, + "loss": 0.0032, + "step": 7254 + }, + { + "epoch": 0.46755171747116064, + "grad_norm": 0.06273022360028993, + "learning_rate": 5.920515574650914e-06, + "loss": 0.0002, + "step": 7255 + }, + { + "epoch": 0.46761616291808983, + "grad_norm": 0.001915030388477191, + "learning_rate": 5.919799498746868e-06, + "loss": 0.0, + "step": 7256 + }, + { + "epoch": 0.467680608365019, + "grad_norm": 0.0018932465533284114, + "learning_rate": 5.9190834228428215e-06, + "loss": 0.0, + "step": 7257 + }, + { + "epoch": 0.4677450538119482, + "grad_norm": 0.01038187083953511, + "learning_rate": 5.918367346938776e-06, + "loss": 0.0, + "step": 7258 + }, + { + "epoch": 0.4678094992588774, + "grad_norm": 0.00021900495299448367, + "learning_rate": 5.91765127103473e-06, + "loss": 0.0, + "step": 7259 + }, + { + "epoch": 0.4678739447058065, + "grad_norm": 0.0002388793545880022, + "learning_rate": 5.916935195130684e-06, + "loss": 0.0, + "step": 7260 + }, + { + "epoch": 0.4679383901527357, + "grad_norm": 0.0033289252996606057, + "learning_rate": 5.916219119226638e-06, + "loss": 0.0, + "step": 7261 + }, + { + "epoch": 0.4680028355996649, + "grad_norm": 0.00018771824976225135, + "learning_rate": 5.915503043322592e-06, + "loss": 0.0, + "step": 7262 + }, + { + "epoch": 0.46806728104659406, + "grad_norm": 0.07404355653908364, + "learning_rate": 5.9147869674185465e-06, + "loss": 0.0008, + "step": 7263 + }, + { + "epoch": 0.46813172649352325, + "grad_norm": 0.003018865199686008, + "learning_rate": 5.914070891514501e-06, + "loss": 0.0, + "step": 7264 + }, + { + "epoch": 0.46819617194045243, + "grad_norm": 0.04145828291970412, + "learning_rate": 5.913354815610455e-06, + "loss": 0.0006, + "step": 7265 + }, + { + "epoch": 0.46826061738738156, + "grad_norm": 0.002445120612271508, + "learning_rate": 5.9126387397064085e-06, + "loss": 0.0, + "step": 7266 + }, + { + "epoch": 0.46832506283431075, + "grad_norm": 0.0041065608074551405, + "learning_rate": 5.911922663802363e-06, + "loss": 0.0, + "step": 7267 + }, + { + "epoch": 0.46838950828123993, + "grad_norm": 0.03801747734846256, + "learning_rate": 5.911206587898318e-06, + "loss": 0.0001, + "step": 7268 + }, + { + "epoch": 0.4684539537281691, + "grad_norm": 0.00017918018740986495, + "learning_rate": 5.910490511994272e-06, + "loss": 0.0, + "step": 7269 + }, + { + "epoch": 0.4685183991750983, + "grad_norm": 0.008305111975488961, + "learning_rate": 5.9097744360902265e-06, + "loss": 0.0001, + "step": 7270 + }, + { + "epoch": 0.46858284462202743, + "grad_norm": 3.966849481567199e-05, + "learning_rate": 5.909058360186181e-06, + "loss": 0.0, + "step": 7271 + }, + { + "epoch": 0.4686472900689566, + "grad_norm": 0.0004967530768057412, + "learning_rate": 5.908342284282135e-06, + "loss": 0.0, + "step": 7272 + }, + { + "epoch": 0.4687117355158858, + "grad_norm": 0.0757589549519109, + "learning_rate": 5.907626208378089e-06, + "loss": 0.0002, + "step": 7273 + }, + { + "epoch": 0.468776180962815, + "grad_norm": 0.0013153308624478225, + "learning_rate": 5.906910132474043e-06, + "loss": 0.0, + "step": 7274 + }, + { + "epoch": 0.46884062640974417, + "grad_norm": 0.004487714706241631, + "learning_rate": 5.906194056569997e-06, + "loss": 0.0, + "step": 7275 + }, + { + "epoch": 0.46890507185667335, + "grad_norm": 0.0024251842236077178, + "learning_rate": 5.9054779806659515e-06, + "loss": 0.0, + "step": 7276 + }, + { + "epoch": 0.4689695173036025, + "grad_norm": 0.0162244810554148, + "learning_rate": 5.904761904761905e-06, + "loss": 0.0001, + "step": 7277 + }, + { + "epoch": 0.46903396275053166, + "grad_norm": 0.00018407225832327176, + "learning_rate": 5.904045828857859e-06, + "loss": 0.0, + "step": 7278 + }, + { + "epoch": 0.46909840819746085, + "grad_norm": 0.0008793784795991423, + "learning_rate": 5.9033297529538135e-06, + "loss": 0.0, + "step": 7279 + }, + { + "epoch": 0.46916285364439003, + "grad_norm": 0.045148619317254515, + "learning_rate": 5.902613677049768e-06, + "loss": 0.0002, + "step": 7280 + }, + { + "epoch": 0.4692272990913192, + "grad_norm": 0.0007002317138202523, + "learning_rate": 5.901897601145722e-06, + "loss": 0.0, + "step": 7281 + }, + { + "epoch": 0.46929174453824835, + "grad_norm": 0.0076014641681480645, + "learning_rate": 5.901181525241676e-06, + "loss": 0.0, + "step": 7282 + }, + { + "epoch": 0.46935618998517753, + "grad_norm": 0.058544814019823664, + "learning_rate": 5.90046544933763e-06, + "loss": 0.0002, + "step": 7283 + }, + { + "epoch": 0.4694206354321067, + "grad_norm": 0.2589595880628915, + "learning_rate": 5.899749373433584e-06, + "loss": 0.0019, + "step": 7284 + }, + { + "epoch": 0.4694850808790359, + "grad_norm": 0.0009875575563172685, + "learning_rate": 5.8990332975295385e-06, + "loss": 0.0, + "step": 7285 + }, + { + "epoch": 0.4695495263259651, + "grad_norm": 0.0001364893657174719, + "learning_rate": 5.898317221625493e-06, + "loss": 0.0, + "step": 7286 + }, + { + "epoch": 0.46961397177289427, + "grad_norm": 0.08705876659342433, + "learning_rate": 5.897601145721446e-06, + "loss": 0.0007, + "step": 7287 + }, + { + "epoch": 0.4696784172198234, + "grad_norm": 0.01562531292125164, + "learning_rate": 5.8968850698174005e-06, + "loss": 0.0001, + "step": 7288 + }, + { + "epoch": 0.4697428626667526, + "grad_norm": 0.04099352701129466, + "learning_rate": 5.896168993913355e-06, + "loss": 0.0001, + "step": 7289 + }, + { + "epoch": 0.46980730811368177, + "grad_norm": 0.0005863534760000729, + "learning_rate": 5.895452918009309e-06, + "loss": 0.0, + "step": 7290 + }, + { + "epoch": 0.46987175356061095, + "grad_norm": 0.0015909966007760811, + "learning_rate": 5.8947368421052634e-06, + "loss": 0.0, + "step": 7291 + }, + { + "epoch": 0.46993619900754013, + "grad_norm": 0.00274800952267154, + "learning_rate": 5.8940207662012186e-06, + "loss": 0.0, + "step": 7292 + }, + { + "epoch": 0.4700006444544693, + "grad_norm": 0.000492930772659004, + "learning_rate": 5.893304690297173e-06, + "loss": 0.0, + "step": 7293 + }, + { + "epoch": 0.47006508990139845, + "grad_norm": 0.0025907573420449314, + "learning_rate": 5.892588614393126e-06, + "loss": 0.0, + "step": 7294 + }, + { + "epoch": 0.47012953534832763, + "grad_norm": 0.0007235664111460894, + "learning_rate": 5.891872538489081e-06, + "loss": 0.0, + "step": 7295 + }, + { + "epoch": 0.4701939807952568, + "grad_norm": 0.003975885038461543, + "learning_rate": 5.891156462585035e-06, + "loss": 0.0, + "step": 7296 + }, + { + "epoch": 0.470258426242186, + "grad_norm": 0.0025255628655265244, + "learning_rate": 5.890440386680989e-06, + "loss": 0.0, + "step": 7297 + }, + { + "epoch": 0.4703228716891152, + "grad_norm": 0.005506941121865925, + "learning_rate": 5.889724310776943e-06, + "loss": 0.0001, + "step": 7298 + }, + { + "epoch": 0.4703873171360443, + "grad_norm": 0.0001799831213966565, + "learning_rate": 5.889008234872897e-06, + "loss": 0.0, + "step": 7299 + }, + { + "epoch": 0.4704517625829735, + "grad_norm": 0.004868009039231323, + "learning_rate": 5.888292158968851e-06, + "loss": 0.0015, + "step": 7300 + }, + { + "epoch": 0.4705162080299027, + "grad_norm": 0.4330565587309389, + "learning_rate": 5.8875760830648056e-06, + "loss": 0.0004, + "step": 7301 + }, + { + "epoch": 0.47058065347683187, + "grad_norm": 0.017751362441838763, + "learning_rate": 5.88686000716076e-06, + "loss": 0.0, + "step": 7302 + }, + { + "epoch": 0.47064509892376105, + "grad_norm": 0.00020847490324210836, + "learning_rate": 5.886143931256713e-06, + "loss": 0.0, + "step": 7303 + }, + { + "epoch": 0.47070954437069024, + "grad_norm": 0.007923028419122423, + "learning_rate": 5.885427855352668e-06, + "loss": 0.0, + "step": 7304 + }, + { + "epoch": 0.47077398981761936, + "grad_norm": 0.005294704298977462, + "learning_rate": 5.884711779448622e-06, + "loss": 0.0, + "step": 7305 + }, + { + "epoch": 0.47083843526454855, + "grad_norm": 0.0007913525318603742, + "learning_rate": 5.883995703544576e-06, + "loss": 0.0, + "step": 7306 + }, + { + "epoch": 0.47090288071147773, + "grad_norm": 0.009260764334228884, + "learning_rate": 5.8832796276405305e-06, + "loss": 0.0, + "step": 7307 + }, + { + "epoch": 0.4709673261584069, + "grad_norm": 0.14998565371771327, + "learning_rate": 5.882563551736484e-06, + "loss": 0.0003, + "step": 7308 + }, + { + "epoch": 0.4710317716053361, + "grad_norm": 0.002568590896597837, + "learning_rate": 5.881847475832438e-06, + "loss": 0.0, + "step": 7309 + }, + { + "epoch": 0.47109621705226523, + "grad_norm": 0.0001356063438065277, + "learning_rate": 5.8811313999283926e-06, + "loss": 0.0, + "step": 7310 + }, + { + "epoch": 0.4711606624991944, + "grad_norm": 3.737375653479307e-05, + "learning_rate": 5.880415324024347e-06, + "loss": 0.0, + "step": 7311 + }, + { + "epoch": 0.4712251079461236, + "grad_norm": 0.0018837255037034548, + "learning_rate": 5.879699248120301e-06, + "loss": 0.0, + "step": 7312 + }, + { + "epoch": 0.4712895533930528, + "grad_norm": 9.369458189287065e-05, + "learning_rate": 5.878983172216255e-06, + "loss": 0.0, + "step": 7313 + }, + { + "epoch": 0.47135399883998197, + "grad_norm": 0.004181875470268751, + "learning_rate": 5.878267096312209e-06, + "loss": 0.0, + "step": 7314 + }, + { + "epoch": 0.47141844428691115, + "grad_norm": 0.003124371841280807, + "learning_rate": 5.877551020408164e-06, + "loss": 0.0001, + "step": 7315 + }, + { + "epoch": 0.4714828897338403, + "grad_norm": 0.00016681583676826113, + "learning_rate": 5.876834944504118e-06, + "loss": 0.0, + "step": 7316 + }, + { + "epoch": 0.47154733518076947, + "grad_norm": 0.013830684057848068, + "learning_rate": 5.876118868600073e-06, + "loss": 0.0, + "step": 7317 + }, + { + "epoch": 0.47161178062769865, + "grad_norm": 0.013160734314818012, + "learning_rate": 5.875402792696027e-06, + "loss": 0.0001, + "step": 7318 + }, + { + "epoch": 0.47167622607462784, + "grad_norm": 0.005155490320092713, + "learning_rate": 5.87468671679198e-06, + "loss": 0.0, + "step": 7319 + }, + { + "epoch": 0.471740671521557, + "grad_norm": 0.0018278137970520977, + "learning_rate": 5.873970640887935e-06, + "loss": 0.0, + "step": 7320 + }, + { + "epoch": 0.47180511696848615, + "grad_norm": 0.003185959559079041, + "learning_rate": 5.873254564983889e-06, + "loss": 0.0, + "step": 7321 + }, + { + "epoch": 0.47186956241541533, + "grad_norm": 0.2694356997754317, + "learning_rate": 5.872538489079843e-06, + "loss": 0.0024, + "step": 7322 + }, + { + "epoch": 0.4719340078623445, + "grad_norm": 0.01864031511807212, + "learning_rate": 5.871822413175798e-06, + "loss": 0.0, + "step": 7323 + }, + { + "epoch": 0.4719984533092737, + "grad_norm": 0.015568650232301293, + "learning_rate": 5.871106337271751e-06, + "loss": 0.0001, + "step": 7324 + }, + { + "epoch": 0.4720628987562029, + "grad_norm": 0.0058521904086244305, + "learning_rate": 5.870390261367705e-06, + "loss": 0.0, + "step": 7325 + }, + { + "epoch": 0.47212734420313207, + "grad_norm": 0.09893792709546372, + "learning_rate": 5.86967418546366e-06, + "loss": 0.0001, + "step": 7326 + }, + { + "epoch": 0.4721917896500612, + "grad_norm": 0.3626702410404047, + "learning_rate": 5.868958109559614e-06, + "loss": 0.0022, + "step": 7327 + }, + { + "epoch": 0.4722562350969904, + "grad_norm": 0.24238985590879442, + "learning_rate": 5.868242033655568e-06, + "loss": 0.0003, + "step": 7328 + }, + { + "epoch": 0.47232068054391957, + "grad_norm": 0.2921772246160459, + "learning_rate": 5.867525957751522e-06, + "loss": 0.0022, + "step": 7329 + }, + { + "epoch": 0.47238512599084875, + "grad_norm": 0.10111720327515572, + "learning_rate": 5.866809881847476e-06, + "loss": 0.0017, + "step": 7330 + }, + { + "epoch": 0.47244957143777794, + "grad_norm": 0.0007373990260450756, + "learning_rate": 5.86609380594343e-06, + "loss": 0.0, + "step": 7331 + }, + { + "epoch": 0.4725140168847071, + "grad_norm": 0.014592344951044412, + "learning_rate": 5.865377730039385e-06, + "loss": 0.0002, + "step": 7332 + }, + { + "epoch": 0.47257846233163625, + "grad_norm": 0.004748893437132448, + "learning_rate": 5.864661654135339e-06, + "loss": 0.0, + "step": 7333 + }, + { + "epoch": 0.47264290777856544, + "grad_norm": 0.04088056414933486, + "learning_rate": 5.863945578231292e-06, + "loss": 0.0001, + "step": 7334 + }, + { + "epoch": 0.4727073532254946, + "grad_norm": 0.0006974730055049175, + "learning_rate": 5.863229502327247e-06, + "loss": 0.0, + "step": 7335 + }, + { + "epoch": 0.4727717986724238, + "grad_norm": 0.0060704542827779, + "learning_rate": 5.862513426423201e-06, + "loss": 0.0, + "step": 7336 + }, + { + "epoch": 0.472836244119353, + "grad_norm": 6.82556858305384e-05, + "learning_rate": 5.861797350519155e-06, + "loss": 0.0, + "step": 7337 + }, + { + "epoch": 0.4729006895662821, + "grad_norm": 0.01232797728501841, + "learning_rate": 5.86108127461511e-06, + "loss": 0.0001, + "step": 7338 + }, + { + "epoch": 0.4729651350132113, + "grad_norm": 0.14040671069080024, + "learning_rate": 5.860365198711065e-06, + "loss": 0.0013, + "step": 7339 + }, + { + "epoch": 0.4730295804601405, + "grad_norm": 0.0069158175830979236, + "learning_rate": 5.859649122807018e-06, + "loss": 0.0, + "step": 7340 + }, + { + "epoch": 0.47309402590706967, + "grad_norm": 0.15098211468237663, + "learning_rate": 5.8589330469029724e-06, + "loss": 0.0014, + "step": 7341 + }, + { + "epoch": 0.47315847135399886, + "grad_norm": 0.0005686017696016296, + "learning_rate": 5.858216970998927e-06, + "loss": 0.0, + "step": 7342 + }, + { + "epoch": 0.47322291680092804, + "grad_norm": 0.06047954006858101, + "learning_rate": 5.857500895094881e-06, + "loss": 0.0001, + "step": 7343 + }, + { + "epoch": 0.47328736224785717, + "grad_norm": 0.07052647298794551, + "learning_rate": 5.856784819190835e-06, + "loss": 0.0008, + "step": 7344 + }, + { + "epoch": 0.47335180769478635, + "grad_norm": 8.880895053995195e-05, + "learning_rate": 5.856068743286789e-06, + "loss": 0.0, + "step": 7345 + }, + { + "epoch": 0.47341625314171554, + "grad_norm": 0.0009487713006594181, + "learning_rate": 5.855352667382743e-06, + "loss": 0.0, + "step": 7346 + }, + { + "epoch": 0.4734806985886447, + "grad_norm": 0.2606289649908769, + "learning_rate": 5.854636591478697e-06, + "loss": 0.0014, + "step": 7347 + }, + { + "epoch": 0.4735451440355739, + "grad_norm": 0.19189682199600822, + "learning_rate": 5.853920515574652e-06, + "loss": 0.0015, + "step": 7348 + }, + { + "epoch": 0.47360958948250304, + "grad_norm": 0.00025634485228848494, + "learning_rate": 5.853204439670606e-06, + "loss": 0.0, + "step": 7349 + }, + { + "epoch": 0.4736740349294322, + "grad_norm": 0.0021115394978189358, + "learning_rate": 5.8524883637665594e-06, + "loss": 0.0, + "step": 7350 + }, + { + "epoch": 0.4737384803763614, + "grad_norm": 0.008847833623355518, + "learning_rate": 5.851772287862514e-06, + "loss": 0.0001, + "step": 7351 + }, + { + "epoch": 0.4738029258232906, + "grad_norm": 0.1189699239903375, + "learning_rate": 5.851056211958468e-06, + "loss": 0.0004, + "step": 7352 + }, + { + "epoch": 0.4738673712702198, + "grad_norm": 0.005188761681522458, + "learning_rate": 5.850340136054422e-06, + "loss": 0.0, + "step": 7353 + }, + { + "epoch": 0.47393181671714896, + "grad_norm": 0.009026635366319133, + "learning_rate": 5.849624060150377e-06, + "loss": 0.0, + "step": 7354 + }, + { + "epoch": 0.4739962621640781, + "grad_norm": 0.00019702236665921518, + "learning_rate": 5.84890798424633e-06, + "loss": 0.0, + "step": 7355 + }, + { + "epoch": 0.47406070761100727, + "grad_norm": 3.311441360306938, + "learning_rate": 5.848191908342284e-06, + "loss": 0.0171, + "step": 7356 + }, + { + "epoch": 0.47412515305793645, + "grad_norm": 0.001216040447727667, + "learning_rate": 5.847475832438239e-06, + "loss": 0.0, + "step": 7357 + }, + { + "epoch": 0.47418959850486564, + "grad_norm": 0.17368396932281072, + "learning_rate": 5.846759756534193e-06, + "loss": 0.0028, + "step": 7358 + }, + { + "epoch": 0.4742540439517948, + "grad_norm": 0.156202195722558, + "learning_rate": 5.846043680630146e-06, + "loss": 0.0011, + "step": 7359 + }, + { + "epoch": 0.47431848939872395, + "grad_norm": 0.04618093117781858, + "learning_rate": 5.845327604726101e-06, + "loss": 0.0002, + "step": 7360 + }, + { + "epoch": 0.47438293484565314, + "grad_norm": 0.01748293277886574, + "learning_rate": 5.844611528822055e-06, + "loss": 0.0, + "step": 7361 + }, + { + "epoch": 0.4744473802925823, + "grad_norm": 0.0077936942194119655, + "learning_rate": 5.84389545291801e-06, + "loss": 0.0, + "step": 7362 + }, + { + "epoch": 0.4745118257395115, + "grad_norm": 0.0030220017533081076, + "learning_rate": 5.8431793770139645e-06, + "loss": 0.0, + "step": 7363 + }, + { + "epoch": 0.4745762711864407, + "grad_norm": 0.020481092922284175, + "learning_rate": 5.842463301109919e-06, + "loss": 0.0, + "step": 7364 + }, + { + "epoch": 0.4746407166333699, + "grad_norm": 0.047249753805527046, + "learning_rate": 5.841747225205873e-06, + "loss": 0.0004, + "step": 7365 + }, + { + "epoch": 0.474705162080299, + "grad_norm": 0.21330669411996359, + "learning_rate": 5.8410311493018265e-06, + "loss": 0.0006, + "step": 7366 + }, + { + "epoch": 0.4747696075272282, + "grad_norm": 2.6491441151497485e-05, + "learning_rate": 5.840315073397781e-06, + "loss": 0.0, + "step": 7367 + }, + { + "epoch": 0.4748340529741574, + "grad_norm": 0.0006470988794403943, + "learning_rate": 5.839598997493735e-06, + "loss": 0.0, + "step": 7368 + }, + { + "epoch": 0.47489849842108656, + "grad_norm": 6.626651095656053e-05, + "learning_rate": 5.838882921589689e-06, + "loss": 0.0, + "step": 7369 + }, + { + "epoch": 0.47496294386801574, + "grad_norm": 0.0010031601606788707, + "learning_rate": 5.838166845685644e-06, + "loss": 0.0, + "step": 7370 + }, + { + "epoch": 0.4750273893149449, + "grad_norm": 0.0006513101402277502, + "learning_rate": 5.837450769781597e-06, + "loss": 0.0, + "step": 7371 + }, + { + "epoch": 0.47509183476187405, + "grad_norm": 0.007908438327495618, + "learning_rate": 5.8367346938775515e-06, + "loss": 0.0, + "step": 7372 + }, + { + "epoch": 0.47515628020880324, + "grad_norm": 0.000893835609315527, + "learning_rate": 5.836018617973506e-06, + "loss": 0.0, + "step": 7373 + }, + { + "epoch": 0.4752207256557324, + "grad_norm": 0.0033915587171132375, + "learning_rate": 5.83530254206946e-06, + "loss": 0.0, + "step": 7374 + }, + { + "epoch": 0.4752851711026616, + "grad_norm": 0.01097830771040835, + "learning_rate": 5.8345864661654135e-06, + "loss": 0.0, + "step": 7375 + }, + { + "epoch": 0.4753496165495908, + "grad_norm": 0.0011861722306901786, + "learning_rate": 5.833870390261368e-06, + "loss": 0.0, + "step": 7376 + }, + { + "epoch": 0.4754140619965199, + "grad_norm": 0.039472695668676026, + "learning_rate": 5.833154314357322e-06, + "loss": 0.0001, + "step": 7377 + }, + { + "epoch": 0.4754785074434491, + "grad_norm": 0.00013474575537218255, + "learning_rate": 5.832438238453276e-06, + "loss": 0.0, + "step": 7378 + }, + { + "epoch": 0.4755429528903783, + "grad_norm": 0.004523991526979276, + "learning_rate": 5.831722162549231e-06, + "loss": 0.0, + "step": 7379 + }, + { + "epoch": 0.4756073983373075, + "grad_norm": 0.006835058807200215, + "learning_rate": 5.831006086645184e-06, + "loss": 0.0, + "step": 7380 + }, + { + "epoch": 0.47567184378423666, + "grad_norm": 0.05759036522811417, + "learning_rate": 5.8302900107411385e-06, + "loss": 0.0001, + "step": 7381 + }, + { + "epoch": 0.47573628923116584, + "grad_norm": 0.4775274076385219, + "learning_rate": 5.829573934837093e-06, + "loss": 0.0034, + "step": 7382 + }, + { + "epoch": 0.47580073467809497, + "grad_norm": 0.0005595710209434688, + "learning_rate": 5.828857858933047e-06, + "loss": 0.0, + "step": 7383 + }, + { + "epoch": 0.47586518012502416, + "grad_norm": 0.002950148607580095, + "learning_rate": 5.828141783029001e-06, + "loss": 0.0, + "step": 7384 + }, + { + "epoch": 0.47592962557195334, + "grad_norm": 0.10752587919006612, + "learning_rate": 5.8274257071249565e-06, + "loss": 0.0007, + "step": 7385 + }, + { + "epoch": 0.4759940710188825, + "grad_norm": 0.005091958235020094, + "learning_rate": 5.826709631220911e-06, + "loss": 0.0001, + "step": 7386 + }, + { + "epoch": 0.4760585164658117, + "grad_norm": 0.003841617842285557, + "learning_rate": 5.825993555316864e-06, + "loss": 0.0, + "step": 7387 + }, + { + "epoch": 0.47612296191274084, + "grad_norm": 0.0006383723237262383, + "learning_rate": 5.8252774794128185e-06, + "loss": 0.0, + "step": 7388 + }, + { + "epoch": 0.47618740735967, + "grad_norm": 0.004039966120740329, + "learning_rate": 5.824561403508773e-06, + "loss": 0.0, + "step": 7389 + }, + { + "epoch": 0.4762518528065992, + "grad_norm": 0.0010477007617992421, + "learning_rate": 5.823845327604727e-06, + "loss": 0.0, + "step": 7390 + }, + { + "epoch": 0.4763162982535284, + "grad_norm": 1.387736674906414, + "learning_rate": 5.823129251700681e-06, + "loss": 0.0068, + "step": 7391 + }, + { + "epoch": 0.4763807437004576, + "grad_norm": 0.07167502330915866, + "learning_rate": 5.822413175796635e-06, + "loss": 0.0002, + "step": 7392 + }, + { + "epoch": 0.47644518914738676, + "grad_norm": 0.014098552902959926, + "learning_rate": 5.821697099892589e-06, + "loss": 0.0, + "step": 7393 + }, + { + "epoch": 0.4765096345943159, + "grad_norm": 0.08331748195728007, + "learning_rate": 5.8209810239885435e-06, + "loss": 0.0001, + "step": 7394 + }, + { + "epoch": 0.4765740800412451, + "grad_norm": 0.08048932115789725, + "learning_rate": 5.820264948084498e-06, + "loss": 0.0002, + "step": 7395 + }, + { + "epoch": 0.47663852548817426, + "grad_norm": 0.8311032058054546, + "learning_rate": 5.819548872180451e-06, + "loss": 0.0028, + "step": 7396 + }, + { + "epoch": 0.47670297093510344, + "grad_norm": 0.20586471478306612, + "learning_rate": 5.8188327962764055e-06, + "loss": 0.0005, + "step": 7397 + }, + { + "epoch": 0.4767674163820326, + "grad_norm": 0.322638659886776, + "learning_rate": 5.81811672037236e-06, + "loss": 0.0031, + "step": 7398 + }, + { + "epoch": 0.4768318618289618, + "grad_norm": 0.011732446097699342, + "learning_rate": 5.817400644468314e-06, + "loss": 0.0001, + "step": 7399 + }, + { + "epoch": 0.47689630727589094, + "grad_norm": 0.15127922701807855, + "learning_rate": 5.8166845685642684e-06, + "loss": 0.0001, + "step": 7400 + }, + { + "epoch": 0.4769607527228201, + "grad_norm": 0.216969181012013, + "learning_rate": 5.815968492660222e-06, + "loss": 0.0025, + "step": 7401 + }, + { + "epoch": 0.4770251981697493, + "grad_norm": 0.30436968734146624, + "learning_rate": 5.815252416756176e-06, + "loss": 0.0004, + "step": 7402 + }, + { + "epoch": 0.4770896436166785, + "grad_norm": 0.07910840864342857, + "learning_rate": 5.8145363408521305e-06, + "loss": 0.0001, + "step": 7403 + }, + { + "epoch": 0.4771540890636077, + "grad_norm": 0.001726383251496407, + "learning_rate": 5.813820264948085e-06, + "loss": 0.0, + "step": 7404 + }, + { + "epoch": 0.4772185345105368, + "grad_norm": 0.0003675535003440943, + "learning_rate": 5.813104189044039e-06, + "loss": 0.0, + "step": 7405 + }, + { + "epoch": 0.477282979957466, + "grad_norm": 0.001882370654428137, + "learning_rate": 5.8123881131399925e-06, + "loss": 0.0, + "step": 7406 + }, + { + "epoch": 0.4773474254043952, + "grad_norm": 0.04665171072155993, + "learning_rate": 5.811672037235947e-06, + "loss": 0.0002, + "step": 7407 + }, + { + "epoch": 0.47741187085132436, + "grad_norm": 0.006549475831629919, + "learning_rate": 5.810955961331902e-06, + "loss": 0.0, + "step": 7408 + }, + { + "epoch": 0.47747631629825354, + "grad_norm": 0.004427687294087215, + "learning_rate": 5.810239885427856e-06, + "loss": 0.0, + "step": 7409 + }, + { + "epoch": 0.47754076174518273, + "grad_norm": 0.0017522578296534845, + "learning_rate": 5.8095238095238106e-06, + "loss": 0.0, + "step": 7410 + }, + { + "epoch": 0.47760520719211186, + "grad_norm": 0.3626807387078549, + "learning_rate": 5.808807733619765e-06, + "loss": 0.0017, + "step": 7411 + }, + { + "epoch": 0.47766965263904104, + "grad_norm": 0.0008852981396097059, + "learning_rate": 5.808091657715718e-06, + "loss": 0.0, + "step": 7412 + }, + { + "epoch": 0.4777340980859702, + "grad_norm": 0.023258903396705985, + "learning_rate": 5.807375581811673e-06, + "loss": 0.0, + "step": 7413 + }, + { + "epoch": 0.4777985435328994, + "grad_norm": 0.005061872213993539, + "learning_rate": 5.806659505907627e-06, + "loss": 0.0001, + "step": 7414 + }, + { + "epoch": 0.4778629889798286, + "grad_norm": 0.0009047437859875983, + "learning_rate": 5.805943430003581e-06, + "loss": 0.0, + "step": 7415 + }, + { + "epoch": 0.4779274344267577, + "grad_norm": 0.00862082379217804, + "learning_rate": 5.8052273540995355e-06, + "loss": 0.0, + "step": 7416 + }, + { + "epoch": 0.4779918798736869, + "grad_norm": 0.001022643294842138, + "learning_rate": 5.804511278195489e-06, + "loss": 0.0, + "step": 7417 + }, + { + "epoch": 0.4780563253206161, + "grad_norm": 0.00018940896883695436, + "learning_rate": 5.803795202291443e-06, + "loss": 0.0, + "step": 7418 + }, + { + "epoch": 0.4781207707675453, + "grad_norm": 0.002504940827306221, + "learning_rate": 5.8030791263873976e-06, + "loss": 0.0, + "step": 7419 + }, + { + "epoch": 0.47818521621447446, + "grad_norm": 0.00035435915281225466, + "learning_rate": 5.802363050483352e-06, + "loss": 0.0, + "step": 7420 + }, + { + "epoch": 0.47824966166140365, + "grad_norm": 0.0021682544293959963, + "learning_rate": 5.801646974579306e-06, + "loss": 0.0, + "step": 7421 + }, + { + "epoch": 0.4783141071083328, + "grad_norm": 0.01594937015862623, + "learning_rate": 5.80093089867526e-06, + "loss": 0.0001, + "step": 7422 + }, + { + "epoch": 0.47837855255526196, + "grad_norm": 0.0018651042764992, + "learning_rate": 5.800214822771214e-06, + "loss": 0.0, + "step": 7423 + }, + { + "epoch": 0.47844299800219114, + "grad_norm": 0.001665721738655338, + "learning_rate": 5.799498746867168e-06, + "loss": 0.0, + "step": 7424 + }, + { + "epoch": 0.47850744344912033, + "grad_norm": 0.0003566205222117585, + "learning_rate": 5.7987826709631225e-06, + "loss": 0.0, + "step": 7425 + }, + { + "epoch": 0.4785718888960495, + "grad_norm": 0.0008448971831598651, + "learning_rate": 5.798066595059077e-06, + "loss": 0.0, + "step": 7426 + }, + { + "epoch": 0.47863633434297864, + "grad_norm": 7.599035001878121e-05, + "learning_rate": 5.79735051915503e-06, + "loss": 0.0, + "step": 7427 + }, + { + "epoch": 0.4787007797899078, + "grad_norm": 0.06246002789145786, + "learning_rate": 5.7966344432509846e-06, + "loss": 0.0006, + "step": 7428 + }, + { + "epoch": 0.478765225236837, + "grad_norm": 0.0005704727887440845, + "learning_rate": 5.795918367346939e-06, + "loss": 0.0, + "step": 7429 + }, + { + "epoch": 0.4788296706837662, + "grad_norm": 9.001205747052077e-05, + "learning_rate": 5.795202291442893e-06, + "loss": 0.0, + "step": 7430 + }, + { + "epoch": 0.4788941161306954, + "grad_norm": 1.6705578758492294, + "learning_rate": 5.7944862155388475e-06, + "loss": 0.0206, + "step": 7431 + }, + { + "epoch": 0.47895856157762456, + "grad_norm": 0.0011345647547070011, + "learning_rate": 5.793770139634803e-06, + "loss": 0.0, + "step": 7432 + }, + { + "epoch": 0.4790230070245537, + "grad_norm": 0.14501953125, + "learning_rate": 5.793054063730756e-06, + "loss": 0.0003, + "step": 7433 + }, + { + "epoch": 0.4790874524714829, + "grad_norm": 0.8865466181852298, + "learning_rate": 5.79233798782671e-06, + "loss": 0.0128, + "step": 7434 + }, + { + "epoch": 0.47915189791841206, + "grad_norm": 0.3714381777882189, + "learning_rate": 5.791621911922665e-06, + "loss": 0.003, + "step": 7435 + }, + { + "epoch": 0.47921634336534125, + "grad_norm": 0.5642342588606207, + "learning_rate": 5.790905836018619e-06, + "loss": 0.0019, + "step": 7436 + }, + { + "epoch": 0.47928078881227043, + "grad_norm": 0.016275023984882227, + "learning_rate": 5.790189760114573e-06, + "loss": 0.0002, + "step": 7437 + }, + { + "epoch": 0.4793452342591996, + "grad_norm": 0.008938399006915438, + "learning_rate": 5.789473684210527e-06, + "loss": 0.0, + "step": 7438 + }, + { + "epoch": 0.47940967970612874, + "grad_norm": 0.0010685410566123547, + "learning_rate": 5.788757608306481e-06, + "loss": 0.0, + "step": 7439 + }, + { + "epoch": 0.47947412515305793, + "grad_norm": 0.23940940258627813, + "learning_rate": 5.788041532402435e-06, + "loss": 0.0005, + "step": 7440 + }, + { + "epoch": 0.4795385705999871, + "grad_norm": 1.1296119464180416, + "learning_rate": 5.78732545649839e-06, + "loss": 0.0082, + "step": 7441 + }, + { + "epoch": 0.4796030160469163, + "grad_norm": 0.0049990550644581685, + "learning_rate": 5.786609380594344e-06, + "loss": 0.0, + "step": 7442 + }, + { + "epoch": 0.4796674614938455, + "grad_norm": 0.0003853361686928715, + "learning_rate": 5.785893304690297e-06, + "loss": 0.0, + "step": 7443 + }, + { + "epoch": 0.4797319069407746, + "grad_norm": 0.012548110204785096, + "learning_rate": 5.785177228786252e-06, + "loss": 0.0001, + "step": 7444 + }, + { + "epoch": 0.4797963523877038, + "grad_norm": 0.01280574895032373, + "learning_rate": 5.784461152882206e-06, + "loss": 0.0, + "step": 7445 + }, + { + "epoch": 0.479860797834633, + "grad_norm": 0.04450895395433987, + "learning_rate": 5.78374507697816e-06, + "loss": 0.0001, + "step": 7446 + }, + { + "epoch": 0.47992524328156216, + "grad_norm": 0.21640394877508198, + "learning_rate": 5.7830290010741145e-06, + "loss": 0.002, + "step": 7447 + }, + { + "epoch": 0.47998968872849135, + "grad_norm": 0.023149261464048686, + "learning_rate": 5.782312925170068e-06, + "loss": 0.0, + "step": 7448 + }, + { + "epoch": 0.48005413417542053, + "grad_norm": 0.07784700405213824, + "learning_rate": 5.781596849266022e-06, + "loss": 0.0001, + "step": 7449 + }, + { + "epoch": 0.48011857962234966, + "grad_norm": 0.004143772080952122, + "learning_rate": 5.780880773361977e-06, + "loss": 0.0, + "step": 7450 + }, + { + "epoch": 0.48018302506927885, + "grad_norm": 0.02690683351179182, + "learning_rate": 5.780164697457931e-06, + "loss": 0.0001, + "step": 7451 + }, + { + "epoch": 0.48024747051620803, + "grad_norm": 0.15777312226534332, + "learning_rate": 5.779448621553884e-06, + "loss": 0.0001, + "step": 7452 + }, + { + "epoch": 0.4803119159631372, + "grad_norm": 0.011854422022662701, + "learning_rate": 5.778732545649839e-06, + "loss": 0.0001, + "step": 7453 + }, + { + "epoch": 0.4803763614100664, + "grad_norm": 0.0246195620217399, + "learning_rate": 5.778016469745793e-06, + "loss": 0.0, + "step": 7454 + }, + { + "epoch": 0.48044080685699553, + "grad_norm": 0.004674714612297223, + "learning_rate": 5.777300393841748e-06, + "loss": 0.0, + "step": 7455 + }, + { + "epoch": 0.4805052523039247, + "grad_norm": 0.08145114731413798, + "learning_rate": 5.776584317937702e-06, + "loss": 0.0001, + "step": 7456 + }, + { + "epoch": 0.4805696977508539, + "grad_norm": 0.0011909823012698556, + "learning_rate": 5.775868242033657e-06, + "loss": 0.0, + "step": 7457 + }, + { + "epoch": 0.4806341431977831, + "grad_norm": 0.0018303589815469893, + "learning_rate": 5.775152166129611e-06, + "loss": 0.0, + "step": 7458 + }, + { + "epoch": 0.48069858864471227, + "grad_norm": 0.010239236917028723, + "learning_rate": 5.7744360902255644e-06, + "loss": 0.0, + "step": 7459 + }, + { + "epoch": 0.48076303409164145, + "grad_norm": 0.09835660615437058, + "learning_rate": 5.773720014321519e-06, + "loss": 0.0001, + "step": 7460 + }, + { + "epoch": 0.4808274795385706, + "grad_norm": 0.014118260546048035, + "learning_rate": 5.773003938417473e-06, + "loss": 0.0001, + "step": 7461 + }, + { + "epoch": 0.48089192498549976, + "grad_norm": 0.000602907625883965, + "learning_rate": 5.772287862513427e-06, + "loss": 0.0, + "step": 7462 + }, + { + "epoch": 0.48095637043242895, + "grad_norm": 0.06684985026054438, + "learning_rate": 5.771571786609382e-06, + "loss": 0.0001, + "step": 7463 + }, + { + "epoch": 0.48102081587935813, + "grad_norm": 0.17200650256293518, + "learning_rate": 5.770855710705335e-06, + "loss": 0.0016, + "step": 7464 + }, + { + "epoch": 0.4810852613262873, + "grad_norm": 0.021725604178362307, + "learning_rate": 5.770139634801289e-06, + "loss": 0.0, + "step": 7465 + }, + { + "epoch": 0.48114970677321645, + "grad_norm": 0.01244272677275712, + "learning_rate": 5.769423558897244e-06, + "loss": 0.0, + "step": 7466 + }, + { + "epoch": 0.48121415222014563, + "grad_norm": 0.0024778840976099475, + "learning_rate": 5.768707482993198e-06, + "loss": 0.0, + "step": 7467 + }, + { + "epoch": 0.4812785976670748, + "grad_norm": 0.00267979224785924, + "learning_rate": 5.7679914070891514e-06, + "loss": 0.0, + "step": 7468 + }, + { + "epoch": 0.481343043114004, + "grad_norm": 0.003459435935471662, + "learning_rate": 5.767275331185106e-06, + "loss": 0.0, + "step": 7469 + }, + { + "epoch": 0.4814074885609332, + "grad_norm": 0.44937154410443325, + "learning_rate": 5.76655925528106e-06, + "loss": 0.0028, + "step": 7470 + }, + { + "epoch": 0.48147193400786237, + "grad_norm": 0.1190897293505687, + "learning_rate": 5.765843179377014e-06, + "loss": 0.0003, + "step": 7471 + }, + { + "epoch": 0.4815363794547915, + "grad_norm": 0.00193991626554029, + "learning_rate": 5.765127103472969e-06, + "loss": 0.0, + "step": 7472 + }, + { + "epoch": 0.4816008249017207, + "grad_norm": 0.0002928079372750345, + "learning_rate": 5.764411027568922e-06, + "loss": 0.0, + "step": 7473 + }, + { + "epoch": 0.48166527034864987, + "grad_norm": 0.0026289588066395926, + "learning_rate": 5.763694951664876e-06, + "loss": 0.0, + "step": 7474 + }, + { + "epoch": 0.48172971579557905, + "grad_norm": 0.016364185159167613, + "learning_rate": 5.762978875760831e-06, + "loss": 0.0002, + "step": 7475 + }, + { + "epoch": 0.48179416124250823, + "grad_norm": 0.002886073119532732, + "learning_rate": 5.762262799856785e-06, + "loss": 0.0, + "step": 7476 + }, + { + "epoch": 0.4818586066894374, + "grad_norm": 0.0005045980107262203, + "learning_rate": 5.761546723952739e-06, + "loss": 0.0, + "step": 7477 + }, + { + "epoch": 0.48192305213636655, + "grad_norm": 0.012034130486031228, + "learning_rate": 5.760830648048694e-06, + "loss": 0.0001, + "step": 7478 + }, + { + "epoch": 0.48198749758329573, + "grad_norm": 0.0009000554618051204, + "learning_rate": 5.760114572144649e-06, + "loss": 0.0, + "step": 7479 + }, + { + "epoch": 0.4820519430302249, + "grad_norm": 0.00965203255101447, + "learning_rate": 5.759398496240602e-06, + "loss": 0.0, + "step": 7480 + }, + { + "epoch": 0.4821163884771541, + "grad_norm": 0.05475154813132867, + "learning_rate": 5.7586824203365565e-06, + "loss": 0.0001, + "step": 7481 + }, + { + "epoch": 0.4821808339240833, + "grad_norm": 0.020473049849451586, + "learning_rate": 5.757966344432511e-06, + "loss": 0.0002, + "step": 7482 + }, + { + "epoch": 0.4822452793710124, + "grad_norm": 0.0010466034143123027, + "learning_rate": 5.757250268528465e-06, + "loss": 0.0, + "step": 7483 + }, + { + "epoch": 0.4823097248179416, + "grad_norm": 0.018919086369527306, + "learning_rate": 5.7565341926244185e-06, + "loss": 0.0001, + "step": 7484 + }, + { + "epoch": 0.4823741702648708, + "grad_norm": 0.0019212618847136709, + "learning_rate": 5.755818116720373e-06, + "loss": 0.0, + "step": 7485 + }, + { + "epoch": 0.48243861571179997, + "grad_norm": 0.00398771180909543, + "learning_rate": 5.755102040816327e-06, + "loss": 0.0, + "step": 7486 + }, + { + "epoch": 0.48250306115872915, + "grad_norm": 0.009733597569587743, + "learning_rate": 5.754385964912281e-06, + "loss": 0.0, + "step": 7487 + }, + { + "epoch": 0.48256750660565834, + "grad_norm": 0.00010193001177631241, + "learning_rate": 5.753669889008236e-06, + "loss": 0.0, + "step": 7488 + }, + { + "epoch": 0.48263195205258747, + "grad_norm": 0.0004880647422603623, + "learning_rate": 5.752953813104189e-06, + "loss": 0.0, + "step": 7489 + }, + { + "epoch": 0.48269639749951665, + "grad_norm": 0.00360417876237237, + "learning_rate": 5.7522377372001435e-06, + "loss": 0.0, + "step": 7490 + }, + { + "epoch": 0.48276084294644583, + "grad_norm": 0.0004049090707836464, + "learning_rate": 5.751521661296098e-06, + "loss": 0.0, + "step": 7491 + }, + { + "epoch": 0.482825288393375, + "grad_norm": 0.0013167952446200002, + "learning_rate": 5.750805585392052e-06, + "loss": 0.0, + "step": 7492 + }, + { + "epoch": 0.4828897338403042, + "grad_norm": 0.025174958117207765, + "learning_rate": 5.750089509488006e-06, + "loss": 0.0002, + "step": 7493 + }, + { + "epoch": 0.48295417928723333, + "grad_norm": 0.06014975165900907, + "learning_rate": 5.74937343358396e-06, + "loss": 0.0016, + "step": 7494 + }, + { + "epoch": 0.4830186247341625, + "grad_norm": 0.012060249113546211, + "learning_rate": 5.748657357679914e-06, + "loss": 0.0, + "step": 7495 + }, + { + "epoch": 0.4830830701810917, + "grad_norm": 0.12713788599629475, + "learning_rate": 5.747941281775868e-06, + "loss": 0.0028, + "step": 7496 + }, + { + "epoch": 0.4831475156280209, + "grad_norm": 0.005913938901135187, + "learning_rate": 5.747225205871823e-06, + "loss": 0.0, + "step": 7497 + }, + { + "epoch": 0.48321196107495007, + "grad_norm": 0.00039260180087013206, + "learning_rate": 5.746509129967777e-06, + "loss": 0.0, + "step": 7498 + }, + { + "epoch": 0.48327640652187925, + "grad_norm": 0.014482109685575993, + "learning_rate": 5.7457930540637305e-06, + "loss": 0.0001, + "step": 7499 + }, + { + "epoch": 0.4833408519688084, + "grad_norm": 0.00447910521815926, + "learning_rate": 5.745076978159685e-06, + "loss": 0.0, + "step": 7500 + }, + { + "epoch": 0.48340529741573757, + "grad_norm": 0.026631144486073698, + "learning_rate": 5.744360902255639e-06, + "loss": 0.0001, + "step": 7501 + }, + { + "epoch": 0.48346974286266675, + "grad_norm": 0.012365717619568232, + "learning_rate": 5.743644826351594e-06, + "loss": 0.0001, + "step": 7502 + }, + { + "epoch": 0.48353418830959594, + "grad_norm": 0.000551421378977375, + "learning_rate": 5.7429287504475485e-06, + "loss": 0.0, + "step": 7503 + }, + { + "epoch": 0.4835986337565251, + "grad_norm": 0.007876399816532313, + "learning_rate": 5.742212674543503e-06, + "loss": 0.0001, + "step": 7504 + }, + { + "epoch": 0.48366307920345425, + "grad_norm": 0.001752821937067081, + "learning_rate": 5.741496598639456e-06, + "loss": 0.0, + "step": 7505 + }, + { + "epoch": 0.48372752465038343, + "grad_norm": 0.002184141243038027, + "learning_rate": 5.7407805227354105e-06, + "loss": 0.0, + "step": 7506 + }, + { + "epoch": 0.4837919700973126, + "grad_norm": 0.19810261064370907, + "learning_rate": 5.740064446831365e-06, + "loss": 0.0009, + "step": 7507 + }, + { + "epoch": 0.4838564155442418, + "grad_norm": 0.0006581602418458326, + "learning_rate": 5.739348370927319e-06, + "loss": 0.0, + "step": 7508 + }, + { + "epoch": 0.483920860991171, + "grad_norm": 0.00032189703491337544, + "learning_rate": 5.7386322950232734e-06, + "loss": 0.0, + "step": 7509 + }, + { + "epoch": 0.48398530643810017, + "grad_norm": 0.0007444887274812117, + "learning_rate": 5.737916219119227e-06, + "loss": 0.0, + "step": 7510 + }, + { + "epoch": 0.4840497518850293, + "grad_norm": 0.0661404739703034, + "learning_rate": 5.737200143215181e-06, + "loss": 0.0002, + "step": 7511 + }, + { + "epoch": 0.4841141973319585, + "grad_norm": 0.020757209588938764, + "learning_rate": 5.7364840673111355e-06, + "loss": 0.0001, + "step": 7512 + }, + { + "epoch": 0.48417864277888767, + "grad_norm": 0.005867169331984068, + "learning_rate": 5.73576799140709e-06, + "loss": 0.0, + "step": 7513 + }, + { + "epoch": 0.48424308822581685, + "grad_norm": 0.00047232161720890626, + "learning_rate": 5.735051915503044e-06, + "loss": 0.0, + "step": 7514 + }, + { + "epoch": 0.48430753367274604, + "grad_norm": 0.006958581875660543, + "learning_rate": 5.7343358395989975e-06, + "loss": 0.0, + "step": 7515 + }, + { + "epoch": 0.4843719791196752, + "grad_norm": 0.0033243687316280453, + "learning_rate": 5.733619763694952e-06, + "loss": 0.0, + "step": 7516 + }, + { + "epoch": 0.48443642456660435, + "grad_norm": 0.00025208480732531735, + "learning_rate": 5.732903687790906e-06, + "loss": 0.0, + "step": 7517 + }, + { + "epoch": 0.48450087001353354, + "grad_norm": 0.014381225038362599, + "learning_rate": 5.7321876118868604e-06, + "loss": 0.0, + "step": 7518 + }, + { + "epoch": 0.4845653154604627, + "grad_norm": 0.2902895555370628, + "learning_rate": 5.731471535982815e-06, + "loss": 0.0006, + "step": 7519 + }, + { + "epoch": 0.4846297609073919, + "grad_norm": 0.004500232171590274, + "learning_rate": 5.730755460078768e-06, + "loss": 0.0, + "step": 7520 + }, + { + "epoch": 0.4846942063543211, + "grad_norm": 0.005529903035349548, + "learning_rate": 5.7300393841747225e-06, + "loss": 0.0, + "step": 7521 + }, + { + "epoch": 0.4847586518012502, + "grad_norm": 0.0014221680833822147, + "learning_rate": 5.729323308270677e-06, + "loss": 0.0, + "step": 7522 + }, + { + "epoch": 0.4848230972481794, + "grad_norm": 0.0029934765826047783, + "learning_rate": 5.728607232366631e-06, + "loss": 0.0, + "step": 7523 + }, + { + "epoch": 0.4848875426951086, + "grad_norm": 0.0015283611343524148, + "learning_rate": 5.727891156462585e-06, + "loss": 0.0, + "step": 7524 + }, + { + "epoch": 0.48495198814203777, + "grad_norm": 0.0019634857246066248, + "learning_rate": 5.7271750805585405e-06, + "loss": 0.0, + "step": 7525 + }, + { + "epoch": 0.48501643358896696, + "grad_norm": 0.0034480413744822237, + "learning_rate": 5.726459004654494e-06, + "loss": 0.0, + "step": 7526 + }, + { + "epoch": 0.48508087903589614, + "grad_norm": 0.0026394266389676234, + "learning_rate": 5.725742928750448e-06, + "loss": 0.0, + "step": 7527 + }, + { + "epoch": 0.48514532448282527, + "grad_norm": 0.014411129078745335, + "learning_rate": 5.7250268528464026e-06, + "loss": 0.0001, + "step": 7528 + }, + { + "epoch": 0.48520976992975445, + "grad_norm": 0.0067901182709101905, + "learning_rate": 5.724310776942357e-06, + "loss": 0.0, + "step": 7529 + }, + { + "epoch": 0.48527421537668364, + "grad_norm": 0.002589366357544819, + "learning_rate": 5.723594701038311e-06, + "loss": 0.0, + "step": 7530 + }, + { + "epoch": 0.4853386608236128, + "grad_norm": 0.2295686716096039, + "learning_rate": 5.722878625134265e-06, + "loss": 0.0002, + "step": 7531 + }, + { + "epoch": 0.485403106270542, + "grad_norm": 0.005061924499310147, + "learning_rate": 5.722162549230219e-06, + "loss": 0.0, + "step": 7532 + }, + { + "epoch": 0.48546755171747114, + "grad_norm": 0.011882524196712532, + "learning_rate": 5.721446473326173e-06, + "loss": 0.0001, + "step": 7533 + }, + { + "epoch": 0.4855319971644003, + "grad_norm": 0.16392287489124577, + "learning_rate": 5.7207303974221275e-06, + "loss": 0.0008, + "step": 7534 + }, + { + "epoch": 0.4855964426113295, + "grad_norm": 0.002512980230831525, + "learning_rate": 5.720014321518082e-06, + "loss": 0.0, + "step": 7535 + }, + { + "epoch": 0.4856608880582587, + "grad_norm": 0.006464325307909982, + "learning_rate": 5.719298245614035e-06, + "loss": 0.0, + "step": 7536 + }, + { + "epoch": 0.4857253335051879, + "grad_norm": 0.001662718931743277, + "learning_rate": 5.7185821697099896e-06, + "loss": 0.0, + "step": 7537 + }, + { + "epoch": 0.48578977895211706, + "grad_norm": 0.00312735401489503, + "learning_rate": 5.717866093805944e-06, + "loss": 0.0, + "step": 7538 + }, + { + "epoch": 0.4858542243990462, + "grad_norm": 0.002918325996476907, + "learning_rate": 5.717150017901898e-06, + "loss": 0.0, + "step": 7539 + }, + { + "epoch": 0.48591866984597537, + "grad_norm": 0.0013583321002518408, + "learning_rate": 5.7164339419978525e-06, + "loss": 0.0, + "step": 7540 + }, + { + "epoch": 0.48598311529290456, + "grad_norm": 0.03570830831840738, + "learning_rate": 5.715717866093806e-06, + "loss": 0.0001, + "step": 7541 + }, + { + "epoch": 0.48604756073983374, + "grad_norm": 0.06065757499082424, + "learning_rate": 5.71500179018976e-06, + "loss": 0.0001, + "step": 7542 + }, + { + "epoch": 0.4861120061867629, + "grad_norm": 0.00027401369607215015, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.0, + "step": 7543 + }, + { + "epoch": 0.48617645163369205, + "grad_norm": 0.40300723802844124, + "learning_rate": 5.713569638381669e-06, + "loss": 0.0053, + "step": 7544 + }, + { + "epoch": 0.48624089708062124, + "grad_norm": 0.0010218532965069267, + "learning_rate": 5.712853562477622e-06, + "loss": 0.0, + "step": 7545 + }, + { + "epoch": 0.4863053425275504, + "grad_norm": 0.00012427245394306858, + "learning_rate": 5.7121374865735766e-06, + "loss": 0.0, + "step": 7546 + }, + { + "epoch": 0.4863697879744796, + "grad_norm": 0.00135554513866112, + "learning_rate": 5.711421410669531e-06, + "loss": 0.0, + "step": 7547 + }, + { + "epoch": 0.4864342334214088, + "grad_norm": 0.009465878989607369, + "learning_rate": 5.710705334765485e-06, + "loss": 0.0, + "step": 7548 + }, + { + "epoch": 0.486498678868338, + "grad_norm": 0.003342775812316762, + "learning_rate": 5.70998925886144e-06, + "loss": 0.0001, + "step": 7549 + }, + { + "epoch": 0.4865631243152671, + "grad_norm": 0.722024424887394, + "learning_rate": 5.709273182957395e-06, + "loss": 0.0034, + "step": 7550 + }, + { + "epoch": 0.4866275697621963, + "grad_norm": 0.0004114479573313148, + "learning_rate": 5.708557107053349e-06, + "loss": 0.0, + "step": 7551 + }, + { + "epoch": 0.4866920152091255, + "grad_norm": 0.0004321755849311948, + "learning_rate": 5.707841031149302e-06, + "loss": 0.0, + "step": 7552 + }, + { + "epoch": 0.48675646065605466, + "grad_norm": 0.0028718780582452424, + "learning_rate": 5.707124955245257e-06, + "loss": 0.0, + "step": 7553 + }, + { + "epoch": 0.48682090610298384, + "grad_norm": 0.9203624601679466, + "learning_rate": 5.706408879341211e-06, + "loss": 0.0046, + "step": 7554 + }, + { + "epoch": 0.486885351549913, + "grad_norm": 0.0048342640045114255, + "learning_rate": 5.705692803437165e-06, + "loss": 0.0, + "step": 7555 + }, + { + "epoch": 0.48694979699684215, + "grad_norm": 0.0002541909663165527, + "learning_rate": 5.7049767275331195e-06, + "loss": 0.0, + "step": 7556 + }, + { + "epoch": 0.48701424244377134, + "grad_norm": 0.0004487357840125776, + "learning_rate": 5.704260651629073e-06, + "loss": 0.0, + "step": 7557 + }, + { + "epoch": 0.4870786878907005, + "grad_norm": 0.0008346864542774917, + "learning_rate": 5.703544575725027e-06, + "loss": 0.0, + "step": 7558 + }, + { + "epoch": 0.4871431333376297, + "grad_norm": 0.0018916479246447236, + "learning_rate": 5.702828499820982e-06, + "loss": 0.0, + "step": 7559 + }, + { + "epoch": 0.4872075787845589, + "grad_norm": 0.018288931008625598, + "learning_rate": 5.702112423916936e-06, + "loss": 0.0, + "step": 7560 + }, + { + "epoch": 0.487272024231488, + "grad_norm": 0.0010530625038883763, + "learning_rate": 5.701396348012889e-06, + "loss": 0.0, + "step": 7561 + }, + { + "epoch": 0.4873364696784172, + "grad_norm": 0.007322550871804302, + "learning_rate": 5.700680272108844e-06, + "loss": 0.0, + "step": 7562 + }, + { + "epoch": 0.4874009151253464, + "grad_norm": 0.003065408485538047, + "learning_rate": 5.699964196204798e-06, + "loss": 0.0, + "step": 7563 + }, + { + "epoch": 0.4874653605722756, + "grad_norm": 0.0006956848235609778, + "learning_rate": 5.699248120300752e-06, + "loss": 0.0, + "step": 7564 + }, + { + "epoch": 0.48752980601920476, + "grad_norm": 0.1693866565782438, + "learning_rate": 5.6985320443967065e-06, + "loss": 0.001, + "step": 7565 + }, + { + "epoch": 0.48759425146613394, + "grad_norm": 0.1538983159029183, + "learning_rate": 5.69781596849266e-06, + "loss": 0.0002, + "step": 7566 + }, + { + "epoch": 0.48765869691306307, + "grad_norm": 0.0006069839695752836, + "learning_rate": 5.697099892588614e-06, + "loss": 0.0, + "step": 7567 + }, + { + "epoch": 0.48772314235999226, + "grad_norm": 0.0033471887203188194, + "learning_rate": 5.696383816684569e-06, + "loss": 0.0, + "step": 7568 + }, + { + "epoch": 0.48778758780692144, + "grad_norm": 0.06231746804088351, + "learning_rate": 5.695667740780523e-06, + "loss": 0.0006, + "step": 7569 + }, + { + "epoch": 0.4878520332538506, + "grad_norm": 0.016381977460167466, + "learning_rate": 5.694951664876477e-06, + "loss": 0.0001, + "step": 7570 + }, + { + "epoch": 0.4879164787007798, + "grad_norm": 0.003621855047769446, + "learning_rate": 5.694235588972431e-06, + "loss": 0.0, + "step": 7571 + }, + { + "epoch": 0.48798092414770894, + "grad_norm": 0.00014088177846377143, + "learning_rate": 5.693519513068387e-06, + "loss": 0.0, + "step": 7572 + }, + { + "epoch": 0.4880453695946381, + "grad_norm": 0.0014828029465861036, + "learning_rate": 5.69280343716434e-06, + "loss": 0.0, + "step": 7573 + }, + { + "epoch": 0.4881098150415673, + "grad_norm": 0.012641590761369413, + "learning_rate": 5.692087361260294e-06, + "loss": 0.0001, + "step": 7574 + }, + { + "epoch": 0.4881742604884965, + "grad_norm": 0.001850971111009934, + "learning_rate": 5.691371285356249e-06, + "loss": 0.0, + "step": 7575 + }, + { + "epoch": 0.4882387059354257, + "grad_norm": 0.046230391549455706, + "learning_rate": 5.690655209452203e-06, + "loss": 0.0001, + "step": 7576 + }, + { + "epoch": 0.48830315138235486, + "grad_norm": 0.01488584860814521, + "learning_rate": 5.689939133548157e-06, + "loss": 0.0001, + "step": 7577 + }, + { + "epoch": 0.488367596829284, + "grad_norm": 0.000514240191141193, + "learning_rate": 5.689223057644111e-06, + "loss": 0.0, + "step": 7578 + }, + { + "epoch": 0.4884320422762132, + "grad_norm": 0.008130484260185939, + "learning_rate": 5.688506981740065e-06, + "loss": 0.0001, + "step": 7579 + }, + { + "epoch": 0.48849648772314236, + "grad_norm": 0.0030046709164151357, + "learning_rate": 5.687790905836019e-06, + "loss": 0.0, + "step": 7580 + }, + { + "epoch": 0.48856093317007154, + "grad_norm": 0.015805491853670892, + "learning_rate": 5.687074829931974e-06, + "loss": 0.0002, + "step": 7581 + }, + { + "epoch": 0.4886253786170007, + "grad_norm": 0.011213436765041867, + "learning_rate": 5.686358754027927e-06, + "loss": 0.0, + "step": 7582 + }, + { + "epoch": 0.48868982406392986, + "grad_norm": 0.0007413171695860233, + "learning_rate": 5.685642678123881e-06, + "loss": 0.0, + "step": 7583 + }, + { + "epoch": 0.48875426951085904, + "grad_norm": 0.032078999380731485, + "learning_rate": 5.684926602219836e-06, + "loss": 0.0001, + "step": 7584 + }, + { + "epoch": 0.4888187149577882, + "grad_norm": 0.015428289726630193, + "learning_rate": 5.68421052631579e-06, + "loss": 0.0001, + "step": 7585 + }, + { + "epoch": 0.4888831604047174, + "grad_norm": 0.7430837339681301, + "learning_rate": 5.683494450411744e-06, + "loss": 0.003, + "step": 7586 + }, + { + "epoch": 0.4889476058516466, + "grad_norm": 0.16889645305455697, + "learning_rate": 5.682778374507698e-06, + "loss": 0.0006, + "step": 7587 + }, + { + "epoch": 0.4890120512985758, + "grad_norm": 0.01572568683559918, + "learning_rate": 5.682062298603652e-06, + "loss": 0.0, + "step": 7588 + }, + { + "epoch": 0.4890764967455049, + "grad_norm": 0.06375657091566687, + "learning_rate": 5.681346222699606e-06, + "loss": 0.0001, + "step": 7589 + }, + { + "epoch": 0.4891409421924341, + "grad_norm": 0.0013207851323213868, + "learning_rate": 5.680630146795561e-06, + "loss": 0.0, + "step": 7590 + }, + { + "epoch": 0.4892053876393633, + "grad_norm": 0.0013833316345360025, + "learning_rate": 5.679914070891515e-06, + "loss": 0.0, + "step": 7591 + }, + { + "epoch": 0.48926983308629246, + "grad_norm": 0.002289494076961141, + "learning_rate": 5.679197994987468e-06, + "loss": 0.0, + "step": 7592 + }, + { + "epoch": 0.48933427853322164, + "grad_norm": 0.0002545245494613917, + "learning_rate": 5.678481919083423e-06, + "loss": 0.0, + "step": 7593 + }, + { + "epoch": 0.48939872398015083, + "grad_norm": 0.001060228307932159, + "learning_rate": 5.677765843179377e-06, + "loss": 0.0, + "step": 7594 + }, + { + "epoch": 0.48946316942707996, + "grad_norm": 0.03487591100642847, + "learning_rate": 5.677049767275332e-06, + "loss": 0.0, + "step": 7595 + }, + { + "epoch": 0.48952761487400914, + "grad_norm": 0.0168614849164812, + "learning_rate": 5.676333691371286e-06, + "loss": 0.0001, + "step": 7596 + }, + { + "epoch": 0.4895920603209383, + "grad_norm": 0.0006910856059586328, + "learning_rate": 5.675617615467241e-06, + "loss": 0.0, + "step": 7597 + }, + { + "epoch": 0.4896565057678675, + "grad_norm": 0.07767658589564769, + "learning_rate": 5.674901539563194e-06, + "loss": 0.0, + "step": 7598 + }, + { + "epoch": 0.4897209512147967, + "grad_norm": 0.3195438699631821, + "learning_rate": 5.6741854636591485e-06, + "loss": 0.0018, + "step": 7599 + }, + { + "epoch": 0.4897853966617258, + "grad_norm": 0.002169986000435176, + "learning_rate": 5.673469387755103e-06, + "loss": 0.0, + "step": 7600 + }, + { + "epoch": 0.489849842108655, + "grad_norm": 0.012983810109652043, + "learning_rate": 5.672753311851057e-06, + "loss": 0.0, + "step": 7601 + }, + { + "epoch": 0.4899142875555842, + "grad_norm": 0.014353244432383392, + "learning_rate": 5.672037235947011e-06, + "loss": 0.0, + "step": 7602 + }, + { + "epoch": 0.4899787330025134, + "grad_norm": 0.00011045339360843307, + "learning_rate": 5.671321160042965e-06, + "loss": 0.0, + "step": 7603 + }, + { + "epoch": 0.49004317844944256, + "grad_norm": 0.006119750306557415, + "learning_rate": 5.670605084138919e-06, + "loss": 0.0, + "step": 7604 + }, + { + "epoch": 0.49010762389637175, + "grad_norm": 0.003246663160360342, + "learning_rate": 5.669889008234873e-06, + "loss": 0.0, + "step": 7605 + }, + { + "epoch": 0.4901720693433009, + "grad_norm": 0.062110801071613486, + "learning_rate": 5.669172932330828e-06, + "loss": 0.0001, + "step": 7606 + }, + { + "epoch": 0.49023651479023006, + "grad_norm": 0.007737362321899938, + "learning_rate": 5.668456856426782e-06, + "loss": 0.0, + "step": 7607 + }, + { + "epoch": 0.49030096023715924, + "grad_norm": 0.020342826642074038, + "learning_rate": 5.6677407805227355e-06, + "loss": 0.0001, + "step": 7608 + }, + { + "epoch": 0.49036540568408843, + "grad_norm": 0.002374675857821564, + "learning_rate": 5.66702470461869e-06, + "loss": 0.0, + "step": 7609 + }, + { + "epoch": 0.4904298511310176, + "grad_norm": 0.002491284824095385, + "learning_rate": 5.666308628714644e-06, + "loss": 0.0, + "step": 7610 + }, + { + "epoch": 0.49049429657794674, + "grad_norm": 1.0187017932555893, + "learning_rate": 5.665592552810598e-06, + "loss": 0.003, + "step": 7611 + }, + { + "epoch": 0.4905587420248759, + "grad_norm": 0.05487809644575838, + "learning_rate": 5.664876476906553e-06, + "loss": 0.0003, + "step": 7612 + }, + { + "epoch": 0.4906231874718051, + "grad_norm": 0.0018616346784588265, + "learning_rate": 5.664160401002506e-06, + "loss": 0.0, + "step": 7613 + }, + { + "epoch": 0.4906876329187343, + "grad_norm": 0.0015321618605294287, + "learning_rate": 5.66344432509846e-06, + "loss": 0.0, + "step": 7614 + }, + { + "epoch": 0.4907520783656635, + "grad_norm": 0.025696763498570488, + "learning_rate": 5.662728249194415e-06, + "loss": 0.0, + "step": 7615 + }, + { + "epoch": 0.49081652381259266, + "grad_norm": 0.05154074278488259, + "learning_rate": 5.662012173290369e-06, + "loss": 0.0003, + "step": 7616 + }, + { + "epoch": 0.4908809692595218, + "grad_norm": 0.26117784201754574, + "learning_rate": 5.661296097386323e-06, + "loss": 0.0008, + "step": 7617 + }, + { + "epoch": 0.490945414706451, + "grad_norm": 0.21229055685764633, + "learning_rate": 5.660580021482277e-06, + "loss": 0.0006, + "step": 7618 + }, + { + "epoch": 0.49100986015338016, + "grad_norm": 0.0019154365838764172, + "learning_rate": 5.659863945578232e-06, + "loss": 0.0, + "step": 7619 + }, + { + "epoch": 0.49107430560030935, + "grad_norm": 0.000521643832761043, + "learning_rate": 5.659147869674186e-06, + "loss": 0.0, + "step": 7620 + }, + { + "epoch": 0.49113875104723853, + "grad_norm": 0.05246131011336328, + "learning_rate": 5.6584317937701405e-06, + "loss": 0.0002, + "step": 7621 + }, + { + "epoch": 0.49120319649416766, + "grad_norm": 0.023859120947491402, + "learning_rate": 5.657715717866095e-06, + "loss": 0.0001, + "step": 7622 + }, + { + "epoch": 0.49126764194109684, + "grad_norm": 0.07066070248157924, + "learning_rate": 5.656999641962049e-06, + "loss": 0.0002, + "step": 7623 + }, + { + "epoch": 0.49133208738802603, + "grad_norm": 0.0058702492762727244, + "learning_rate": 5.6562835660580025e-06, + "loss": 0.0, + "step": 7624 + }, + { + "epoch": 0.4913965328349552, + "grad_norm": 0.45628538974263216, + "learning_rate": 5.655567490153957e-06, + "loss": 0.001, + "step": 7625 + }, + { + "epoch": 0.4914609782818844, + "grad_norm": 0.016563145807952723, + "learning_rate": 5.654851414249911e-06, + "loss": 0.0, + "step": 7626 + }, + { + "epoch": 0.4915254237288136, + "grad_norm": 0.0007029439357273081, + "learning_rate": 5.6541353383458654e-06, + "loss": 0.0, + "step": 7627 + }, + { + "epoch": 0.4915898691757427, + "grad_norm": 0.07840164793822463, + "learning_rate": 5.65341926244182e-06, + "loss": 0.0, + "step": 7628 + }, + { + "epoch": 0.4916543146226719, + "grad_norm": 0.3056592361995422, + "learning_rate": 5.652703186537773e-06, + "loss": 0.0023, + "step": 7629 + }, + { + "epoch": 0.4917187600696011, + "grad_norm": 0.6774211016599851, + "learning_rate": 5.6519871106337275e-06, + "loss": 0.0035, + "step": 7630 + }, + { + "epoch": 0.49178320551653026, + "grad_norm": 0.001993808920682025, + "learning_rate": 5.651271034729682e-06, + "loss": 0.0, + "step": 7631 + }, + { + "epoch": 0.49184765096345945, + "grad_norm": 0.1596749738737264, + "learning_rate": 5.650554958825636e-06, + "loss": 0.0003, + "step": 7632 + }, + { + "epoch": 0.49191209641038863, + "grad_norm": 0.018059339301017427, + "learning_rate": 5.64983888292159e-06, + "loss": 0.0001, + "step": 7633 + }, + { + "epoch": 0.49197654185731776, + "grad_norm": 0.0013676482324627582, + "learning_rate": 5.649122807017544e-06, + "loss": 0.0, + "step": 7634 + }, + { + "epoch": 0.49204098730424695, + "grad_norm": 0.001527761549636789, + "learning_rate": 5.648406731113498e-06, + "loss": 0.0, + "step": 7635 + }, + { + "epoch": 0.49210543275117613, + "grad_norm": 0.002563567830487596, + "learning_rate": 5.6476906552094524e-06, + "loss": 0.0, + "step": 7636 + }, + { + "epoch": 0.4921698781981053, + "grad_norm": 0.0020392195153546198, + "learning_rate": 5.646974579305407e-06, + "loss": 0.0, + "step": 7637 + }, + { + "epoch": 0.4922343236450345, + "grad_norm": 0.0025428419743069013, + "learning_rate": 5.646258503401361e-06, + "loss": 0.0, + "step": 7638 + }, + { + "epoch": 0.49229876909196363, + "grad_norm": 0.0010233870824352465, + "learning_rate": 5.6455424274973145e-06, + "loss": 0.0, + "step": 7639 + }, + { + "epoch": 0.4923632145388928, + "grad_norm": 0.16308028959442436, + "learning_rate": 5.644826351593269e-06, + "loss": 0.0005, + "step": 7640 + }, + { + "epoch": 0.492427659985822, + "grad_norm": 0.0002236146238672205, + "learning_rate": 5.644110275689223e-06, + "loss": 0.0, + "step": 7641 + }, + { + "epoch": 0.4924921054327512, + "grad_norm": 0.003699660007362483, + "learning_rate": 5.643394199785178e-06, + "loss": 0.0, + "step": 7642 + }, + { + "epoch": 0.49255655087968037, + "grad_norm": 0.6511107573406089, + "learning_rate": 5.6426781238811325e-06, + "loss": 0.0012, + "step": 7643 + }, + { + "epoch": 0.49262099632660955, + "grad_norm": 0.0006555271681799672, + "learning_rate": 5.641962047977087e-06, + "loss": 0.0, + "step": 7644 + }, + { + "epoch": 0.4926854417735387, + "grad_norm": 0.011247004639613586, + "learning_rate": 5.64124597207304e-06, + "loss": 0.0, + "step": 7645 + }, + { + "epoch": 0.49274988722046786, + "grad_norm": 0.04122928475379016, + "learning_rate": 5.6405298961689946e-06, + "loss": 0.0, + "step": 7646 + }, + { + "epoch": 0.49281433266739705, + "grad_norm": 0.09820465599572177, + "learning_rate": 5.639813820264949e-06, + "loss": 0.0003, + "step": 7647 + }, + { + "epoch": 0.49287877811432623, + "grad_norm": 0.0005418565121424015, + "learning_rate": 5.639097744360903e-06, + "loss": 0.0, + "step": 7648 + }, + { + "epoch": 0.4929432235612554, + "grad_norm": 0.25866958451243477, + "learning_rate": 5.6383816684568575e-06, + "loss": 0.0004, + "step": 7649 + }, + { + "epoch": 0.49300766900818455, + "grad_norm": 0.014861191542048305, + "learning_rate": 5.637665592552811e-06, + "loss": 0.0, + "step": 7650 + }, + { + "epoch": 0.49307211445511373, + "grad_norm": 0.06393503994418094, + "learning_rate": 5.636949516648765e-06, + "loss": 0.0002, + "step": 7651 + }, + { + "epoch": 0.4931365599020429, + "grad_norm": 0.001295256191406105, + "learning_rate": 5.6362334407447195e-06, + "loss": 0.0, + "step": 7652 + }, + { + "epoch": 0.4932010053489721, + "grad_norm": 0.0013651152885221776, + "learning_rate": 5.635517364840674e-06, + "loss": 0.0, + "step": 7653 + }, + { + "epoch": 0.4932654507959013, + "grad_norm": 0.034053694591434605, + "learning_rate": 5.634801288936628e-06, + "loss": 0.0, + "step": 7654 + }, + { + "epoch": 0.49332989624283047, + "grad_norm": 0.025483034313675483, + "learning_rate": 5.6340852130325816e-06, + "loss": 0.0001, + "step": 7655 + }, + { + "epoch": 0.4933943416897596, + "grad_norm": 0.0867213163984139, + "learning_rate": 5.633369137128536e-06, + "loss": 0.0002, + "step": 7656 + }, + { + "epoch": 0.4934587871366888, + "grad_norm": 0.036528468614227996, + "learning_rate": 5.63265306122449e-06, + "loss": 0.0005, + "step": 7657 + }, + { + "epoch": 0.49352323258361797, + "grad_norm": 0.014582704541776105, + "learning_rate": 5.6319369853204445e-06, + "loss": 0.0001, + "step": 7658 + }, + { + "epoch": 0.49358767803054715, + "grad_norm": 0.006312064480264101, + "learning_rate": 5.631220909416398e-06, + "loss": 0.0, + "step": 7659 + }, + { + "epoch": 0.49365212347747633, + "grad_norm": 0.05966863374292694, + "learning_rate": 5.630504833512352e-06, + "loss": 0.0001, + "step": 7660 + }, + { + "epoch": 0.49371656892440546, + "grad_norm": 0.01641082586326572, + "learning_rate": 5.6297887576083065e-06, + "loss": 0.0001, + "step": 7661 + }, + { + "epoch": 0.49378101437133465, + "grad_norm": 0.003940255907692563, + "learning_rate": 5.629072681704261e-06, + "loss": 0.0, + "step": 7662 + }, + { + "epoch": 0.49384545981826383, + "grad_norm": 0.006613115214583254, + "learning_rate": 5.628356605800215e-06, + "loss": 0.0001, + "step": 7663 + }, + { + "epoch": 0.493909905265193, + "grad_norm": 0.05100384626993441, + "learning_rate": 5.6276405298961686e-06, + "loss": 0.0001, + "step": 7664 + }, + { + "epoch": 0.4939743507121222, + "grad_norm": 0.003475097216405046, + "learning_rate": 5.6269244539921246e-06, + "loss": 0.0, + "step": 7665 + }, + { + "epoch": 0.4940387961590514, + "grad_norm": 0.002349933148403365, + "learning_rate": 5.626208378088078e-06, + "loss": 0.0, + "step": 7666 + }, + { + "epoch": 0.4941032416059805, + "grad_norm": 0.1691805409207571, + "learning_rate": 5.625492302184032e-06, + "loss": 0.0004, + "step": 7667 + }, + { + "epoch": 0.4941676870529097, + "grad_norm": 0.04184740078663928, + "learning_rate": 5.624776226279987e-06, + "loss": 0.0001, + "step": 7668 + }, + { + "epoch": 0.4942321324998389, + "grad_norm": 0.015622903452228139, + "learning_rate": 5.624060150375941e-06, + "loss": 0.0, + "step": 7669 + }, + { + "epoch": 0.49429657794676807, + "grad_norm": 0.006198852245547676, + "learning_rate": 5.623344074471895e-06, + "loss": 0.0, + "step": 7670 + }, + { + "epoch": 0.49436102339369725, + "grad_norm": 0.03239944533858724, + "learning_rate": 5.622627998567849e-06, + "loss": 0.0001, + "step": 7671 + }, + { + "epoch": 0.49442546884062644, + "grad_norm": 0.23790876792287802, + "learning_rate": 5.621911922663803e-06, + "loss": 0.004, + "step": 7672 + }, + { + "epoch": 0.49448991428755557, + "grad_norm": 0.002371005246239983, + "learning_rate": 5.621195846759757e-06, + "loss": 0.0, + "step": 7673 + }, + { + "epoch": 0.49455435973448475, + "grad_norm": 0.00392991030615517, + "learning_rate": 5.6204797708557115e-06, + "loss": 0.0, + "step": 7674 + }, + { + "epoch": 0.49461880518141393, + "grad_norm": 0.4288448915211329, + "learning_rate": 5.619763694951665e-06, + "loss": 0.0046, + "step": 7675 + }, + { + "epoch": 0.4946832506283431, + "grad_norm": 0.05516306764421135, + "learning_rate": 5.619047619047619e-06, + "loss": 0.0003, + "step": 7676 + }, + { + "epoch": 0.4947476960752723, + "grad_norm": 0.010246722793248482, + "learning_rate": 5.618331543143574e-06, + "loss": 0.0, + "step": 7677 + }, + { + "epoch": 0.49481214152220143, + "grad_norm": 0.007946841214768584, + "learning_rate": 5.617615467239528e-06, + "loss": 0.0, + "step": 7678 + }, + { + "epoch": 0.4948765869691306, + "grad_norm": 0.0548576853204545, + "learning_rate": 5.616899391335482e-06, + "loss": 0.0, + "step": 7679 + }, + { + "epoch": 0.4949410324160598, + "grad_norm": 0.07455733516548389, + "learning_rate": 5.616183315431436e-06, + "loss": 0.0001, + "step": 7680 + }, + { + "epoch": 0.495005477862989, + "grad_norm": 0.001946500852743602, + "learning_rate": 5.61546723952739e-06, + "loss": 0.0, + "step": 7681 + }, + { + "epoch": 0.49506992330991817, + "grad_norm": 0.11966813281237758, + "learning_rate": 5.614751163623344e-06, + "loss": 0.0003, + "step": 7682 + }, + { + "epoch": 0.49513436875684735, + "grad_norm": 0.22977987073403688, + "learning_rate": 5.6140350877192985e-06, + "loss": 0.0012, + "step": 7683 + }, + { + "epoch": 0.4951988142037765, + "grad_norm": 0.004786080663721793, + "learning_rate": 5.613319011815253e-06, + "loss": 0.0, + "step": 7684 + }, + { + "epoch": 0.49526325965070567, + "grad_norm": 0.3058660284807045, + "learning_rate": 5.612602935911206e-06, + "loss": 0.0025, + "step": 7685 + }, + { + "epoch": 0.49532770509763485, + "grad_norm": 0.0006387131833626681, + "learning_rate": 5.611886860007161e-06, + "loss": 0.0, + "step": 7686 + }, + { + "epoch": 0.49539215054456404, + "grad_norm": 0.007263654905943296, + "learning_rate": 5.611170784103115e-06, + "loss": 0.0, + "step": 7687 + }, + { + "epoch": 0.4954565959914932, + "grad_norm": 0.00047506028153626915, + "learning_rate": 5.610454708199069e-06, + "loss": 0.0, + "step": 7688 + }, + { + "epoch": 0.49552104143842235, + "grad_norm": 0.0012289411413441314, + "learning_rate": 5.609738632295024e-06, + "loss": 0.0, + "step": 7689 + }, + { + "epoch": 0.49558548688535153, + "grad_norm": 0.040993396378150186, + "learning_rate": 5.609022556390979e-06, + "loss": 0.0001, + "step": 7690 + }, + { + "epoch": 0.4956499323322807, + "grad_norm": 0.0013355882791165105, + "learning_rate": 5.608306480486932e-06, + "loss": 0.0, + "step": 7691 + }, + { + "epoch": 0.4957143777792099, + "grad_norm": 0.010719311534290885, + "learning_rate": 5.607590404582886e-06, + "loss": 0.0, + "step": 7692 + }, + { + "epoch": 0.4957788232261391, + "grad_norm": 0.0038407025845055135, + "learning_rate": 5.606874328678841e-06, + "loss": 0.0, + "step": 7693 + }, + { + "epoch": 0.49584326867306827, + "grad_norm": 0.4340892057394482, + "learning_rate": 5.606158252774795e-06, + "loss": 0.0044, + "step": 7694 + }, + { + "epoch": 0.4959077141199974, + "grad_norm": 0.005255564610214897, + "learning_rate": 5.605442176870749e-06, + "loss": 0.0, + "step": 7695 + }, + { + "epoch": 0.4959721595669266, + "grad_norm": 0.018044455183344257, + "learning_rate": 5.604726100966703e-06, + "loss": 0.0002, + "step": 7696 + }, + { + "epoch": 0.49603660501385577, + "grad_norm": 0.0006821521166229848, + "learning_rate": 5.604010025062657e-06, + "loss": 0.0, + "step": 7697 + }, + { + "epoch": 0.49610105046078495, + "grad_norm": 0.12202850007171458, + "learning_rate": 5.603293949158611e-06, + "loss": 0.0002, + "step": 7698 + }, + { + "epoch": 0.49616549590771414, + "grad_norm": 0.036351122742084956, + "learning_rate": 5.602577873254566e-06, + "loss": 0.0, + "step": 7699 + }, + { + "epoch": 0.49622994135464327, + "grad_norm": 0.00016657967509917158, + "learning_rate": 5.60186179735052e-06, + "loss": 0.0, + "step": 7700 + }, + { + "epoch": 0.49629438680157245, + "grad_norm": 0.1313316661645494, + "learning_rate": 5.601145721446473e-06, + "loss": 0.0003, + "step": 7701 + }, + { + "epoch": 0.49635883224850164, + "grad_norm": 0.5427525590777412, + "learning_rate": 5.600429645542428e-06, + "loss": 0.001, + "step": 7702 + }, + { + "epoch": 0.4964232776954308, + "grad_norm": 0.004619553697276954, + "learning_rate": 5.599713569638382e-06, + "loss": 0.0, + "step": 7703 + }, + { + "epoch": 0.49648772314236, + "grad_norm": 0.12349671304702227, + "learning_rate": 5.598997493734336e-06, + "loss": 0.0016, + "step": 7704 + }, + { + "epoch": 0.4965521685892892, + "grad_norm": 0.001022775411367935, + "learning_rate": 5.5982814178302906e-06, + "loss": 0.0, + "step": 7705 + }, + { + "epoch": 0.4966166140362183, + "grad_norm": 0.11159717941088838, + "learning_rate": 5.597565341926244e-06, + "loss": 0.0002, + "step": 7706 + }, + { + "epoch": 0.4966810594831475, + "grad_norm": 0.006720570718083302, + "learning_rate": 5.596849266022198e-06, + "loss": 0.0, + "step": 7707 + }, + { + "epoch": 0.4967455049300767, + "grad_norm": 0.0001619117415859008, + "learning_rate": 5.596133190118153e-06, + "loss": 0.0, + "step": 7708 + }, + { + "epoch": 0.49680995037700587, + "grad_norm": 1.5955664810823738, + "learning_rate": 5.595417114214107e-06, + "loss": 0.0081, + "step": 7709 + }, + { + "epoch": 0.49687439582393506, + "grad_norm": 0.008624999726623077, + "learning_rate": 5.594701038310061e-06, + "loss": 0.0, + "step": 7710 + }, + { + "epoch": 0.49693884127086424, + "grad_norm": 0.011486251104675875, + "learning_rate": 5.593984962406015e-06, + "loss": 0.0, + "step": 7711 + }, + { + "epoch": 0.49700328671779337, + "grad_norm": 0.2950930319796222, + "learning_rate": 5.59326888650197e-06, + "loss": 0.0009, + "step": 7712 + }, + { + "epoch": 0.49706773216472255, + "grad_norm": 0.0014471563901175158, + "learning_rate": 5.592552810597924e-06, + "loss": 0.0, + "step": 7713 + }, + { + "epoch": 0.49713217761165174, + "grad_norm": 0.0038007229908679743, + "learning_rate": 5.591836734693878e-06, + "loss": 0.0, + "step": 7714 + }, + { + "epoch": 0.4971966230585809, + "grad_norm": 0.0018911992897892018, + "learning_rate": 5.591120658789833e-06, + "loss": 0.0, + "step": 7715 + }, + { + "epoch": 0.4972610685055101, + "grad_norm": 0.017360700248121624, + "learning_rate": 5.590404582885787e-06, + "loss": 0.0, + "step": 7716 + }, + { + "epoch": 0.49732551395243924, + "grad_norm": 0.000842689886037626, + "learning_rate": 5.5896885069817405e-06, + "loss": 0.0, + "step": 7717 + }, + { + "epoch": 0.4973899593993684, + "grad_norm": 0.10780282530014913, + "learning_rate": 5.588972431077695e-06, + "loss": 0.0002, + "step": 7718 + }, + { + "epoch": 0.4974544048462976, + "grad_norm": 0.009681021889705044, + "learning_rate": 5.588256355173649e-06, + "loss": 0.0001, + "step": 7719 + }, + { + "epoch": 0.4975188502932268, + "grad_norm": 0.0017426617240457378, + "learning_rate": 5.587540279269603e-06, + "loss": 0.0, + "step": 7720 + }, + { + "epoch": 0.497583295740156, + "grad_norm": 1.5181758690231042, + "learning_rate": 5.586824203365558e-06, + "loss": 0.0113, + "step": 7721 + }, + { + "epoch": 0.49764774118708516, + "grad_norm": 0.006614844277389507, + "learning_rate": 5.586108127461511e-06, + "loss": 0.0001, + "step": 7722 + }, + { + "epoch": 0.4977121866340143, + "grad_norm": 0.2634574442064215, + "learning_rate": 5.585392051557465e-06, + "loss": 0.0012, + "step": 7723 + }, + { + "epoch": 0.49777663208094347, + "grad_norm": 0.26991512770438303, + "learning_rate": 5.58467597565342e-06, + "loss": 0.0023, + "step": 7724 + }, + { + "epoch": 0.49784107752787266, + "grad_norm": 0.009235564462089122, + "learning_rate": 5.583959899749374e-06, + "loss": 0.0, + "step": 7725 + }, + { + "epoch": 0.49790552297480184, + "grad_norm": 0.2566201641688113, + "learning_rate": 5.583243823845328e-06, + "loss": 0.0005, + "step": 7726 + }, + { + "epoch": 0.497969968421731, + "grad_norm": 0.005090404771266792, + "learning_rate": 5.582527747941282e-06, + "loss": 0.0, + "step": 7727 + }, + { + "epoch": 0.49803441386866015, + "grad_norm": 0.27343225474095073, + "learning_rate": 5.581811672037236e-06, + "loss": 0.002, + "step": 7728 + }, + { + "epoch": 0.49809885931558934, + "grad_norm": 0.031385309490975594, + "learning_rate": 5.58109559613319e-06, + "loss": 0.0, + "step": 7729 + }, + { + "epoch": 0.4981633047625185, + "grad_norm": 0.023604299895657106, + "learning_rate": 5.580379520229145e-06, + "loss": 0.0, + "step": 7730 + }, + { + "epoch": 0.4982277502094477, + "grad_norm": 0.010524595936297206, + "learning_rate": 5.579663444325099e-06, + "loss": 0.0, + "step": 7731 + }, + { + "epoch": 0.4982921956563769, + "grad_norm": 0.08313716178764882, + "learning_rate": 5.578947368421052e-06, + "loss": 0.0001, + "step": 7732 + }, + { + "epoch": 0.4983566411033061, + "grad_norm": 0.20455374205723056, + "learning_rate": 5.578231292517007e-06, + "loss": 0.0002, + "step": 7733 + }, + { + "epoch": 0.4984210865502352, + "grad_norm": 0.006954444476851246, + "learning_rate": 5.577515216612961e-06, + "loss": 0.0, + "step": 7734 + }, + { + "epoch": 0.4984855319971644, + "grad_norm": 0.1379288863464268, + "learning_rate": 5.576799140708916e-06, + "loss": 0.0012, + "step": 7735 + }, + { + "epoch": 0.4985499774440936, + "grad_norm": 0.22782729888330902, + "learning_rate": 5.5760830648048704e-06, + "loss": 0.0003, + "step": 7736 + }, + { + "epoch": 0.49861442289102276, + "grad_norm": 0.004750422647156393, + "learning_rate": 5.575366988900825e-06, + "loss": 0.0, + "step": 7737 + }, + { + "epoch": 0.49867886833795194, + "grad_norm": 0.02539140787201507, + "learning_rate": 5.574650912996778e-06, + "loss": 0.0, + "step": 7738 + }, + { + "epoch": 0.49874331378488107, + "grad_norm": 0.0074664136120375325, + "learning_rate": 5.5739348370927325e-06, + "loss": 0.0, + "step": 7739 + }, + { + "epoch": 0.49880775923181025, + "grad_norm": 0.040856026752530014, + "learning_rate": 5.573218761188687e-06, + "loss": 0.0005, + "step": 7740 + }, + { + "epoch": 0.49887220467873944, + "grad_norm": 0.023396444870685918, + "learning_rate": 5.572502685284641e-06, + "loss": 0.0016, + "step": 7741 + }, + { + "epoch": 0.4989366501256686, + "grad_norm": 0.003881107778788916, + "learning_rate": 5.571786609380595e-06, + "loss": 0.0, + "step": 7742 + }, + { + "epoch": 0.4990010955725978, + "grad_norm": 0.004024760296975562, + "learning_rate": 5.571070533476549e-06, + "loss": 0.0, + "step": 7743 + }, + { + "epoch": 0.499065541019527, + "grad_norm": 0.005052507845867952, + "learning_rate": 5.570354457572503e-06, + "loss": 0.0001, + "step": 7744 + }, + { + "epoch": 0.4991299864664561, + "grad_norm": 0.006618753422869714, + "learning_rate": 5.5696383816684574e-06, + "loss": 0.0, + "step": 7745 + }, + { + "epoch": 0.4991944319133853, + "grad_norm": 0.013298532013776608, + "learning_rate": 5.568922305764412e-06, + "loss": 0.0, + "step": 7746 + }, + { + "epoch": 0.4992588773603145, + "grad_norm": 0.008402784516505377, + "learning_rate": 5.568206229860366e-06, + "loss": 0.0, + "step": 7747 + }, + { + "epoch": 0.4993233228072437, + "grad_norm": 0.028681718139751507, + "learning_rate": 5.5674901539563195e-06, + "loss": 0.0, + "step": 7748 + }, + { + "epoch": 0.49938776825417286, + "grad_norm": 0.0004790018685773785, + "learning_rate": 5.566774078052274e-06, + "loss": 0.0, + "step": 7749 + }, + { + "epoch": 0.49945221370110204, + "grad_norm": 0.00017217928028040734, + "learning_rate": 5.566058002148228e-06, + "loss": 0.0, + "step": 7750 + }, + { + "epoch": 0.4995166591480312, + "grad_norm": 0.004187983856702524, + "learning_rate": 5.565341926244182e-06, + "loss": 0.0, + "step": 7751 + }, + { + "epoch": 0.49958110459496036, + "grad_norm": 0.00661046670208516, + "learning_rate": 5.564625850340136e-06, + "loss": 0.0001, + "step": 7752 + }, + { + "epoch": 0.49964555004188954, + "grad_norm": 0.0028452297455472866, + "learning_rate": 5.56390977443609e-06, + "loss": 0.0, + "step": 7753 + }, + { + "epoch": 0.4997099954888187, + "grad_norm": 0.08567421038959883, + "learning_rate": 5.5631936985320444e-06, + "loss": 0.0003, + "step": 7754 + }, + { + "epoch": 0.4997744409357479, + "grad_norm": 0.000638038899302093, + "learning_rate": 5.562477622627999e-06, + "loss": 0.0, + "step": 7755 + }, + { + "epoch": 0.49983888638267704, + "grad_norm": 0.00032599765353913423, + "learning_rate": 5.561761546723953e-06, + "loss": 0.0, + "step": 7756 + }, + { + "epoch": 0.4999033318296062, + "grad_norm": 0.005907647457967332, + "learning_rate": 5.5610454708199065e-06, + "loss": 0.0, + "step": 7757 + }, + { + "epoch": 0.4999677772765354, + "grad_norm": 0.005853467747557248, + "learning_rate": 5.560329394915861e-06, + "loss": 0.0, + "step": 7758 + }, + { + "epoch": 0.5000322227234646, + "grad_norm": 0.001166157403481343, + "learning_rate": 5.559613319011816e-06, + "loss": 0.0, + "step": 7759 + }, + { + "epoch": 0.5000966681703938, + "grad_norm": 0.006691685794746546, + "learning_rate": 5.55889724310777e-06, + "loss": 0.0, + "step": 7760 + }, + { + "epoch": 0.500161113617323, + "grad_norm": 0.00032993997002244194, + "learning_rate": 5.5581811672037245e-06, + "loss": 0.0, + "step": 7761 + }, + { + "epoch": 0.5002255590642521, + "grad_norm": 0.010774148663307965, + "learning_rate": 5.557465091299679e-06, + "loss": 0.0001, + "step": 7762 + }, + { + "epoch": 0.5002900045111813, + "grad_norm": 0.04316844108864388, + "learning_rate": 5.556749015395633e-06, + "loss": 0.0001, + "step": 7763 + }, + { + "epoch": 0.5003544499581105, + "grad_norm": 0.1035563865516481, + "learning_rate": 5.5560329394915866e-06, + "loss": 0.0002, + "step": 7764 + }, + { + "epoch": 0.5004188954050396, + "grad_norm": 0.0013261507056000824, + "learning_rate": 5.555316863587541e-06, + "loss": 0.0, + "step": 7765 + }, + { + "epoch": 0.5004833408519688, + "grad_norm": 0.012714541640589294, + "learning_rate": 5.554600787683495e-06, + "loss": 0.0, + "step": 7766 + }, + { + "epoch": 0.500547786298898, + "grad_norm": 0.0031739563583639747, + "learning_rate": 5.5538847117794495e-06, + "loss": 0.0, + "step": 7767 + }, + { + "epoch": 0.5006122317458271, + "grad_norm": 0.11012735207591887, + "learning_rate": 5.553168635875403e-06, + "loss": 0.0001, + "step": 7768 + }, + { + "epoch": 0.5006766771927563, + "grad_norm": 0.015187950687640926, + "learning_rate": 5.552452559971357e-06, + "loss": 0.0001, + "step": 7769 + }, + { + "epoch": 0.5007411226396855, + "grad_norm": 0.013135091208567084, + "learning_rate": 5.5517364840673115e-06, + "loss": 0.0, + "step": 7770 + }, + { + "epoch": 0.5008055680866147, + "grad_norm": 0.2000327962569176, + "learning_rate": 5.551020408163266e-06, + "loss": 0.0003, + "step": 7771 + }, + { + "epoch": 0.5008700135335439, + "grad_norm": 0.00024363627308754418, + "learning_rate": 5.55030433225922e-06, + "loss": 0.0, + "step": 7772 + }, + { + "epoch": 0.5009344589804731, + "grad_norm": 4.0486426170484195e-05, + "learning_rate": 5.5495882563551736e-06, + "loss": 0.0, + "step": 7773 + }, + { + "epoch": 0.5009989044274022, + "grad_norm": 0.0044095567704248275, + "learning_rate": 5.548872180451128e-06, + "loss": 0.0, + "step": 7774 + }, + { + "epoch": 0.5010633498743314, + "grad_norm": 0.06624869983000084, + "learning_rate": 5.548156104547082e-06, + "loss": 0.0001, + "step": 7775 + }, + { + "epoch": 0.5011277953212605, + "grad_norm": 0.26731744305720906, + "learning_rate": 5.5474400286430365e-06, + "loss": 0.0012, + "step": 7776 + }, + { + "epoch": 0.5011922407681897, + "grad_norm": 0.0031713670464031084, + "learning_rate": 5.546723952738991e-06, + "loss": 0.0, + "step": 7777 + }, + { + "epoch": 0.5012566862151189, + "grad_norm": 0.10094297798692436, + "learning_rate": 5.546007876834944e-06, + "loss": 0.0002, + "step": 7778 + }, + { + "epoch": 0.5013211316620481, + "grad_norm": 0.0006766827932490896, + "learning_rate": 5.5452918009308985e-06, + "loss": 0.0, + "step": 7779 + }, + { + "epoch": 0.5013855771089772, + "grad_norm": 0.006504950923653288, + "learning_rate": 5.544575725026853e-06, + "loss": 0.0, + "step": 7780 + }, + { + "epoch": 0.5014500225559064, + "grad_norm": 0.002359928133011971, + "learning_rate": 5.543859649122807e-06, + "loss": 0.0, + "step": 7781 + }, + { + "epoch": 0.5015144680028356, + "grad_norm": 0.001259427859195719, + "learning_rate": 5.543143573218762e-06, + "loss": 0.0, + "step": 7782 + }, + { + "epoch": 0.5015789134497648, + "grad_norm": 4.103638976019595e-05, + "learning_rate": 5.5424274973147166e-06, + "loss": 0.0, + "step": 7783 + }, + { + "epoch": 0.501643358896694, + "grad_norm": 0.008854051233046384, + "learning_rate": 5.54171142141067e-06, + "loss": 0.0, + "step": 7784 + }, + { + "epoch": 0.5017078043436232, + "grad_norm": 0.0033936376934568737, + "learning_rate": 5.540995345506624e-06, + "loss": 0.0, + "step": 7785 + }, + { + "epoch": 0.5017722497905523, + "grad_norm": 0.023507155038777785, + "learning_rate": 5.540279269602579e-06, + "loss": 0.0, + "step": 7786 + }, + { + "epoch": 0.5018366952374814, + "grad_norm": 0.005942011229836717, + "learning_rate": 5.539563193698533e-06, + "loss": 0.0, + "step": 7787 + }, + { + "epoch": 0.5019011406844106, + "grad_norm": 0.02232280920114333, + "learning_rate": 5.538847117794487e-06, + "loss": 0.0001, + "step": 7788 + }, + { + "epoch": 0.5019655861313398, + "grad_norm": 0.0016590937530407013, + "learning_rate": 5.538131041890441e-06, + "loss": 0.0, + "step": 7789 + }, + { + "epoch": 0.502030031578269, + "grad_norm": 0.011643822116127455, + "learning_rate": 5.537414965986395e-06, + "loss": 0.0001, + "step": 7790 + }, + { + "epoch": 0.5020944770251982, + "grad_norm": 0.0005359369657540056, + "learning_rate": 5.536698890082349e-06, + "loss": 0.0, + "step": 7791 + }, + { + "epoch": 0.5021589224721273, + "grad_norm": 0.06418886703555608, + "learning_rate": 5.5359828141783035e-06, + "loss": 0.0, + "step": 7792 + }, + { + "epoch": 0.5022233679190565, + "grad_norm": 0.0025205363208524005, + "learning_rate": 5.535266738274258e-06, + "loss": 0.0, + "step": 7793 + }, + { + "epoch": 0.5022878133659857, + "grad_norm": 0.47469825069465993, + "learning_rate": 5.534550662370211e-06, + "loss": 0.0027, + "step": 7794 + }, + { + "epoch": 0.5023522588129149, + "grad_norm": 0.0067098508005370774, + "learning_rate": 5.533834586466166e-06, + "loss": 0.0, + "step": 7795 + }, + { + "epoch": 0.5024167042598441, + "grad_norm": 7.736586171454487e-05, + "learning_rate": 5.53311851056212e-06, + "loss": 0.0, + "step": 7796 + }, + { + "epoch": 0.5024811497067733, + "grad_norm": 0.0212810879829306, + "learning_rate": 5.532402434658074e-06, + "loss": 0.0, + "step": 7797 + }, + { + "epoch": 0.5025455951537023, + "grad_norm": 0.027915426011045134, + "learning_rate": 5.5316863587540285e-06, + "loss": 0.0001, + "step": 7798 + }, + { + "epoch": 0.5026100406006315, + "grad_norm": 0.0005632651672147035, + "learning_rate": 5.530970282849982e-06, + "loss": 0.0, + "step": 7799 + }, + { + "epoch": 0.5026744860475607, + "grad_norm": 0.20493163371728026, + "learning_rate": 5.530254206945936e-06, + "loss": 0.0008, + "step": 7800 + }, + { + "epoch": 0.5027389314944899, + "grad_norm": 0.015775780970922863, + "learning_rate": 5.5295381310418905e-06, + "loss": 0.0, + "step": 7801 + }, + { + "epoch": 0.5028033769414191, + "grad_norm": 0.06814150491643632, + "learning_rate": 5.528822055137845e-06, + "loss": 0.0001, + "step": 7802 + }, + { + "epoch": 0.5028678223883483, + "grad_norm": 0.18345450132435454, + "learning_rate": 5.528105979233799e-06, + "loss": 0.0005, + "step": 7803 + }, + { + "epoch": 0.5029322678352774, + "grad_norm": 0.008875562764333902, + "learning_rate": 5.527389903329753e-06, + "loss": 0.0001, + "step": 7804 + }, + { + "epoch": 0.5029967132822066, + "grad_norm": 0.00011833865029197304, + "learning_rate": 5.526673827425708e-06, + "loss": 0.0, + "step": 7805 + }, + { + "epoch": 0.5030611587291358, + "grad_norm": 0.001142839209205189, + "learning_rate": 5.525957751521662e-06, + "loss": 0.0, + "step": 7806 + }, + { + "epoch": 0.503125604176065, + "grad_norm": 0.0007741564340708393, + "learning_rate": 5.525241675617616e-06, + "loss": 0.0, + "step": 7807 + }, + { + "epoch": 0.5031900496229942, + "grad_norm": 0.06842654198181584, + "learning_rate": 5.524525599713571e-06, + "loss": 0.0002, + "step": 7808 + }, + { + "epoch": 0.5032544950699233, + "grad_norm": 0.0018370367967294632, + "learning_rate": 5.523809523809525e-06, + "loss": 0.0, + "step": 7809 + }, + { + "epoch": 0.5033189405168524, + "grad_norm": 0.0042387088958715, + "learning_rate": 5.523093447905478e-06, + "loss": 0.0, + "step": 7810 + }, + { + "epoch": 0.5033833859637816, + "grad_norm": 0.0014034877772028946, + "learning_rate": 5.522377372001433e-06, + "loss": 0.0, + "step": 7811 + }, + { + "epoch": 0.5034478314107108, + "grad_norm": 0.047558174581854164, + "learning_rate": 5.521661296097387e-06, + "loss": 0.0003, + "step": 7812 + }, + { + "epoch": 0.50351227685764, + "grad_norm": 0.11587947569488101, + "learning_rate": 5.520945220193341e-06, + "loss": 0.0001, + "step": 7813 + }, + { + "epoch": 0.5035767223045692, + "grad_norm": 0.0008404481830644785, + "learning_rate": 5.520229144289296e-06, + "loss": 0.0, + "step": 7814 + }, + { + "epoch": 0.5036411677514984, + "grad_norm": 0.027745073397463797, + "learning_rate": 5.519513068385249e-06, + "loss": 0.0002, + "step": 7815 + }, + { + "epoch": 0.5037056131984275, + "grad_norm": 0.0015664983895771825, + "learning_rate": 5.518796992481203e-06, + "loss": 0.0, + "step": 7816 + }, + { + "epoch": 0.5037700586453567, + "grad_norm": 4.358205768194176e-05, + "learning_rate": 5.518080916577158e-06, + "loss": 0.0, + "step": 7817 + }, + { + "epoch": 0.5038345040922859, + "grad_norm": 0.7133459022193046, + "learning_rate": 5.517364840673112e-06, + "loss": 0.0042, + "step": 7818 + }, + { + "epoch": 0.5038989495392151, + "grad_norm": 0.00675914167301981, + "learning_rate": 5.516648764769066e-06, + "loss": 0.0, + "step": 7819 + }, + { + "epoch": 0.5039633949861442, + "grad_norm": 0.4145019286793012, + "learning_rate": 5.51593268886502e-06, + "loss": 0.0018, + "step": 7820 + }, + { + "epoch": 0.5040278404330734, + "grad_norm": 0.0005312810452736112, + "learning_rate": 5.515216612960974e-06, + "loss": 0.0, + "step": 7821 + }, + { + "epoch": 0.5040922858800025, + "grad_norm": 0.24971380839961985, + "learning_rate": 5.514500537056928e-06, + "loss": 0.0043, + "step": 7822 + }, + { + "epoch": 0.5041567313269317, + "grad_norm": 0.11865125663479083, + "learning_rate": 5.5137844611528826e-06, + "loss": 0.0014, + "step": 7823 + }, + { + "epoch": 0.5042211767738609, + "grad_norm": 0.005578891068497698, + "learning_rate": 5.513068385248837e-06, + "loss": 0.0, + "step": 7824 + }, + { + "epoch": 0.5042856222207901, + "grad_norm": 0.00012931750021159341, + "learning_rate": 5.51235230934479e-06, + "loss": 0.0, + "step": 7825 + }, + { + "epoch": 0.5043500676677193, + "grad_norm": 0.6063908845861173, + "learning_rate": 5.511636233440745e-06, + "loss": 0.0016, + "step": 7826 + }, + { + "epoch": 0.5044145131146485, + "grad_norm": 0.00775264018639695, + "learning_rate": 5.510920157536699e-06, + "loss": 0.0, + "step": 7827 + }, + { + "epoch": 0.5044789585615777, + "grad_norm": 0.1251495777689951, + "learning_rate": 5.510204081632653e-06, + "loss": 0.0017, + "step": 7828 + }, + { + "epoch": 0.5045434040085068, + "grad_norm": 0.08034870037365004, + "learning_rate": 5.509488005728608e-06, + "loss": 0.0001, + "step": 7829 + }, + { + "epoch": 0.504607849455436, + "grad_norm": 0.18931312894291646, + "learning_rate": 5.508771929824563e-06, + "loss": 0.001, + "step": 7830 + }, + { + "epoch": 0.5046722949023651, + "grad_norm": 0.0001198111221057338, + "learning_rate": 5.508055853920516e-06, + "loss": 0.0, + "step": 7831 + }, + { + "epoch": 0.5047367403492943, + "grad_norm": 0.0008712099359854654, + "learning_rate": 5.50733977801647e-06, + "loss": 0.0, + "step": 7832 + }, + { + "epoch": 0.5048011857962235, + "grad_norm": 0.024833433088498218, + "learning_rate": 5.506623702112425e-06, + "loss": 0.0002, + "step": 7833 + }, + { + "epoch": 0.5048656312431526, + "grad_norm": 0.0011160004670191623, + "learning_rate": 5.505907626208379e-06, + "loss": 0.0, + "step": 7834 + }, + { + "epoch": 0.5049300766900818, + "grad_norm": 0.2044019086283718, + "learning_rate": 5.505191550304333e-06, + "loss": 0.0021, + "step": 7835 + }, + { + "epoch": 0.504994522137011, + "grad_norm": 0.0008533538186981164, + "learning_rate": 5.504475474400287e-06, + "loss": 0.0, + "step": 7836 + }, + { + "epoch": 0.5050589675839402, + "grad_norm": 0.000822287982651814, + "learning_rate": 5.503759398496241e-06, + "loss": 0.0, + "step": 7837 + }, + { + "epoch": 0.5051234130308694, + "grad_norm": 0.000461616928902722, + "learning_rate": 5.503043322592195e-06, + "loss": 0.0, + "step": 7838 + }, + { + "epoch": 0.5051878584777986, + "grad_norm": 0.0006680393711834995, + "learning_rate": 5.50232724668815e-06, + "loss": 0.0, + "step": 7839 + }, + { + "epoch": 0.5052523039247278, + "grad_norm": 8.234770240082646e-05, + "learning_rate": 5.501611170784104e-06, + "loss": 0.0, + "step": 7840 + }, + { + "epoch": 0.5053167493716569, + "grad_norm": 0.011275463051318033, + "learning_rate": 5.500895094880057e-06, + "loss": 0.0001, + "step": 7841 + }, + { + "epoch": 0.5053811948185861, + "grad_norm": 8.605929278122782e-05, + "learning_rate": 5.500179018976012e-06, + "loss": 0.0, + "step": 7842 + }, + { + "epoch": 0.5054456402655152, + "grad_norm": 0.678472155824014, + "learning_rate": 5.499462943071966e-06, + "loss": 0.0057, + "step": 7843 + }, + { + "epoch": 0.5055100857124444, + "grad_norm": 0.0004380668417542172, + "learning_rate": 5.49874686716792e-06, + "loss": 0.0, + "step": 7844 + }, + { + "epoch": 0.5055745311593736, + "grad_norm": 0.00023279857462329666, + "learning_rate": 5.498030791263875e-06, + "loss": 0.0, + "step": 7845 + }, + { + "epoch": 0.5056389766063027, + "grad_norm": 0.351107005576601, + "learning_rate": 5.497314715359828e-06, + "loss": 0.0008, + "step": 7846 + }, + { + "epoch": 0.5057034220532319, + "grad_norm": 0.014867512331617283, + "learning_rate": 5.496598639455782e-06, + "loss": 0.0, + "step": 7847 + }, + { + "epoch": 0.5057678675001611, + "grad_norm": 0.00021601504494369152, + "learning_rate": 5.495882563551737e-06, + "loss": 0.0, + "step": 7848 + }, + { + "epoch": 0.5058323129470903, + "grad_norm": 0.000571257955598517, + "learning_rate": 5.495166487647691e-06, + "loss": 0.0, + "step": 7849 + }, + { + "epoch": 0.5058967583940195, + "grad_norm": 0.10286979470954646, + "learning_rate": 5.494450411743644e-06, + "loss": 0.0003, + "step": 7850 + }, + { + "epoch": 0.5059612038409487, + "grad_norm": 0.011815902087395896, + "learning_rate": 5.493734335839599e-06, + "loss": 0.0001, + "step": 7851 + }, + { + "epoch": 0.5060256492878779, + "grad_norm": 0.004256208408072257, + "learning_rate": 5.493018259935554e-06, + "loss": 0.0, + "step": 7852 + }, + { + "epoch": 0.506090094734807, + "grad_norm": 0.0010554367991475037, + "learning_rate": 5.492302184031508e-06, + "loss": 0.0, + "step": 7853 + }, + { + "epoch": 0.5061545401817361, + "grad_norm": 0.0008940419510676588, + "learning_rate": 5.4915861081274624e-06, + "loss": 0.0, + "step": 7854 + }, + { + "epoch": 0.5062189856286653, + "grad_norm": 0.15635816883053835, + "learning_rate": 5.490870032223417e-06, + "loss": 0.0007, + "step": 7855 + }, + { + "epoch": 0.5062834310755945, + "grad_norm": 0.002856546237428052, + "learning_rate": 5.490153956319371e-06, + "loss": 0.0, + "step": 7856 + }, + { + "epoch": 0.5063478765225237, + "grad_norm": 0.13532431522261668, + "learning_rate": 5.4894378804153245e-06, + "loss": 0.0003, + "step": 7857 + }, + { + "epoch": 0.5064123219694528, + "grad_norm": 0.8924651293527359, + "learning_rate": 5.488721804511279e-06, + "loss": 0.0041, + "step": 7858 + }, + { + "epoch": 0.506476767416382, + "grad_norm": 0.0003431231341416375, + "learning_rate": 5.488005728607233e-06, + "loss": 0.0, + "step": 7859 + }, + { + "epoch": 0.5065412128633112, + "grad_norm": 0.14257876513611784, + "learning_rate": 5.487289652703187e-06, + "loss": 0.0004, + "step": 7860 + }, + { + "epoch": 0.5066056583102404, + "grad_norm": 0.5874093706149864, + "learning_rate": 5.486573576799142e-06, + "loss": 0.0022, + "step": 7861 + }, + { + "epoch": 0.5066701037571696, + "grad_norm": 0.0017878685428287893, + "learning_rate": 5.485857500895095e-06, + "loss": 0.0, + "step": 7862 + }, + { + "epoch": 0.5067345492040988, + "grad_norm": 0.041144057837388136, + "learning_rate": 5.4851414249910494e-06, + "loss": 0.0004, + "step": 7863 + }, + { + "epoch": 0.506798994651028, + "grad_norm": 0.003932462138094369, + "learning_rate": 5.484425349087004e-06, + "loss": 0.0, + "step": 7864 + }, + { + "epoch": 0.506863440097957, + "grad_norm": 0.0006095626648205814, + "learning_rate": 5.483709273182958e-06, + "loss": 0.0, + "step": 7865 + }, + { + "epoch": 0.5069278855448862, + "grad_norm": 0.0048959003447374555, + "learning_rate": 5.4829931972789115e-06, + "loss": 0.0, + "step": 7866 + }, + { + "epoch": 0.5069923309918154, + "grad_norm": 0.018113076815429438, + "learning_rate": 5.482277121374866e-06, + "loss": 0.0001, + "step": 7867 + }, + { + "epoch": 0.5070567764387446, + "grad_norm": 0.010513576606503637, + "learning_rate": 5.48156104547082e-06, + "loss": 0.0, + "step": 7868 + }, + { + "epoch": 0.5071212218856738, + "grad_norm": 0.003912950009431698, + "learning_rate": 5.480844969566774e-06, + "loss": 0.0, + "step": 7869 + }, + { + "epoch": 0.507185667332603, + "grad_norm": 0.0023382469396162698, + "learning_rate": 5.480128893662729e-06, + "loss": 0.0, + "step": 7870 + }, + { + "epoch": 0.5072501127795321, + "grad_norm": 0.021264051917344883, + "learning_rate": 5.479412817758682e-06, + "loss": 0.0, + "step": 7871 + }, + { + "epoch": 0.5073145582264613, + "grad_norm": 0.04142688589003656, + "learning_rate": 5.4786967418546364e-06, + "loss": 0.0001, + "step": 7872 + }, + { + "epoch": 0.5073790036733905, + "grad_norm": 0.34118988635976827, + "learning_rate": 5.477980665950591e-06, + "loss": 0.0026, + "step": 7873 + }, + { + "epoch": 0.5074434491203197, + "grad_norm": 0.1676696231890971, + "learning_rate": 5.477264590046545e-06, + "loss": 0.0003, + "step": 7874 + }, + { + "epoch": 0.5075078945672489, + "grad_norm": 0.07809003405108669, + "learning_rate": 5.4765485141425e-06, + "loss": 0.0005, + "step": 7875 + }, + { + "epoch": 0.507572340014178, + "grad_norm": 0.016567306202172546, + "learning_rate": 5.4758324382384545e-06, + "loss": 0.0001, + "step": 7876 + }, + { + "epoch": 0.5076367854611071, + "grad_norm": 0.7576097826830628, + "learning_rate": 5.475116362334409e-06, + "loss": 0.0029, + "step": 7877 + }, + { + "epoch": 0.5077012309080363, + "grad_norm": 0.14010409729777418, + "learning_rate": 5.474400286430362e-06, + "loss": 0.0001, + "step": 7878 + }, + { + "epoch": 0.5077656763549655, + "grad_norm": 0.005552410463870406, + "learning_rate": 5.4736842105263165e-06, + "loss": 0.0, + "step": 7879 + }, + { + "epoch": 0.5078301218018947, + "grad_norm": 0.006620169711987851, + "learning_rate": 5.472968134622271e-06, + "loss": 0.0, + "step": 7880 + }, + { + "epoch": 0.5078945672488239, + "grad_norm": 1.0878805985322575, + "learning_rate": 5.472252058718225e-06, + "loss": 0.0043, + "step": 7881 + }, + { + "epoch": 0.507959012695753, + "grad_norm": 0.003664526663129976, + "learning_rate": 5.4715359828141786e-06, + "loss": 0.0, + "step": 7882 + }, + { + "epoch": 0.5080234581426822, + "grad_norm": 0.051065456594272696, + "learning_rate": 5.470819906910133e-06, + "loss": 0.0001, + "step": 7883 + }, + { + "epoch": 0.5080879035896114, + "grad_norm": 0.0010883020255493963, + "learning_rate": 5.470103831006087e-06, + "loss": 0.0, + "step": 7884 + }, + { + "epoch": 0.5081523490365406, + "grad_norm": 0.3437544432266218, + "learning_rate": 5.4693877551020415e-06, + "loss": 0.0007, + "step": 7885 + }, + { + "epoch": 0.5082167944834698, + "grad_norm": 0.09143956119452665, + "learning_rate": 5.468671679197996e-06, + "loss": 0.0001, + "step": 7886 + }, + { + "epoch": 0.5082812399303989, + "grad_norm": 0.0005676533329750051, + "learning_rate": 5.467955603293949e-06, + "loss": 0.0, + "step": 7887 + }, + { + "epoch": 0.508345685377328, + "grad_norm": 0.14704188747214672, + "learning_rate": 5.4672395273899035e-06, + "loss": 0.0001, + "step": 7888 + }, + { + "epoch": 0.5084101308242572, + "grad_norm": 0.0025846929900272945, + "learning_rate": 5.466523451485858e-06, + "loss": 0.0, + "step": 7889 + }, + { + "epoch": 0.5084745762711864, + "grad_norm": 0.009612976558434051, + "learning_rate": 5.465807375581812e-06, + "loss": 0.0, + "step": 7890 + }, + { + "epoch": 0.5085390217181156, + "grad_norm": 0.2334955803368132, + "learning_rate": 5.465091299677766e-06, + "loss": 0.0007, + "step": 7891 + }, + { + "epoch": 0.5086034671650448, + "grad_norm": 0.039201922748705605, + "learning_rate": 5.46437522377372e-06, + "loss": 0.0001, + "step": 7892 + }, + { + "epoch": 0.508667912611974, + "grad_norm": 0.06696498251734098, + "learning_rate": 5.463659147869674e-06, + "loss": 0.0001, + "step": 7893 + }, + { + "epoch": 0.5087323580589032, + "grad_norm": 0.020123397663725916, + "learning_rate": 5.4629430719656285e-06, + "loss": 0.0001, + "step": 7894 + }, + { + "epoch": 0.5087968035058323, + "grad_norm": 0.010950162090299425, + "learning_rate": 5.462226996061583e-06, + "loss": 0.0001, + "step": 7895 + }, + { + "epoch": 0.5088612489527615, + "grad_norm": 0.005105569536457743, + "learning_rate": 5.461510920157537e-06, + "loss": 0.0, + "step": 7896 + }, + { + "epoch": 0.5089256943996907, + "grad_norm": 0.0021593518982048537, + "learning_rate": 5.4607948442534905e-06, + "loss": 0.0, + "step": 7897 + }, + { + "epoch": 0.5089901398466198, + "grad_norm": 0.007026752353424155, + "learning_rate": 5.460078768349445e-06, + "loss": 0.0, + "step": 7898 + }, + { + "epoch": 0.509054585293549, + "grad_norm": 0.0012650142913674938, + "learning_rate": 5.4593626924454e-06, + "loss": 0.0, + "step": 7899 + }, + { + "epoch": 0.5091190307404782, + "grad_norm": 0.014156076340900966, + "learning_rate": 5.458646616541354e-06, + "loss": 0.0001, + "step": 7900 + }, + { + "epoch": 0.5091834761874073, + "grad_norm": 0.17339007764593933, + "learning_rate": 5.4579305406373086e-06, + "loss": 0.0008, + "step": 7901 + }, + { + "epoch": 0.5092479216343365, + "grad_norm": 0.0112315500039506, + "learning_rate": 5.457214464733263e-06, + "loss": 0.0, + "step": 7902 + }, + { + "epoch": 0.5093123670812657, + "grad_norm": 0.01594386663439557, + "learning_rate": 5.456498388829216e-06, + "loss": 0.0002, + "step": 7903 + }, + { + "epoch": 0.5093768125281949, + "grad_norm": 0.024997468723359482, + "learning_rate": 5.455782312925171e-06, + "loss": 0.0001, + "step": 7904 + }, + { + "epoch": 0.5094412579751241, + "grad_norm": 0.005159837593019308, + "learning_rate": 5.455066237021125e-06, + "loss": 0.0, + "step": 7905 + }, + { + "epoch": 0.5095057034220533, + "grad_norm": 0.013976639008599155, + "learning_rate": 5.454350161117079e-06, + "loss": 0.0001, + "step": 7906 + }, + { + "epoch": 0.5095701488689824, + "grad_norm": 1.2407112226143289, + "learning_rate": 5.4536340852130335e-06, + "loss": 0.0017, + "step": 7907 + }, + { + "epoch": 0.5096345943159116, + "grad_norm": 0.0009534155375335451, + "learning_rate": 5.452918009308987e-06, + "loss": 0.0, + "step": 7908 + }, + { + "epoch": 0.5096990397628407, + "grad_norm": 0.0010277082009812432, + "learning_rate": 5.452201933404941e-06, + "loss": 0.0, + "step": 7909 + }, + { + "epoch": 0.5097634852097699, + "grad_norm": 0.012268255597643768, + "learning_rate": 5.4514858575008955e-06, + "loss": 0.0, + "step": 7910 + }, + { + "epoch": 0.5098279306566991, + "grad_norm": 0.05539452184990833, + "learning_rate": 5.45076978159685e-06, + "loss": 0.0003, + "step": 7911 + }, + { + "epoch": 0.5098923761036283, + "grad_norm": 0.002352447691784602, + "learning_rate": 5.450053705692804e-06, + "loss": 0.0, + "step": 7912 + }, + { + "epoch": 0.5099568215505574, + "grad_norm": 0.000641193713048714, + "learning_rate": 5.449337629788758e-06, + "loss": 0.0, + "step": 7913 + }, + { + "epoch": 0.5100212669974866, + "grad_norm": 0.026912041162248948, + "learning_rate": 5.448621553884712e-06, + "loss": 0.0001, + "step": 7914 + }, + { + "epoch": 0.5100857124444158, + "grad_norm": 0.007920404774023341, + "learning_rate": 5.447905477980666e-06, + "loss": 0.0001, + "step": 7915 + }, + { + "epoch": 0.510150157891345, + "grad_norm": 0.252571013657588, + "learning_rate": 5.4471894020766205e-06, + "loss": 0.0003, + "step": 7916 + }, + { + "epoch": 0.5102146033382742, + "grad_norm": 0.006732229568060559, + "learning_rate": 5.446473326172575e-06, + "loss": 0.0, + "step": 7917 + }, + { + "epoch": 0.5102790487852034, + "grad_norm": 0.008280544471657686, + "learning_rate": 5.445757250268528e-06, + "loss": 0.0, + "step": 7918 + }, + { + "epoch": 0.5103434942321325, + "grad_norm": 0.003950611251451125, + "learning_rate": 5.4450411743644825e-06, + "loss": 0.0, + "step": 7919 + }, + { + "epoch": 0.5104079396790617, + "grad_norm": 0.20482560914732528, + "learning_rate": 5.444325098460437e-06, + "loss": 0.0017, + "step": 7920 + }, + { + "epoch": 0.5104723851259908, + "grad_norm": 0.13401776278697505, + "learning_rate": 5.443609022556391e-06, + "loss": 0.0002, + "step": 7921 + }, + { + "epoch": 0.51053683057292, + "grad_norm": 0.07180763760137501, + "learning_rate": 5.442892946652346e-06, + "loss": 0.0002, + "step": 7922 + }, + { + "epoch": 0.5106012760198492, + "grad_norm": 0.015309862017110912, + "learning_rate": 5.442176870748301e-06, + "loss": 0.0, + "step": 7923 + }, + { + "epoch": 0.5106657214667784, + "grad_norm": 0.0011142132569021373, + "learning_rate": 5.441460794844254e-06, + "loss": 0.0, + "step": 7924 + }, + { + "epoch": 0.5107301669137075, + "grad_norm": 0.0024562087064645146, + "learning_rate": 5.440744718940208e-06, + "loss": 0.0, + "step": 7925 + }, + { + "epoch": 0.5107946123606367, + "grad_norm": 0.0013992305215337778, + "learning_rate": 5.440028643036163e-06, + "loss": 0.0, + "step": 7926 + }, + { + "epoch": 0.5108590578075659, + "grad_norm": 0.0032199139412421437, + "learning_rate": 5.439312567132117e-06, + "loss": 0.0, + "step": 7927 + }, + { + "epoch": 0.5109235032544951, + "grad_norm": 0.23834775559403137, + "learning_rate": 5.438596491228071e-06, + "loss": 0.0019, + "step": 7928 + }, + { + "epoch": 0.5109879487014243, + "grad_norm": 0.0745276654029238, + "learning_rate": 5.437880415324025e-06, + "loss": 0.0001, + "step": 7929 + }, + { + "epoch": 0.5110523941483535, + "grad_norm": 0.004944950771004873, + "learning_rate": 5.437164339419979e-06, + "loss": 0.0, + "step": 7930 + }, + { + "epoch": 0.5111168395952826, + "grad_norm": 0.00038942084221351607, + "learning_rate": 5.436448263515933e-06, + "loss": 0.0, + "step": 7931 + }, + { + "epoch": 0.5111812850422117, + "grad_norm": 0.0005447171405046528, + "learning_rate": 5.435732187611888e-06, + "loss": 0.0, + "step": 7932 + }, + { + "epoch": 0.5112457304891409, + "grad_norm": 0.002356542261505252, + "learning_rate": 5.435016111707842e-06, + "loss": 0.0, + "step": 7933 + }, + { + "epoch": 0.5113101759360701, + "grad_norm": 0.0012706526169710428, + "learning_rate": 5.434300035803795e-06, + "loss": 0.0, + "step": 7934 + }, + { + "epoch": 0.5113746213829993, + "grad_norm": 0.07874067125905353, + "learning_rate": 5.43358395989975e-06, + "loss": 0.0001, + "step": 7935 + }, + { + "epoch": 0.5114390668299285, + "grad_norm": 0.001029995249023795, + "learning_rate": 5.432867883995704e-06, + "loss": 0.0, + "step": 7936 + }, + { + "epoch": 0.5115035122768576, + "grad_norm": 0.001838396980398961, + "learning_rate": 5.432151808091658e-06, + "loss": 0.0, + "step": 7937 + }, + { + "epoch": 0.5115679577237868, + "grad_norm": 0.0005883734590697582, + "learning_rate": 5.4314357321876125e-06, + "loss": 0.0, + "step": 7938 + }, + { + "epoch": 0.511632403170716, + "grad_norm": 0.0172669531723459, + "learning_rate": 5.430719656283566e-06, + "loss": 0.0, + "step": 7939 + }, + { + "epoch": 0.5116968486176452, + "grad_norm": 0.0009709076387414066, + "learning_rate": 5.43000358037952e-06, + "loss": 0.0, + "step": 7940 + }, + { + "epoch": 0.5117612940645744, + "grad_norm": 0.008330469998503403, + "learning_rate": 5.4292875044754746e-06, + "loss": 0.0001, + "step": 7941 + }, + { + "epoch": 0.5118257395115036, + "grad_norm": 0.008222742107848081, + "learning_rate": 5.428571428571429e-06, + "loss": 0.0, + "step": 7942 + }, + { + "epoch": 0.5118901849584326, + "grad_norm": 0.0035724153632476397, + "learning_rate": 5.427855352667382e-06, + "loss": 0.0, + "step": 7943 + }, + { + "epoch": 0.5119546304053618, + "grad_norm": 0.0916269789696049, + "learning_rate": 5.427139276763337e-06, + "loss": 0.0002, + "step": 7944 + }, + { + "epoch": 0.512019075852291, + "grad_norm": 0.005102800168367895, + "learning_rate": 5.426423200859291e-06, + "loss": 0.0, + "step": 7945 + }, + { + "epoch": 0.5120835212992202, + "grad_norm": 0.11785411775680807, + "learning_rate": 5.425707124955246e-06, + "loss": 0.0001, + "step": 7946 + }, + { + "epoch": 0.5121479667461494, + "grad_norm": 0.006188772612846142, + "learning_rate": 5.4249910490512e-06, + "loss": 0.0, + "step": 7947 + }, + { + "epoch": 0.5122124121930786, + "grad_norm": 0.0010159282271246758, + "learning_rate": 5.424274973147155e-06, + "loss": 0.0, + "step": 7948 + }, + { + "epoch": 0.5122768576400077, + "grad_norm": 0.05386086260127591, + "learning_rate": 5.423558897243109e-06, + "loss": 0.0002, + "step": 7949 + }, + { + "epoch": 0.5123413030869369, + "grad_norm": 0.15529134635092198, + "learning_rate": 5.422842821339062e-06, + "loss": 0.0002, + "step": 7950 + }, + { + "epoch": 0.5124057485338661, + "grad_norm": 0.01658001019660191, + "learning_rate": 5.422126745435017e-06, + "loss": 0.0001, + "step": 7951 + }, + { + "epoch": 0.5124701939807953, + "grad_norm": 0.0009362448979712939, + "learning_rate": 5.421410669530971e-06, + "loss": 0.0, + "step": 7952 + }, + { + "epoch": 0.5125346394277245, + "grad_norm": 0.00837422541313603, + "learning_rate": 5.420694593626925e-06, + "loss": 0.0001, + "step": 7953 + }, + { + "epoch": 0.5125990848746536, + "grad_norm": 0.015030309507146898, + "learning_rate": 5.41997851772288e-06, + "loss": 0.0, + "step": 7954 + }, + { + "epoch": 0.5126635303215827, + "grad_norm": 0.00020856912114475138, + "learning_rate": 5.419262441818833e-06, + "loss": 0.0, + "step": 7955 + }, + { + "epoch": 0.5127279757685119, + "grad_norm": 0.04200720795641049, + "learning_rate": 5.418546365914787e-06, + "loss": 0.0001, + "step": 7956 + }, + { + "epoch": 0.5127924212154411, + "grad_norm": 3.0653002645703564, + "learning_rate": 5.417830290010742e-06, + "loss": 0.0219, + "step": 7957 + }, + { + "epoch": 0.5128568666623703, + "grad_norm": 0.0005120299741213488, + "learning_rate": 5.417114214106696e-06, + "loss": 0.0, + "step": 7958 + }, + { + "epoch": 0.5129213121092995, + "grad_norm": 0.03933544465942909, + "learning_rate": 5.416398138202649e-06, + "loss": 0.0001, + "step": 7959 + }, + { + "epoch": 0.5129857575562287, + "grad_norm": 0.0012020741314226874, + "learning_rate": 5.415682062298604e-06, + "loss": 0.0, + "step": 7960 + }, + { + "epoch": 0.5130502030031578, + "grad_norm": 0.0005658398741220849, + "learning_rate": 5.414965986394558e-06, + "loss": 0.0, + "step": 7961 + }, + { + "epoch": 0.513114648450087, + "grad_norm": 0.017119137317332913, + "learning_rate": 5.414249910490512e-06, + "loss": 0.0002, + "step": 7962 + }, + { + "epoch": 0.5131790938970162, + "grad_norm": 0.002558508419807508, + "learning_rate": 5.413533834586467e-06, + "loss": 0.0, + "step": 7963 + }, + { + "epoch": 0.5132435393439454, + "grad_norm": 0.0348350584973022, + "learning_rate": 5.41281775868242e-06, + "loss": 0.0002, + "step": 7964 + }, + { + "epoch": 0.5133079847908745, + "grad_norm": 0.0015763089749261437, + "learning_rate": 5.412101682778374e-06, + "loss": 0.0, + "step": 7965 + }, + { + "epoch": 0.5133724302378037, + "grad_norm": 0.073376589482896, + "learning_rate": 5.411385606874329e-06, + "loss": 0.0001, + "step": 7966 + }, + { + "epoch": 0.5134368756847328, + "grad_norm": 0.008660317254924959, + "learning_rate": 5.410669530970283e-06, + "loss": 0.0, + "step": 7967 + }, + { + "epoch": 0.513501321131662, + "grad_norm": 0.00013401005881552363, + "learning_rate": 5.409953455066237e-06, + "loss": 0.0, + "step": 7968 + }, + { + "epoch": 0.5135657665785912, + "grad_norm": 0.009542936602791867, + "learning_rate": 5.409237379162192e-06, + "loss": 0.0, + "step": 7969 + }, + { + "epoch": 0.5136302120255204, + "grad_norm": 0.06508845696056229, + "learning_rate": 5.408521303258147e-06, + "loss": 0.0002, + "step": 7970 + }, + { + "epoch": 0.5136946574724496, + "grad_norm": 0.00033976029903329643, + "learning_rate": 5.4078052273541e-06, + "loss": 0.0, + "step": 7971 + }, + { + "epoch": 0.5137591029193788, + "grad_norm": 0.0012616412870784784, + "learning_rate": 5.4070891514500544e-06, + "loss": 0.0, + "step": 7972 + }, + { + "epoch": 0.513823548366308, + "grad_norm": 0.03339331166490907, + "learning_rate": 5.406373075546009e-06, + "loss": 0.0001, + "step": 7973 + }, + { + "epoch": 0.5138879938132371, + "grad_norm": 0.11922746760816738, + "learning_rate": 5.405656999641963e-06, + "loss": 0.0002, + "step": 7974 + }, + { + "epoch": 0.5139524392601663, + "grad_norm": 0.018733981766930537, + "learning_rate": 5.4049409237379165e-06, + "loss": 0.0001, + "step": 7975 + }, + { + "epoch": 0.5140168847070954, + "grad_norm": 0.1374454530979648, + "learning_rate": 5.404224847833871e-06, + "loss": 0.0002, + "step": 7976 + }, + { + "epoch": 0.5140813301540246, + "grad_norm": 0.2097619022628654, + "learning_rate": 5.403508771929825e-06, + "loss": 0.001, + "step": 7977 + }, + { + "epoch": 0.5141457756009538, + "grad_norm": 0.0008893098012604791, + "learning_rate": 5.402792696025779e-06, + "loss": 0.0, + "step": 7978 + }, + { + "epoch": 0.5142102210478829, + "grad_norm": 0.00014497031483067468, + "learning_rate": 5.402076620121734e-06, + "loss": 0.0, + "step": 7979 + }, + { + "epoch": 0.5142746664948121, + "grad_norm": 0.002657034458652046, + "learning_rate": 5.401360544217687e-06, + "loss": 0.0, + "step": 7980 + }, + { + "epoch": 0.5143391119417413, + "grad_norm": 0.001948556525665166, + "learning_rate": 5.4006444683136414e-06, + "loss": 0.0, + "step": 7981 + }, + { + "epoch": 0.5144035573886705, + "grad_norm": 0.0023215001057442755, + "learning_rate": 5.399928392409596e-06, + "loss": 0.0, + "step": 7982 + }, + { + "epoch": 0.5144680028355997, + "grad_norm": 0.008492666356822762, + "learning_rate": 5.39921231650555e-06, + "loss": 0.0, + "step": 7983 + }, + { + "epoch": 0.5145324482825289, + "grad_norm": 0.006237242050533532, + "learning_rate": 5.398496240601504e-06, + "loss": 0.0, + "step": 7984 + }, + { + "epoch": 0.514596893729458, + "grad_norm": 0.2957292205100141, + "learning_rate": 5.397780164697458e-06, + "loss": 0.0025, + "step": 7985 + }, + { + "epoch": 0.5146613391763872, + "grad_norm": 0.0006000581207181238, + "learning_rate": 5.397064088793412e-06, + "loss": 0.0, + "step": 7986 + }, + { + "epoch": 0.5147257846233164, + "grad_norm": 0.0009272405372127528, + "learning_rate": 5.396348012889366e-06, + "loss": 0.0, + "step": 7987 + }, + { + "epoch": 0.5147902300702455, + "grad_norm": 0.22983715021964995, + "learning_rate": 5.395631936985321e-06, + "loss": 0.0005, + "step": 7988 + }, + { + "epoch": 0.5148546755171747, + "grad_norm": 0.0018526735076136297, + "learning_rate": 5.394915861081275e-06, + "loss": 0.0, + "step": 7989 + }, + { + "epoch": 0.5149191209641039, + "grad_norm": 5.234039816139064e-05, + "learning_rate": 5.3941997851772284e-06, + "loss": 0.0, + "step": 7990 + }, + { + "epoch": 0.514983566411033, + "grad_norm": 1.0890406449039203, + "learning_rate": 5.393483709273183e-06, + "loss": 0.0013, + "step": 7991 + }, + { + "epoch": 0.5150480118579622, + "grad_norm": 0.0010511426046532501, + "learning_rate": 5.392767633369138e-06, + "loss": 0.0, + "step": 7992 + }, + { + "epoch": 0.5151124573048914, + "grad_norm": 0.0011704403448952318, + "learning_rate": 5.392051557465092e-06, + "loss": 0.0, + "step": 7993 + }, + { + "epoch": 0.5151769027518206, + "grad_norm": 0.001237749221018552, + "learning_rate": 5.3913354815610465e-06, + "loss": 0.0, + "step": 7994 + }, + { + "epoch": 0.5152413481987498, + "grad_norm": 0.03349309412923939, + "learning_rate": 5.390619405657001e-06, + "loss": 0.0002, + "step": 7995 + }, + { + "epoch": 0.515305793645679, + "grad_norm": 0.16808593343973655, + "learning_rate": 5.389903329752954e-06, + "loss": 0.0034, + "step": 7996 + }, + { + "epoch": 0.5153702390926082, + "grad_norm": 0.0013906726617820943, + "learning_rate": 5.3891872538489085e-06, + "loss": 0.0, + "step": 7997 + }, + { + "epoch": 0.5154346845395373, + "grad_norm": 0.0045494031450296355, + "learning_rate": 5.388471177944863e-06, + "loss": 0.0, + "step": 7998 + }, + { + "epoch": 0.5154991299864664, + "grad_norm": 0.007636372474605849, + "learning_rate": 5.387755102040817e-06, + "loss": 0.0, + "step": 7999 + }, + { + "epoch": 0.5155635754333956, + "grad_norm": 0.0015319027312418868, + "learning_rate": 5.387039026136771e-06, + "loss": 0.0, + "step": 8000 + }, + { + "epoch": 0.5156280208803248, + "grad_norm": 0.004652866648377999, + "learning_rate": 5.386322950232725e-06, + "loss": 0.0, + "step": 8001 + }, + { + "epoch": 0.515692466327254, + "grad_norm": 0.0018567965467686757, + "learning_rate": 5.385606874328679e-06, + "loss": 0.0, + "step": 8002 + }, + { + "epoch": 0.5157569117741831, + "grad_norm": 0.038382413997460466, + "learning_rate": 5.3848907984246335e-06, + "loss": 0.0001, + "step": 8003 + }, + { + "epoch": 0.5158213572211123, + "grad_norm": 0.0003577640989233756, + "learning_rate": 5.384174722520588e-06, + "loss": 0.0, + "step": 8004 + }, + { + "epoch": 0.5158858026680415, + "grad_norm": 0.19394153080352525, + "learning_rate": 5.383458646616542e-06, + "loss": 0.0002, + "step": 8005 + }, + { + "epoch": 0.5159502481149707, + "grad_norm": 0.0043557760796160425, + "learning_rate": 5.3827425707124955e-06, + "loss": 0.0, + "step": 8006 + }, + { + "epoch": 0.5160146935618999, + "grad_norm": 0.26720070499559867, + "learning_rate": 5.38202649480845e-06, + "loss": 0.0005, + "step": 8007 + }, + { + "epoch": 0.5160791390088291, + "grad_norm": 0.1252867271194741, + "learning_rate": 5.381310418904404e-06, + "loss": 0.0002, + "step": 8008 + }, + { + "epoch": 0.5161435844557583, + "grad_norm": 0.00033245680958482986, + "learning_rate": 5.380594343000358e-06, + "loss": 0.0, + "step": 8009 + }, + { + "epoch": 0.5162080299026873, + "grad_norm": 0.0025579589688075007, + "learning_rate": 5.379878267096313e-06, + "loss": 0.0, + "step": 8010 + }, + { + "epoch": 0.5162724753496165, + "grad_norm": 0.009223071042598662, + "learning_rate": 5.379162191192266e-06, + "loss": 0.0, + "step": 8011 + }, + { + "epoch": 0.5163369207965457, + "grad_norm": 0.5300760762477967, + "learning_rate": 5.3784461152882205e-06, + "loss": 0.0006, + "step": 8012 + }, + { + "epoch": 0.5164013662434749, + "grad_norm": 0.011043418132426576, + "learning_rate": 5.377730039384175e-06, + "loss": 0.0, + "step": 8013 + }, + { + "epoch": 0.5164658116904041, + "grad_norm": 0.00021837471094835563, + "learning_rate": 5.377013963480129e-06, + "loss": 0.0, + "step": 8014 + }, + { + "epoch": 0.5165302571373332, + "grad_norm": 0.3276306356442377, + "learning_rate": 5.376297887576083e-06, + "loss": 0.0008, + "step": 8015 + }, + { + "epoch": 0.5165947025842624, + "grad_norm": 0.2765679994672126, + "learning_rate": 5.3755818116720385e-06, + "loss": 0.0008, + "step": 8016 + }, + { + "epoch": 0.5166591480311916, + "grad_norm": 0.18362511711818005, + "learning_rate": 5.374865735767992e-06, + "loss": 0.001, + "step": 8017 + }, + { + "epoch": 0.5167235934781208, + "grad_norm": 0.002038531219077466, + "learning_rate": 5.374149659863946e-06, + "loss": 0.0, + "step": 8018 + }, + { + "epoch": 0.51678803892505, + "grad_norm": 0.00524951653881258, + "learning_rate": 5.3734335839599006e-06, + "loss": 0.0, + "step": 8019 + }, + { + "epoch": 0.5168524843719792, + "grad_norm": 0.003735285914029635, + "learning_rate": 5.372717508055855e-06, + "loss": 0.0, + "step": 8020 + }, + { + "epoch": 0.5169169298189082, + "grad_norm": 0.048634071984891634, + "learning_rate": 5.372001432151809e-06, + "loss": 0.0001, + "step": 8021 + }, + { + "epoch": 0.5169813752658374, + "grad_norm": 0.007575055206033924, + "learning_rate": 5.371285356247763e-06, + "loss": 0.0, + "step": 8022 + }, + { + "epoch": 0.5170458207127666, + "grad_norm": 0.0896404079922022, + "learning_rate": 5.370569280343717e-06, + "loss": 0.001, + "step": 8023 + }, + { + "epoch": 0.5171102661596958, + "grad_norm": 0.0009642160243532904, + "learning_rate": 5.369853204439671e-06, + "loss": 0.0, + "step": 8024 + }, + { + "epoch": 0.517174711606625, + "grad_norm": 9.580468636606124e-05, + "learning_rate": 5.3691371285356255e-06, + "loss": 0.0, + "step": 8025 + }, + { + "epoch": 0.5172391570535542, + "grad_norm": 0.00038877102939403603, + "learning_rate": 5.36842105263158e-06, + "loss": 0.0, + "step": 8026 + }, + { + "epoch": 0.5173036025004834, + "grad_norm": 0.017995206744111487, + "learning_rate": 5.367704976727533e-06, + "loss": 0.0, + "step": 8027 + }, + { + "epoch": 0.5173680479474125, + "grad_norm": 0.00025071646488426043, + "learning_rate": 5.3669889008234875e-06, + "loss": 0.0, + "step": 8028 + }, + { + "epoch": 0.5174324933943417, + "grad_norm": 0.5368235711987875, + "learning_rate": 5.366272824919442e-06, + "loss": 0.003, + "step": 8029 + }, + { + "epoch": 0.5174969388412709, + "grad_norm": 0.5099628110958586, + "learning_rate": 5.365556749015396e-06, + "loss": 0.0036, + "step": 8030 + }, + { + "epoch": 0.5175613842882001, + "grad_norm": 0.016237080453299548, + "learning_rate": 5.3648406731113504e-06, + "loss": 0.0001, + "step": 8031 + }, + { + "epoch": 0.5176258297351292, + "grad_norm": 0.037514827573428736, + "learning_rate": 5.364124597207304e-06, + "loss": 0.0002, + "step": 8032 + }, + { + "epoch": 0.5176902751820583, + "grad_norm": 0.007461747262392446, + "learning_rate": 5.363408521303258e-06, + "loss": 0.0, + "step": 8033 + }, + { + "epoch": 0.5177547206289875, + "grad_norm": 0.003969606536505866, + "learning_rate": 5.3626924453992125e-06, + "loss": 0.0, + "step": 8034 + }, + { + "epoch": 0.5178191660759167, + "grad_norm": 0.0012055136054845725, + "learning_rate": 5.361976369495167e-06, + "loss": 0.0, + "step": 8035 + }, + { + "epoch": 0.5178836115228459, + "grad_norm": 0.04324137427823979, + "learning_rate": 5.36126029359112e-06, + "loss": 0.0002, + "step": 8036 + }, + { + "epoch": 0.5179480569697751, + "grad_norm": 0.022892261141917725, + "learning_rate": 5.3605442176870745e-06, + "loss": 0.0, + "step": 8037 + }, + { + "epoch": 0.5180125024167043, + "grad_norm": 0.0011791094418522764, + "learning_rate": 5.359828141783029e-06, + "loss": 0.0, + "step": 8038 + }, + { + "epoch": 0.5180769478636335, + "grad_norm": 0.01512018866252014, + "learning_rate": 5.359112065878984e-06, + "loss": 0.0, + "step": 8039 + }, + { + "epoch": 0.5181413933105626, + "grad_norm": 0.17008817142783822, + "learning_rate": 5.358395989974938e-06, + "loss": 0.0019, + "step": 8040 + }, + { + "epoch": 0.5182058387574918, + "grad_norm": 0.06252434748028268, + "learning_rate": 5.357679914070893e-06, + "loss": 0.0003, + "step": 8041 + }, + { + "epoch": 0.518270284204421, + "grad_norm": 0.0023996196919451067, + "learning_rate": 5.356963838166847e-06, + "loss": 0.0, + "step": 8042 + }, + { + "epoch": 0.5183347296513501, + "grad_norm": 0.0007908011827102855, + "learning_rate": 5.3562477622628e-06, + "loss": 0.0, + "step": 8043 + }, + { + "epoch": 0.5183991750982793, + "grad_norm": 0.02134104283681624, + "learning_rate": 5.355531686358755e-06, + "loss": 0.0003, + "step": 8044 + }, + { + "epoch": 0.5184636205452084, + "grad_norm": 0.03731752513019512, + "learning_rate": 5.354815610454709e-06, + "loss": 0.0001, + "step": 8045 + }, + { + "epoch": 0.5185280659921376, + "grad_norm": 0.04644805216348859, + "learning_rate": 5.354099534550663e-06, + "loss": 0.0003, + "step": 8046 + }, + { + "epoch": 0.5185925114390668, + "grad_norm": 0.35393408777423346, + "learning_rate": 5.3533834586466175e-06, + "loss": 0.0017, + "step": 8047 + }, + { + "epoch": 0.518656956885996, + "grad_norm": 0.007836727577587311, + "learning_rate": 5.352667382742571e-06, + "loss": 0.0, + "step": 8048 + }, + { + "epoch": 0.5187214023329252, + "grad_norm": 0.002620074872959019, + "learning_rate": 5.351951306838525e-06, + "loss": 0.0, + "step": 8049 + }, + { + "epoch": 0.5187858477798544, + "grad_norm": 0.0004816107082796707, + "learning_rate": 5.35123523093448e-06, + "loss": 0.0, + "step": 8050 + }, + { + "epoch": 0.5188502932267836, + "grad_norm": 0.00032189141713238927, + "learning_rate": 5.350519155030434e-06, + "loss": 0.0, + "step": 8051 + }, + { + "epoch": 0.5189147386737127, + "grad_norm": 0.001981909159571749, + "learning_rate": 5.349803079126387e-06, + "loss": 0.0, + "step": 8052 + }, + { + "epoch": 0.5189791841206419, + "grad_norm": 0.004482247778658294, + "learning_rate": 5.349087003222342e-06, + "loss": 0.0, + "step": 8053 + }, + { + "epoch": 0.519043629567571, + "grad_norm": 0.0429455602448583, + "learning_rate": 5.348370927318296e-06, + "loss": 0.0001, + "step": 8054 + }, + { + "epoch": 0.5191080750145002, + "grad_norm": 0.00010109813583239728, + "learning_rate": 5.34765485141425e-06, + "loss": 0.0, + "step": 8055 + }, + { + "epoch": 0.5191725204614294, + "grad_norm": 0.005013169175907346, + "learning_rate": 5.3469387755102045e-06, + "loss": 0.0, + "step": 8056 + }, + { + "epoch": 0.5192369659083585, + "grad_norm": 0.0026910242392564347, + "learning_rate": 5.346222699606158e-06, + "loss": 0.0, + "step": 8057 + }, + { + "epoch": 0.5193014113552877, + "grad_norm": 0.001115195455531372, + "learning_rate": 5.345506623702112e-06, + "loss": 0.0, + "step": 8058 + }, + { + "epoch": 0.5193658568022169, + "grad_norm": 0.0036148613287218584, + "learning_rate": 5.3447905477980666e-06, + "loss": 0.0, + "step": 8059 + }, + { + "epoch": 0.5194303022491461, + "grad_norm": 0.012834000600551449, + "learning_rate": 5.344074471894021e-06, + "loss": 0.0, + "step": 8060 + }, + { + "epoch": 0.5194947476960753, + "grad_norm": 0.00019800514969051393, + "learning_rate": 5.343358395989975e-06, + "loss": 0.0015, + "step": 8061 + }, + { + "epoch": 0.5195591931430045, + "grad_norm": 7.261156807335496e-05, + "learning_rate": 5.34264232008593e-06, + "loss": 0.0, + "step": 8062 + }, + { + "epoch": 0.5196236385899337, + "grad_norm": 0.00010452706061990569, + "learning_rate": 5.341926244181885e-06, + "loss": 0.0, + "step": 8063 + }, + { + "epoch": 0.5196880840368628, + "grad_norm": 0.0015938797537969481, + "learning_rate": 5.341210168277838e-06, + "loss": 0.0, + "step": 8064 + }, + { + "epoch": 0.519752529483792, + "grad_norm": 0.0006986058601608112, + "learning_rate": 5.340494092373792e-06, + "loss": 0.0, + "step": 8065 + }, + { + "epoch": 0.5198169749307211, + "grad_norm": 0.00018862228638009165, + "learning_rate": 5.339778016469747e-06, + "loss": 0.0, + "step": 8066 + }, + { + "epoch": 0.5198814203776503, + "grad_norm": 0.04569480241493098, + "learning_rate": 5.339061940565701e-06, + "loss": 0.0004, + "step": 8067 + }, + { + "epoch": 0.5199458658245795, + "grad_norm": 0.00048435932877395973, + "learning_rate": 5.338345864661654e-06, + "loss": 0.0, + "step": 8068 + }, + { + "epoch": 0.5200103112715087, + "grad_norm": 4.264877117970066e-05, + "learning_rate": 5.337629788757609e-06, + "loss": 0.0, + "step": 8069 + }, + { + "epoch": 0.5200747567184378, + "grad_norm": 0.022674035570682753, + "learning_rate": 5.336913712853563e-06, + "loss": 0.0001, + "step": 8070 + }, + { + "epoch": 0.520139202165367, + "grad_norm": 7.506351649127817e-05, + "learning_rate": 5.336197636949517e-06, + "loss": 0.0, + "step": 8071 + }, + { + "epoch": 0.5202036476122962, + "grad_norm": 0.0002121658204572475, + "learning_rate": 5.335481561045472e-06, + "loss": 0.0, + "step": 8072 + }, + { + "epoch": 0.5202680930592254, + "grad_norm": 0.0005612859992496004, + "learning_rate": 5.334765485141425e-06, + "loss": 0.0, + "step": 8073 + }, + { + "epoch": 0.5203325385061546, + "grad_norm": 0.0005723835744671824, + "learning_rate": 5.334049409237379e-06, + "loss": 0.0, + "step": 8074 + }, + { + "epoch": 0.5203969839530838, + "grad_norm": 4.74043528609633e-05, + "learning_rate": 5.333333333333334e-06, + "loss": 0.0, + "step": 8075 + }, + { + "epoch": 0.5204614294000129, + "grad_norm": 2.987999870269243e-05, + "learning_rate": 5.332617257429288e-06, + "loss": 0.0, + "step": 8076 + }, + { + "epoch": 0.520525874846942, + "grad_norm": 0.00460482292988574, + "learning_rate": 5.331901181525242e-06, + "loss": 0.0, + "step": 8077 + }, + { + "epoch": 0.5205903202938712, + "grad_norm": 0.0010893569519421812, + "learning_rate": 5.331185105621196e-06, + "loss": 0.0015, + "step": 8078 + }, + { + "epoch": 0.5206547657408004, + "grad_norm": 0.0019560104342554994, + "learning_rate": 5.33046902971715e-06, + "loss": 0.0, + "step": 8079 + }, + { + "epoch": 0.5207192111877296, + "grad_norm": 0.00010963162534686023, + "learning_rate": 5.329752953813104e-06, + "loss": 0.0, + "step": 8080 + }, + { + "epoch": 0.5207836566346588, + "grad_norm": 0.0014649577549688212, + "learning_rate": 5.329036877909059e-06, + "loss": 0.0, + "step": 8081 + }, + { + "epoch": 0.5208481020815879, + "grad_norm": 0.0045492230178957975, + "learning_rate": 5.328320802005013e-06, + "loss": 0.0, + "step": 8082 + }, + { + "epoch": 0.5209125475285171, + "grad_norm": 0.0012380836922672056, + "learning_rate": 5.327604726100966e-06, + "loss": 0.0, + "step": 8083 + }, + { + "epoch": 0.5209769929754463, + "grad_norm": 0.006471011870845555, + "learning_rate": 5.326888650196921e-06, + "loss": 0.0, + "step": 8084 + }, + { + "epoch": 0.5210414384223755, + "grad_norm": 0.0273098650513487, + "learning_rate": 5.326172574292875e-06, + "loss": 0.0001, + "step": 8085 + }, + { + "epoch": 0.5211058838693047, + "grad_norm": 0.2248162519329055, + "learning_rate": 5.32545649838883e-06, + "loss": 0.0005, + "step": 8086 + }, + { + "epoch": 0.5211703293162339, + "grad_norm": 0.0010722583432270054, + "learning_rate": 5.324740422484784e-06, + "loss": 0.0, + "step": 8087 + }, + { + "epoch": 0.5212347747631629, + "grad_norm": 0.0011142265211501097, + "learning_rate": 5.324024346580739e-06, + "loss": 0.0, + "step": 8088 + }, + { + "epoch": 0.5212992202100921, + "grad_norm": 0.11535493960735047, + "learning_rate": 5.323308270676692e-06, + "loss": 0.0003, + "step": 8089 + }, + { + "epoch": 0.5213636656570213, + "grad_norm": 0.009380588396358299, + "learning_rate": 5.3225921947726464e-06, + "loss": 0.0, + "step": 8090 + }, + { + "epoch": 0.5214281111039505, + "grad_norm": 0.002236862676547502, + "learning_rate": 5.321876118868601e-06, + "loss": 0.0, + "step": 8091 + }, + { + "epoch": 0.5214925565508797, + "grad_norm": 0.0032438128252277815, + "learning_rate": 5.321160042964555e-06, + "loss": 0.0, + "step": 8092 + }, + { + "epoch": 0.5215570019978089, + "grad_norm": 0.0006600547852672644, + "learning_rate": 5.320443967060509e-06, + "loss": 0.0, + "step": 8093 + }, + { + "epoch": 0.521621447444738, + "grad_norm": 0.00015368027513390677, + "learning_rate": 5.319727891156463e-06, + "loss": 0.0, + "step": 8094 + }, + { + "epoch": 0.5216858928916672, + "grad_norm": 0.02774108911226277, + "learning_rate": 5.319011815252417e-06, + "loss": 0.0002, + "step": 8095 + }, + { + "epoch": 0.5217503383385964, + "grad_norm": 0.0006598481528583075, + "learning_rate": 5.318295739348371e-06, + "loss": 0.0, + "step": 8096 + }, + { + "epoch": 0.5218147837855256, + "grad_norm": 0.0009686613851743172, + "learning_rate": 5.317579663444326e-06, + "loss": 0.0, + "step": 8097 + }, + { + "epoch": 0.5218792292324548, + "grad_norm": 0.0003071446257228951, + "learning_rate": 5.31686358754028e-06, + "loss": 0.0, + "step": 8098 + }, + { + "epoch": 0.5219436746793839, + "grad_norm": 0.0007522031735325496, + "learning_rate": 5.3161475116362334e-06, + "loss": 0.0, + "step": 8099 + }, + { + "epoch": 0.522008120126313, + "grad_norm": 0.00565657191007196, + "learning_rate": 5.315431435732188e-06, + "loss": 0.0, + "step": 8100 + }, + { + "epoch": 0.5220725655732422, + "grad_norm": 0.011783286177244336, + "learning_rate": 5.314715359828142e-06, + "loss": 0.0002, + "step": 8101 + }, + { + "epoch": 0.5221370110201714, + "grad_norm": 0.0004662790406560697, + "learning_rate": 5.313999283924096e-06, + "loss": 0.0, + "step": 8102 + }, + { + "epoch": 0.5222014564671006, + "grad_norm": 0.001986001404947262, + "learning_rate": 5.313283208020051e-06, + "loss": 0.0, + "step": 8103 + }, + { + "epoch": 0.5222659019140298, + "grad_norm": 0.0005110061619616537, + "learning_rate": 5.312567132116004e-06, + "loss": 0.0, + "step": 8104 + }, + { + "epoch": 0.522330347360959, + "grad_norm": 6.456255465957766e-05, + "learning_rate": 5.311851056211958e-06, + "loss": 0.0, + "step": 8105 + }, + { + "epoch": 0.5223947928078881, + "grad_norm": 0.003485680539695944, + "learning_rate": 5.311134980307913e-06, + "loss": 0.0, + "step": 8106 + }, + { + "epoch": 0.5224592382548173, + "grad_norm": 0.027200001693703183, + "learning_rate": 5.310418904403867e-06, + "loss": 0.0003, + "step": 8107 + }, + { + "epoch": 0.5225236837017465, + "grad_norm": 0.00010678184588055318, + "learning_rate": 5.309702828499821e-06, + "loss": 0.0, + "step": 8108 + }, + { + "epoch": 0.5225881291486757, + "grad_norm": 0.009661019632250483, + "learning_rate": 5.3089867525957764e-06, + "loss": 0.0, + "step": 8109 + }, + { + "epoch": 0.5226525745956048, + "grad_norm": 0.16898934666960477, + "learning_rate": 5.30827067669173e-06, + "loss": 0.0021, + "step": 8110 + }, + { + "epoch": 0.522717020042534, + "grad_norm": 0.648830777139313, + "learning_rate": 5.307554600787684e-06, + "loss": 0.0027, + "step": 8111 + }, + { + "epoch": 0.5227814654894631, + "grad_norm": 2.3984561151492033e-05, + "learning_rate": 5.3068385248836385e-06, + "loss": 0.0, + "step": 8112 + }, + { + "epoch": 0.5228459109363923, + "grad_norm": 0.1974333531613506, + "learning_rate": 5.306122448979593e-06, + "loss": 0.0011, + "step": 8113 + }, + { + "epoch": 0.5229103563833215, + "grad_norm": 0.00015204341681974146, + "learning_rate": 5.305406373075547e-06, + "loss": 0.0, + "step": 8114 + }, + { + "epoch": 0.5229748018302507, + "grad_norm": 0.0036347013021713263, + "learning_rate": 5.3046902971715005e-06, + "loss": 0.0, + "step": 8115 + }, + { + "epoch": 0.5230392472771799, + "grad_norm": 0.0037800083078743346, + "learning_rate": 5.303974221267455e-06, + "loss": 0.0, + "step": 8116 + }, + { + "epoch": 0.5231036927241091, + "grad_norm": 0.0013677929468042499, + "learning_rate": 5.303258145363409e-06, + "loss": 0.0, + "step": 8117 + }, + { + "epoch": 0.5231681381710382, + "grad_norm": 0.024809124082378647, + "learning_rate": 5.302542069459363e-06, + "loss": 0.0002, + "step": 8118 + }, + { + "epoch": 0.5232325836179674, + "grad_norm": 0.017704296459074436, + "learning_rate": 5.301825993555318e-06, + "loss": 0.0001, + "step": 8119 + }, + { + "epoch": 0.5232970290648966, + "grad_norm": 0.008421566509266936, + "learning_rate": 5.301109917651271e-06, + "loss": 0.0, + "step": 8120 + }, + { + "epoch": 0.5233614745118257, + "grad_norm": 0.00030410408926729963, + "learning_rate": 5.3003938417472255e-06, + "loss": 0.0, + "step": 8121 + }, + { + "epoch": 0.5234259199587549, + "grad_norm": 0.031790182885038674, + "learning_rate": 5.29967776584318e-06, + "loss": 0.0, + "step": 8122 + }, + { + "epoch": 0.523490365405684, + "grad_norm": 0.0006871531393244011, + "learning_rate": 5.298961689939134e-06, + "loss": 0.0, + "step": 8123 + }, + { + "epoch": 0.5235548108526132, + "grad_norm": 0.004459484505413135, + "learning_rate": 5.298245614035088e-06, + "loss": 0.0, + "step": 8124 + }, + { + "epoch": 0.5236192562995424, + "grad_norm": 0.0018307072091210933, + "learning_rate": 5.297529538131042e-06, + "loss": 0.0, + "step": 8125 + }, + { + "epoch": 0.5236837017464716, + "grad_norm": 0.0038162593027634127, + "learning_rate": 5.296813462226996e-06, + "loss": 0.0, + "step": 8126 + }, + { + "epoch": 0.5237481471934008, + "grad_norm": 7.791748082360972e-05, + "learning_rate": 5.29609738632295e-06, + "loss": 0.0, + "step": 8127 + }, + { + "epoch": 0.52381259264033, + "grad_norm": 0.0016177164245403997, + "learning_rate": 5.295381310418905e-06, + "loss": 0.0, + "step": 8128 + }, + { + "epoch": 0.5238770380872592, + "grad_norm": 0.018440304189078296, + "learning_rate": 5.294665234514859e-06, + "loss": 0.0001, + "step": 8129 + }, + { + "epoch": 0.5239414835341883, + "grad_norm": 0.00045727140499551784, + "learning_rate": 5.2939491586108125e-06, + "loss": 0.0, + "step": 8130 + }, + { + "epoch": 0.5240059289811175, + "grad_norm": 0.001961379100123223, + "learning_rate": 5.293233082706767e-06, + "loss": 0.0, + "step": 8131 + }, + { + "epoch": 0.5240703744280466, + "grad_norm": 0.0017229937831158926, + "learning_rate": 5.292517006802722e-06, + "loss": 0.0, + "step": 8132 + }, + { + "epoch": 0.5241348198749758, + "grad_norm": 0.0035674278089422567, + "learning_rate": 5.291800930898676e-06, + "loss": 0.0, + "step": 8133 + }, + { + "epoch": 0.524199265321905, + "grad_norm": 0.05173934526094626, + "learning_rate": 5.2910848549946305e-06, + "loss": 0.0003, + "step": 8134 + }, + { + "epoch": 0.5242637107688342, + "grad_norm": 0.0022714186955937123, + "learning_rate": 5.290368779090585e-06, + "loss": 0.0, + "step": 8135 + }, + { + "epoch": 0.5243281562157633, + "grad_norm": 0.04544742654464294, + "learning_rate": 5.289652703186538e-06, + "loss": 0.0001, + "step": 8136 + }, + { + "epoch": 0.5243926016626925, + "grad_norm": 0.0011263308024880082, + "learning_rate": 5.2889366272824926e-06, + "loss": 0.0, + "step": 8137 + }, + { + "epoch": 0.5244570471096217, + "grad_norm": 0.0005061771864201109, + "learning_rate": 5.288220551378447e-06, + "loss": 0.0, + "step": 8138 + }, + { + "epoch": 0.5245214925565509, + "grad_norm": 0.0009123932009742532, + "learning_rate": 5.287504475474401e-06, + "loss": 0.0, + "step": 8139 + }, + { + "epoch": 0.5245859380034801, + "grad_norm": 0.001865122440905928, + "learning_rate": 5.2867883995703554e-06, + "loss": 0.0, + "step": 8140 + }, + { + "epoch": 0.5246503834504093, + "grad_norm": 0.005351867999890206, + "learning_rate": 5.286072323666309e-06, + "loss": 0.0, + "step": 8141 + }, + { + "epoch": 0.5247148288973384, + "grad_norm": 0.0012923710822036933, + "learning_rate": 5.285356247762263e-06, + "loss": 0.0, + "step": 8142 + }, + { + "epoch": 0.5247792743442676, + "grad_norm": 0.07148435050672101, + "learning_rate": 5.2846401718582175e-06, + "loss": 0.0002, + "step": 8143 + }, + { + "epoch": 0.5248437197911967, + "grad_norm": 0.0003850860287028189, + "learning_rate": 5.283924095954172e-06, + "loss": 0.0, + "step": 8144 + }, + { + "epoch": 0.5249081652381259, + "grad_norm": 0.29063587886178854, + "learning_rate": 5.283208020050126e-06, + "loss": 0.0009, + "step": 8145 + }, + { + "epoch": 0.5249726106850551, + "grad_norm": 0.0005251628566615367, + "learning_rate": 5.2824919441460795e-06, + "loss": 0.0, + "step": 8146 + }, + { + "epoch": 0.5250370561319843, + "grad_norm": 0.0034821841120536135, + "learning_rate": 5.281775868242034e-06, + "loss": 0.0001, + "step": 8147 + }, + { + "epoch": 0.5251015015789134, + "grad_norm": 0.030426072261242363, + "learning_rate": 5.281059792337988e-06, + "loss": 0.0, + "step": 8148 + }, + { + "epoch": 0.5251659470258426, + "grad_norm": 0.05582442107006137, + "learning_rate": 5.2803437164339424e-06, + "loss": 0.0001, + "step": 8149 + }, + { + "epoch": 0.5252303924727718, + "grad_norm": 0.009250913006427195, + "learning_rate": 5.279627640529896e-06, + "loss": 0.0, + "step": 8150 + }, + { + "epoch": 0.525294837919701, + "grad_norm": 0.0034705167538564173, + "learning_rate": 5.27891156462585e-06, + "loss": 0.0, + "step": 8151 + }, + { + "epoch": 0.5253592833666302, + "grad_norm": 0.0016301703975253937, + "learning_rate": 5.2781954887218045e-06, + "loss": 0.0, + "step": 8152 + }, + { + "epoch": 0.5254237288135594, + "grad_norm": 0.004285375626315951, + "learning_rate": 5.277479412817759e-06, + "loss": 0.0, + "step": 8153 + }, + { + "epoch": 0.5254881742604885, + "grad_norm": 0.0001443931223535041, + "learning_rate": 5.276763336913713e-06, + "loss": 0.0, + "step": 8154 + }, + { + "epoch": 0.5255526197074176, + "grad_norm": 0.0012174254517221168, + "learning_rate": 5.2760472610096665e-06, + "loss": 0.0, + "step": 8155 + }, + { + "epoch": 0.5256170651543468, + "grad_norm": 0.1638833555172645, + "learning_rate": 5.2753311851056225e-06, + "loss": 0.0018, + "step": 8156 + }, + { + "epoch": 0.525681510601276, + "grad_norm": 0.011755288426830504, + "learning_rate": 5.274615109201576e-06, + "loss": 0.0, + "step": 8157 + }, + { + "epoch": 0.5257459560482052, + "grad_norm": 0.0003608457318295168, + "learning_rate": 5.27389903329753e-06, + "loss": 0.0, + "step": 8158 + }, + { + "epoch": 0.5258104014951344, + "grad_norm": 0.0014071326382772225, + "learning_rate": 5.273182957393485e-06, + "loss": 0.0, + "step": 8159 + }, + { + "epoch": 0.5258748469420635, + "grad_norm": 0.0002052865912161992, + "learning_rate": 5.272466881489439e-06, + "loss": 0.0, + "step": 8160 + }, + { + "epoch": 0.5259392923889927, + "grad_norm": 0.11002314063391534, + "learning_rate": 5.271750805585393e-06, + "loss": 0.0002, + "step": 8161 + }, + { + "epoch": 0.5260037378359219, + "grad_norm": 0.1543719917652916, + "learning_rate": 5.271034729681347e-06, + "loss": 0.0016, + "step": 8162 + }, + { + "epoch": 0.5260681832828511, + "grad_norm": 0.0013602805663139417, + "learning_rate": 5.270318653777301e-06, + "loss": 0.0, + "step": 8163 + }, + { + "epoch": 0.5261326287297803, + "grad_norm": 0.028567334380291513, + "learning_rate": 5.269602577873255e-06, + "loss": 0.0, + "step": 8164 + }, + { + "epoch": 0.5261970741767095, + "grad_norm": 0.030342511622000475, + "learning_rate": 5.2688865019692095e-06, + "loss": 0.0, + "step": 8165 + }, + { + "epoch": 0.5262615196236385, + "grad_norm": 0.0024130707987652417, + "learning_rate": 5.268170426065163e-06, + "loss": 0.0, + "step": 8166 + }, + { + "epoch": 0.5263259650705677, + "grad_norm": 0.0004052014762314221, + "learning_rate": 5.267454350161117e-06, + "loss": 0.0, + "step": 8167 + }, + { + "epoch": 0.5263904105174969, + "grad_norm": 0.000613029688713524, + "learning_rate": 5.266738274257072e-06, + "loss": 0.0, + "step": 8168 + }, + { + "epoch": 0.5264548559644261, + "grad_norm": 0.007337527303468706, + "learning_rate": 5.266022198353026e-06, + "loss": 0.0, + "step": 8169 + }, + { + "epoch": 0.5265193014113553, + "grad_norm": 0.007629737369463775, + "learning_rate": 5.26530612244898e-06, + "loss": 0.0, + "step": 8170 + }, + { + "epoch": 0.5265837468582845, + "grad_norm": 0.019203614179758664, + "learning_rate": 5.264590046544934e-06, + "loss": 0.0, + "step": 8171 + }, + { + "epoch": 0.5266481923052136, + "grad_norm": 0.009774924112548477, + "learning_rate": 5.263873970640888e-06, + "loss": 0.0, + "step": 8172 + }, + { + "epoch": 0.5267126377521428, + "grad_norm": 0.13492138472105183, + "learning_rate": 5.263157894736842e-06, + "loss": 0.0001, + "step": 8173 + }, + { + "epoch": 0.526777083199072, + "grad_norm": 0.17494896310555966, + "learning_rate": 5.2624418188327965e-06, + "loss": 0.0026, + "step": 8174 + }, + { + "epoch": 0.5268415286460012, + "grad_norm": 0.0030930372243162135, + "learning_rate": 5.261725742928751e-06, + "loss": 0.0, + "step": 8175 + }, + { + "epoch": 0.5269059740929304, + "grad_norm": 0.00018265507632032822, + "learning_rate": 5.261009667024704e-06, + "loss": 0.0, + "step": 8176 + }, + { + "epoch": 0.5269704195398595, + "grad_norm": 0.005233237911499148, + "learning_rate": 5.2602935911206586e-06, + "loss": 0.0, + "step": 8177 + }, + { + "epoch": 0.5270348649867886, + "grad_norm": 0.005108504047076498, + "learning_rate": 5.259577515216613e-06, + "loss": 0.0, + "step": 8178 + }, + { + "epoch": 0.5270993104337178, + "grad_norm": 0.001656930633442291, + "learning_rate": 5.258861439312568e-06, + "loss": 0.0, + "step": 8179 + }, + { + "epoch": 0.527163755880647, + "grad_norm": 0.0015388414270932735, + "learning_rate": 5.258145363408522e-06, + "loss": 0.0, + "step": 8180 + }, + { + "epoch": 0.5272282013275762, + "grad_norm": 0.001493593354986821, + "learning_rate": 5.257429287504477e-06, + "loss": 0.0, + "step": 8181 + }, + { + "epoch": 0.5272926467745054, + "grad_norm": 0.0015624622198609356, + "learning_rate": 5.25671321160043e-06, + "loss": 0.0, + "step": 8182 + }, + { + "epoch": 0.5273570922214346, + "grad_norm": 0.00023563587226363962, + "learning_rate": 5.255997135696384e-06, + "loss": 0.0, + "step": 8183 + }, + { + "epoch": 0.5274215376683637, + "grad_norm": 0.0008716355655753914, + "learning_rate": 5.255281059792339e-06, + "loss": 0.0, + "step": 8184 + }, + { + "epoch": 0.5274859831152929, + "grad_norm": 0.00041412078705236374, + "learning_rate": 5.254564983888293e-06, + "loss": 0.0, + "step": 8185 + }, + { + "epoch": 0.5275504285622221, + "grad_norm": 0.02280689810042121, + "learning_rate": 5.253848907984247e-06, + "loss": 0.0001, + "step": 8186 + }, + { + "epoch": 0.5276148740091513, + "grad_norm": 0.0002442367796725076, + "learning_rate": 5.253132832080201e-06, + "loss": 0.0, + "step": 8187 + }, + { + "epoch": 0.5276793194560804, + "grad_norm": 0.0001824094471575746, + "learning_rate": 5.252416756176155e-06, + "loss": 0.0, + "step": 8188 + }, + { + "epoch": 0.5277437649030096, + "grad_norm": 0.10367689975011432, + "learning_rate": 5.251700680272109e-06, + "loss": 0.0006, + "step": 8189 + }, + { + "epoch": 0.5278082103499387, + "grad_norm": 0.04179084686455239, + "learning_rate": 5.250984604368064e-06, + "loss": 0.0003, + "step": 8190 + }, + { + "epoch": 0.5278726557968679, + "grad_norm": 0.054257698556740556, + "learning_rate": 5.250268528464018e-06, + "loss": 0.0002, + "step": 8191 + }, + { + "epoch": 0.5279371012437971, + "grad_norm": 0.00045334327345424616, + "learning_rate": 5.249552452559971e-06, + "loss": 0.0, + "step": 8192 + }, + { + "epoch": 0.5280015466907263, + "grad_norm": 0.002689039349698105, + "learning_rate": 5.248836376655926e-06, + "loss": 0.0, + "step": 8193 + }, + { + "epoch": 0.5280659921376555, + "grad_norm": 0.003820872434692222, + "learning_rate": 5.24812030075188e-06, + "loss": 0.0, + "step": 8194 + }, + { + "epoch": 0.5281304375845847, + "grad_norm": 0.0031079245168158607, + "learning_rate": 5.247404224847834e-06, + "loss": 0.0, + "step": 8195 + }, + { + "epoch": 0.5281948830315139, + "grad_norm": 0.008044136523780283, + "learning_rate": 5.2466881489437886e-06, + "loss": 0.0, + "step": 8196 + }, + { + "epoch": 0.528259328478443, + "grad_norm": 0.0013573262889266815, + "learning_rate": 5.245972073039742e-06, + "loss": 0.0, + "step": 8197 + }, + { + "epoch": 0.5283237739253722, + "grad_norm": 0.00013557121331184024, + "learning_rate": 5.245255997135696e-06, + "loss": 0.0, + "step": 8198 + }, + { + "epoch": 0.5283882193723013, + "grad_norm": 0.006549067277275383, + "learning_rate": 5.244539921231651e-06, + "loss": 0.0, + "step": 8199 + }, + { + "epoch": 0.5284526648192305, + "grad_norm": 0.00038068579052549697, + "learning_rate": 5.243823845327605e-06, + "loss": 0.0, + "step": 8200 + }, + { + "epoch": 0.5285171102661597, + "grad_norm": 0.001630613111818217, + "learning_rate": 5.243107769423559e-06, + "loss": 0.0, + "step": 8201 + }, + { + "epoch": 0.5285815557130888, + "grad_norm": 0.005277173181096403, + "learning_rate": 5.242391693519514e-06, + "loss": 0.0, + "step": 8202 + }, + { + "epoch": 0.528646001160018, + "grad_norm": 0.0022489153954235027, + "learning_rate": 5.241675617615468e-06, + "loss": 0.0, + "step": 8203 + }, + { + "epoch": 0.5287104466069472, + "grad_norm": 0.0007811213053585651, + "learning_rate": 5.240959541711422e-06, + "loss": 0.0, + "step": 8204 + }, + { + "epoch": 0.5287748920538764, + "grad_norm": 0.15758120777290088, + "learning_rate": 5.240243465807376e-06, + "loss": 0.0008, + "step": 8205 + }, + { + "epoch": 0.5288393375008056, + "grad_norm": 0.226937066591774, + "learning_rate": 5.239527389903331e-06, + "loss": 0.001, + "step": 8206 + }, + { + "epoch": 0.5289037829477348, + "grad_norm": 0.0004585199950160562, + "learning_rate": 5.238811313999285e-06, + "loss": 0.0, + "step": 8207 + }, + { + "epoch": 0.528968228394664, + "grad_norm": 0.007970610709894329, + "learning_rate": 5.2380952380952384e-06, + "loss": 0.0, + "step": 8208 + }, + { + "epoch": 0.5290326738415931, + "grad_norm": 0.0008556513303001802, + "learning_rate": 5.237379162191193e-06, + "loss": 0.0, + "step": 8209 + }, + { + "epoch": 0.5290971192885222, + "grad_norm": 0.0013647344791251014, + "learning_rate": 5.236663086287147e-06, + "loss": 0.0, + "step": 8210 + }, + { + "epoch": 0.5291615647354514, + "grad_norm": 0.0008613043011201751, + "learning_rate": 5.235947010383101e-06, + "loss": 0.0, + "step": 8211 + }, + { + "epoch": 0.5292260101823806, + "grad_norm": 0.0011533930435283846, + "learning_rate": 5.235230934479056e-06, + "loss": 0.0, + "step": 8212 + }, + { + "epoch": 0.5292904556293098, + "grad_norm": 0.004177880975011757, + "learning_rate": 5.234514858575009e-06, + "loss": 0.0, + "step": 8213 + }, + { + "epoch": 0.529354901076239, + "grad_norm": 8.555679965205073e-05, + "learning_rate": 5.233798782670963e-06, + "loss": 0.0, + "step": 8214 + }, + { + "epoch": 0.5294193465231681, + "grad_norm": 0.06403724198652394, + "learning_rate": 5.233082706766918e-06, + "loss": 0.0002, + "step": 8215 + }, + { + "epoch": 0.5294837919700973, + "grad_norm": 0.0004445335666127929, + "learning_rate": 5.232366630862872e-06, + "loss": 0.0, + "step": 8216 + }, + { + "epoch": 0.5295482374170265, + "grad_norm": 0.0018955943196318668, + "learning_rate": 5.231650554958826e-06, + "loss": 0.0, + "step": 8217 + }, + { + "epoch": 0.5296126828639557, + "grad_norm": 0.00044187092058860976, + "learning_rate": 5.23093447905478e-06, + "loss": 0.0, + "step": 8218 + }, + { + "epoch": 0.5296771283108849, + "grad_norm": 0.015893673273748708, + "learning_rate": 5.230218403150734e-06, + "loss": 0.0001, + "step": 8219 + }, + { + "epoch": 0.529741573757814, + "grad_norm": 0.0035433591799342505, + "learning_rate": 5.229502327246688e-06, + "loss": 0.0, + "step": 8220 + }, + { + "epoch": 0.5298060192047432, + "grad_norm": 0.0010208689897191162, + "learning_rate": 5.228786251342643e-06, + "loss": 0.0, + "step": 8221 + }, + { + "epoch": 0.5298704646516723, + "grad_norm": 7.476281600967891e-05, + "learning_rate": 5.228070175438597e-06, + "loss": 0.0, + "step": 8222 + }, + { + "epoch": 0.5299349100986015, + "grad_norm": 0.03769369368956137, + "learning_rate": 5.22735409953455e-06, + "loss": 0.0002, + "step": 8223 + }, + { + "epoch": 0.5299993555455307, + "grad_norm": 0.5087324466067144, + "learning_rate": 5.226638023630505e-06, + "loss": 0.001, + "step": 8224 + }, + { + "epoch": 0.5300638009924599, + "grad_norm": 0.0004116131397795408, + "learning_rate": 5.225921947726459e-06, + "loss": 0.0, + "step": 8225 + }, + { + "epoch": 0.530128246439389, + "grad_norm": 0.12127374146854736, + "learning_rate": 5.225205871822414e-06, + "loss": 0.0003, + "step": 8226 + }, + { + "epoch": 0.5301926918863182, + "grad_norm": 0.43693211006198096, + "learning_rate": 5.2244897959183684e-06, + "loss": 0.0006, + "step": 8227 + }, + { + "epoch": 0.5302571373332474, + "grad_norm": 0.0016617944626236237, + "learning_rate": 5.223773720014323e-06, + "loss": 0.0, + "step": 8228 + }, + { + "epoch": 0.5303215827801766, + "grad_norm": 0.274345728826985, + "learning_rate": 5.223057644110276e-06, + "loss": 0.0006, + "step": 8229 + }, + { + "epoch": 0.5303860282271058, + "grad_norm": 0.005449729470843885, + "learning_rate": 5.2223415682062305e-06, + "loss": 0.0, + "step": 8230 + }, + { + "epoch": 0.530450473674035, + "grad_norm": 0.0013428693602989992, + "learning_rate": 5.221625492302185e-06, + "loss": 0.0, + "step": 8231 + }, + { + "epoch": 0.5305149191209642, + "grad_norm": 0.025936283526849124, + "learning_rate": 5.220909416398139e-06, + "loss": 0.0, + "step": 8232 + }, + { + "epoch": 0.5305793645678932, + "grad_norm": 0.015376020230364216, + "learning_rate": 5.220193340494093e-06, + "loss": 0.0001, + "step": 8233 + }, + { + "epoch": 0.5306438100148224, + "grad_norm": 0.00035719175370790775, + "learning_rate": 5.219477264590047e-06, + "loss": 0.0, + "step": 8234 + }, + { + "epoch": 0.5307082554617516, + "grad_norm": 0.6565876046945677, + "learning_rate": 5.218761188686001e-06, + "loss": 0.005, + "step": 8235 + }, + { + "epoch": 0.5307727009086808, + "grad_norm": 0.011372042509931924, + "learning_rate": 5.218045112781955e-06, + "loss": 0.0, + "step": 8236 + }, + { + "epoch": 0.53083714635561, + "grad_norm": 0.002884196527175304, + "learning_rate": 5.21732903687791e-06, + "loss": 0.0, + "step": 8237 + }, + { + "epoch": 0.5309015918025392, + "grad_norm": 0.0003135085556914037, + "learning_rate": 5.216612960973864e-06, + "loss": 0.0, + "step": 8238 + }, + { + "epoch": 0.5309660372494683, + "grad_norm": 0.01590281361846434, + "learning_rate": 5.2158968850698175e-06, + "loss": 0.0002, + "step": 8239 + }, + { + "epoch": 0.5310304826963975, + "grad_norm": 0.011128835166307135, + "learning_rate": 5.215180809165772e-06, + "loss": 0.0, + "step": 8240 + }, + { + "epoch": 0.5310949281433267, + "grad_norm": 0.08628985929142845, + "learning_rate": 5.214464733261726e-06, + "loss": 0.0004, + "step": 8241 + }, + { + "epoch": 0.5311593735902559, + "grad_norm": 0.0002728034723986173, + "learning_rate": 5.21374865735768e-06, + "loss": 0.0, + "step": 8242 + }, + { + "epoch": 0.5312238190371851, + "grad_norm": 0.0019110246639428413, + "learning_rate": 5.213032581453634e-06, + "loss": 0.0, + "step": 8243 + }, + { + "epoch": 0.5312882644841141, + "grad_norm": 0.12502401836431717, + "learning_rate": 5.212316505549588e-06, + "loss": 0.0056, + "step": 8244 + }, + { + "epoch": 0.5313527099310433, + "grad_norm": 0.0008021568203203074, + "learning_rate": 5.211600429645542e-06, + "loss": 0.0, + "step": 8245 + }, + { + "epoch": 0.5314171553779725, + "grad_norm": 0.012489563526513548, + "learning_rate": 5.210884353741497e-06, + "loss": 0.0, + "step": 8246 + }, + { + "epoch": 0.5314816008249017, + "grad_norm": 0.566265930684311, + "learning_rate": 5.210168277837451e-06, + "loss": 0.002, + "step": 8247 + }, + { + "epoch": 0.5315460462718309, + "grad_norm": 0.16108968909072638, + "learning_rate": 5.2094522019334045e-06, + "loss": 0.0002, + "step": 8248 + }, + { + "epoch": 0.5316104917187601, + "grad_norm": 0.0007321626886337238, + "learning_rate": 5.2087361260293605e-06, + "loss": 0.0, + "step": 8249 + }, + { + "epoch": 0.5316749371656893, + "grad_norm": 0.10953813121065938, + "learning_rate": 5.208020050125314e-06, + "loss": 0.0001, + "step": 8250 + }, + { + "epoch": 0.5317393826126184, + "grad_norm": 0.007629119868102849, + "learning_rate": 5.207303974221268e-06, + "loss": 0.0, + "step": 8251 + }, + { + "epoch": 0.5318038280595476, + "grad_norm": 0.0022120117547238275, + "learning_rate": 5.2065878983172225e-06, + "loss": 0.0, + "step": 8252 + }, + { + "epoch": 0.5318682735064768, + "grad_norm": 9.754262784845069e-05, + "learning_rate": 5.205871822413177e-06, + "loss": 0.0, + "step": 8253 + }, + { + "epoch": 0.531932718953406, + "grad_norm": 0.03304436715002747, + "learning_rate": 5.205155746509131e-06, + "loss": 0.0, + "step": 8254 + }, + { + "epoch": 0.5319971644003351, + "grad_norm": 0.002980140074258487, + "learning_rate": 5.2044396706050846e-06, + "loss": 0.0, + "step": 8255 + }, + { + "epoch": 0.5320616098472642, + "grad_norm": 0.00030013782604176344, + "learning_rate": 5.203723594701039e-06, + "loss": 0.0, + "step": 8256 + }, + { + "epoch": 0.5321260552941934, + "grad_norm": 0.0005443334783101526, + "learning_rate": 5.203007518796993e-06, + "loss": 0.0, + "step": 8257 + }, + { + "epoch": 0.5321905007411226, + "grad_norm": 0.03163681132386296, + "learning_rate": 5.2022914428929474e-06, + "loss": 0.0001, + "step": 8258 + }, + { + "epoch": 0.5322549461880518, + "grad_norm": 0.04303355504829882, + "learning_rate": 5.201575366988901e-06, + "loss": 0.0001, + "step": 8259 + }, + { + "epoch": 0.532319391634981, + "grad_norm": 0.17966033896124106, + "learning_rate": 5.200859291084855e-06, + "loss": 0.0001, + "step": 8260 + }, + { + "epoch": 0.5323838370819102, + "grad_norm": 0.001579268255010264, + "learning_rate": 5.2001432151808095e-06, + "loss": 0.0, + "step": 8261 + }, + { + "epoch": 0.5324482825288394, + "grad_norm": 0.0009424939141334193, + "learning_rate": 5.199427139276764e-06, + "loss": 0.0, + "step": 8262 + }, + { + "epoch": 0.5325127279757685, + "grad_norm": 0.0009968826784912033, + "learning_rate": 5.198711063372718e-06, + "loss": 0.0, + "step": 8263 + }, + { + "epoch": 0.5325771734226977, + "grad_norm": 0.008419342363879062, + "learning_rate": 5.1979949874686715e-06, + "loss": 0.0001, + "step": 8264 + }, + { + "epoch": 0.5326416188696269, + "grad_norm": 0.003526839644485888, + "learning_rate": 5.197278911564626e-06, + "loss": 0.0, + "step": 8265 + }, + { + "epoch": 0.532706064316556, + "grad_norm": 0.0004590648824850025, + "learning_rate": 5.19656283566058e-06, + "loss": 0.0, + "step": 8266 + }, + { + "epoch": 0.5327705097634852, + "grad_norm": 0.3312528124276014, + "learning_rate": 5.1958467597565344e-06, + "loss": 0.0009, + "step": 8267 + }, + { + "epoch": 0.5328349552104144, + "grad_norm": 0.018709853089610762, + "learning_rate": 5.195130683852489e-06, + "loss": 0.0001, + "step": 8268 + }, + { + "epoch": 0.5328994006573435, + "grad_norm": 0.014009747720029147, + "learning_rate": 5.194414607948442e-06, + "loss": 0.0, + "step": 8269 + }, + { + "epoch": 0.5329638461042727, + "grad_norm": 0.0016731185805280674, + "learning_rate": 5.1936985320443965e-06, + "loss": 0.0, + "step": 8270 + }, + { + "epoch": 0.5330282915512019, + "grad_norm": 0.0074413456102401035, + "learning_rate": 5.192982456140351e-06, + "loss": 0.0, + "step": 8271 + }, + { + "epoch": 0.5330927369981311, + "grad_norm": 0.2735715265142258, + "learning_rate": 5.192266380236306e-06, + "loss": 0.0021, + "step": 8272 + }, + { + "epoch": 0.5331571824450603, + "grad_norm": 0.07160542783589822, + "learning_rate": 5.19155030433226e-06, + "loss": 0.0008, + "step": 8273 + }, + { + "epoch": 0.5332216278919895, + "grad_norm": 0.00013488290769220058, + "learning_rate": 5.1908342284282145e-06, + "loss": 0.0, + "step": 8274 + }, + { + "epoch": 0.5332860733389186, + "grad_norm": 2.876226943198003e-05, + "learning_rate": 5.190118152524168e-06, + "loss": 0.0, + "step": 8275 + }, + { + "epoch": 0.5333505187858478, + "grad_norm": 0.0012743140065523632, + "learning_rate": 5.189402076620122e-06, + "loss": 0.0, + "step": 8276 + }, + { + "epoch": 0.5334149642327769, + "grad_norm": 0.004809027917468691, + "learning_rate": 5.188686000716077e-06, + "loss": 0.0, + "step": 8277 + }, + { + "epoch": 0.5334794096797061, + "grad_norm": 0.003701628229133183, + "learning_rate": 5.187969924812031e-06, + "loss": 0.0, + "step": 8278 + }, + { + "epoch": 0.5335438551266353, + "grad_norm": 0.00039285142383473234, + "learning_rate": 5.187253848907985e-06, + "loss": 0.0, + "step": 8279 + }, + { + "epoch": 0.5336083005735645, + "grad_norm": 0.0047367452853752864, + "learning_rate": 5.186537773003939e-06, + "loss": 0.0, + "step": 8280 + }, + { + "epoch": 0.5336727460204936, + "grad_norm": 0.003703470161721052, + "learning_rate": 5.185821697099893e-06, + "loss": 0.0, + "step": 8281 + }, + { + "epoch": 0.5337371914674228, + "grad_norm": 4.95614077371797e-05, + "learning_rate": 5.185105621195847e-06, + "loss": 0.0, + "step": 8282 + }, + { + "epoch": 0.533801636914352, + "grad_norm": 3.902805141580079e-05, + "learning_rate": 5.1843895452918015e-06, + "loss": 0.0, + "step": 8283 + }, + { + "epoch": 0.5338660823612812, + "grad_norm": 0.2618037697781461, + "learning_rate": 5.183673469387756e-06, + "loss": 0.0004, + "step": 8284 + }, + { + "epoch": 0.5339305278082104, + "grad_norm": 0.10652270259820498, + "learning_rate": 5.182957393483709e-06, + "loss": 0.0002, + "step": 8285 + }, + { + "epoch": 0.5339949732551396, + "grad_norm": 0.05615630343200612, + "learning_rate": 5.182241317579664e-06, + "loss": 0.0001, + "step": 8286 + }, + { + "epoch": 0.5340594187020687, + "grad_norm": 8.943660066176195e-05, + "learning_rate": 5.181525241675618e-06, + "loss": 0.0, + "step": 8287 + }, + { + "epoch": 0.5341238641489978, + "grad_norm": 0.00048379422864961943, + "learning_rate": 5.180809165771572e-06, + "loss": 0.0, + "step": 8288 + }, + { + "epoch": 0.534188309595927, + "grad_norm": 0.0003209484811010145, + "learning_rate": 5.1800930898675265e-06, + "loss": 0.0, + "step": 8289 + }, + { + "epoch": 0.5342527550428562, + "grad_norm": 0.000557616408584314, + "learning_rate": 5.17937701396348e-06, + "loss": 0.0, + "step": 8290 + }, + { + "epoch": 0.5343172004897854, + "grad_norm": 0.06915313841843637, + "learning_rate": 5.178660938059434e-06, + "loss": 0.0001, + "step": 8291 + }, + { + "epoch": 0.5343816459367146, + "grad_norm": 7.130504738155417e-05, + "learning_rate": 5.1779448621553885e-06, + "loss": 0.0, + "step": 8292 + }, + { + "epoch": 0.5344460913836437, + "grad_norm": 0.026762604545379487, + "learning_rate": 5.177228786251343e-06, + "loss": 0.0002, + "step": 8293 + }, + { + "epoch": 0.5345105368305729, + "grad_norm": 0.1844287713085006, + "learning_rate": 5.176512710347297e-06, + "loss": 0.0007, + "step": 8294 + }, + { + "epoch": 0.5345749822775021, + "grad_norm": 0.006867867960209021, + "learning_rate": 5.1757966344432506e-06, + "loss": 0.0, + "step": 8295 + }, + { + "epoch": 0.5346394277244313, + "grad_norm": 0.03740969652855573, + "learning_rate": 5.175080558539206e-06, + "loss": 0.0002, + "step": 8296 + }, + { + "epoch": 0.5347038731713605, + "grad_norm": 0.0006198577460679115, + "learning_rate": 5.17436448263516e-06, + "loss": 0.0, + "step": 8297 + }, + { + "epoch": 0.5347683186182897, + "grad_norm": 0.0001554785987894876, + "learning_rate": 5.173648406731114e-06, + "loss": 0.0, + "step": 8298 + }, + { + "epoch": 0.5348327640652188, + "grad_norm": 0.0020449257013498948, + "learning_rate": 5.172932330827069e-06, + "loss": 0.0, + "step": 8299 + }, + { + "epoch": 0.5348972095121479, + "grad_norm": 9.068199803593524e-05, + "learning_rate": 5.172216254923023e-06, + "loss": 0.0, + "step": 8300 + }, + { + "epoch": 0.5349616549590771, + "grad_norm": 0.0018346562331147103, + "learning_rate": 5.171500179018976e-06, + "loss": 0.0, + "step": 8301 + }, + { + "epoch": 0.5350261004060063, + "grad_norm": 0.06669650386462536, + "learning_rate": 5.170784103114931e-06, + "loss": 0.0005, + "step": 8302 + }, + { + "epoch": 0.5350905458529355, + "grad_norm": 3.8839867272744435, + "learning_rate": 5.170068027210885e-06, + "loss": 0.0224, + "step": 8303 + }, + { + "epoch": 0.5351549912998647, + "grad_norm": 0.0004139125755270844, + "learning_rate": 5.169351951306839e-06, + "loss": 0.0, + "step": 8304 + }, + { + "epoch": 0.5352194367467938, + "grad_norm": 0.23567786488833725, + "learning_rate": 5.1686358754027936e-06, + "loss": 0.0005, + "step": 8305 + }, + { + "epoch": 0.535283882193723, + "grad_norm": 0.0032149324484725223, + "learning_rate": 5.167919799498747e-06, + "loss": 0.0, + "step": 8306 + }, + { + "epoch": 0.5353483276406522, + "grad_norm": 0.001745209537508723, + "learning_rate": 5.167203723594701e-06, + "loss": 0.0, + "step": 8307 + }, + { + "epoch": 0.5354127730875814, + "grad_norm": 0.0003493430009983679, + "learning_rate": 5.166487647690656e-06, + "loss": 0.0, + "step": 8308 + }, + { + "epoch": 0.5354772185345106, + "grad_norm": 0.0006592212059325943, + "learning_rate": 5.16577157178661e-06, + "loss": 0.0, + "step": 8309 + }, + { + "epoch": 0.5355416639814398, + "grad_norm": 0.009202076173033253, + "learning_rate": 5.165055495882564e-06, + "loss": 0.0001, + "step": 8310 + }, + { + "epoch": 0.5356061094283688, + "grad_norm": 0.002701766323453557, + "learning_rate": 5.164339419978518e-06, + "loss": 0.0, + "step": 8311 + }, + { + "epoch": 0.535670554875298, + "grad_norm": 0.0005170194706688815, + "learning_rate": 5.163623344074472e-06, + "loss": 0.0, + "step": 8312 + }, + { + "epoch": 0.5357350003222272, + "grad_norm": 0.12185486281755308, + "learning_rate": 5.162907268170426e-06, + "loss": 0.0001, + "step": 8313 + }, + { + "epoch": 0.5357994457691564, + "grad_norm": 0.00047911325761194646, + "learning_rate": 5.1621911922663806e-06, + "loss": 0.0, + "step": 8314 + }, + { + "epoch": 0.5358638912160856, + "grad_norm": 0.0002798710818400487, + "learning_rate": 5.161475116362335e-06, + "loss": 0.0, + "step": 8315 + }, + { + "epoch": 0.5359283366630148, + "grad_norm": 0.01226373081079961, + "learning_rate": 5.160759040458288e-06, + "loss": 0.0001, + "step": 8316 + }, + { + "epoch": 0.5359927821099439, + "grad_norm": 0.00038180829833394726, + "learning_rate": 5.160042964554243e-06, + "loss": 0.0, + "step": 8317 + }, + { + "epoch": 0.5360572275568731, + "grad_norm": 4.5729150975741886e-05, + "learning_rate": 5.159326888650197e-06, + "loss": 0.0, + "step": 8318 + }, + { + "epoch": 0.5361216730038023, + "grad_norm": 0.7364370331298571, + "learning_rate": 5.158610812746152e-06, + "loss": 0.0017, + "step": 8319 + }, + { + "epoch": 0.5361861184507315, + "grad_norm": 0.0003008430119444691, + "learning_rate": 5.157894736842106e-06, + "loss": 0.0, + "step": 8320 + }, + { + "epoch": 0.5362505638976607, + "grad_norm": 0.0008920872326398447, + "learning_rate": 5.157178660938061e-06, + "loss": 0.0, + "step": 8321 + }, + { + "epoch": 0.5363150093445898, + "grad_norm": 0.030279863544600275, + "learning_rate": 5.156462585034014e-06, + "loss": 0.0002, + "step": 8322 + }, + { + "epoch": 0.5363794547915189, + "grad_norm": 0.00022730484292783038, + "learning_rate": 5.155746509129968e-06, + "loss": 0.0, + "step": 8323 + }, + { + "epoch": 0.5364439002384481, + "grad_norm": 6.012959763087097e-05, + "learning_rate": 5.155030433225923e-06, + "loss": 0.0, + "step": 8324 + }, + { + "epoch": 0.5365083456853773, + "grad_norm": 0.004307225892368384, + "learning_rate": 5.154314357321877e-06, + "loss": 0.0, + "step": 8325 + }, + { + "epoch": 0.5365727911323065, + "grad_norm": 0.14500555619742644, + "learning_rate": 5.153598281417831e-06, + "loss": 0.0013, + "step": 8326 + }, + { + "epoch": 0.5366372365792357, + "grad_norm": 0.003633286377291637, + "learning_rate": 5.152882205513785e-06, + "loss": 0.0, + "step": 8327 + }, + { + "epoch": 0.5367016820261649, + "grad_norm": 0.0030499388817333785, + "learning_rate": 5.152166129609739e-06, + "loss": 0.0, + "step": 8328 + }, + { + "epoch": 0.536766127473094, + "grad_norm": 0.003460639557092233, + "learning_rate": 5.151450053705693e-06, + "loss": 0.0, + "step": 8329 + }, + { + "epoch": 0.5368305729200232, + "grad_norm": 0.0654827941475252, + "learning_rate": 5.150733977801648e-06, + "loss": 0.0002, + "step": 8330 + }, + { + "epoch": 0.5368950183669524, + "grad_norm": 0.0006291733494983527, + "learning_rate": 5.150017901897602e-06, + "loss": 0.0, + "step": 8331 + }, + { + "epoch": 0.5369594638138816, + "grad_norm": 0.004459620943301213, + "learning_rate": 5.149301825993555e-06, + "loss": 0.0001, + "step": 8332 + }, + { + "epoch": 0.5370239092608107, + "grad_norm": 0.00040403846333492225, + "learning_rate": 5.14858575008951e-06, + "loss": 0.0, + "step": 8333 + }, + { + "epoch": 0.5370883547077399, + "grad_norm": 0.0007446600423081939, + "learning_rate": 5.147869674185464e-06, + "loss": 0.0, + "step": 8334 + }, + { + "epoch": 0.537152800154669, + "grad_norm": 0.0001650185458354627, + "learning_rate": 5.147153598281418e-06, + "loss": 0.0, + "step": 8335 + }, + { + "epoch": 0.5372172456015982, + "grad_norm": 1.7263584494515336e-05, + "learning_rate": 5.146437522377372e-06, + "loss": 0.0, + "step": 8336 + }, + { + "epoch": 0.5372816910485274, + "grad_norm": 0.002230091483141834, + "learning_rate": 5.145721446473326e-06, + "loss": 0.0, + "step": 8337 + }, + { + "epoch": 0.5373461364954566, + "grad_norm": 0.0005651914951587413, + "learning_rate": 5.14500537056928e-06, + "loss": 0.0, + "step": 8338 + }, + { + "epoch": 0.5374105819423858, + "grad_norm": 0.00022583757860816512, + "learning_rate": 5.144289294665235e-06, + "loss": 0.0, + "step": 8339 + }, + { + "epoch": 0.537475027389315, + "grad_norm": 0.170381111795957, + "learning_rate": 5.143573218761189e-06, + "loss": 0.0004, + "step": 8340 + }, + { + "epoch": 0.5375394728362441, + "grad_norm": 0.002303433150644348, + "learning_rate": 5.142857142857142e-06, + "loss": 0.0, + "step": 8341 + }, + { + "epoch": 0.5376039182831733, + "grad_norm": 0.002125040189832157, + "learning_rate": 5.142141066953097e-06, + "loss": 0.0, + "step": 8342 + }, + { + "epoch": 0.5376683637301025, + "grad_norm": 0.00013921987677024776, + "learning_rate": 5.141424991049052e-06, + "loss": 0.0, + "step": 8343 + }, + { + "epoch": 0.5377328091770316, + "grad_norm": 0.00022435381485150807, + "learning_rate": 5.140708915145006e-06, + "loss": 0.0, + "step": 8344 + }, + { + "epoch": 0.5377972546239608, + "grad_norm": 0.003466804477732005, + "learning_rate": 5.1399928392409604e-06, + "loss": 0.0, + "step": 8345 + }, + { + "epoch": 0.53786170007089, + "grad_norm": 0.00026267158997037504, + "learning_rate": 5.139276763336915e-06, + "loss": 0.0, + "step": 8346 + }, + { + "epoch": 0.5379261455178191, + "grad_norm": 0.0022633420166437274, + "learning_rate": 5.138560687432869e-06, + "loss": 0.0, + "step": 8347 + }, + { + "epoch": 0.5379905909647483, + "grad_norm": 0.025317271810819834, + "learning_rate": 5.1378446115288225e-06, + "loss": 0.0001, + "step": 8348 + }, + { + "epoch": 0.5380550364116775, + "grad_norm": 0.000673680858811099, + "learning_rate": 5.137128535624777e-06, + "loss": 0.0, + "step": 8349 + }, + { + "epoch": 0.5381194818586067, + "grad_norm": 0.0018941198791382194, + "learning_rate": 5.136412459720731e-06, + "loss": 0.0, + "step": 8350 + }, + { + "epoch": 0.5381839273055359, + "grad_norm": 0.5314760288276836, + "learning_rate": 5.135696383816685e-06, + "loss": 0.0032, + "step": 8351 + }, + { + "epoch": 0.5382483727524651, + "grad_norm": 0.010573634830043211, + "learning_rate": 5.134980307912639e-06, + "loss": 0.0001, + "step": 8352 + }, + { + "epoch": 0.5383128181993942, + "grad_norm": 0.00018203131654043764, + "learning_rate": 5.134264232008593e-06, + "loss": 0.0, + "step": 8353 + }, + { + "epoch": 0.5383772636463234, + "grad_norm": 1.1062119213382617, + "learning_rate": 5.133548156104547e-06, + "loss": 0.0072, + "step": 8354 + }, + { + "epoch": 0.5384417090932525, + "grad_norm": 0.010292150074225268, + "learning_rate": 5.132832080200502e-06, + "loss": 0.0, + "step": 8355 + }, + { + "epoch": 0.5385061545401817, + "grad_norm": 0.011232534638379256, + "learning_rate": 5.132116004296456e-06, + "loss": 0.0001, + "step": 8356 + }, + { + "epoch": 0.5385705999871109, + "grad_norm": 0.006481248957277151, + "learning_rate": 5.1313999283924095e-06, + "loss": 0.0, + "step": 8357 + }, + { + "epoch": 0.5386350454340401, + "grad_norm": 0.0103300801049018, + "learning_rate": 5.130683852488364e-06, + "loss": 0.0, + "step": 8358 + }, + { + "epoch": 0.5386994908809692, + "grad_norm": 0.0007205325030744387, + "learning_rate": 5.129967776584318e-06, + "loss": 0.0, + "step": 8359 + }, + { + "epoch": 0.5387639363278984, + "grad_norm": 0.001971069018050957, + "learning_rate": 5.129251700680272e-06, + "loss": 0.0, + "step": 8360 + }, + { + "epoch": 0.5388283817748276, + "grad_norm": 0.00025740076583384244, + "learning_rate": 5.128535624776227e-06, + "loss": 0.0, + "step": 8361 + }, + { + "epoch": 0.5388928272217568, + "grad_norm": 0.004754505978913037, + "learning_rate": 5.12781954887218e-06, + "loss": 0.0001, + "step": 8362 + }, + { + "epoch": 0.538957272668686, + "grad_norm": 6.622904868150729e-05, + "learning_rate": 5.127103472968134e-06, + "loss": 0.0, + "step": 8363 + }, + { + "epoch": 0.5390217181156152, + "grad_norm": 0.3479271808147232, + "learning_rate": 5.126387397064089e-06, + "loss": 0.0217, + "step": 8364 + }, + { + "epoch": 0.5390861635625444, + "grad_norm": 8.444964625094108e-05, + "learning_rate": 5.125671321160043e-06, + "loss": 0.0, + "step": 8365 + }, + { + "epoch": 0.5391506090094735, + "grad_norm": 0.00021665471768633301, + "learning_rate": 5.124955245255998e-06, + "loss": 0.0, + "step": 8366 + }, + { + "epoch": 0.5392150544564026, + "grad_norm": 0.00021705941240832152, + "learning_rate": 5.1242391693519525e-06, + "loss": 0.0, + "step": 8367 + }, + { + "epoch": 0.5392794999033318, + "grad_norm": 0.05381334252700333, + "learning_rate": 5.123523093447906e-06, + "loss": 0.0002, + "step": 8368 + }, + { + "epoch": 0.539343945350261, + "grad_norm": 0.0011084411287508335, + "learning_rate": 5.12280701754386e-06, + "loss": 0.0, + "step": 8369 + }, + { + "epoch": 0.5394083907971902, + "grad_norm": 0.002692409573749917, + "learning_rate": 5.1220909416398145e-06, + "loss": 0.0, + "step": 8370 + }, + { + "epoch": 0.5394728362441193, + "grad_norm": 0.0019404386497965766, + "learning_rate": 5.121374865735769e-06, + "loss": 0.0, + "step": 8371 + }, + { + "epoch": 0.5395372816910485, + "grad_norm": 0.000830474063169443, + "learning_rate": 5.120658789831723e-06, + "loss": 0.0, + "step": 8372 + }, + { + "epoch": 0.5396017271379777, + "grad_norm": 0.0001285680201385665, + "learning_rate": 5.1199427139276766e-06, + "loss": 0.0, + "step": 8373 + }, + { + "epoch": 0.5396661725849069, + "grad_norm": 1.688643150650448e-05, + "learning_rate": 5.119226638023631e-06, + "loss": 0.0, + "step": 8374 + }, + { + "epoch": 0.5397306180318361, + "grad_norm": 0.010633813313598334, + "learning_rate": 5.118510562119585e-06, + "loss": 0.0002, + "step": 8375 + }, + { + "epoch": 0.5397950634787653, + "grad_norm": 0.2513697768569286, + "learning_rate": 5.1177944862155394e-06, + "loss": 0.0004, + "step": 8376 + }, + { + "epoch": 0.5398595089256945, + "grad_norm": 0.011243210437085592, + "learning_rate": 5.117078410311494e-06, + "loss": 0.0, + "step": 8377 + }, + { + "epoch": 0.5399239543726235, + "grad_norm": 0.0634902939249464, + "learning_rate": 5.116362334407447e-06, + "loss": 0.0001, + "step": 8378 + }, + { + "epoch": 0.5399883998195527, + "grad_norm": 0.009778168936793704, + "learning_rate": 5.1156462585034015e-06, + "loss": 0.0001, + "step": 8379 + }, + { + "epoch": 0.5400528452664819, + "grad_norm": 4.753302020719344e-05, + "learning_rate": 5.114930182599356e-06, + "loss": 0.0, + "step": 8380 + }, + { + "epoch": 0.5401172907134111, + "grad_norm": 0.0010928865291910395, + "learning_rate": 5.11421410669531e-06, + "loss": 0.0, + "step": 8381 + }, + { + "epoch": 0.5401817361603403, + "grad_norm": 6.587222646677347e-06, + "learning_rate": 5.113498030791264e-06, + "loss": 0.0, + "step": 8382 + }, + { + "epoch": 0.5402461816072694, + "grad_norm": 0.0003763542620900478, + "learning_rate": 5.112781954887218e-06, + "loss": 0.0, + "step": 8383 + }, + { + "epoch": 0.5403106270541986, + "grad_norm": 0.0005508190320432071, + "learning_rate": 5.112065878983172e-06, + "loss": 0.0, + "step": 8384 + }, + { + "epoch": 0.5403750725011278, + "grad_norm": 0.0001657109248957889, + "learning_rate": 5.1113498030791264e-06, + "loss": 0.0, + "step": 8385 + }, + { + "epoch": 0.540439517948057, + "grad_norm": 0.000585394327920387, + "learning_rate": 5.110633727175081e-06, + "loss": 0.0, + "step": 8386 + }, + { + "epoch": 0.5405039633949862, + "grad_norm": 0.12383700047162453, + "learning_rate": 5.109917651271035e-06, + "loss": 0.0003, + "step": 8387 + }, + { + "epoch": 0.5405684088419154, + "grad_norm": 0.0012521166326916959, + "learning_rate": 5.1092015753669885e-06, + "loss": 0.0, + "step": 8388 + }, + { + "epoch": 0.5406328542888444, + "grad_norm": 0.024399225972084052, + "learning_rate": 5.108485499462944e-06, + "loss": 0.0002, + "step": 8389 + }, + { + "epoch": 0.5406972997357736, + "grad_norm": 0.0009896365504723618, + "learning_rate": 5.107769423558898e-06, + "loss": 0.0, + "step": 8390 + }, + { + "epoch": 0.5407617451827028, + "grad_norm": 0.41062065354460814, + "learning_rate": 5.107053347654852e-06, + "loss": 0.003, + "step": 8391 + }, + { + "epoch": 0.540826190629632, + "grad_norm": 0.00020028325207377276, + "learning_rate": 5.1063372717508065e-06, + "loss": 0.0, + "step": 8392 + }, + { + "epoch": 0.5408906360765612, + "grad_norm": 0.0021295931855767553, + "learning_rate": 5.105621195846761e-06, + "loss": 0.0, + "step": 8393 + }, + { + "epoch": 0.5409550815234904, + "grad_norm": 0.003080648669282957, + "learning_rate": 5.104905119942714e-06, + "loss": 0.0, + "step": 8394 + }, + { + "epoch": 0.5410195269704196, + "grad_norm": 0.037808258822707816, + "learning_rate": 5.104189044038669e-06, + "loss": 0.0001, + "step": 8395 + }, + { + "epoch": 0.5410839724173487, + "grad_norm": 1.003356723819026, + "learning_rate": 5.103472968134623e-06, + "loss": 0.006, + "step": 8396 + }, + { + "epoch": 0.5411484178642779, + "grad_norm": 1.2780228888800633, + "learning_rate": 5.102756892230577e-06, + "loss": 0.0034, + "step": 8397 + }, + { + "epoch": 0.5412128633112071, + "grad_norm": 0.006901590997431245, + "learning_rate": 5.1020408163265315e-06, + "loss": 0.0, + "step": 8398 + }, + { + "epoch": 0.5412773087581363, + "grad_norm": 0.020858812785563983, + "learning_rate": 5.101324740422485e-06, + "loss": 0.0001, + "step": 8399 + }, + { + "epoch": 0.5413417542050654, + "grad_norm": 0.0006388117841880102, + "learning_rate": 5.100608664518439e-06, + "loss": 0.0, + "step": 8400 + }, + { + "epoch": 0.5414061996519945, + "grad_norm": 0.09992779997527523, + "learning_rate": 5.0998925886143935e-06, + "loss": 0.0002, + "step": 8401 + }, + { + "epoch": 0.5414706450989237, + "grad_norm": 0.01728350633489314, + "learning_rate": 5.099176512710348e-06, + "loss": 0.0, + "step": 8402 + }, + { + "epoch": 0.5415350905458529, + "grad_norm": 0.010191441552771597, + "learning_rate": 5.098460436806302e-06, + "loss": 0.0001, + "step": 8403 + }, + { + "epoch": 0.5415995359927821, + "grad_norm": 0.0006010007948885225, + "learning_rate": 5.097744360902256e-06, + "loss": 0.0, + "step": 8404 + }, + { + "epoch": 0.5416639814397113, + "grad_norm": 0.002163450281916386, + "learning_rate": 5.09702828499821e-06, + "loss": 0.0, + "step": 8405 + }, + { + "epoch": 0.5417284268866405, + "grad_norm": 0.2445328803815511, + "learning_rate": 5.096312209094164e-06, + "loss": 0.0004, + "step": 8406 + }, + { + "epoch": 0.5417928723335697, + "grad_norm": 0.00222522598216728, + "learning_rate": 5.0955961331901185e-06, + "loss": 0.0, + "step": 8407 + }, + { + "epoch": 0.5418573177804988, + "grad_norm": 0.016346940554171265, + "learning_rate": 5.094880057286073e-06, + "loss": 0.0, + "step": 8408 + }, + { + "epoch": 0.541921763227428, + "grad_norm": 0.0017265815881555501, + "learning_rate": 5.094163981382026e-06, + "loss": 0.0, + "step": 8409 + }, + { + "epoch": 0.5419862086743572, + "grad_norm": 0.002953750292921564, + "learning_rate": 5.0934479054779805e-06, + "loss": 0.0, + "step": 8410 + }, + { + "epoch": 0.5420506541212863, + "grad_norm": 0.0017401369012034575, + "learning_rate": 5.092731829573935e-06, + "loss": 0.0, + "step": 8411 + }, + { + "epoch": 0.5421150995682155, + "grad_norm": 0.17858228586602923, + "learning_rate": 5.092015753669889e-06, + "loss": 0.0002, + "step": 8412 + }, + { + "epoch": 0.5421795450151446, + "grad_norm": 0.07280302949534724, + "learning_rate": 5.091299677765844e-06, + "loss": 0.0003, + "step": 8413 + }, + { + "epoch": 0.5422439904620738, + "grad_norm": 0.02793595399615248, + "learning_rate": 5.0905836018617986e-06, + "loss": 0.0, + "step": 8414 + }, + { + "epoch": 0.542308435909003, + "grad_norm": 0.07168111059143023, + "learning_rate": 5.089867525957752e-06, + "loss": 0.0002, + "step": 8415 + }, + { + "epoch": 0.5423728813559322, + "grad_norm": 0.013141592770366813, + "learning_rate": 5.089151450053706e-06, + "loss": 0.0, + "step": 8416 + }, + { + "epoch": 0.5424373268028614, + "grad_norm": 0.008050688347397276, + "learning_rate": 5.088435374149661e-06, + "loss": 0.0, + "step": 8417 + }, + { + "epoch": 0.5425017722497906, + "grad_norm": 0.01882217632422738, + "learning_rate": 5.087719298245615e-06, + "loss": 0.0, + "step": 8418 + }, + { + "epoch": 0.5425662176967198, + "grad_norm": 0.0018837017851688694, + "learning_rate": 5.087003222341569e-06, + "loss": 0.0, + "step": 8419 + }, + { + "epoch": 0.5426306631436489, + "grad_norm": 0.0027659480170431224, + "learning_rate": 5.086287146437523e-06, + "loss": 0.0, + "step": 8420 + }, + { + "epoch": 0.5426951085905781, + "grad_norm": 0.02249851271354615, + "learning_rate": 5.085571070533477e-06, + "loss": 0.0, + "step": 8421 + }, + { + "epoch": 0.5427595540375072, + "grad_norm": 0.24873905024797605, + "learning_rate": 5.084854994629431e-06, + "loss": 0.0004, + "step": 8422 + }, + { + "epoch": 0.5428239994844364, + "grad_norm": 0.0633032486789857, + "learning_rate": 5.0841389187253856e-06, + "loss": 0.0003, + "step": 8423 + }, + { + "epoch": 0.5428884449313656, + "grad_norm": 0.00014077413343572848, + "learning_rate": 5.08342284282134e-06, + "loss": 0.0, + "step": 8424 + }, + { + "epoch": 0.5429528903782948, + "grad_norm": 0.16562388707632744, + "learning_rate": 5.082706766917293e-06, + "loss": 0.0004, + "step": 8425 + }, + { + "epoch": 0.5430173358252239, + "grad_norm": 0.020381830748196907, + "learning_rate": 5.081990691013248e-06, + "loss": 0.0001, + "step": 8426 + }, + { + "epoch": 0.5430817812721531, + "grad_norm": 0.0020281902919562383, + "learning_rate": 5.081274615109202e-06, + "loss": 0.0, + "step": 8427 + }, + { + "epoch": 0.5431462267190823, + "grad_norm": 0.0002616038828169522, + "learning_rate": 5.080558539205156e-06, + "loss": 0.0, + "step": 8428 + }, + { + "epoch": 0.5432106721660115, + "grad_norm": 0.043275989112033196, + "learning_rate": 5.0798424633011105e-06, + "loss": 0.0002, + "step": 8429 + }, + { + "epoch": 0.5432751176129407, + "grad_norm": 0.00038925996925701165, + "learning_rate": 5.079126387397064e-06, + "loss": 0.0, + "step": 8430 + }, + { + "epoch": 0.5433395630598699, + "grad_norm": 0.1549475627115091, + "learning_rate": 5.078410311493018e-06, + "loss": 0.0007, + "step": 8431 + }, + { + "epoch": 0.543404008506799, + "grad_norm": 0.22845921268417974, + "learning_rate": 5.0776942355889726e-06, + "loss": 0.0004, + "step": 8432 + }, + { + "epoch": 0.5434684539537281, + "grad_norm": 0.02679208840814703, + "learning_rate": 5.076978159684927e-06, + "loss": 0.0001, + "step": 8433 + }, + { + "epoch": 0.5435328994006573, + "grad_norm": 0.004714868407184482, + "learning_rate": 5.07626208378088e-06, + "loss": 0.0, + "step": 8434 + }, + { + "epoch": 0.5435973448475865, + "grad_norm": 0.00257031806453596, + "learning_rate": 5.075546007876835e-06, + "loss": 0.0, + "step": 8435 + }, + { + "epoch": 0.5436617902945157, + "grad_norm": 0.021800592707464768, + "learning_rate": 5.07482993197279e-06, + "loss": 0.0, + "step": 8436 + }, + { + "epoch": 0.5437262357414449, + "grad_norm": 0.006127748959277537, + "learning_rate": 5.074113856068744e-06, + "loss": 0.0, + "step": 8437 + }, + { + "epoch": 0.543790681188374, + "grad_norm": 0.6883689850433048, + "learning_rate": 5.073397780164698e-06, + "loss": 0.0046, + "step": 8438 + }, + { + "epoch": 0.5438551266353032, + "grad_norm": 0.001150318426266875, + "learning_rate": 5.072681704260653e-06, + "loss": 0.0, + "step": 8439 + }, + { + "epoch": 0.5439195720822324, + "grad_norm": 0.002072532651337089, + "learning_rate": 5.071965628356607e-06, + "loss": 0.0, + "step": 8440 + }, + { + "epoch": 0.5439840175291616, + "grad_norm": 0.0002500489370783692, + "learning_rate": 5.07124955245256e-06, + "loss": 0.0, + "step": 8441 + }, + { + "epoch": 0.5440484629760908, + "grad_norm": 0.3168147475990133, + "learning_rate": 5.070533476548515e-06, + "loss": 0.0007, + "step": 8442 + }, + { + "epoch": 0.54411290842302, + "grad_norm": 0.8963605674738248, + "learning_rate": 5.069817400644469e-06, + "loss": 0.0054, + "step": 8443 + }, + { + "epoch": 0.5441773538699491, + "grad_norm": 0.001800277014813923, + "learning_rate": 5.069101324740423e-06, + "loss": 0.0, + "step": 8444 + }, + { + "epoch": 0.5442417993168782, + "grad_norm": 0.010620852971074788, + "learning_rate": 5.068385248836378e-06, + "loss": 0.0, + "step": 8445 + }, + { + "epoch": 0.5443062447638074, + "grad_norm": 0.012435870960374875, + "learning_rate": 5.067669172932331e-06, + "loss": 0.0, + "step": 8446 + }, + { + "epoch": 0.5443706902107366, + "grad_norm": 0.016274537573299074, + "learning_rate": 5.066953097028285e-06, + "loss": 0.0002, + "step": 8447 + }, + { + "epoch": 0.5444351356576658, + "grad_norm": 0.046486270889920786, + "learning_rate": 5.06623702112424e-06, + "loss": 0.0, + "step": 8448 + }, + { + "epoch": 0.544499581104595, + "grad_norm": 0.2916972351517615, + "learning_rate": 5.065520945220194e-06, + "loss": 0.0006, + "step": 8449 + }, + { + "epoch": 0.5445640265515241, + "grad_norm": 0.006940782184560781, + "learning_rate": 5.064804869316147e-06, + "loss": 0.0, + "step": 8450 + }, + { + "epoch": 0.5446284719984533, + "grad_norm": 0.016796576003806782, + "learning_rate": 5.064088793412102e-06, + "loss": 0.0, + "step": 8451 + }, + { + "epoch": 0.5446929174453825, + "grad_norm": 0.1291454725284565, + "learning_rate": 5.063372717508056e-06, + "loss": 0.0004, + "step": 8452 + }, + { + "epoch": 0.5447573628923117, + "grad_norm": 0.21752034007255436, + "learning_rate": 5.06265664160401e-06, + "loss": 0.0037, + "step": 8453 + }, + { + "epoch": 0.5448218083392409, + "grad_norm": 0.21748016693135402, + "learning_rate": 5.061940565699965e-06, + "loss": 0.0007, + "step": 8454 + }, + { + "epoch": 0.5448862537861701, + "grad_norm": 0.013149752274337866, + "learning_rate": 5.061224489795918e-06, + "loss": 0.0, + "step": 8455 + }, + { + "epoch": 0.5449506992330991, + "grad_norm": 0.00546176216623649, + "learning_rate": 5.060508413891872e-06, + "loss": 0.0, + "step": 8456 + }, + { + "epoch": 0.5450151446800283, + "grad_norm": 0.1679995530738449, + "learning_rate": 5.059792337987827e-06, + "loss": 0.0003, + "step": 8457 + }, + { + "epoch": 0.5450795901269575, + "grad_norm": 0.005963703038636505, + "learning_rate": 5.059076262083781e-06, + "loss": 0.0, + "step": 8458 + }, + { + "epoch": 0.5451440355738867, + "grad_norm": 0.0013005704425181387, + "learning_rate": 5.058360186179736e-06, + "loss": 0.0, + "step": 8459 + }, + { + "epoch": 0.5452084810208159, + "grad_norm": 0.28935846435973755, + "learning_rate": 5.05764411027569e-06, + "loss": 0.0004, + "step": 8460 + }, + { + "epoch": 0.5452729264677451, + "grad_norm": 0.05144240661139117, + "learning_rate": 5.056928034371645e-06, + "loss": 0.0001, + "step": 8461 + }, + { + "epoch": 0.5453373719146742, + "grad_norm": 0.25995588863752056, + "learning_rate": 5.056211958467598e-06, + "loss": 0.0014, + "step": 8462 + }, + { + "epoch": 0.5454018173616034, + "grad_norm": 0.0034649733590266948, + "learning_rate": 5.0554958825635524e-06, + "loss": 0.0, + "step": 8463 + }, + { + "epoch": 0.5454662628085326, + "grad_norm": 0.00736018183046089, + "learning_rate": 5.054779806659507e-06, + "loss": 0.0, + "step": 8464 + }, + { + "epoch": 0.5455307082554618, + "grad_norm": 0.20396441547188474, + "learning_rate": 5.054063730755461e-06, + "loss": 0.0009, + "step": 8465 + }, + { + "epoch": 0.545595153702391, + "grad_norm": 0.0818718599215992, + "learning_rate": 5.0533476548514145e-06, + "loss": 0.0001, + "step": 8466 + }, + { + "epoch": 0.54565959914932, + "grad_norm": 0.571786751738957, + "learning_rate": 5.052631578947369e-06, + "loss": 0.0005, + "step": 8467 + }, + { + "epoch": 0.5457240445962492, + "grad_norm": 0.03189593553225645, + "learning_rate": 5.051915503043323e-06, + "loss": 0.0, + "step": 8468 + }, + { + "epoch": 0.5457884900431784, + "grad_norm": 0.2562977042803086, + "learning_rate": 5.051199427139277e-06, + "loss": 0.0055, + "step": 8469 + }, + { + "epoch": 0.5458529354901076, + "grad_norm": 0.1811014294086341, + "learning_rate": 5.050483351235232e-06, + "loss": 0.0001, + "step": 8470 + }, + { + "epoch": 0.5459173809370368, + "grad_norm": 0.00438817026625853, + "learning_rate": 5.049767275331185e-06, + "loss": 0.0, + "step": 8471 + }, + { + "epoch": 0.545981826383966, + "grad_norm": 0.05164839767589543, + "learning_rate": 5.049051199427139e-06, + "loss": 0.0001, + "step": 8472 + }, + { + "epoch": 0.5460462718308952, + "grad_norm": 0.0001833333554516947, + "learning_rate": 5.048335123523094e-06, + "loss": 0.0, + "step": 8473 + }, + { + "epoch": 0.5461107172778243, + "grad_norm": 0.014345555426445568, + "learning_rate": 5.047619047619048e-06, + "loss": 0.0, + "step": 8474 + }, + { + "epoch": 0.5461751627247535, + "grad_norm": 0.0011238233457995053, + "learning_rate": 5.046902971715002e-06, + "loss": 0.0, + "step": 8475 + }, + { + "epoch": 0.5462396081716827, + "grad_norm": 1.2545085185504081, + "learning_rate": 5.046186895810956e-06, + "loss": 0.0033, + "step": 8476 + }, + { + "epoch": 0.5463040536186119, + "grad_norm": 0.5972288261339238, + "learning_rate": 5.04547081990691e-06, + "loss": 0.0017, + "step": 8477 + }, + { + "epoch": 0.546368499065541, + "grad_norm": 0.00844234305207491, + "learning_rate": 5.044754744002864e-06, + "loss": 0.0, + "step": 8478 + }, + { + "epoch": 0.5464329445124702, + "grad_norm": 0.0011451975272744536, + "learning_rate": 5.044038668098819e-06, + "loss": 0.0, + "step": 8479 + }, + { + "epoch": 0.5464973899593993, + "grad_norm": 0.05387772975290664, + "learning_rate": 5.043322592194773e-06, + "loss": 0.0002, + "step": 8480 + }, + { + "epoch": 0.5465618354063285, + "grad_norm": 0.0005193757903388635, + "learning_rate": 5.042606516290726e-06, + "loss": 0.0, + "step": 8481 + }, + { + "epoch": 0.5466262808532577, + "grad_norm": 5.494419434045538e-05, + "learning_rate": 5.041890440386681e-06, + "loss": 0.0, + "step": 8482 + }, + { + "epoch": 0.5466907263001869, + "grad_norm": 0.027016486452575494, + "learning_rate": 5.041174364482636e-06, + "loss": 0.0, + "step": 8483 + }, + { + "epoch": 0.5467551717471161, + "grad_norm": 0.0043791513808389085, + "learning_rate": 5.04045828857859e-06, + "loss": 0.0, + "step": 8484 + }, + { + "epoch": 0.5468196171940453, + "grad_norm": 0.29107433725311255, + "learning_rate": 5.0397422126745445e-06, + "loss": 0.0023, + "step": 8485 + }, + { + "epoch": 0.5468840626409744, + "grad_norm": 0.1543428437047449, + "learning_rate": 5.039026136770499e-06, + "loss": 0.001, + "step": 8486 + }, + { + "epoch": 0.5469485080879036, + "grad_norm": 0.008881782261700416, + "learning_rate": 5.038310060866452e-06, + "loss": 0.0, + "step": 8487 + }, + { + "epoch": 0.5470129535348328, + "grad_norm": 0.06710262718182129, + "learning_rate": 5.0375939849624065e-06, + "loss": 0.0003, + "step": 8488 + }, + { + "epoch": 0.5470773989817619, + "grad_norm": 0.04351480667082749, + "learning_rate": 5.036877909058361e-06, + "loss": 0.0016, + "step": 8489 + }, + { + "epoch": 0.5471418444286911, + "grad_norm": 0.0027709001929434106, + "learning_rate": 5.036161833154315e-06, + "loss": 0.0, + "step": 8490 + }, + { + "epoch": 0.5472062898756203, + "grad_norm": 0.008246434111247947, + "learning_rate": 5.035445757250269e-06, + "loss": 0.0, + "step": 8491 + }, + { + "epoch": 0.5472707353225494, + "grad_norm": 0.001855429937951199, + "learning_rate": 5.034729681346223e-06, + "loss": 0.0, + "step": 8492 + }, + { + "epoch": 0.5473351807694786, + "grad_norm": 0.054930165588919964, + "learning_rate": 5.034013605442177e-06, + "loss": 0.0002, + "step": 8493 + }, + { + "epoch": 0.5473996262164078, + "grad_norm": 0.005980705986750074, + "learning_rate": 5.0332975295381314e-06, + "loss": 0.0, + "step": 8494 + }, + { + "epoch": 0.547464071663337, + "grad_norm": 0.0031414896183599274, + "learning_rate": 5.032581453634086e-06, + "loss": 0.0, + "step": 8495 + }, + { + "epoch": 0.5475285171102662, + "grad_norm": 0.0005091821035342204, + "learning_rate": 5.03186537773004e-06, + "loss": 0.0, + "step": 8496 + }, + { + "epoch": 0.5475929625571954, + "grad_norm": 0.0017150736231015301, + "learning_rate": 5.0311493018259935e-06, + "loss": 0.0, + "step": 8497 + }, + { + "epoch": 0.5476574080041245, + "grad_norm": 0.19840486551000913, + "learning_rate": 5.030433225921948e-06, + "loss": 0.0002, + "step": 8498 + }, + { + "epoch": 0.5477218534510537, + "grad_norm": 1.3321290190046273, + "learning_rate": 5.029717150017902e-06, + "loss": 0.0099, + "step": 8499 + }, + { + "epoch": 0.5477862988979828, + "grad_norm": 0.00012402391058209097, + "learning_rate": 5.029001074113856e-06, + "loss": 0.0, + "step": 8500 + }, + { + "epoch": 0.547850744344912, + "grad_norm": 0.1594086270292522, + "learning_rate": 5.028284998209811e-06, + "loss": 0.0033, + "step": 8501 + }, + { + "epoch": 0.5479151897918412, + "grad_norm": 0.005608211860773984, + "learning_rate": 5.027568922305764e-06, + "loss": 0.0, + "step": 8502 + }, + { + "epoch": 0.5479796352387704, + "grad_norm": 0.000615421787577941, + "learning_rate": 5.0268528464017184e-06, + "loss": 0.0, + "step": 8503 + }, + { + "epoch": 0.5480440806856995, + "grad_norm": 0.002077126853143437, + "learning_rate": 5.026136770497673e-06, + "loss": 0.0, + "step": 8504 + }, + { + "epoch": 0.5481085261326287, + "grad_norm": 0.06163211903730178, + "learning_rate": 5.025420694593627e-06, + "loss": 0.0036, + "step": 8505 + }, + { + "epoch": 0.5481729715795579, + "grad_norm": 0.016333306029010678, + "learning_rate": 5.024704618689582e-06, + "loss": 0.0, + "step": 8506 + }, + { + "epoch": 0.5482374170264871, + "grad_norm": 0.002153469672369918, + "learning_rate": 5.0239885427855365e-06, + "loss": 0.0, + "step": 8507 + }, + { + "epoch": 0.5483018624734163, + "grad_norm": 0.1651638620042547, + "learning_rate": 5.02327246688149e-06, + "loss": 0.0011, + "step": 8508 + }, + { + "epoch": 0.5483663079203455, + "grad_norm": 0.00636054384750868, + "learning_rate": 5.022556390977444e-06, + "loss": 0.0001, + "step": 8509 + }, + { + "epoch": 0.5484307533672746, + "grad_norm": 0.0033881972217375076, + "learning_rate": 5.0218403150733985e-06, + "loss": 0.0, + "step": 8510 + }, + { + "epoch": 0.5484951988142037, + "grad_norm": 0.0018993068083641498, + "learning_rate": 5.021124239169353e-06, + "loss": 0.0, + "step": 8511 + }, + { + "epoch": 0.5485596442611329, + "grad_norm": 0.005748847183468727, + "learning_rate": 5.020408163265307e-06, + "loss": 0.0, + "step": 8512 + }, + { + "epoch": 0.5486240897080621, + "grad_norm": 0.27499401671229495, + "learning_rate": 5.019692087361261e-06, + "loss": 0.0009, + "step": 8513 + }, + { + "epoch": 0.5486885351549913, + "grad_norm": 0.017018584289137592, + "learning_rate": 5.018976011457215e-06, + "loss": 0.0, + "step": 8514 + }, + { + "epoch": 0.5487529806019205, + "grad_norm": 0.0014632281008861163, + "learning_rate": 5.018259935553169e-06, + "loss": 0.0, + "step": 8515 + }, + { + "epoch": 0.5488174260488496, + "grad_norm": 0.012847125648462757, + "learning_rate": 5.0175438596491235e-06, + "loss": 0.0, + "step": 8516 + }, + { + "epoch": 0.5488818714957788, + "grad_norm": 0.007008510181476973, + "learning_rate": 5.016827783745078e-06, + "loss": 0.0, + "step": 8517 + }, + { + "epoch": 0.548946316942708, + "grad_norm": 0.11917408488960375, + "learning_rate": 5.016111707841031e-06, + "loss": 0.0003, + "step": 8518 + }, + { + "epoch": 0.5490107623896372, + "grad_norm": 0.007272636048923761, + "learning_rate": 5.0153956319369855e-06, + "loss": 0.0001, + "step": 8519 + }, + { + "epoch": 0.5490752078365664, + "grad_norm": 0.004351227458944417, + "learning_rate": 5.01467955603294e-06, + "loss": 0.0, + "step": 8520 + }, + { + "epoch": 0.5491396532834956, + "grad_norm": 0.19051117742905466, + "learning_rate": 5.013963480128894e-06, + "loss": 0.0034, + "step": 8521 + }, + { + "epoch": 0.5492040987304248, + "grad_norm": 0.00026021408293215944, + "learning_rate": 5.0132474042248484e-06, + "loss": 0.0, + "step": 8522 + }, + { + "epoch": 0.5492685441773538, + "grad_norm": 0.002791641585007276, + "learning_rate": 5.012531328320802e-06, + "loss": 0.0, + "step": 8523 + }, + { + "epoch": 0.549332989624283, + "grad_norm": 0.005355082463114203, + "learning_rate": 5.011815252416756e-06, + "loss": 0.0, + "step": 8524 + }, + { + "epoch": 0.5493974350712122, + "grad_norm": 0.005519173063785302, + "learning_rate": 5.0110991765127105e-06, + "loss": 0.0, + "step": 8525 + }, + { + "epoch": 0.5494618805181414, + "grad_norm": 0.0020975079614027133, + "learning_rate": 5.010383100608665e-06, + "loss": 0.0, + "step": 8526 + }, + { + "epoch": 0.5495263259650706, + "grad_norm": 0.015470825862644465, + "learning_rate": 5.009667024704618e-06, + "loss": 0.0001, + "step": 8527 + }, + { + "epoch": 0.5495907714119997, + "grad_norm": 0.0014627428871791936, + "learning_rate": 5.0089509488005725e-06, + "loss": 0.0, + "step": 8528 + }, + { + "epoch": 0.5496552168589289, + "grad_norm": 0.012332215954442869, + "learning_rate": 5.008234872896528e-06, + "loss": 0.0001, + "step": 8529 + }, + { + "epoch": 0.5497196623058581, + "grad_norm": 0.000407497928952683, + "learning_rate": 5.007518796992482e-06, + "loss": 0.0, + "step": 8530 + }, + { + "epoch": 0.5497841077527873, + "grad_norm": 0.0013019234173674507, + "learning_rate": 5.006802721088436e-06, + "loss": 0.0, + "step": 8531 + }, + { + "epoch": 0.5498485531997165, + "grad_norm": 0.0013845580062636382, + "learning_rate": 5.0060866451843906e-06, + "loss": 0.0, + "step": 8532 + }, + { + "epoch": 0.5499129986466457, + "grad_norm": 0.2245196370524226, + "learning_rate": 5.005370569280345e-06, + "loss": 0.0005, + "step": 8533 + }, + { + "epoch": 0.5499774440935747, + "grad_norm": 8.993623459536706e-05, + "learning_rate": 5.004654493376298e-06, + "loss": 0.0, + "step": 8534 + }, + { + "epoch": 0.5500418895405039, + "grad_norm": 0.3626066527828885, + "learning_rate": 5.003938417472253e-06, + "loss": 0.0022, + "step": 8535 + }, + { + "epoch": 0.5501063349874331, + "grad_norm": 0.020626980673355562, + "learning_rate": 5.003222341568207e-06, + "loss": 0.0, + "step": 8536 + }, + { + "epoch": 0.5501707804343623, + "grad_norm": 0.10646967652793571, + "learning_rate": 5.002506265664161e-06, + "loss": 0.0004, + "step": 8537 + }, + { + "epoch": 0.5502352258812915, + "grad_norm": 0.09317456412157121, + "learning_rate": 5.0017901897601155e-06, + "loss": 0.0002, + "step": 8538 + }, + { + "epoch": 0.5502996713282207, + "grad_norm": 0.036085198902033974, + "learning_rate": 5.001074113856069e-06, + "loss": 0.0002, + "step": 8539 + }, + { + "epoch": 0.5503641167751498, + "grad_norm": 0.0037343965925228746, + "learning_rate": 5.000358037952023e-06, + "loss": 0.0, + "step": 8540 + }, + { + "epoch": 0.550428562222079, + "grad_norm": 0.009326869675734522, + "learning_rate": 4.9996419620479776e-06, + "loss": 0.0, + "step": 8541 + }, + { + "epoch": 0.5504930076690082, + "grad_norm": 0.021823792039814445, + "learning_rate": 4.998925886143932e-06, + "loss": 0.0001, + "step": 8542 + }, + { + "epoch": 0.5505574531159374, + "grad_norm": 0.0001219656029408, + "learning_rate": 4.998209810239885e-06, + "loss": 0.0, + "step": 8543 + }, + { + "epoch": 0.5506218985628666, + "grad_norm": 0.03387806160686626, + "learning_rate": 4.99749373433584e-06, + "loss": 0.0, + "step": 8544 + }, + { + "epoch": 0.5506863440097957, + "grad_norm": 0.00012018551306236546, + "learning_rate": 4.996777658431794e-06, + "loss": 0.0, + "step": 8545 + }, + { + "epoch": 0.5507507894567248, + "grad_norm": 0.007050738607783566, + "learning_rate": 4.996061582527748e-06, + "loss": 0.0, + "step": 8546 + }, + { + "epoch": 0.550815234903654, + "grad_norm": 0.0021130126048716142, + "learning_rate": 4.9953455066237025e-06, + "loss": 0.0, + "step": 8547 + }, + { + "epoch": 0.5508796803505832, + "grad_norm": 0.0006315345436932797, + "learning_rate": 4.994629430719657e-06, + "loss": 0.0, + "step": 8548 + }, + { + "epoch": 0.5509441257975124, + "grad_norm": 0.0007102576706969377, + "learning_rate": 4.993913354815611e-06, + "loss": 0.0, + "step": 8549 + }, + { + "epoch": 0.5510085712444416, + "grad_norm": 0.001955949521665762, + "learning_rate": 4.993197278911565e-06, + "loss": 0.0, + "step": 8550 + }, + { + "epoch": 0.5510730166913708, + "grad_norm": 0.009190210993093906, + "learning_rate": 4.992481203007519e-06, + "loss": 0.0, + "step": 8551 + }, + { + "epoch": 0.5511374621383, + "grad_norm": 0.401591291768431, + "learning_rate": 4.991765127103473e-06, + "loss": 0.0007, + "step": 8552 + }, + { + "epoch": 0.5512019075852291, + "grad_norm": 0.0005988339569598564, + "learning_rate": 4.9910490511994274e-06, + "loss": 0.0, + "step": 8553 + }, + { + "epoch": 0.5512663530321583, + "grad_norm": 0.001051486590964813, + "learning_rate": 4.990332975295382e-06, + "loss": 0.0, + "step": 8554 + }, + { + "epoch": 0.5513307984790875, + "grad_norm": 0.006394037454805913, + "learning_rate": 4.989616899391336e-06, + "loss": 0.0, + "step": 8555 + }, + { + "epoch": 0.5513952439260166, + "grad_norm": 5.165007726001237e-05, + "learning_rate": 4.9889008234872895e-06, + "loss": 0.0, + "step": 8556 + }, + { + "epoch": 0.5514596893729458, + "grad_norm": 0.002047369514226012, + "learning_rate": 4.988184747583244e-06, + "loss": 0.0, + "step": 8557 + }, + { + "epoch": 0.551524134819875, + "grad_norm": 0.0012906349742674791, + "learning_rate": 4.987468671679198e-06, + "loss": 0.0, + "step": 8558 + }, + { + "epoch": 0.5515885802668041, + "grad_norm": 0.0029618502007701924, + "learning_rate": 4.986752595775152e-06, + "loss": 0.0, + "step": 8559 + }, + { + "epoch": 0.5516530257137333, + "grad_norm": 0.01238850996303165, + "learning_rate": 4.986036519871107e-06, + "loss": 0.0, + "step": 8560 + }, + { + "epoch": 0.5517174711606625, + "grad_norm": 0.0009714262530897783, + "learning_rate": 4.985320443967061e-06, + "loss": 0.0, + "step": 8561 + }, + { + "epoch": 0.5517819166075917, + "grad_norm": 0.0010781651410204844, + "learning_rate": 4.984604368063015e-06, + "loss": 0.0, + "step": 8562 + }, + { + "epoch": 0.5518463620545209, + "grad_norm": 0.15714035869658133, + "learning_rate": 4.98388829215897e-06, + "loss": 0.0004, + "step": 8563 + }, + { + "epoch": 0.55191080750145, + "grad_norm": 0.0034370084976843578, + "learning_rate": 4.983172216254923e-06, + "loss": 0.0, + "step": 8564 + }, + { + "epoch": 0.5519752529483792, + "grad_norm": 0.0002537463466807135, + "learning_rate": 4.982456140350877e-06, + "loss": 0.0, + "step": 8565 + }, + { + "epoch": 0.5520396983953084, + "grad_norm": 0.0035772505393363426, + "learning_rate": 4.981740064446832e-06, + "loss": 0.0, + "step": 8566 + }, + { + "epoch": 0.5521041438422375, + "grad_norm": 0.0007178845574760114, + "learning_rate": 4.981023988542786e-06, + "loss": 0.0, + "step": 8567 + }, + { + "epoch": 0.5521685892891667, + "grad_norm": 0.005066510748939021, + "learning_rate": 4.98030791263874e-06, + "loss": 0.0, + "step": 8568 + }, + { + "epoch": 0.5522330347360959, + "grad_norm": 0.0005125547267131619, + "learning_rate": 4.979591836734694e-06, + "loss": 0.0, + "step": 8569 + }, + { + "epoch": 0.552297480183025, + "grad_norm": 0.003153537373562369, + "learning_rate": 4.978875760830649e-06, + "loss": 0.0, + "step": 8570 + }, + { + "epoch": 0.5523619256299542, + "grad_norm": 0.006392792439787252, + "learning_rate": 4.978159684926603e-06, + "loss": 0.0, + "step": 8571 + }, + { + "epoch": 0.5524263710768834, + "grad_norm": 4.826716838034971, + "learning_rate": 4.977443609022557e-06, + "loss": 0.0123, + "step": 8572 + }, + { + "epoch": 0.5524908165238126, + "grad_norm": 9.228853949010394e-05, + "learning_rate": 4.976727533118511e-06, + "loss": 0.0, + "step": 8573 + }, + { + "epoch": 0.5525552619707418, + "grad_norm": 0.007953029032488415, + "learning_rate": 4.976011457214465e-06, + "loss": 0.0, + "step": 8574 + }, + { + "epoch": 0.552619707417671, + "grad_norm": 0.0009689809464016709, + "learning_rate": 4.9752953813104195e-06, + "loss": 0.0, + "step": 8575 + }, + { + "epoch": 0.5526841528646002, + "grad_norm": 0.0024152789138099672, + "learning_rate": 4.974579305406374e-06, + "loss": 0.0, + "step": 8576 + }, + { + "epoch": 0.5527485983115293, + "grad_norm": 9.91049513405085e-05, + "learning_rate": 4.973863229502327e-06, + "loss": 0.0, + "step": 8577 + }, + { + "epoch": 0.5528130437584584, + "grad_norm": 0.025710516224901427, + "learning_rate": 4.9731471535982815e-06, + "loss": 0.0001, + "step": 8578 + }, + { + "epoch": 0.5528774892053876, + "grad_norm": 0.0009076876494428708, + "learning_rate": 4.972431077694236e-06, + "loss": 0.0, + "step": 8579 + }, + { + "epoch": 0.5529419346523168, + "grad_norm": 0.3465730529980872, + "learning_rate": 4.97171500179019e-06, + "loss": 0.0018, + "step": 8580 + }, + { + "epoch": 0.553006380099246, + "grad_norm": 0.0037399123513236405, + "learning_rate": 4.9709989258861444e-06, + "loss": 0.0, + "step": 8581 + }, + { + "epoch": 0.5530708255461751, + "grad_norm": 0.0002229589558730772, + "learning_rate": 4.970282849982099e-06, + "loss": 0.0, + "step": 8582 + }, + { + "epoch": 0.5531352709931043, + "grad_norm": 0.004994821684799948, + "learning_rate": 4.969566774078053e-06, + "loss": 0.0, + "step": 8583 + }, + { + "epoch": 0.5531997164400335, + "grad_norm": 0.0001422551604455913, + "learning_rate": 4.968850698174007e-06, + "loss": 0.0, + "step": 8584 + }, + { + "epoch": 0.5532641618869627, + "grad_norm": 3.6293142231062747, + "learning_rate": 4.968134622269961e-06, + "loss": 0.0243, + "step": 8585 + }, + { + "epoch": 0.5533286073338919, + "grad_norm": 0.0006342448217312984, + "learning_rate": 4.967418546365915e-06, + "loss": 0.0, + "step": 8586 + }, + { + "epoch": 0.5533930527808211, + "grad_norm": 0.011555769561746025, + "learning_rate": 4.966702470461869e-06, + "loss": 0.0, + "step": 8587 + }, + { + "epoch": 0.5534574982277503, + "grad_norm": 0.01095507667123741, + "learning_rate": 4.965986394557824e-06, + "loss": 0.0001, + "step": 8588 + }, + { + "epoch": 0.5535219436746793, + "grad_norm": 1.259846859798293e-05, + "learning_rate": 4.965270318653778e-06, + "loss": 0.0, + "step": 8589 + }, + { + "epoch": 0.5535863891216085, + "grad_norm": 0.005156511472907512, + "learning_rate": 4.964554242749731e-06, + "loss": 0.0, + "step": 8590 + }, + { + "epoch": 0.5536508345685377, + "grad_norm": 0.2383249586696943, + "learning_rate": 4.963838166845686e-06, + "loss": 0.0009, + "step": 8591 + }, + { + "epoch": 0.5537152800154669, + "grad_norm": 0.012284478435519698, + "learning_rate": 4.96312209094164e-06, + "loss": 0.0, + "step": 8592 + }, + { + "epoch": 0.5537797254623961, + "grad_norm": 0.0055611878877656114, + "learning_rate": 4.962406015037594e-06, + "loss": 0.0, + "step": 8593 + }, + { + "epoch": 0.5538441709093253, + "grad_norm": 0.32462081338167953, + "learning_rate": 4.961689939133549e-06, + "loss": 0.0023, + "step": 8594 + }, + { + "epoch": 0.5539086163562544, + "grad_norm": 0.0036361403139943778, + "learning_rate": 4.960973863229503e-06, + "loss": 0.0, + "step": 8595 + }, + { + "epoch": 0.5539730618031836, + "grad_norm": 0.01589423451383375, + "learning_rate": 4.960257787325457e-06, + "loss": 0.0, + "step": 8596 + }, + { + "epoch": 0.5540375072501128, + "grad_norm": 0.0011489116491658889, + "learning_rate": 4.9595417114214115e-06, + "loss": 0.0, + "step": 8597 + }, + { + "epoch": 0.554101952697042, + "grad_norm": 0.0010378343495381565, + "learning_rate": 4.958825635517365e-06, + "loss": 0.0, + "step": 8598 + }, + { + "epoch": 0.5541663981439712, + "grad_norm": 0.00014715845418476452, + "learning_rate": 4.958109559613319e-06, + "loss": 0.0, + "step": 8599 + }, + { + "epoch": 0.5542308435909004, + "grad_norm": 0.0020139445294210146, + "learning_rate": 4.9573934837092736e-06, + "loss": 0.0, + "step": 8600 + }, + { + "epoch": 0.5542952890378294, + "grad_norm": 0.0007146691062408835, + "learning_rate": 4.956677407805228e-06, + "loss": 0.0, + "step": 8601 + }, + { + "epoch": 0.5543597344847586, + "grad_norm": 0.006505887901441304, + "learning_rate": 4.955961331901182e-06, + "loss": 0.0, + "step": 8602 + }, + { + "epoch": 0.5544241799316878, + "grad_norm": 0.009405139009455533, + "learning_rate": 4.955245255997136e-06, + "loss": 0.0, + "step": 8603 + }, + { + "epoch": 0.554488625378617, + "grad_norm": 0.003237377086330774, + "learning_rate": 4.95452918009309e-06, + "loss": 0.0, + "step": 8604 + }, + { + "epoch": 0.5545530708255462, + "grad_norm": 0.0031621215893174203, + "learning_rate": 4.953813104189045e-06, + "loss": 0.0, + "step": 8605 + }, + { + "epoch": 0.5546175162724754, + "grad_norm": 0.19976517593339282, + "learning_rate": 4.9530970282849985e-06, + "loss": 0.0004, + "step": 8606 + }, + { + "epoch": 0.5546819617194045, + "grad_norm": 0.0014662636392100076, + "learning_rate": 4.952380952380953e-06, + "loss": 0.0, + "step": 8607 + }, + { + "epoch": 0.5547464071663337, + "grad_norm": 0.3341552899371035, + "learning_rate": 4.951664876476907e-06, + "loss": 0.0034, + "step": 8608 + }, + { + "epoch": 0.5548108526132629, + "grad_norm": 0.004519190199484597, + "learning_rate": 4.950948800572861e-06, + "loss": 0.0, + "step": 8609 + }, + { + "epoch": 0.5548752980601921, + "grad_norm": 0.25093081164918085, + "learning_rate": 4.950232724668816e-06, + "loss": 0.0005, + "step": 8610 + }, + { + "epoch": 0.5549397435071213, + "grad_norm": 0.005922211232974903, + "learning_rate": 4.949516648764769e-06, + "loss": 0.0, + "step": 8611 + }, + { + "epoch": 0.5550041889540503, + "grad_norm": 0.0003468587612752949, + "learning_rate": 4.9488005728607234e-06, + "loss": 0.0, + "step": 8612 + }, + { + "epoch": 0.5550686344009795, + "grad_norm": 0.14501370633433025, + "learning_rate": 4.948084496956678e-06, + "loss": 0.0002, + "step": 8613 + }, + { + "epoch": 0.5551330798479087, + "grad_norm": 0.1744257960372159, + "learning_rate": 4.947368421052632e-06, + "loss": 0.0004, + "step": 8614 + }, + { + "epoch": 0.5551975252948379, + "grad_norm": 0.0036943899184547248, + "learning_rate": 4.946652345148586e-06, + "loss": 0.0, + "step": 8615 + }, + { + "epoch": 0.5552619707417671, + "grad_norm": 0.0005746585162564398, + "learning_rate": 4.94593626924454e-06, + "loss": 0.0, + "step": 8616 + }, + { + "epoch": 0.5553264161886963, + "grad_norm": 0.0054436829334644915, + "learning_rate": 4.945220193340495e-06, + "loss": 0.0, + "step": 8617 + }, + { + "epoch": 0.5553908616356255, + "grad_norm": 0.00019123731125635397, + "learning_rate": 4.944504117436449e-06, + "loss": 0.0, + "step": 8618 + }, + { + "epoch": 0.5554553070825546, + "grad_norm": 0.31377869303838934, + "learning_rate": 4.943788041532403e-06, + "loss": 0.0007, + "step": 8619 + }, + { + "epoch": 0.5555197525294838, + "grad_norm": 0.009279498678199789, + "learning_rate": 4.943071965628357e-06, + "loss": 0.0, + "step": 8620 + }, + { + "epoch": 0.555584197976413, + "grad_norm": 0.0013721914906434604, + "learning_rate": 4.942355889724311e-06, + "loss": 0.0, + "step": 8621 + }, + { + "epoch": 0.5556486434233422, + "grad_norm": 0.00026775923044403474, + "learning_rate": 4.941639813820266e-06, + "loss": 0.0, + "step": 8622 + }, + { + "epoch": 0.5557130888702713, + "grad_norm": 0.22156785673329588, + "learning_rate": 4.94092373791622e-06, + "loss": 0.0007, + "step": 8623 + }, + { + "epoch": 0.5557775343172005, + "grad_norm": 0.01439473978710645, + "learning_rate": 4.940207662012173e-06, + "loss": 0.0, + "step": 8624 + }, + { + "epoch": 0.5558419797641296, + "grad_norm": 0.0032194399396147095, + "learning_rate": 4.939491586108128e-06, + "loss": 0.0, + "step": 8625 + }, + { + "epoch": 0.5559064252110588, + "grad_norm": 0.00022568911306749794, + "learning_rate": 4.938775510204082e-06, + "loss": 0.0, + "step": 8626 + }, + { + "epoch": 0.555970870657988, + "grad_norm": 0.02743558809261413, + "learning_rate": 4.938059434300036e-06, + "loss": 0.0001, + "step": 8627 + }, + { + "epoch": 0.5560353161049172, + "grad_norm": 0.03291318148000441, + "learning_rate": 4.93734335839599e-06, + "loss": 0.0003, + "step": 8628 + }, + { + "epoch": 0.5560997615518464, + "grad_norm": 0.26339233556561864, + "learning_rate": 4.936627282491945e-06, + "loss": 0.0016, + "step": 8629 + }, + { + "epoch": 0.5561642069987756, + "grad_norm": 0.03465554861101963, + "learning_rate": 4.935911206587899e-06, + "loss": 0.0001, + "step": 8630 + }, + { + "epoch": 0.5562286524457047, + "grad_norm": 0.0010887964390989104, + "learning_rate": 4.9351951306838534e-06, + "loss": 0.0, + "step": 8631 + }, + { + "epoch": 0.5562930978926339, + "grad_norm": 0.0033196397485056733, + "learning_rate": 4.934479054779807e-06, + "loss": 0.0, + "step": 8632 + }, + { + "epoch": 0.5563575433395631, + "grad_norm": 0.021989216789134153, + "learning_rate": 4.933762978875761e-06, + "loss": 0.0002, + "step": 8633 + }, + { + "epoch": 0.5564219887864922, + "grad_norm": 1.53336188593831, + "learning_rate": 4.9330469029717155e-06, + "loss": 0.003, + "step": 8634 + }, + { + "epoch": 0.5564864342334214, + "grad_norm": 0.03507442853934759, + "learning_rate": 4.93233082706767e-06, + "loss": 0.0001, + "step": 8635 + }, + { + "epoch": 0.5565508796803506, + "grad_norm": 0.671429575159169, + "learning_rate": 4.931614751163623e-06, + "loss": 0.0026, + "step": 8636 + }, + { + "epoch": 0.5566153251272797, + "grad_norm": 0.0023686506544780763, + "learning_rate": 4.9308986752595775e-06, + "loss": 0.0, + "step": 8637 + }, + { + "epoch": 0.5566797705742089, + "grad_norm": 0.0010374830972492118, + "learning_rate": 4.930182599355532e-06, + "loss": 0.0, + "step": 8638 + }, + { + "epoch": 0.5567442160211381, + "grad_norm": 0.019772198207453226, + "learning_rate": 4.929466523451486e-06, + "loss": 0.0002, + "step": 8639 + }, + { + "epoch": 0.5568086614680673, + "grad_norm": 0.006079940266471607, + "learning_rate": 4.9287504475474404e-06, + "loss": 0.0, + "step": 8640 + }, + { + "epoch": 0.5568731069149965, + "grad_norm": 0.022085923738554603, + "learning_rate": 4.928034371643395e-06, + "loss": 0.0001, + "step": 8641 + }, + { + "epoch": 0.5569375523619257, + "grad_norm": 3.3427854198404874, + "learning_rate": 4.927318295739349e-06, + "loss": 0.0662, + "step": 8642 + }, + { + "epoch": 0.5570019978088548, + "grad_norm": 0.01589109570954559, + "learning_rate": 4.926602219835303e-06, + "loss": 0.0, + "step": 8643 + }, + { + "epoch": 0.557066443255784, + "grad_norm": 0.0008106748129718368, + "learning_rate": 4.925886143931257e-06, + "loss": 0.0, + "step": 8644 + }, + { + "epoch": 0.5571308887027131, + "grad_norm": 0.06000058737916828, + "learning_rate": 4.925170068027211e-06, + "loss": 0.0038, + "step": 8645 + }, + { + "epoch": 0.5571953341496423, + "grad_norm": 0.0015374185399924462, + "learning_rate": 4.924453992123165e-06, + "loss": 0.0, + "step": 8646 + }, + { + "epoch": 0.5572597795965715, + "grad_norm": 0.001674477621343504, + "learning_rate": 4.92373791621912e-06, + "loss": 0.0, + "step": 8647 + }, + { + "epoch": 0.5573242250435007, + "grad_norm": 0.0012146313544239115, + "learning_rate": 4.923021840315074e-06, + "loss": 0.0, + "step": 8648 + }, + { + "epoch": 0.5573886704904298, + "grad_norm": 0.012782928730319724, + "learning_rate": 4.922305764411027e-06, + "loss": 0.0001, + "step": 8649 + }, + { + "epoch": 0.557453115937359, + "grad_norm": 0.07260694958982807, + "learning_rate": 4.921589688506982e-06, + "loss": 0.001, + "step": 8650 + }, + { + "epoch": 0.5575175613842882, + "grad_norm": 0.004229980046474155, + "learning_rate": 4.920873612602936e-06, + "loss": 0.0, + "step": 8651 + }, + { + "epoch": 0.5575820068312174, + "grad_norm": 0.09238504624841795, + "learning_rate": 4.92015753669889e-06, + "loss": 0.0002, + "step": 8652 + }, + { + "epoch": 0.5576464522781466, + "grad_norm": 0.019205153148170562, + "learning_rate": 4.919441460794845e-06, + "loss": 0.0001, + "step": 8653 + }, + { + "epoch": 0.5577108977250758, + "grad_norm": 0.004936052066062885, + "learning_rate": 4.918725384890799e-06, + "loss": 0.0, + "step": 8654 + }, + { + "epoch": 0.557775343172005, + "grad_norm": 0.006712840825438057, + "learning_rate": 4.918009308986753e-06, + "loss": 0.0, + "step": 8655 + }, + { + "epoch": 0.557839788618934, + "grad_norm": 0.002762241064491735, + "learning_rate": 4.9172932330827075e-06, + "loss": 0.0, + "step": 8656 + }, + { + "epoch": 0.5579042340658632, + "grad_norm": 0.01698880984180648, + "learning_rate": 4.916577157178661e-06, + "loss": 0.0001, + "step": 8657 + }, + { + "epoch": 0.5579686795127924, + "grad_norm": 0.00016716416716050216, + "learning_rate": 4.915861081274615e-06, + "loss": 0.0, + "step": 8658 + }, + { + "epoch": 0.5580331249597216, + "grad_norm": 0.07974039068577432, + "learning_rate": 4.9151450053705696e-06, + "loss": 0.0002, + "step": 8659 + }, + { + "epoch": 0.5580975704066508, + "grad_norm": 0.013269370565432554, + "learning_rate": 4.914428929466524e-06, + "loss": 0.0, + "step": 8660 + }, + { + "epoch": 0.5581620158535799, + "grad_norm": 0.005166149279363144, + "learning_rate": 4.913712853562478e-06, + "loss": 0.0, + "step": 8661 + }, + { + "epoch": 0.5582264613005091, + "grad_norm": 0.003897579659758297, + "learning_rate": 4.912996777658432e-06, + "loss": 0.0, + "step": 8662 + }, + { + "epoch": 0.5582909067474383, + "grad_norm": 0.006220268919483848, + "learning_rate": 4.912280701754386e-06, + "loss": 0.0, + "step": 8663 + }, + { + "epoch": 0.5583553521943675, + "grad_norm": 0.005305744753739531, + "learning_rate": 4.911564625850341e-06, + "loss": 0.0, + "step": 8664 + }, + { + "epoch": 0.5584197976412967, + "grad_norm": 0.0005758029329325327, + "learning_rate": 4.9108485499462945e-06, + "loss": 0.0, + "step": 8665 + }, + { + "epoch": 0.5584842430882259, + "grad_norm": 0.000827279042110068, + "learning_rate": 4.910132474042249e-06, + "loss": 0.0, + "step": 8666 + }, + { + "epoch": 0.558548688535155, + "grad_norm": 0.004102911737880749, + "learning_rate": 4.909416398138203e-06, + "loss": 0.0, + "step": 8667 + }, + { + "epoch": 0.5586131339820841, + "grad_norm": 0.0004710363920028894, + "learning_rate": 4.908700322234157e-06, + "loss": 0.0, + "step": 8668 + }, + { + "epoch": 0.5586775794290133, + "grad_norm": 0.00015958688110151233, + "learning_rate": 4.907984246330112e-06, + "loss": 0.0, + "step": 8669 + }, + { + "epoch": 0.5587420248759425, + "grad_norm": 0.02376513779259615, + "learning_rate": 4.907268170426065e-06, + "loss": 0.0, + "step": 8670 + }, + { + "epoch": 0.5588064703228717, + "grad_norm": 0.002388357422968757, + "learning_rate": 4.9065520945220194e-06, + "loss": 0.0, + "step": 8671 + }, + { + "epoch": 0.5588709157698009, + "grad_norm": 0.0016862129693133454, + "learning_rate": 4.905836018617974e-06, + "loss": 0.0, + "step": 8672 + }, + { + "epoch": 0.55893536121673, + "grad_norm": 0.004936221946703141, + "learning_rate": 4.905119942713928e-06, + "loss": 0.0, + "step": 8673 + }, + { + "epoch": 0.5589998066636592, + "grad_norm": 0.0015385476593219008, + "learning_rate": 4.904403866809882e-06, + "loss": 0.0, + "step": 8674 + }, + { + "epoch": 0.5590642521105884, + "grad_norm": 0.09754154621517012, + "learning_rate": 4.903687790905837e-06, + "loss": 0.0013, + "step": 8675 + }, + { + "epoch": 0.5591286975575176, + "grad_norm": 0.0025453102097556697, + "learning_rate": 4.902971715001791e-06, + "loss": 0.0, + "step": 8676 + }, + { + "epoch": 0.5591931430044468, + "grad_norm": 0.000572278693487006, + "learning_rate": 4.902255639097745e-06, + "loss": 0.0, + "step": 8677 + }, + { + "epoch": 0.559257588451376, + "grad_norm": 3.171789459428063, + "learning_rate": 4.901539563193699e-06, + "loss": 0.0115, + "step": 8678 + }, + { + "epoch": 0.559322033898305, + "grad_norm": 0.005036475146354532, + "learning_rate": 4.900823487289653e-06, + "loss": 0.0, + "step": 8679 + }, + { + "epoch": 0.5593864793452342, + "grad_norm": 0.039560586102517295, + "learning_rate": 4.900107411385607e-06, + "loss": 0.0001, + "step": 8680 + }, + { + "epoch": 0.5594509247921634, + "grad_norm": 0.1343451450139708, + "learning_rate": 4.899391335481562e-06, + "loss": 0.0006, + "step": 8681 + }, + { + "epoch": 0.5595153702390926, + "grad_norm": 0.012012192767179632, + "learning_rate": 4.898675259577516e-06, + "loss": 0.0001, + "step": 8682 + }, + { + "epoch": 0.5595798156860218, + "grad_norm": 0.002232516437988932, + "learning_rate": 4.897959183673469e-06, + "loss": 0.0, + "step": 8683 + }, + { + "epoch": 0.559644261132951, + "grad_norm": 0.0005160028869192366, + "learning_rate": 4.897243107769424e-06, + "loss": 0.0, + "step": 8684 + }, + { + "epoch": 0.5597087065798801, + "grad_norm": 0.17146186593991028, + "learning_rate": 4.896527031865378e-06, + "loss": 0.001, + "step": 8685 + }, + { + "epoch": 0.5597731520268093, + "grad_norm": 0.24822877236891366, + "learning_rate": 4.895810955961332e-06, + "loss": 0.0006, + "step": 8686 + }, + { + "epoch": 0.5598375974737385, + "grad_norm": 0.039314318138136045, + "learning_rate": 4.8950948800572865e-06, + "loss": 0.0004, + "step": 8687 + }, + { + "epoch": 0.5599020429206677, + "grad_norm": 0.8952721272098672, + "learning_rate": 4.894378804153241e-06, + "loss": 0.0025, + "step": 8688 + }, + { + "epoch": 0.5599664883675969, + "grad_norm": 0.000244431402831098, + "learning_rate": 4.893662728249195e-06, + "loss": 0.0, + "step": 8689 + }, + { + "epoch": 0.560030933814526, + "grad_norm": 0.003176688002738185, + "learning_rate": 4.8929466523451494e-06, + "loss": 0.0, + "step": 8690 + }, + { + "epoch": 0.5600953792614551, + "grad_norm": 0.0003265274312593406, + "learning_rate": 4.892230576441103e-06, + "loss": 0.0, + "step": 8691 + }, + { + "epoch": 0.5601598247083843, + "grad_norm": 0.0016430742264626347, + "learning_rate": 4.891514500537057e-06, + "loss": 0.0, + "step": 8692 + }, + { + "epoch": 0.5602242701553135, + "grad_norm": 0.0009334472852909937, + "learning_rate": 4.8907984246330115e-06, + "loss": 0.0, + "step": 8693 + }, + { + "epoch": 0.5602887156022427, + "grad_norm": 7.442905227648027e-05, + "learning_rate": 4.890082348728966e-06, + "loss": 0.0, + "step": 8694 + }, + { + "epoch": 0.5603531610491719, + "grad_norm": 0.16791826420947753, + "learning_rate": 4.88936627282492e-06, + "loss": 0.0007, + "step": 8695 + }, + { + "epoch": 0.5604176064961011, + "grad_norm": 0.3082393107668559, + "learning_rate": 4.8886501969208735e-06, + "loss": 0.0007, + "step": 8696 + }, + { + "epoch": 0.5604820519430302, + "grad_norm": 0.00016087311292047117, + "learning_rate": 4.887934121016828e-06, + "loss": 0.0, + "step": 8697 + }, + { + "epoch": 0.5605464973899594, + "grad_norm": 0.16002245291919298, + "learning_rate": 4.887218045112782e-06, + "loss": 0.0008, + "step": 8698 + }, + { + "epoch": 0.5606109428368886, + "grad_norm": 0.004248675914345726, + "learning_rate": 4.8865019692087364e-06, + "loss": 0.0, + "step": 8699 + }, + { + "epoch": 0.5606753882838178, + "grad_norm": 0.006593991119702283, + "learning_rate": 4.885785893304691e-06, + "loss": 0.0, + "step": 8700 + }, + { + "epoch": 0.5607398337307469, + "grad_norm": 0.32584103920711627, + "learning_rate": 4.885069817400645e-06, + "loss": 0.0014, + "step": 8701 + }, + { + "epoch": 0.5608042791776761, + "grad_norm": 0.0002263789195140541, + "learning_rate": 4.884353741496599e-06, + "loss": 0.0, + "step": 8702 + }, + { + "epoch": 0.5608687246246052, + "grad_norm": 0.7640283245408404, + "learning_rate": 4.883637665592554e-06, + "loss": 0.0059, + "step": 8703 + }, + { + "epoch": 0.5609331700715344, + "grad_norm": 7.212601342396701e-05, + "learning_rate": 4.882921589688507e-06, + "loss": 0.0, + "step": 8704 + }, + { + "epoch": 0.5609976155184636, + "grad_norm": 0.010067607272151306, + "learning_rate": 4.882205513784461e-06, + "loss": 0.0, + "step": 8705 + }, + { + "epoch": 0.5610620609653928, + "grad_norm": 0.010785156772366373, + "learning_rate": 4.881489437880416e-06, + "loss": 0.0001, + "step": 8706 + }, + { + "epoch": 0.561126506412322, + "grad_norm": 0.020743954743965017, + "learning_rate": 4.88077336197637e-06, + "loss": 0.0001, + "step": 8707 + }, + { + "epoch": 0.5611909518592512, + "grad_norm": 0.01784634922520059, + "learning_rate": 4.880057286072324e-06, + "loss": 0.0, + "step": 8708 + }, + { + "epoch": 0.5612553973061803, + "grad_norm": 0.030889615161481417, + "learning_rate": 4.879341210168278e-06, + "loss": 0.0001, + "step": 8709 + }, + { + "epoch": 0.5613198427531095, + "grad_norm": 0.08717037831813229, + "learning_rate": 4.878625134264233e-06, + "loss": 0.0001, + "step": 8710 + }, + { + "epoch": 0.5613842882000387, + "grad_norm": 4.0069996870915824e-05, + "learning_rate": 4.877909058360187e-06, + "loss": 0.0, + "step": 8711 + }, + { + "epoch": 0.5614487336469678, + "grad_norm": 0.21334305206683088, + "learning_rate": 4.877192982456141e-06, + "loss": 0.0004, + "step": 8712 + }, + { + "epoch": 0.561513179093897, + "grad_norm": 0.0009662374374246946, + "learning_rate": 4.876476906552095e-06, + "loss": 0.0, + "step": 8713 + }, + { + "epoch": 0.5615776245408262, + "grad_norm": 0.001994360683181997, + "learning_rate": 4.875760830648049e-06, + "loss": 0.0, + "step": 8714 + }, + { + "epoch": 0.5616420699877553, + "grad_norm": 0.0007313893970778114, + "learning_rate": 4.8750447547440035e-06, + "loss": 0.0, + "step": 8715 + }, + { + "epoch": 0.5617065154346845, + "grad_norm": 0.12489726270253314, + "learning_rate": 4.874328678839958e-06, + "loss": 0.0003, + "step": 8716 + }, + { + "epoch": 0.5617709608816137, + "grad_norm": 0.023161239719397465, + "learning_rate": 4.873612602935911e-06, + "loss": 0.0, + "step": 8717 + }, + { + "epoch": 0.5618354063285429, + "grad_norm": 0.12134647530902526, + "learning_rate": 4.8728965270318656e-06, + "loss": 0.0004, + "step": 8718 + }, + { + "epoch": 0.5618998517754721, + "grad_norm": 0.0005077160448647168, + "learning_rate": 4.87218045112782e-06, + "loss": 0.0, + "step": 8719 + }, + { + "epoch": 0.5619642972224013, + "grad_norm": 0.4305684855102253, + "learning_rate": 4.871464375223774e-06, + "loss": 0.0031, + "step": 8720 + }, + { + "epoch": 0.5620287426693304, + "grad_norm": 0.03279392093936876, + "learning_rate": 4.8707482993197285e-06, + "loss": 0.0001, + "step": 8721 + }, + { + "epoch": 0.5620931881162596, + "grad_norm": 0.012182210483527876, + "learning_rate": 4.870032223415683e-06, + "loss": 0.0001, + "step": 8722 + }, + { + "epoch": 0.5621576335631887, + "grad_norm": 0.01315964680711356, + "learning_rate": 4.869316147511637e-06, + "loss": 0.0, + "step": 8723 + }, + { + "epoch": 0.5622220790101179, + "grad_norm": 0.011839825426168998, + "learning_rate": 4.868600071607591e-06, + "loss": 0.0, + "step": 8724 + }, + { + "epoch": 0.5622865244570471, + "grad_norm": 0.003285467399204521, + "learning_rate": 4.867883995703545e-06, + "loss": 0.0, + "step": 8725 + }, + { + "epoch": 0.5623509699039763, + "grad_norm": 0.008998222780521375, + "learning_rate": 4.867167919799499e-06, + "loss": 0.0, + "step": 8726 + }, + { + "epoch": 0.5624154153509054, + "grad_norm": 0.006015495470261791, + "learning_rate": 4.866451843895453e-06, + "loss": 0.0, + "step": 8727 + }, + { + "epoch": 0.5624798607978346, + "grad_norm": 0.0036835391157751737, + "learning_rate": 4.865735767991408e-06, + "loss": 0.0, + "step": 8728 + }, + { + "epoch": 0.5625443062447638, + "grad_norm": 0.000797614702290517, + "learning_rate": 4.865019692087362e-06, + "loss": 0.0, + "step": 8729 + }, + { + "epoch": 0.562608751691693, + "grad_norm": 0.002094505358365688, + "learning_rate": 4.8643036161833154e-06, + "loss": 0.0, + "step": 8730 + }, + { + "epoch": 0.5626731971386222, + "grad_norm": 0.0011268998344081452, + "learning_rate": 4.86358754027927e-06, + "loss": 0.0, + "step": 8731 + }, + { + "epoch": 0.5627376425855514, + "grad_norm": 0.0005851215754860948, + "learning_rate": 4.862871464375224e-06, + "loss": 0.0, + "step": 8732 + }, + { + "epoch": 0.5628020880324806, + "grad_norm": 0.002780093783662651, + "learning_rate": 4.862155388471178e-06, + "loss": 0.0, + "step": 8733 + }, + { + "epoch": 0.5628665334794096, + "grad_norm": 0.0036051815660962795, + "learning_rate": 4.861439312567133e-06, + "loss": 0.0, + "step": 8734 + }, + { + "epoch": 0.5629309789263388, + "grad_norm": 0.17071126224493394, + "learning_rate": 4.860723236663087e-06, + "loss": 0.0007, + "step": 8735 + }, + { + "epoch": 0.562995424373268, + "grad_norm": 0.0009409020897952011, + "learning_rate": 4.860007160759041e-06, + "loss": 0.0, + "step": 8736 + }, + { + "epoch": 0.5630598698201972, + "grad_norm": 0.017214624380895726, + "learning_rate": 4.8592910848549955e-06, + "loss": 0.0002, + "step": 8737 + }, + { + "epoch": 0.5631243152671264, + "grad_norm": 0.0001412999669650116, + "learning_rate": 4.858575008950949e-06, + "loss": 0.0, + "step": 8738 + }, + { + "epoch": 0.5631887607140555, + "grad_norm": 0.5667308534939416, + "learning_rate": 4.857858933046903e-06, + "loss": 0.0017, + "step": 8739 + }, + { + "epoch": 0.5632532061609847, + "grad_norm": 0.0006867199061076417, + "learning_rate": 4.857142857142858e-06, + "loss": 0.0, + "step": 8740 + }, + { + "epoch": 0.5633176516079139, + "grad_norm": 0.0001116138075197791, + "learning_rate": 4.856426781238812e-06, + "loss": 0.0, + "step": 8741 + }, + { + "epoch": 0.5633820970548431, + "grad_norm": 0.23073332358003387, + "learning_rate": 4.855710705334765e-06, + "loss": 0.0003, + "step": 8742 + }, + { + "epoch": 0.5634465425017723, + "grad_norm": 0.0007989920972693515, + "learning_rate": 4.85499462943072e-06, + "loss": 0.0, + "step": 8743 + }, + { + "epoch": 0.5635109879487015, + "grad_norm": 0.001161135735589687, + "learning_rate": 4.854278553526674e-06, + "loss": 0.0, + "step": 8744 + }, + { + "epoch": 0.5635754333956307, + "grad_norm": 0.019831510081458233, + "learning_rate": 4.853562477622629e-06, + "loss": 0.0001, + "step": 8745 + }, + { + "epoch": 0.5636398788425597, + "grad_norm": 0.0007333301662223194, + "learning_rate": 4.8528464017185825e-06, + "loss": 0.0, + "step": 8746 + }, + { + "epoch": 0.5637043242894889, + "grad_norm": 0.0004206232000712696, + "learning_rate": 4.852130325814537e-06, + "loss": 0.0, + "step": 8747 + }, + { + "epoch": 0.5637687697364181, + "grad_norm": 0.00020189099769625215, + "learning_rate": 4.851414249910491e-06, + "loss": 0.0, + "step": 8748 + }, + { + "epoch": 0.5638332151833473, + "grad_norm": 0.23797480623700412, + "learning_rate": 4.8506981740064454e-06, + "loss": 0.0018, + "step": 8749 + }, + { + "epoch": 0.5638976606302765, + "grad_norm": 0.002936163898924804, + "learning_rate": 4.849982098102399e-06, + "loss": 0.0, + "step": 8750 + }, + { + "epoch": 0.5639621060772056, + "grad_norm": 0.001416732870031649, + "learning_rate": 4.849266022198353e-06, + "loss": 0.0, + "step": 8751 + }, + { + "epoch": 0.5640265515241348, + "grad_norm": 0.0005018175610769106, + "learning_rate": 4.8485499462943075e-06, + "loss": 0.0, + "step": 8752 + }, + { + "epoch": 0.564090996971064, + "grad_norm": 0.20247256512586775, + "learning_rate": 4.847833870390262e-06, + "loss": 0.004, + "step": 8753 + }, + { + "epoch": 0.5641554424179932, + "grad_norm": 0.004722470804156485, + "learning_rate": 4.847117794486216e-06, + "loss": 0.0001, + "step": 8754 + }, + { + "epoch": 0.5642198878649224, + "grad_norm": 0.0189753211178013, + "learning_rate": 4.8464017185821695e-06, + "loss": 0.0, + "step": 8755 + }, + { + "epoch": 0.5642843333118516, + "grad_norm": 0.1526467964911563, + "learning_rate": 4.845685642678124e-06, + "loss": 0.0005, + "step": 8756 + }, + { + "epoch": 0.5643487787587806, + "grad_norm": 0.002193407182671908, + "learning_rate": 4.844969566774079e-06, + "loss": 0.0, + "step": 8757 + }, + { + "epoch": 0.5644132242057098, + "grad_norm": 0.0024595912356725554, + "learning_rate": 4.8442534908700324e-06, + "loss": 0.0, + "step": 8758 + }, + { + "epoch": 0.564477669652639, + "grad_norm": 0.16334430991807178, + "learning_rate": 4.843537414965987e-06, + "loss": 0.0001, + "step": 8759 + }, + { + "epoch": 0.5645421150995682, + "grad_norm": 6.383292916162537e-05, + "learning_rate": 4.842821339061941e-06, + "loss": 0.0, + "step": 8760 + }, + { + "epoch": 0.5646065605464974, + "grad_norm": 0.0006328202426825101, + "learning_rate": 4.842105263157895e-06, + "loss": 0.0, + "step": 8761 + }, + { + "epoch": 0.5646710059934266, + "grad_norm": 0.0036656038926473366, + "learning_rate": 4.84138918725385e-06, + "loss": 0.0, + "step": 8762 + }, + { + "epoch": 0.5647354514403558, + "grad_norm": 0.00020467357590308498, + "learning_rate": 4.840673111349803e-06, + "loss": 0.0, + "step": 8763 + }, + { + "epoch": 0.5647998968872849, + "grad_norm": 0.00020467357590308498, + "learning_rate": 4.840673111349803e-06, + "loss": 0.0068, + "step": 8764 + }, + { + "epoch": 0.5648643423342141, + "grad_norm": 0.3235758531595586, + "learning_rate": 4.839957035445757e-06, + "loss": 0.0026, + "step": 8765 + }, + { + "epoch": 0.5649287877811433, + "grad_norm": 0.004006022682251447, + "learning_rate": 4.839240959541712e-06, + "loss": 0.0, + "step": 8766 + }, + { + "epoch": 0.5649932332280725, + "grad_norm": 0.004125043265111191, + "learning_rate": 4.838524883637666e-06, + "loss": 0.0, + "step": 8767 + }, + { + "epoch": 0.5650576786750016, + "grad_norm": 0.5681165458293576, + "learning_rate": 4.83780880773362e-06, + "loss": 0.003, + "step": 8768 + }, + { + "epoch": 0.5651221241219307, + "grad_norm": 0.007982689949264836, + "learning_rate": 4.837092731829574e-06, + "loss": 0.0, + "step": 8769 + }, + { + "epoch": 0.5651865695688599, + "grad_norm": 0.006878737484773677, + "learning_rate": 4.836376655925529e-06, + "loss": 0.0001, + "step": 8770 + }, + { + "epoch": 0.5652510150157891, + "grad_norm": 0.0037331182480409924, + "learning_rate": 4.835660580021483e-06, + "loss": 0.0, + "step": 8771 + }, + { + "epoch": 0.5653154604627183, + "grad_norm": 0.2563085617045643, + "learning_rate": 4.834944504117437e-06, + "loss": 0.0004, + "step": 8772 + }, + { + "epoch": 0.5653799059096475, + "grad_norm": 0.009190612379956239, + "learning_rate": 4.834228428213391e-06, + "loss": 0.0001, + "step": 8773 + }, + { + "epoch": 0.5654443513565767, + "grad_norm": 0.006375923284434245, + "learning_rate": 4.833512352309345e-06, + "loss": 0.0, + "step": 8774 + }, + { + "epoch": 0.5655087968035059, + "grad_norm": 0.796118919865634, + "learning_rate": 4.8327962764052995e-06, + "loss": 0.0015, + "step": 8775 + }, + { + "epoch": 0.565573242250435, + "grad_norm": 0.00342534803484227, + "learning_rate": 4.832080200501254e-06, + "loss": 0.0, + "step": 8776 + }, + { + "epoch": 0.5656376876973642, + "grad_norm": 0.0312884269577183, + "learning_rate": 4.831364124597207e-06, + "loss": 0.0016, + "step": 8777 + }, + { + "epoch": 0.5657021331442934, + "grad_norm": 0.051568192290613435, + "learning_rate": 4.8306480486931616e-06, + "loss": 0.0005, + "step": 8778 + }, + { + "epoch": 0.5657665785912225, + "grad_norm": 0.0001771488648946319, + "learning_rate": 4.829931972789116e-06, + "loss": 0.0, + "step": 8779 + }, + { + "epoch": 0.5658310240381517, + "grad_norm": 0.005487712499418206, + "learning_rate": 4.82921589688507e-06, + "loss": 0.0001, + "step": 8780 + }, + { + "epoch": 0.5658954694850808, + "grad_norm": 0.001248246749537538, + "learning_rate": 4.8284998209810245e-06, + "loss": 0.0, + "step": 8781 + }, + { + "epoch": 0.56595991493201, + "grad_norm": 0.009656013818239738, + "learning_rate": 4.827783745076979e-06, + "loss": 0.0, + "step": 8782 + }, + { + "epoch": 0.5660243603789392, + "grad_norm": 0.001758210468587272, + "learning_rate": 4.827067669172933e-06, + "loss": 0.0, + "step": 8783 + }, + { + "epoch": 0.5660888058258684, + "grad_norm": 0.3298186687581501, + "learning_rate": 4.826351593268887e-06, + "loss": 0.0013, + "step": 8784 + }, + { + "epoch": 0.5661532512727976, + "grad_norm": 1.0338854780988707, + "learning_rate": 4.825635517364841e-06, + "loss": 0.0159, + "step": 8785 + }, + { + "epoch": 0.5662176967197268, + "grad_norm": 0.013471690213689116, + "learning_rate": 4.824919441460795e-06, + "loss": 0.0, + "step": 8786 + }, + { + "epoch": 0.566282142166656, + "grad_norm": 0.00011633145196054886, + "learning_rate": 4.824203365556749e-06, + "loss": 0.0, + "step": 8787 + }, + { + "epoch": 0.5663465876135851, + "grad_norm": 0.005370732892713071, + "learning_rate": 4.823487289652704e-06, + "loss": 0.0, + "step": 8788 + }, + { + "epoch": 0.5664110330605143, + "grad_norm": 1.9052808205184348, + "learning_rate": 4.822771213748658e-06, + "loss": 0.0155, + "step": 8789 + }, + { + "epoch": 0.5664754785074434, + "grad_norm": 0.0016993295556823635, + "learning_rate": 4.8220551378446114e-06, + "loss": 0.0, + "step": 8790 + }, + { + "epoch": 0.5665399239543726, + "grad_norm": 0.21898209656732134, + "learning_rate": 4.821339061940566e-06, + "loss": 0.0008, + "step": 8791 + }, + { + "epoch": 0.5666043694013018, + "grad_norm": 0.0029171804599055747, + "learning_rate": 4.82062298603652e-06, + "loss": 0.0, + "step": 8792 + }, + { + "epoch": 0.566668814848231, + "grad_norm": 0.0011682263683127836, + "learning_rate": 4.819906910132474e-06, + "loss": 0.0, + "step": 8793 + }, + { + "epoch": 0.5667332602951601, + "grad_norm": 0.02120118758689934, + "learning_rate": 4.819190834228429e-06, + "loss": 0.0002, + "step": 8794 + }, + { + "epoch": 0.5667977057420893, + "grad_norm": 0.016868041783098704, + "learning_rate": 4.818474758324383e-06, + "loss": 0.0001, + "step": 8795 + }, + { + "epoch": 0.5668621511890185, + "grad_norm": 0.0024044416183440355, + "learning_rate": 4.817758682420337e-06, + "loss": 0.0, + "step": 8796 + }, + { + "epoch": 0.5669265966359477, + "grad_norm": 1.069360351283807, + "learning_rate": 4.8170426065162915e-06, + "loss": 0.0072, + "step": 8797 + }, + { + "epoch": 0.5669910420828769, + "grad_norm": 0.012245140194462465, + "learning_rate": 4.816326530612245e-06, + "loss": 0.0, + "step": 8798 + }, + { + "epoch": 0.5670554875298061, + "grad_norm": 0.00043831334825429955, + "learning_rate": 4.815610454708199e-06, + "loss": 0.0, + "step": 8799 + }, + { + "epoch": 0.5671199329767352, + "grad_norm": 0.0032662984748576328, + "learning_rate": 4.814894378804154e-06, + "loss": 0.0, + "step": 8800 + }, + { + "epoch": 0.5671843784236643, + "grad_norm": 0.06767947089195707, + "learning_rate": 4.814178302900108e-06, + "loss": 0.0, + "step": 8801 + }, + { + "epoch": 0.5672488238705935, + "grad_norm": 0.040585778805621656, + "learning_rate": 4.813462226996062e-06, + "loss": 0.0001, + "step": 8802 + }, + { + "epoch": 0.5673132693175227, + "grad_norm": 0.0005241659708540031, + "learning_rate": 4.812746151092016e-06, + "loss": 0.0, + "step": 8803 + }, + { + "epoch": 0.5673777147644519, + "grad_norm": 0.06185363157920903, + "learning_rate": 4.81203007518797e-06, + "loss": 0.0002, + "step": 8804 + }, + { + "epoch": 0.567442160211381, + "grad_norm": 0.012243238039889377, + "learning_rate": 4.811313999283925e-06, + "loss": 0.0, + "step": 8805 + }, + { + "epoch": 0.5675066056583102, + "grad_norm": 0.04625774070281256, + "learning_rate": 4.8105979233798785e-06, + "loss": 0.0003, + "step": 8806 + }, + { + "epoch": 0.5675710511052394, + "grad_norm": 0.00486547121355555, + "learning_rate": 4.809881847475833e-06, + "loss": 0.0, + "step": 8807 + }, + { + "epoch": 0.5676354965521686, + "grad_norm": 0.0404367972194789, + "learning_rate": 4.809165771571787e-06, + "loss": 0.0001, + "step": 8808 + }, + { + "epoch": 0.5676999419990978, + "grad_norm": 0.21987166805627087, + "learning_rate": 4.8084496956677414e-06, + "loss": 0.0002, + "step": 8809 + }, + { + "epoch": 0.567764387446027, + "grad_norm": 0.00850806290664799, + "learning_rate": 4.807733619763696e-06, + "loss": 0.0, + "step": 8810 + }, + { + "epoch": 0.5678288328929562, + "grad_norm": 0.046005889230199014, + "learning_rate": 4.807017543859649e-06, + "loss": 0.0002, + "step": 8811 + }, + { + "epoch": 0.5678932783398852, + "grad_norm": 0.33100387723458397, + "learning_rate": 4.8063014679556035e-06, + "loss": 0.0003, + "step": 8812 + }, + { + "epoch": 0.5679577237868144, + "grad_norm": 0.11855455379698503, + "learning_rate": 4.805585392051558e-06, + "loss": 0.0001, + "step": 8813 + }, + { + "epoch": 0.5680221692337436, + "grad_norm": 0.016615890357752884, + "learning_rate": 4.804869316147512e-06, + "loss": 0.0, + "step": 8814 + }, + { + "epoch": 0.5680866146806728, + "grad_norm": 0.669933129930625, + "learning_rate": 4.804153240243466e-06, + "loss": 0.0017, + "step": 8815 + }, + { + "epoch": 0.568151060127602, + "grad_norm": 0.3022767576768994, + "learning_rate": 4.803437164339421e-06, + "loss": 0.0009, + "step": 8816 + }, + { + "epoch": 0.5682155055745312, + "grad_norm": 0.09943305098401042, + "learning_rate": 4.802721088435375e-06, + "loss": 0.0001, + "step": 8817 + }, + { + "epoch": 0.5682799510214603, + "grad_norm": 0.0142782273969829, + "learning_rate": 4.802005012531329e-06, + "loss": 0.0, + "step": 8818 + }, + { + "epoch": 0.5683443964683895, + "grad_norm": 0.23304219371967497, + "learning_rate": 4.801288936627283e-06, + "loss": 0.0008, + "step": 8819 + }, + { + "epoch": 0.5684088419153187, + "grad_norm": 0.02275632003333956, + "learning_rate": 4.800572860723237e-06, + "loss": 0.0, + "step": 8820 + }, + { + "epoch": 0.5684732873622479, + "grad_norm": 0.01564516688304527, + "learning_rate": 4.799856784819191e-06, + "loss": 0.0, + "step": 8821 + }, + { + "epoch": 0.5685377328091771, + "grad_norm": 0.001371723968120945, + "learning_rate": 4.799140708915146e-06, + "loss": 0.0, + "step": 8822 + }, + { + "epoch": 0.5686021782561063, + "grad_norm": 0.012505581892835328, + "learning_rate": 4.7984246330111e-06, + "loss": 0.0, + "step": 8823 + }, + { + "epoch": 0.5686666237030353, + "grad_norm": 0.008130933934212818, + "learning_rate": 4.797708557107053e-06, + "loss": 0.0, + "step": 8824 + }, + { + "epoch": 0.5687310691499645, + "grad_norm": 0.030915509771977535, + "learning_rate": 4.796992481203008e-06, + "loss": 0.0001, + "step": 8825 + }, + { + "epoch": 0.5687955145968937, + "grad_norm": 0.8509190779648602, + "learning_rate": 4.796276405298962e-06, + "loss": 0.0059, + "step": 8826 + }, + { + "epoch": 0.5688599600438229, + "grad_norm": 3.1909198740549973, + "learning_rate": 4.795560329394916e-06, + "loss": 0.0556, + "step": 8827 + }, + { + "epoch": 0.5689244054907521, + "grad_norm": 0.23147505557006737, + "learning_rate": 4.7948442534908706e-06, + "loss": 0.0005, + "step": 8828 + }, + { + "epoch": 0.5689888509376813, + "grad_norm": 0.027221213537766676, + "learning_rate": 4.794128177586825e-06, + "loss": 0.0002, + "step": 8829 + }, + { + "epoch": 0.5690532963846104, + "grad_norm": 0.8506112691652553, + "learning_rate": 4.793412101682779e-06, + "loss": 0.003, + "step": 8830 + }, + { + "epoch": 0.5691177418315396, + "grad_norm": 0.5891452973962943, + "learning_rate": 4.7926960257787335e-06, + "loss": 0.0032, + "step": 8831 + }, + { + "epoch": 0.5691821872784688, + "grad_norm": 0.006008513606025011, + "learning_rate": 4.791979949874687e-06, + "loss": 0.0001, + "step": 8832 + }, + { + "epoch": 0.569246632725398, + "grad_norm": 0.0008348159052347049, + "learning_rate": 4.791263873970641e-06, + "loss": 0.0, + "step": 8833 + }, + { + "epoch": 0.5693110781723272, + "grad_norm": 0.019444771138941552, + "learning_rate": 4.7905477980665955e-06, + "loss": 0.0, + "step": 8834 + }, + { + "epoch": 0.5693755236192563, + "grad_norm": 0.06374701691057286, + "learning_rate": 4.78983172216255e-06, + "loss": 0.0002, + "step": 8835 + }, + { + "epoch": 0.5694399690661854, + "grad_norm": 0.004703231092509179, + "learning_rate": 4.789115646258503e-06, + "loss": 0.0001, + "step": 8836 + }, + { + "epoch": 0.5695044145131146, + "grad_norm": 0.001044352791871368, + "learning_rate": 4.7883995703544576e-06, + "loss": 0.0, + "step": 8837 + }, + { + "epoch": 0.5695688599600438, + "grad_norm": 0.0020537983650882334, + "learning_rate": 4.787683494450412e-06, + "loss": 0.0, + "step": 8838 + }, + { + "epoch": 0.569633305406973, + "grad_norm": 0.00620996218740712, + "learning_rate": 4.786967418546366e-06, + "loss": 0.0, + "step": 8839 + }, + { + "epoch": 0.5696977508539022, + "grad_norm": 0.005034392248727269, + "learning_rate": 4.7862513426423205e-06, + "loss": 0.0, + "step": 8840 + }, + { + "epoch": 0.5697621963008314, + "grad_norm": 0.004523870500418789, + "learning_rate": 4.785535266738275e-06, + "loss": 0.0, + "step": 8841 + }, + { + "epoch": 0.5698266417477605, + "grad_norm": 0.04456405299394487, + "learning_rate": 4.784819190834229e-06, + "loss": 0.0005, + "step": 8842 + }, + { + "epoch": 0.5698910871946897, + "grad_norm": 5.05333046130126e-05, + "learning_rate": 4.784103114930183e-06, + "loss": 0.0, + "step": 8843 + }, + { + "epoch": 0.5699555326416189, + "grad_norm": 0.000751981987644549, + "learning_rate": 4.783387039026137e-06, + "loss": 0.0, + "step": 8844 + }, + { + "epoch": 0.5700199780885481, + "grad_norm": 0.00312416821242081, + "learning_rate": 4.782670963122091e-06, + "loss": 0.0, + "step": 8845 + }, + { + "epoch": 0.5700844235354772, + "grad_norm": 0.0016447265891001916, + "learning_rate": 4.781954887218045e-06, + "loss": 0.0, + "step": 8846 + }, + { + "epoch": 0.5701488689824064, + "grad_norm": 0.005598044842178874, + "learning_rate": 4.781238811314e-06, + "loss": 0.0001, + "step": 8847 + }, + { + "epoch": 0.5702133144293355, + "grad_norm": 0.01762638982733986, + "learning_rate": 4.780522735409954e-06, + "loss": 0.0, + "step": 8848 + }, + { + "epoch": 0.5702777598762647, + "grad_norm": 0.010449044572078174, + "learning_rate": 4.7798066595059074e-06, + "loss": 0.0, + "step": 8849 + }, + { + "epoch": 0.5703422053231939, + "grad_norm": 0.0002665962713856165, + "learning_rate": 4.779090583601862e-06, + "loss": 0.0, + "step": 8850 + }, + { + "epoch": 0.5704066507701231, + "grad_norm": 0.06352423931488883, + "learning_rate": 4.778374507697817e-06, + "loss": 0.0007, + "step": 8851 + }, + { + "epoch": 0.5704710962170523, + "grad_norm": 0.38984084577376893, + "learning_rate": 4.77765843179377e-06, + "loss": 0.0017, + "step": 8852 + }, + { + "epoch": 0.5705355416639815, + "grad_norm": 0.3330555543209313, + "learning_rate": 4.776942355889725e-06, + "loss": 0.0025, + "step": 8853 + }, + { + "epoch": 0.5705999871109106, + "grad_norm": 0.0028222350733292994, + "learning_rate": 4.776226279985679e-06, + "loss": 0.0, + "step": 8854 + }, + { + "epoch": 0.5706644325578398, + "grad_norm": 0.001275800740573039, + "learning_rate": 4.775510204081633e-06, + "loss": 0.0, + "step": 8855 + }, + { + "epoch": 0.570728878004769, + "grad_norm": 0.0010790753775213522, + "learning_rate": 4.7747941281775875e-06, + "loss": 0.0, + "step": 8856 + }, + { + "epoch": 0.5707933234516981, + "grad_norm": 0.04619489207028863, + "learning_rate": 4.774078052273541e-06, + "loss": 0.0, + "step": 8857 + }, + { + "epoch": 0.5708577688986273, + "grad_norm": 0.020743763233173337, + "learning_rate": 4.773361976369495e-06, + "loss": 0.0001, + "step": 8858 + }, + { + "epoch": 0.5709222143455565, + "grad_norm": 0.0006698125467998965, + "learning_rate": 4.77264590046545e-06, + "loss": 0.0, + "step": 8859 + }, + { + "epoch": 0.5709866597924856, + "grad_norm": 0.0014080113945894896, + "learning_rate": 4.771929824561404e-06, + "loss": 0.0, + "step": 8860 + }, + { + "epoch": 0.5710511052394148, + "grad_norm": 0.27785739883434873, + "learning_rate": 4.771213748657358e-06, + "loss": 0.0003, + "step": 8861 + }, + { + "epoch": 0.571115550686344, + "grad_norm": 0.0027483904368733616, + "learning_rate": 4.770497672753312e-06, + "loss": 0.0, + "step": 8862 + }, + { + "epoch": 0.5711799961332732, + "grad_norm": 0.5726181350659572, + "learning_rate": 4.769781596849267e-06, + "loss": 0.002, + "step": 8863 + }, + { + "epoch": 0.5712444415802024, + "grad_norm": 0.00208850930236962, + "learning_rate": 4.769065520945221e-06, + "loss": 0.0, + "step": 8864 + }, + { + "epoch": 0.5713088870271316, + "grad_norm": 0.0028141424681147045, + "learning_rate": 4.7683494450411745e-06, + "loss": 0.0, + "step": 8865 + }, + { + "epoch": 0.5713733324740607, + "grad_norm": 0.05520224991306442, + "learning_rate": 4.767633369137129e-06, + "loss": 0.0001, + "step": 8866 + }, + { + "epoch": 0.5714377779209899, + "grad_norm": 0.0017825190273745063, + "learning_rate": 4.766917293233083e-06, + "loss": 0.0, + "step": 8867 + }, + { + "epoch": 0.571502223367919, + "grad_norm": 0.2973295170699638, + "learning_rate": 4.7662012173290374e-06, + "loss": 0.0032, + "step": 8868 + }, + { + "epoch": 0.5715666688148482, + "grad_norm": 0.004041302913021656, + "learning_rate": 4.765485141424992e-06, + "loss": 0.0, + "step": 8869 + }, + { + "epoch": 0.5716311142617774, + "grad_norm": 0.003423076298219085, + "learning_rate": 4.764769065520945e-06, + "loss": 0.0, + "step": 8870 + }, + { + "epoch": 0.5716955597087066, + "grad_norm": 0.001335184445791643, + "learning_rate": 4.7640529896168995e-06, + "loss": 0.0, + "step": 8871 + }, + { + "epoch": 0.5717600051556357, + "grad_norm": 0.006451075758302448, + "learning_rate": 4.763336913712854e-06, + "loss": 0.0, + "step": 8872 + }, + { + "epoch": 0.5718244506025649, + "grad_norm": 0.2841419575082844, + "learning_rate": 4.762620837808808e-06, + "loss": 0.0005, + "step": 8873 + }, + { + "epoch": 0.5718888960494941, + "grad_norm": 0.0009039202676003914, + "learning_rate": 4.761904761904762e-06, + "loss": 0.0, + "step": 8874 + }, + { + "epoch": 0.5719533414964233, + "grad_norm": 0.0006223950228870037, + "learning_rate": 4.761188686000717e-06, + "loss": 0.0, + "step": 8875 + }, + { + "epoch": 0.5720177869433525, + "grad_norm": 0.0020699808052104343, + "learning_rate": 4.760472610096671e-06, + "loss": 0.0, + "step": 8876 + }, + { + "epoch": 0.5720822323902817, + "grad_norm": 0.33732607430765, + "learning_rate": 4.759756534192625e-06, + "loss": 0.0015, + "step": 8877 + }, + { + "epoch": 0.5721466778372108, + "grad_norm": 0.005303730736312988, + "learning_rate": 4.759040458288579e-06, + "loss": 0.0, + "step": 8878 + }, + { + "epoch": 0.5722111232841399, + "grad_norm": 0.012692532555947952, + "learning_rate": 4.758324382384533e-06, + "loss": 0.0001, + "step": 8879 + }, + { + "epoch": 0.5722755687310691, + "grad_norm": 0.00019626527068705832, + "learning_rate": 4.757608306480487e-06, + "loss": 0.0, + "step": 8880 + }, + { + "epoch": 0.5723400141779983, + "grad_norm": 0.002095381014958743, + "learning_rate": 4.756892230576442e-06, + "loss": 0.0, + "step": 8881 + }, + { + "epoch": 0.5724044596249275, + "grad_norm": 0.1159781279800337, + "learning_rate": 4.756176154672396e-06, + "loss": 0.0001, + "step": 8882 + }, + { + "epoch": 0.5724689050718567, + "grad_norm": 0.01648991749511099, + "learning_rate": 4.755460078768349e-06, + "loss": 0.0, + "step": 8883 + }, + { + "epoch": 0.5725333505187858, + "grad_norm": 0.20105396444792106, + "learning_rate": 4.754744002864304e-06, + "loss": 0.0002, + "step": 8884 + }, + { + "epoch": 0.572597795965715, + "grad_norm": 0.17178626370759914, + "learning_rate": 4.754027926960258e-06, + "loss": 0.0011, + "step": 8885 + }, + { + "epoch": 0.5726622414126442, + "grad_norm": 0.010614898100775799, + "learning_rate": 4.753311851056212e-06, + "loss": 0.0001, + "step": 8886 + }, + { + "epoch": 0.5727266868595734, + "grad_norm": 0.001217281913593414, + "learning_rate": 4.7525957751521666e-06, + "loss": 0.0, + "step": 8887 + }, + { + "epoch": 0.5727911323065026, + "grad_norm": 0.0057027341480303745, + "learning_rate": 4.751879699248121e-06, + "loss": 0.0, + "step": 8888 + }, + { + "epoch": 0.5728555777534318, + "grad_norm": 0.8818495232939141, + "learning_rate": 4.751163623344075e-06, + "loss": 0.001, + "step": 8889 + }, + { + "epoch": 0.5729200232003608, + "grad_norm": 0.00742720014691036, + "learning_rate": 4.7504475474400295e-06, + "loss": 0.0001, + "step": 8890 + }, + { + "epoch": 0.57298446864729, + "grad_norm": 0.027254013778446516, + "learning_rate": 4.749731471535983e-06, + "loss": 0.0, + "step": 8891 + }, + { + "epoch": 0.5730489140942192, + "grad_norm": 0.03194781425699038, + "learning_rate": 4.749015395631937e-06, + "loss": 0.0, + "step": 8892 + }, + { + "epoch": 0.5731133595411484, + "grad_norm": 0.0026582619085988384, + "learning_rate": 4.7482993197278915e-06, + "loss": 0.0, + "step": 8893 + }, + { + "epoch": 0.5731778049880776, + "grad_norm": 0.127752705813193, + "learning_rate": 4.747583243823846e-06, + "loss": 0.0001, + "step": 8894 + }, + { + "epoch": 0.5732422504350068, + "grad_norm": 0.002056805451891573, + "learning_rate": 4.7468671679198e-06, + "loss": 0.0, + "step": 8895 + }, + { + "epoch": 0.573306695881936, + "grad_norm": 0.004686356561184799, + "learning_rate": 4.7461510920157536e-06, + "loss": 0.0, + "step": 8896 + }, + { + "epoch": 0.5733711413288651, + "grad_norm": 0.0038974480489338153, + "learning_rate": 4.745435016111708e-06, + "loss": 0.0, + "step": 8897 + }, + { + "epoch": 0.5734355867757943, + "grad_norm": 0.1602997195389966, + "learning_rate": 4.744718940207663e-06, + "loss": 0.0011, + "step": 8898 + }, + { + "epoch": 0.5735000322227235, + "grad_norm": 0.0026400329446987615, + "learning_rate": 4.7440028643036165e-06, + "loss": 0.0, + "step": 8899 + }, + { + "epoch": 0.5735644776696527, + "grad_norm": 0.003769746465072043, + "learning_rate": 4.743286788399571e-06, + "loss": 0.0, + "step": 8900 + }, + { + "epoch": 0.5736289231165819, + "grad_norm": 0.010710992003722329, + "learning_rate": 4.742570712495525e-06, + "loss": 0.0, + "step": 8901 + }, + { + "epoch": 0.5736933685635109, + "grad_norm": 0.0006582057125751989, + "learning_rate": 4.741854636591479e-06, + "loss": 0.0, + "step": 8902 + }, + { + "epoch": 0.5737578140104401, + "grad_norm": 0.0024639063269701047, + "learning_rate": 4.741138560687434e-06, + "loss": 0.0, + "step": 8903 + }, + { + "epoch": 0.5738222594573693, + "grad_norm": 0.8917694769321234, + "learning_rate": 4.740422484783387e-06, + "loss": 0.0007, + "step": 8904 + }, + { + "epoch": 0.5738867049042985, + "grad_norm": 0.0032197677652261165, + "learning_rate": 4.739706408879341e-06, + "loss": 0.0, + "step": 8905 + }, + { + "epoch": 0.5739511503512277, + "grad_norm": 0.22933696610797213, + "learning_rate": 4.738990332975296e-06, + "loss": 0.001, + "step": 8906 + }, + { + "epoch": 0.5740155957981569, + "grad_norm": 0.06729571615328032, + "learning_rate": 4.73827425707125e-06, + "loss": 0.0002, + "step": 8907 + }, + { + "epoch": 0.574080041245086, + "grad_norm": 0.19443921359345376, + "learning_rate": 4.737558181167204e-06, + "loss": 0.0006, + "step": 8908 + }, + { + "epoch": 0.5741444866920152, + "grad_norm": 0.007514377513820292, + "learning_rate": 4.736842105263158e-06, + "loss": 0.0, + "step": 8909 + }, + { + "epoch": 0.5742089321389444, + "grad_norm": 0.00025992034944802154, + "learning_rate": 4.736126029359113e-06, + "loss": 0.0, + "step": 8910 + }, + { + "epoch": 0.5742733775858736, + "grad_norm": 0.0007463775014227858, + "learning_rate": 4.735409953455067e-06, + "loss": 0.0, + "step": 8911 + }, + { + "epoch": 0.5743378230328028, + "grad_norm": 0.0008458128520093252, + "learning_rate": 4.734693877551021e-06, + "loss": 0.0, + "step": 8912 + }, + { + "epoch": 0.5744022684797319, + "grad_norm": 0.02946843736749207, + "learning_rate": 4.733977801646975e-06, + "loss": 0.0001, + "step": 8913 + }, + { + "epoch": 0.574466713926661, + "grad_norm": 0.0015844954372983143, + "learning_rate": 4.733261725742929e-06, + "loss": 0.0, + "step": 8914 + }, + { + "epoch": 0.5745311593735902, + "grad_norm": 0.0006650693208472371, + "learning_rate": 4.7325456498388835e-06, + "loss": 0.0, + "step": 8915 + }, + { + "epoch": 0.5745956048205194, + "grad_norm": 0.00024925103154753225, + "learning_rate": 4.731829573934838e-06, + "loss": 0.0, + "step": 8916 + }, + { + "epoch": 0.5746600502674486, + "grad_norm": 0.0006270557073641862, + "learning_rate": 4.731113498030791e-06, + "loss": 0.0, + "step": 8917 + }, + { + "epoch": 0.5747244957143778, + "grad_norm": 0.000898050790659976, + "learning_rate": 4.730397422126746e-06, + "loss": 0.0015, + "step": 8918 + }, + { + "epoch": 0.574788941161307, + "grad_norm": 0.0010283628729795369, + "learning_rate": 4.7296813462227e-06, + "loss": 0.0, + "step": 8919 + }, + { + "epoch": 0.5748533866082361, + "grad_norm": 0.014245992428902015, + "learning_rate": 4.728965270318654e-06, + "loss": 0.0, + "step": 8920 + }, + { + "epoch": 0.5749178320551653, + "grad_norm": 0.004830281237210356, + "learning_rate": 4.7282491944146085e-06, + "loss": 0.0, + "step": 8921 + }, + { + "epoch": 0.5749822775020945, + "grad_norm": 0.02278564499924832, + "learning_rate": 4.727533118510563e-06, + "loss": 0.0, + "step": 8922 + }, + { + "epoch": 0.5750467229490237, + "grad_norm": 0.0002993158678736612, + "learning_rate": 4.726817042606517e-06, + "loss": 0.0, + "step": 8923 + }, + { + "epoch": 0.5751111683959528, + "grad_norm": 0.05430118317287579, + "learning_rate": 4.726100966702471e-06, + "loss": 0.0001, + "step": 8924 + }, + { + "epoch": 0.575175613842882, + "grad_norm": 0.0014384505586056613, + "learning_rate": 4.725384890798425e-06, + "loss": 0.0, + "step": 8925 + }, + { + "epoch": 0.5752400592898111, + "grad_norm": 0.5587276218293904, + "learning_rate": 4.724668814894379e-06, + "loss": 0.0008, + "step": 8926 + }, + { + "epoch": 0.5753045047367403, + "grad_norm": 0.009065701688840216, + "learning_rate": 4.7239527389903334e-06, + "loss": 0.0, + "step": 8927 + }, + { + "epoch": 0.5753689501836695, + "grad_norm": 0.04705428814279142, + "learning_rate": 4.723236663086288e-06, + "loss": 0.0005, + "step": 8928 + }, + { + "epoch": 0.5754333956305987, + "grad_norm": 0.009048597003513747, + "learning_rate": 4.722520587182241e-06, + "loss": 0.0, + "step": 8929 + }, + { + "epoch": 0.5754978410775279, + "grad_norm": 0.00033590933556921425, + "learning_rate": 4.7218045112781955e-06, + "loss": 0.0, + "step": 8930 + }, + { + "epoch": 0.5755622865244571, + "grad_norm": 0.002176598827963511, + "learning_rate": 4.72108843537415e-06, + "loss": 0.0, + "step": 8931 + }, + { + "epoch": 0.5756267319713863, + "grad_norm": 0.00026799353923762494, + "learning_rate": 4.720372359470104e-06, + "loss": 0.0, + "step": 8932 + }, + { + "epoch": 0.5756911774183154, + "grad_norm": 0.03386055598732095, + "learning_rate": 4.719656283566058e-06, + "loss": 0.0001, + "step": 8933 + }, + { + "epoch": 0.5757556228652446, + "grad_norm": 0.0008778612228956532, + "learning_rate": 4.718940207662013e-06, + "loss": 0.0, + "step": 8934 + }, + { + "epoch": 0.5758200683121737, + "grad_norm": 0.0007188580320088237, + "learning_rate": 4.718224131757967e-06, + "loss": 0.0, + "step": 8935 + }, + { + "epoch": 0.5758845137591029, + "grad_norm": 0.022633746857609334, + "learning_rate": 4.717508055853921e-06, + "loss": 0.0001, + "step": 8936 + }, + { + "epoch": 0.5759489592060321, + "grad_norm": 0.0006597787799743524, + "learning_rate": 4.716791979949875e-06, + "loss": 0.0, + "step": 8937 + }, + { + "epoch": 0.5760134046529612, + "grad_norm": 0.017968162787472524, + "learning_rate": 4.716075904045829e-06, + "loss": 0.0002, + "step": 8938 + }, + { + "epoch": 0.5760778500998904, + "grad_norm": 0.003721095668540121, + "learning_rate": 4.715359828141783e-06, + "loss": 0.0, + "step": 8939 + }, + { + "epoch": 0.5761422955468196, + "grad_norm": 0.1334403917347702, + "learning_rate": 4.714643752237738e-06, + "loss": 0.0002, + "step": 8940 + }, + { + "epoch": 0.5762067409937488, + "grad_norm": 0.049999945890129134, + "learning_rate": 4.713927676333692e-06, + "loss": 0.0005, + "step": 8941 + }, + { + "epoch": 0.576271186440678, + "grad_norm": 0.04438637225912091, + "learning_rate": 4.713211600429645e-06, + "loss": 0.0001, + "step": 8942 + }, + { + "epoch": 0.5763356318876072, + "grad_norm": 0.004939305130353833, + "learning_rate": 4.7124955245256e-06, + "loss": 0.0, + "step": 8943 + }, + { + "epoch": 0.5764000773345364, + "grad_norm": 0.010423595187445532, + "learning_rate": 4.711779448621554e-06, + "loss": 0.0001, + "step": 8944 + }, + { + "epoch": 0.5764645227814655, + "grad_norm": 0.30387026664487055, + "learning_rate": 4.711063372717508e-06, + "loss": 0.0003, + "step": 8945 + }, + { + "epoch": 0.5765289682283946, + "grad_norm": 0.002739366849377702, + "learning_rate": 4.7103472968134626e-06, + "loss": 0.0, + "step": 8946 + }, + { + "epoch": 0.5765934136753238, + "grad_norm": 0.21060028318841664, + "learning_rate": 4.709631220909417e-06, + "loss": 0.0042, + "step": 8947 + }, + { + "epoch": 0.576657859122253, + "grad_norm": 0.0018765208304858886, + "learning_rate": 4.708915145005371e-06, + "loss": 0.0, + "step": 8948 + }, + { + "epoch": 0.5767223045691822, + "grad_norm": 0.0007770944081688407, + "learning_rate": 4.7081990691013255e-06, + "loss": 0.0, + "step": 8949 + }, + { + "epoch": 0.5767867500161113, + "grad_norm": 0.002056512481472225, + "learning_rate": 4.707482993197279e-06, + "loss": 0.0, + "step": 8950 + }, + { + "epoch": 0.5768511954630405, + "grad_norm": 0.0034826728857168528, + "learning_rate": 4.706766917293233e-06, + "loss": 0.0, + "step": 8951 + }, + { + "epoch": 0.5769156409099697, + "grad_norm": 0.0003830165904485001, + "learning_rate": 4.7060508413891875e-06, + "loss": 0.0, + "step": 8952 + }, + { + "epoch": 0.5769800863568989, + "grad_norm": 0.043901532936228796, + "learning_rate": 4.705334765485142e-06, + "loss": 0.0001, + "step": 8953 + }, + { + "epoch": 0.5770445318038281, + "grad_norm": 0.003480280335613063, + "learning_rate": 4.704618689581096e-06, + "loss": 0.0, + "step": 8954 + }, + { + "epoch": 0.5771089772507573, + "grad_norm": 0.09117785697310841, + "learning_rate": 4.7039026136770496e-06, + "loss": 0.0001, + "step": 8955 + }, + { + "epoch": 0.5771734226976865, + "grad_norm": 0.0001285398936270668, + "learning_rate": 4.703186537773004e-06, + "loss": 0.0, + "step": 8956 + }, + { + "epoch": 0.5772378681446155, + "grad_norm": 0.0008863763737434807, + "learning_rate": 4.702470461868959e-06, + "loss": 0.0, + "step": 8957 + }, + { + "epoch": 0.5773023135915447, + "grad_norm": 0.38130637127350175, + "learning_rate": 4.7017543859649125e-06, + "loss": 0.0016, + "step": 8958 + }, + { + "epoch": 0.5773667590384739, + "grad_norm": 0.008510612682685685, + "learning_rate": 4.701038310060867e-06, + "loss": 0.0, + "step": 8959 + }, + { + "epoch": 0.5774312044854031, + "grad_norm": 0.009348145843828558, + "learning_rate": 4.700322234156821e-06, + "loss": 0.0001, + "step": 8960 + }, + { + "epoch": 0.5774956499323323, + "grad_norm": 0.006941331467167952, + "learning_rate": 4.699606158252775e-06, + "loss": 0.0, + "step": 8961 + }, + { + "epoch": 0.5775600953792615, + "grad_norm": 0.0013218582692286602, + "learning_rate": 4.69889008234873e-06, + "loss": 0.0, + "step": 8962 + }, + { + "epoch": 0.5776245408261906, + "grad_norm": 0.0005961525107431034, + "learning_rate": 4.698174006444683e-06, + "loss": 0.0, + "step": 8963 + }, + { + "epoch": 0.5776889862731198, + "grad_norm": 0.5694010257321127, + "learning_rate": 4.697457930540637e-06, + "loss": 0.0026, + "step": 8964 + }, + { + "epoch": 0.577753431720049, + "grad_norm": 0.0002869682815712005, + "learning_rate": 4.696741854636592e-06, + "loss": 0.0, + "step": 8965 + }, + { + "epoch": 0.5778178771669782, + "grad_norm": 0.006472222447300945, + "learning_rate": 4.696025778732546e-06, + "loss": 0.0, + "step": 8966 + }, + { + "epoch": 0.5778823226139074, + "grad_norm": 0.05890671080692376, + "learning_rate": 4.6953097028285e-06, + "loss": 0.0001, + "step": 8967 + }, + { + "epoch": 0.5779467680608364, + "grad_norm": 0.0015918262072770278, + "learning_rate": 4.694593626924455e-06, + "loss": 0.0, + "step": 8968 + }, + { + "epoch": 0.5780112135077656, + "grad_norm": 0.0012047950646157887, + "learning_rate": 4.693877551020409e-06, + "loss": 0.0, + "step": 8969 + }, + { + "epoch": 0.5780756589546948, + "grad_norm": 0.0001741613348393895, + "learning_rate": 4.693161475116363e-06, + "loss": 0.0, + "step": 8970 + }, + { + "epoch": 0.578140104401624, + "grad_norm": 9.804336294408929e-05, + "learning_rate": 4.692445399212317e-06, + "loss": 0.0, + "step": 8971 + }, + { + "epoch": 0.5782045498485532, + "grad_norm": 0.0002919895338205372, + "learning_rate": 4.691729323308271e-06, + "loss": 0.0, + "step": 8972 + }, + { + "epoch": 0.5782689952954824, + "grad_norm": 0.33817765277223377, + "learning_rate": 4.691013247404225e-06, + "loss": 0.001, + "step": 8973 + }, + { + "epoch": 0.5783334407424116, + "grad_norm": 0.0005003706800080808, + "learning_rate": 4.6902971715001795e-06, + "loss": 0.0, + "step": 8974 + }, + { + "epoch": 0.5783978861893407, + "grad_norm": 0.01697098235105734, + "learning_rate": 4.689581095596134e-06, + "loss": 0.0, + "step": 8975 + }, + { + "epoch": 0.5784623316362699, + "grad_norm": 0.01526346802499094, + "learning_rate": 4.688865019692087e-06, + "loss": 0.0001, + "step": 8976 + }, + { + "epoch": 0.5785267770831991, + "grad_norm": 0.00011173870781894097, + "learning_rate": 4.688148943788042e-06, + "loss": 0.0, + "step": 8977 + }, + { + "epoch": 0.5785912225301283, + "grad_norm": 0.19004203268711622, + "learning_rate": 4.687432867883996e-06, + "loss": 0.0005, + "step": 8978 + }, + { + "epoch": 0.5786556679770575, + "grad_norm": 6.244763941013428e-05, + "learning_rate": 4.68671679197995e-06, + "loss": 0.0, + "step": 8979 + }, + { + "epoch": 0.5787201134239865, + "grad_norm": 9.843372332926753e-05, + "learning_rate": 4.6860007160759045e-06, + "loss": 0.0, + "step": 8980 + }, + { + "epoch": 0.5787845588709157, + "grad_norm": 0.16569536958932962, + "learning_rate": 4.685284640171859e-06, + "loss": 0.0004, + "step": 8981 + }, + { + "epoch": 0.5788490043178449, + "grad_norm": 0.0005437621685816029, + "learning_rate": 4.684568564267813e-06, + "loss": 0.0, + "step": 8982 + }, + { + "epoch": 0.5789134497647741, + "grad_norm": 0.08751011709068818, + "learning_rate": 4.683852488363767e-06, + "loss": 0.0047, + "step": 8983 + }, + { + "epoch": 0.5789778952117033, + "grad_norm": 0.0361179598361197, + "learning_rate": 4.683136412459721e-06, + "loss": 0.0002, + "step": 8984 + }, + { + "epoch": 0.5790423406586325, + "grad_norm": 0.0009243908624880609, + "learning_rate": 4.682420336555675e-06, + "loss": 0.0, + "step": 8985 + }, + { + "epoch": 0.5791067861055617, + "grad_norm": 0.00028813802643748844, + "learning_rate": 4.6817042606516294e-06, + "loss": 0.0, + "step": 8986 + }, + { + "epoch": 0.5791712315524908, + "grad_norm": 0.0014871085523719119, + "learning_rate": 4.680988184747584e-06, + "loss": 0.0, + "step": 8987 + }, + { + "epoch": 0.57923567699942, + "grad_norm": 0.0011678639568634536, + "learning_rate": 4.680272108843538e-06, + "loss": 0.0, + "step": 8988 + }, + { + "epoch": 0.5793001224463492, + "grad_norm": 0.022646964211210575, + "learning_rate": 4.6795560329394915e-06, + "loss": 0.0, + "step": 8989 + }, + { + "epoch": 0.5793645678932784, + "grad_norm": 6.162092306559458e-05, + "learning_rate": 4.678839957035446e-06, + "loss": 0.0, + "step": 8990 + }, + { + "epoch": 0.5794290133402075, + "grad_norm": 0.02449790624310379, + "learning_rate": 4.6781238811314e-06, + "loss": 0.0, + "step": 8991 + }, + { + "epoch": 0.5794934587871367, + "grad_norm": 0.12489244930395811, + "learning_rate": 4.677407805227354e-06, + "loss": 0.0001, + "step": 8992 + }, + { + "epoch": 0.5795579042340658, + "grad_norm": 0.012767780350679178, + "learning_rate": 4.676691729323309e-06, + "loss": 0.0001, + "step": 8993 + }, + { + "epoch": 0.579622349680995, + "grad_norm": 0.009526083138262352, + "learning_rate": 4.675975653419263e-06, + "loss": 0.0, + "step": 8994 + }, + { + "epoch": 0.5796867951279242, + "grad_norm": 0.021985102148627596, + "learning_rate": 4.675259577515217e-06, + "loss": 0.0002, + "step": 8995 + }, + { + "epoch": 0.5797512405748534, + "grad_norm": 0.0004470049212301294, + "learning_rate": 4.6745435016111716e-06, + "loss": 0.0, + "step": 8996 + }, + { + "epoch": 0.5798156860217826, + "grad_norm": 0.0004562537429128238, + "learning_rate": 4.673827425707125e-06, + "loss": 0.0, + "step": 8997 + }, + { + "epoch": 0.5798801314687118, + "grad_norm": 0.0009392800325101437, + "learning_rate": 4.673111349803079e-06, + "loss": 0.0, + "step": 8998 + }, + { + "epoch": 0.5799445769156409, + "grad_norm": 0.008200601472724514, + "learning_rate": 4.672395273899034e-06, + "loss": 0.0, + "step": 8999 + }, + { + "epoch": 0.5800090223625701, + "grad_norm": 0.00019648518137536374, + "learning_rate": 4.671679197994988e-06, + "loss": 0.0, + "step": 9000 + }, + { + "epoch": 0.5800734678094993, + "grad_norm": 0.001726909860329572, + "learning_rate": 4.670963122090942e-06, + "loss": 0.0, + "step": 9001 + }, + { + "epoch": 0.5801379132564284, + "grad_norm": 7.409479499881678e-05, + "learning_rate": 4.670247046186896e-06, + "loss": 0.0, + "step": 9002 + }, + { + "epoch": 0.5802023587033576, + "grad_norm": 0.0003329872254956827, + "learning_rate": 4.669530970282851e-06, + "loss": 0.0, + "step": 9003 + }, + { + "epoch": 0.5802668041502868, + "grad_norm": 0.0010982760062732727, + "learning_rate": 4.668814894378805e-06, + "loss": 0.0, + "step": 9004 + }, + { + "epoch": 0.5803312495972159, + "grad_norm": 0.0035950464528479, + "learning_rate": 4.6680988184747586e-06, + "loss": 0.0, + "step": 9005 + }, + { + "epoch": 0.5803956950441451, + "grad_norm": 0.021047836132908507, + "learning_rate": 4.667382742570713e-06, + "loss": 0.0002, + "step": 9006 + }, + { + "epoch": 0.5804601404910743, + "grad_norm": 0.0011425220805333551, + "learning_rate": 4.666666666666667e-06, + "loss": 0.0, + "step": 9007 + }, + { + "epoch": 0.5805245859380035, + "grad_norm": 0.0008882349360519453, + "learning_rate": 4.6659505907626215e-06, + "loss": 0.0, + "step": 9008 + }, + { + "epoch": 0.5805890313849327, + "grad_norm": 0.0002802394128841394, + "learning_rate": 4.665234514858576e-06, + "loss": 0.0, + "step": 9009 + }, + { + "epoch": 0.5806534768318619, + "grad_norm": 0.0009635131074359399, + "learning_rate": 4.664518438954529e-06, + "loss": 0.0, + "step": 9010 + }, + { + "epoch": 0.580717922278791, + "grad_norm": 0.0009012143762942716, + "learning_rate": 4.6638023630504835e-06, + "loss": 0.0, + "step": 9011 + }, + { + "epoch": 0.5807823677257202, + "grad_norm": 0.006541857974311703, + "learning_rate": 4.663086287146438e-06, + "loss": 0.0, + "step": 9012 + }, + { + "epoch": 0.5808468131726493, + "grad_norm": 0.06217128394273282, + "learning_rate": 4.662370211242392e-06, + "loss": 0.0002, + "step": 9013 + }, + { + "epoch": 0.5809112586195785, + "grad_norm": 0.01966216794474465, + "learning_rate": 4.661654135338346e-06, + "loss": 0.0, + "step": 9014 + }, + { + "epoch": 0.5809757040665077, + "grad_norm": 0.0007292552460633633, + "learning_rate": 4.660938059434301e-06, + "loss": 0.0, + "step": 9015 + }, + { + "epoch": 0.5810401495134369, + "grad_norm": 0.047384355713321426, + "learning_rate": 4.660221983530255e-06, + "loss": 0.0001, + "step": 9016 + }, + { + "epoch": 0.581104594960366, + "grad_norm": 0.004729452570084791, + "learning_rate": 4.659505907626209e-06, + "loss": 0.0, + "step": 9017 + }, + { + "epoch": 0.5811690404072952, + "grad_norm": 0.0005329612298227006, + "learning_rate": 4.658789831722163e-06, + "loss": 0.0, + "step": 9018 + }, + { + "epoch": 0.5812334858542244, + "grad_norm": 0.09264701461117533, + "learning_rate": 4.658073755818117e-06, + "loss": 0.001, + "step": 9019 + }, + { + "epoch": 0.5812979313011536, + "grad_norm": 0.0002257829058000882, + "learning_rate": 4.657357679914071e-06, + "loss": 0.0, + "step": 9020 + }, + { + "epoch": 0.5813623767480828, + "grad_norm": 0.010905818191302687, + "learning_rate": 4.656641604010026e-06, + "loss": 0.0, + "step": 9021 + }, + { + "epoch": 0.581426822195012, + "grad_norm": 0.005600216922704845, + "learning_rate": 4.65592552810598e-06, + "loss": 0.0, + "step": 9022 + }, + { + "epoch": 0.5814912676419411, + "grad_norm": 0.031750284272755426, + "learning_rate": 4.655209452201933e-06, + "loss": 0.0001, + "step": 9023 + }, + { + "epoch": 0.5815557130888702, + "grad_norm": 0.0077528691803133536, + "learning_rate": 4.654493376297888e-06, + "loss": 0.0, + "step": 9024 + }, + { + "epoch": 0.5816201585357994, + "grad_norm": 0.04543225714500262, + "learning_rate": 4.653777300393842e-06, + "loss": 0.0001, + "step": 9025 + }, + { + "epoch": 0.5816846039827286, + "grad_norm": 0.013056369682744871, + "learning_rate": 4.653061224489796e-06, + "loss": 0.0001, + "step": 9026 + }, + { + "epoch": 0.5817490494296578, + "grad_norm": 0.17819283640927924, + "learning_rate": 4.652345148585751e-06, + "loss": 0.0003, + "step": 9027 + }, + { + "epoch": 0.581813494876587, + "grad_norm": 0.08917160086074113, + "learning_rate": 4.651629072681705e-06, + "loss": 0.0003, + "step": 9028 + }, + { + "epoch": 0.5818779403235161, + "grad_norm": 0.006331248638778778, + "learning_rate": 4.650912996777659e-06, + "loss": 0.0, + "step": 9029 + }, + { + "epoch": 0.5819423857704453, + "grad_norm": 0.002775633504862486, + "learning_rate": 4.6501969208736135e-06, + "loss": 0.0, + "step": 9030 + }, + { + "epoch": 0.5820068312173745, + "grad_norm": 6.992336507404066e-05, + "learning_rate": 4.649480844969567e-06, + "loss": 0.0, + "step": 9031 + }, + { + "epoch": 0.5820712766643037, + "grad_norm": 0.00017943615738269465, + "learning_rate": 4.648764769065521e-06, + "loss": 0.0, + "step": 9032 + }, + { + "epoch": 0.5821357221112329, + "grad_norm": 0.0058394424584071415, + "learning_rate": 4.6480486931614755e-06, + "loss": 0.0, + "step": 9033 + }, + { + "epoch": 0.5822001675581621, + "grad_norm": 0.050562310960973855, + "learning_rate": 4.64733261725743e-06, + "loss": 0.0002, + "step": 9034 + }, + { + "epoch": 0.5822646130050911, + "grad_norm": 0.8657970450295585, + "learning_rate": 4.646616541353383e-06, + "loss": 0.0037, + "step": 9035 + }, + { + "epoch": 0.5823290584520203, + "grad_norm": 0.00211204704726517, + "learning_rate": 4.645900465449338e-06, + "loss": 0.0, + "step": 9036 + }, + { + "epoch": 0.5823935038989495, + "grad_norm": 0.004920183719435545, + "learning_rate": 4.645184389545292e-06, + "loss": 0.0, + "step": 9037 + }, + { + "epoch": 0.5824579493458787, + "grad_norm": 0.00663491009308444, + "learning_rate": 4.644468313641247e-06, + "loss": 0.0, + "step": 9038 + }, + { + "epoch": 0.5825223947928079, + "grad_norm": 0.0022066924291144698, + "learning_rate": 4.6437522377372005e-06, + "loss": 0.0, + "step": 9039 + }, + { + "epoch": 0.5825868402397371, + "grad_norm": 0.0022171824438060417, + "learning_rate": 4.643036161833155e-06, + "loss": 0.0, + "step": 9040 + }, + { + "epoch": 0.5826512856866662, + "grad_norm": 0.00032441297467396377, + "learning_rate": 4.642320085929109e-06, + "loss": 0.0, + "step": 9041 + }, + { + "epoch": 0.5827157311335954, + "grad_norm": 0.030769737840866715, + "learning_rate": 4.641604010025063e-06, + "loss": 0.0, + "step": 9042 + }, + { + "epoch": 0.5827801765805246, + "grad_norm": 0.0019483906462393567, + "learning_rate": 4.640887934121017e-06, + "loss": 0.0, + "step": 9043 + }, + { + "epoch": 0.5828446220274538, + "grad_norm": 0.00029194905052751225, + "learning_rate": 4.640171858216971e-06, + "loss": 0.0, + "step": 9044 + }, + { + "epoch": 0.582909067474383, + "grad_norm": 0.0002021999263205449, + "learning_rate": 4.6394557823129254e-06, + "loss": 0.0, + "step": 9045 + }, + { + "epoch": 0.5829735129213122, + "grad_norm": 0.0014373682518094646, + "learning_rate": 4.63873970640888e-06, + "loss": 0.0, + "step": 9046 + }, + { + "epoch": 0.5830379583682412, + "grad_norm": 0.059762924731719465, + "learning_rate": 4.638023630504834e-06, + "loss": 0.0001, + "step": 9047 + }, + { + "epoch": 0.5831024038151704, + "grad_norm": 0.4903177897093207, + "learning_rate": 4.6373075546007875e-06, + "loss": 0.0041, + "step": 9048 + }, + { + "epoch": 0.5831668492620996, + "grad_norm": 0.0014177705494520779, + "learning_rate": 4.636591478696742e-06, + "loss": 0.0, + "step": 9049 + }, + { + "epoch": 0.5832312947090288, + "grad_norm": 0.2566934777913586, + "learning_rate": 4.635875402792697e-06, + "loss": 0.0021, + "step": 9050 + }, + { + "epoch": 0.583295740155958, + "grad_norm": 0.018140133354513432, + "learning_rate": 4.63515932688865e-06, + "loss": 0.0001, + "step": 9051 + }, + { + "epoch": 0.5833601856028872, + "grad_norm": 0.00788615014582874, + "learning_rate": 4.634443250984605e-06, + "loss": 0.0, + "step": 9052 + }, + { + "epoch": 0.5834246310498163, + "grad_norm": 0.01840864843113238, + "learning_rate": 4.633727175080559e-06, + "loss": 0.0001, + "step": 9053 + }, + { + "epoch": 0.5834890764967455, + "grad_norm": 0.001822052435323416, + "learning_rate": 4.633011099176513e-06, + "loss": 0.0, + "step": 9054 + }, + { + "epoch": 0.5835535219436747, + "grad_norm": 0.017397195278423098, + "learning_rate": 4.6322950232724676e-06, + "loss": 0.0, + "step": 9055 + }, + { + "epoch": 0.5836179673906039, + "grad_norm": 0.6295143409970986, + "learning_rate": 4.631578947368421e-06, + "loss": 0.0036, + "step": 9056 + }, + { + "epoch": 0.5836824128375331, + "grad_norm": 0.004145135076455736, + "learning_rate": 4.630862871464375e-06, + "loss": 0.0, + "step": 9057 + }, + { + "epoch": 0.5837468582844622, + "grad_norm": 0.01922043763141991, + "learning_rate": 4.63014679556033e-06, + "loss": 0.0001, + "step": 9058 + }, + { + "epoch": 0.5838113037313913, + "grad_norm": 0.21877145661980707, + "learning_rate": 4.629430719656284e-06, + "loss": 0.0003, + "step": 9059 + }, + { + "epoch": 0.5838757491783205, + "grad_norm": 0.06723857036500273, + "learning_rate": 4.628714643752238e-06, + "loss": 0.0001, + "step": 9060 + }, + { + "epoch": 0.5839401946252497, + "grad_norm": 0.037172463083185225, + "learning_rate": 4.627998567848192e-06, + "loss": 0.0, + "step": 9061 + }, + { + "epoch": 0.5840046400721789, + "grad_norm": 0.0024169997396676244, + "learning_rate": 4.627282491944147e-06, + "loss": 0.0, + "step": 9062 + }, + { + "epoch": 0.5840690855191081, + "grad_norm": 0.06419768070761167, + "learning_rate": 4.626566416040101e-06, + "loss": 0.0, + "step": 9063 + }, + { + "epoch": 0.5841335309660373, + "grad_norm": 0.01399367865517338, + "learning_rate": 4.6258503401360546e-06, + "loss": 0.0, + "step": 9064 + }, + { + "epoch": 0.5841979764129664, + "grad_norm": 0.000952353755274673, + "learning_rate": 4.625134264232009e-06, + "loss": 0.0, + "step": 9065 + }, + { + "epoch": 0.5842624218598956, + "grad_norm": 0.0015975798325585998, + "learning_rate": 4.624418188327963e-06, + "loss": 0.0, + "step": 9066 + }, + { + "epoch": 0.5843268673068248, + "grad_norm": 0.0026882272384257705, + "learning_rate": 4.6237021124239175e-06, + "loss": 0.0, + "step": 9067 + }, + { + "epoch": 0.584391312753754, + "grad_norm": 0.001748996446030553, + "learning_rate": 4.622986036519872e-06, + "loss": 0.0, + "step": 9068 + }, + { + "epoch": 0.5844557582006831, + "grad_norm": 0.18220017498674793, + "learning_rate": 4.622269960615825e-06, + "loss": 0.0017, + "step": 9069 + }, + { + "epoch": 0.5845202036476123, + "grad_norm": 0.009680095160558684, + "learning_rate": 4.6215538847117795e-06, + "loss": 0.0, + "step": 9070 + }, + { + "epoch": 0.5845846490945414, + "grad_norm": 1.2011762913568456, + "learning_rate": 4.620837808807734e-06, + "loss": 0.0104, + "step": 9071 + }, + { + "epoch": 0.5846490945414706, + "grad_norm": 0.009852353560086855, + "learning_rate": 4.620121732903688e-06, + "loss": 0.0, + "step": 9072 + }, + { + "epoch": 0.5847135399883998, + "grad_norm": 0.0034802751090491258, + "learning_rate": 4.619405656999642e-06, + "loss": 0.0, + "step": 9073 + }, + { + "epoch": 0.584777985435329, + "grad_norm": 0.02462778481389003, + "learning_rate": 4.618689581095597e-06, + "loss": 0.0, + "step": 9074 + }, + { + "epoch": 0.5848424308822582, + "grad_norm": 0.07292170024709481, + "learning_rate": 4.617973505191551e-06, + "loss": 0.0001, + "step": 9075 + }, + { + "epoch": 0.5849068763291874, + "grad_norm": 0.01276836844157095, + "learning_rate": 4.617257429287505e-06, + "loss": 0.0001, + "step": 9076 + }, + { + "epoch": 0.5849713217761165, + "grad_norm": 0.0059313510178414935, + "learning_rate": 4.616541353383459e-06, + "loss": 0.0, + "step": 9077 + }, + { + "epoch": 0.5850357672230457, + "grad_norm": 0.0050054168634305505, + "learning_rate": 4.615825277479413e-06, + "loss": 0.0, + "step": 9078 + }, + { + "epoch": 0.5851002126699749, + "grad_norm": 0.004330369079694027, + "learning_rate": 4.615109201575367e-06, + "loss": 0.0, + "step": 9079 + }, + { + "epoch": 0.585164658116904, + "grad_norm": 0.0014310698214295584, + "learning_rate": 4.614393125671322e-06, + "loss": 0.0, + "step": 9080 + }, + { + "epoch": 0.5852291035638332, + "grad_norm": 0.09305590243071263, + "learning_rate": 4.613677049767276e-06, + "loss": 0.0003, + "step": 9081 + }, + { + "epoch": 0.5852935490107624, + "grad_norm": 0.003215462836571615, + "learning_rate": 4.612960973863229e-06, + "loss": 0.0, + "step": 9082 + }, + { + "epoch": 0.5853579944576915, + "grad_norm": 1.087399822312692, + "learning_rate": 4.612244897959184e-06, + "loss": 0.0105, + "step": 9083 + }, + { + "epoch": 0.5854224399046207, + "grad_norm": 0.7281104794474664, + "learning_rate": 4.611528822055138e-06, + "loss": 0.0062, + "step": 9084 + }, + { + "epoch": 0.5854868853515499, + "grad_norm": 0.00043356516939963686, + "learning_rate": 4.610812746151092e-06, + "loss": 0.0, + "step": 9085 + }, + { + "epoch": 0.5855513307984791, + "grad_norm": 0.00033685958252393, + "learning_rate": 4.610096670247047e-06, + "loss": 0.0, + "step": 9086 + }, + { + "epoch": 0.5856157762454083, + "grad_norm": 0.002318961319503037, + "learning_rate": 4.609380594343001e-06, + "loss": 0.0, + "step": 9087 + }, + { + "epoch": 0.5856802216923375, + "grad_norm": 0.005592278210797931, + "learning_rate": 4.608664518438955e-06, + "loss": 0.0, + "step": 9088 + }, + { + "epoch": 0.5857446671392667, + "grad_norm": 0.0243394892930314, + "learning_rate": 4.6079484425349095e-06, + "loss": 0.0, + "step": 9089 + }, + { + "epoch": 0.5858091125861958, + "grad_norm": 0.11083153764968887, + "learning_rate": 4.607232366630863e-06, + "loss": 0.0001, + "step": 9090 + }, + { + "epoch": 0.5858735580331249, + "grad_norm": 0.0007002490856994532, + "learning_rate": 4.606516290726817e-06, + "loss": 0.0, + "step": 9091 + }, + { + "epoch": 0.5859380034800541, + "grad_norm": 0.0017745563231980528, + "learning_rate": 4.6058002148227715e-06, + "loss": 0.0, + "step": 9092 + }, + { + "epoch": 0.5860024489269833, + "grad_norm": 0.005978146684738015, + "learning_rate": 4.605084138918726e-06, + "loss": 0.0, + "step": 9093 + }, + { + "epoch": 0.5860668943739125, + "grad_norm": 0.05288187124635033, + "learning_rate": 4.60436806301468e-06, + "loss": 0.0001, + "step": 9094 + }, + { + "epoch": 0.5861313398208416, + "grad_norm": 0.016551244789661204, + "learning_rate": 4.603651987110634e-06, + "loss": 0.0002, + "step": 9095 + }, + { + "epoch": 0.5861957852677708, + "grad_norm": 0.044669416309915146, + "learning_rate": 4.602935911206588e-06, + "loss": 0.0001, + "step": 9096 + }, + { + "epoch": 0.5862602307147, + "grad_norm": 0.2125491982896501, + "learning_rate": 4.602219835302543e-06, + "loss": 0.0002, + "step": 9097 + }, + { + "epoch": 0.5863246761616292, + "grad_norm": 0.49376460609078615, + "learning_rate": 4.6015037593984965e-06, + "loss": 0.001, + "step": 9098 + }, + { + "epoch": 0.5863891216085584, + "grad_norm": 0.016966827455400634, + "learning_rate": 4.600787683494451e-06, + "loss": 0.0, + "step": 9099 + }, + { + "epoch": 0.5864535670554876, + "grad_norm": 0.02803030621426667, + "learning_rate": 4.600071607590405e-06, + "loss": 0.0, + "step": 9100 + }, + { + "epoch": 0.5865180125024168, + "grad_norm": 0.00504899862022767, + "learning_rate": 4.599355531686359e-06, + "loss": 0.0, + "step": 9101 + }, + { + "epoch": 0.5865824579493458, + "grad_norm": 5.1629753872988315, + "learning_rate": 4.598639455782314e-06, + "loss": 0.0183, + "step": 9102 + }, + { + "epoch": 0.586646903396275, + "grad_norm": 0.0011573463084333388, + "learning_rate": 4.597923379878267e-06, + "loss": 0.0, + "step": 9103 + }, + { + "epoch": 0.5867113488432042, + "grad_norm": 0.009361562436643706, + "learning_rate": 4.5972073039742214e-06, + "loss": 0.0, + "step": 9104 + }, + { + "epoch": 0.5867757942901334, + "grad_norm": 0.027741470990613967, + "learning_rate": 4.596491228070176e-06, + "loss": 0.0, + "step": 9105 + }, + { + "epoch": 0.5868402397370626, + "grad_norm": 0.00832458852908692, + "learning_rate": 4.59577515216613e-06, + "loss": 0.0, + "step": 9106 + }, + { + "epoch": 0.5869046851839917, + "grad_norm": 0.001290437778751822, + "learning_rate": 4.595059076262084e-06, + "loss": 0.0, + "step": 9107 + }, + { + "epoch": 0.5869691306309209, + "grad_norm": 0.015002341091636952, + "learning_rate": 4.594343000358039e-06, + "loss": 0.0, + "step": 9108 + }, + { + "epoch": 0.5870335760778501, + "grad_norm": 0.004408846988619795, + "learning_rate": 4.593626924453993e-06, + "loss": 0.0, + "step": 9109 + }, + { + "epoch": 0.5870980215247793, + "grad_norm": 0.039488889693771616, + "learning_rate": 4.592910848549947e-06, + "loss": 0.0, + "step": 9110 + }, + { + "epoch": 0.5871624669717085, + "grad_norm": 0.026583034234040636, + "learning_rate": 4.592194772645901e-06, + "loss": 0.0003, + "step": 9111 + }, + { + "epoch": 0.5872269124186377, + "grad_norm": 0.000849249254509577, + "learning_rate": 4.591478696741855e-06, + "loss": 0.0, + "step": 9112 + }, + { + "epoch": 0.5872913578655667, + "grad_norm": 0.004329321968183173, + "learning_rate": 4.590762620837809e-06, + "loss": 0.0, + "step": 9113 + }, + { + "epoch": 0.5873558033124959, + "grad_norm": 0.04470970489953232, + "learning_rate": 4.5900465449337636e-06, + "loss": 0.0, + "step": 9114 + }, + { + "epoch": 0.5874202487594251, + "grad_norm": 0.004520625298732878, + "learning_rate": 4.589330469029718e-06, + "loss": 0.0, + "step": 9115 + }, + { + "epoch": 0.5874846942063543, + "grad_norm": 0.00766393875770787, + "learning_rate": 4.588614393125671e-06, + "loss": 0.0, + "step": 9116 + }, + { + "epoch": 0.5875491396532835, + "grad_norm": 0.002323407439271178, + "learning_rate": 4.587898317221626e-06, + "loss": 0.0, + "step": 9117 + }, + { + "epoch": 0.5876135851002127, + "grad_norm": 0.06852094765522453, + "learning_rate": 4.58718224131758e-06, + "loss": 0.0001, + "step": 9118 + }, + { + "epoch": 0.5876780305471418, + "grad_norm": 0.004350183486519918, + "learning_rate": 4.586466165413534e-06, + "loss": 0.0001, + "step": 9119 + }, + { + "epoch": 0.587742475994071, + "grad_norm": 0.03648216598243733, + "learning_rate": 4.5857500895094885e-06, + "loss": 0.0001, + "step": 9120 + }, + { + "epoch": 0.5878069214410002, + "grad_norm": 0.31417986446623686, + "learning_rate": 4.585034013605443e-06, + "loss": 0.0014, + "step": 9121 + }, + { + "epoch": 0.5878713668879294, + "grad_norm": 0.0414216516748464, + "learning_rate": 4.584317937701397e-06, + "loss": 0.0001, + "step": 9122 + }, + { + "epoch": 0.5879358123348586, + "grad_norm": 0.05218019741684195, + "learning_rate": 4.583601861797351e-06, + "loss": 0.0001, + "step": 9123 + }, + { + "epoch": 0.5880002577817878, + "grad_norm": 0.00462391310842688, + "learning_rate": 4.582885785893305e-06, + "loss": 0.0, + "step": 9124 + }, + { + "epoch": 0.5880647032287168, + "grad_norm": 0.020700351989345004, + "learning_rate": 4.582169709989259e-06, + "loss": 0.0001, + "step": 9125 + }, + { + "epoch": 0.588129148675646, + "grad_norm": 0.0029324404378681776, + "learning_rate": 4.5814536340852135e-06, + "loss": 0.0, + "step": 9126 + }, + { + "epoch": 0.5881935941225752, + "grad_norm": 0.00026724816470433214, + "learning_rate": 4.580737558181168e-06, + "loss": 0.0, + "step": 9127 + }, + { + "epoch": 0.5882580395695044, + "grad_norm": 0.00047139101971960793, + "learning_rate": 4.580021482277121e-06, + "loss": 0.0, + "step": 9128 + }, + { + "epoch": 0.5883224850164336, + "grad_norm": 0.001048866333167167, + "learning_rate": 4.5793054063730755e-06, + "loss": 0.0, + "step": 9129 + }, + { + "epoch": 0.5883869304633628, + "grad_norm": 0.00039698803690931634, + "learning_rate": 4.57858933046903e-06, + "loss": 0.0, + "step": 9130 + }, + { + "epoch": 0.588451375910292, + "grad_norm": 0.006695436789118914, + "learning_rate": 4.577873254564984e-06, + "loss": 0.0001, + "step": 9131 + }, + { + "epoch": 0.5885158213572211, + "grad_norm": 0.0019064073027897633, + "learning_rate": 4.577157178660938e-06, + "loss": 0.0, + "step": 9132 + }, + { + "epoch": 0.5885802668041503, + "grad_norm": 0.0002214748274238106, + "learning_rate": 4.576441102756893e-06, + "loss": 0.0, + "step": 9133 + }, + { + "epoch": 0.5886447122510795, + "grad_norm": 0.1768014864419196, + "learning_rate": 4.575725026852847e-06, + "loss": 0.0032, + "step": 9134 + }, + { + "epoch": 0.5887091576980087, + "grad_norm": 0.00627439198323087, + "learning_rate": 4.575008950948801e-06, + "loss": 0.0, + "step": 9135 + }, + { + "epoch": 0.5887736031449378, + "grad_norm": 0.0019090848411585798, + "learning_rate": 4.574292875044755e-06, + "loss": 0.0, + "step": 9136 + }, + { + "epoch": 0.588838048591867, + "grad_norm": 0.023734394041223905, + "learning_rate": 4.573576799140709e-06, + "loss": 0.0001, + "step": 9137 + }, + { + "epoch": 0.5889024940387961, + "grad_norm": 0.0012559579821498448, + "learning_rate": 4.572860723236663e-06, + "loss": 0.0, + "step": 9138 + }, + { + "epoch": 0.5889669394857253, + "grad_norm": 0.005693433432522691, + "learning_rate": 4.572144647332618e-06, + "loss": 0.0001, + "step": 9139 + }, + { + "epoch": 0.5890313849326545, + "grad_norm": 0.009353012652150373, + "learning_rate": 4.571428571428572e-06, + "loss": 0.0, + "step": 9140 + }, + { + "epoch": 0.5890958303795837, + "grad_norm": 0.029658588021311515, + "learning_rate": 4.570712495524525e-06, + "loss": 0.0001, + "step": 9141 + }, + { + "epoch": 0.5891602758265129, + "grad_norm": 0.003302594123403248, + "learning_rate": 4.56999641962048e-06, + "loss": 0.0, + "step": 9142 + }, + { + "epoch": 0.589224721273442, + "grad_norm": 0.001705376414646427, + "learning_rate": 4.569280343716435e-06, + "loss": 0.0, + "step": 9143 + }, + { + "epoch": 0.5892891667203712, + "grad_norm": 0.18220126885195806, + "learning_rate": 4.568564267812388e-06, + "loss": 0.0004, + "step": 9144 + }, + { + "epoch": 0.5893536121673004, + "grad_norm": 0.0019109073460819275, + "learning_rate": 4.567848191908343e-06, + "loss": 0.0, + "step": 9145 + }, + { + "epoch": 0.5894180576142296, + "grad_norm": 0.005024864271140364, + "learning_rate": 4.567132116004297e-06, + "loss": 0.0, + "step": 9146 + }, + { + "epoch": 0.5894825030611587, + "grad_norm": 0.00905942092399999, + "learning_rate": 4.566416040100251e-06, + "loss": 0.0, + "step": 9147 + }, + { + "epoch": 0.5895469485080879, + "grad_norm": 0.005881856218996886, + "learning_rate": 4.5656999641962055e-06, + "loss": 0.0, + "step": 9148 + }, + { + "epoch": 0.589611393955017, + "grad_norm": 0.0008304460678939578, + "learning_rate": 4.564983888292159e-06, + "loss": 0.0, + "step": 9149 + }, + { + "epoch": 0.5896758394019462, + "grad_norm": 0.007501626420824486, + "learning_rate": 4.564267812388113e-06, + "loss": 0.0, + "step": 9150 + }, + { + "epoch": 0.5897402848488754, + "grad_norm": 0.0012510811332456175, + "learning_rate": 4.5635517364840675e-06, + "loss": 0.0, + "step": 9151 + }, + { + "epoch": 0.5898047302958046, + "grad_norm": 0.0746794568702863, + "learning_rate": 4.562835660580022e-06, + "loss": 0.001, + "step": 9152 + }, + { + "epoch": 0.5898691757427338, + "grad_norm": 0.26361614627359486, + "learning_rate": 4.562119584675976e-06, + "loss": 0.0021, + "step": 9153 + }, + { + "epoch": 0.589933621189663, + "grad_norm": 0.0002156212009346355, + "learning_rate": 4.56140350877193e-06, + "loss": 0.0, + "step": 9154 + }, + { + "epoch": 0.5899980666365922, + "grad_norm": 0.0015590836780957843, + "learning_rate": 4.560687432867885e-06, + "loss": 0.0, + "step": 9155 + }, + { + "epoch": 0.5900625120835213, + "grad_norm": 0.004308015541784625, + "learning_rate": 4.559971356963839e-06, + "loss": 0.0, + "step": 9156 + }, + { + "epoch": 0.5901269575304505, + "grad_norm": 0.0717703984591805, + "learning_rate": 4.5592552810597925e-06, + "loss": 0.0003, + "step": 9157 + }, + { + "epoch": 0.5901914029773796, + "grad_norm": 0.0016221557899336572, + "learning_rate": 4.558539205155747e-06, + "loss": 0.0, + "step": 9158 + }, + { + "epoch": 0.5902558484243088, + "grad_norm": 0.02843239715369182, + "learning_rate": 4.557823129251701e-06, + "loss": 0.0001, + "step": 9159 + }, + { + "epoch": 0.590320293871238, + "grad_norm": 0.08906306174586306, + "learning_rate": 4.557107053347655e-06, + "loss": 0.0002, + "step": 9160 + }, + { + "epoch": 0.5903847393181672, + "grad_norm": 0.041065458269552234, + "learning_rate": 4.55639097744361e-06, + "loss": 0.0001, + "step": 9161 + }, + { + "epoch": 0.5904491847650963, + "grad_norm": 0.0008033243988639701, + "learning_rate": 4.555674901539563e-06, + "loss": 0.0, + "step": 9162 + }, + { + "epoch": 0.5905136302120255, + "grad_norm": 0.05046193202663287, + "learning_rate": 4.5549588256355174e-06, + "loss": 0.0, + "step": 9163 + }, + { + "epoch": 0.5905780756589547, + "grad_norm": 0.00023855639736130923, + "learning_rate": 4.554242749731472e-06, + "loss": 0.0, + "step": 9164 + }, + { + "epoch": 0.5906425211058839, + "grad_norm": 0.0006409486529008727, + "learning_rate": 4.553526673827426e-06, + "loss": 0.0, + "step": 9165 + }, + { + "epoch": 0.5907069665528131, + "grad_norm": 0.00037044303511188413, + "learning_rate": 4.55281059792338e-06, + "loss": 0.0, + "step": 9166 + }, + { + "epoch": 0.5907714119997423, + "grad_norm": 0.012088132812897083, + "learning_rate": 4.552094522019335e-06, + "loss": 0.0, + "step": 9167 + }, + { + "epoch": 0.5908358574466714, + "grad_norm": 0.0016999619869035114, + "learning_rate": 4.551378446115289e-06, + "loss": 0.0, + "step": 9168 + }, + { + "epoch": 0.5909003028936005, + "grad_norm": 0.38509600634394847, + "learning_rate": 4.550662370211243e-06, + "loss": 0.0014, + "step": 9169 + }, + { + "epoch": 0.5909647483405297, + "grad_norm": 0.0015224179117437268, + "learning_rate": 4.549946294307197e-06, + "loss": 0.0, + "step": 9170 + }, + { + "epoch": 0.5910291937874589, + "grad_norm": 0.0011083900503956407, + "learning_rate": 4.549230218403151e-06, + "loss": 0.0, + "step": 9171 + }, + { + "epoch": 0.5910936392343881, + "grad_norm": 0.00030231327758239914, + "learning_rate": 4.548514142499105e-06, + "loss": 0.0, + "step": 9172 + }, + { + "epoch": 0.5911580846813173, + "grad_norm": 0.17115920743812696, + "learning_rate": 4.5477980665950596e-06, + "loss": 0.0002, + "step": 9173 + }, + { + "epoch": 0.5912225301282464, + "grad_norm": 0.0030166942177998085, + "learning_rate": 4.547081990691014e-06, + "loss": 0.0, + "step": 9174 + }, + { + "epoch": 0.5912869755751756, + "grad_norm": 0.013242115384337326, + "learning_rate": 4.546365914786967e-06, + "loss": 0.0001, + "step": 9175 + }, + { + "epoch": 0.5913514210221048, + "grad_norm": 0.23101641353282965, + "learning_rate": 4.545649838882922e-06, + "loss": 0.0006, + "step": 9176 + }, + { + "epoch": 0.591415866469034, + "grad_norm": 0.03984360700703188, + "learning_rate": 4.544933762978876e-06, + "loss": 0.0016, + "step": 9177 + }, + { + "epoch": 0.5914803119159632, + "grad_norm": 0.03024855675695247, + "learning_rate": 4.54421768707483e-06, + "loss": 0.0002, + "step": 9178 + }, + { + "epoch": 0.5915447573628924, + "grad_norm": 0.04043637977027526, + "learning_rate": 4.5435016111707845e-06, + "loss": 0.0004, + "step": 9179 + }, + { + "epoch": 0.5916092028098214, + "grad_norm": 0.10753774462695014, + "learning_rate": 4.542785535266739e-06, + "loss": 0.0001, + "step": 9180 + }, + { + "epoch": 0.5916736482567506, + "grad_norm": 0.0025644812183412474, + "learning_rate": 4.542069459362693e-06, + "loss": 0.0, + "step": 9181 + }, + { + "epoch": 0.5917380937036798, + "grad_norm": 0.04065201438857625, + "learning_rate": 4.541353383458647e-06, + "loss": 0.0001, + "step": 9182 + }, + { + "epoch": 0.591802539150609, + "grad_norm": 0.3754980038895867, + "learning_rate": 4.540637307554601e-06, + "loss": 0.0003, + "step": 9183 + }, + { + "epoch": 0.5918669845975382, + "grad_norm": 0.0013786835065639602, + "learning_rate": 4.539921231650555e-06, + "loss": 0.0, + "step": 9184 + }, + { + "epoch": 0.5919314300444674, + "grad_norm": 0.038676694668396946, + "learning_rate": 4.5392051557465095e-06, + "loss": 0.0018, + "step": 9185 + }, + { + "epoch": 0.5919958754913965, + "grad_norm": 0.123739661317091, + "learning_rate": 4.538489079842464e-06, + "loss": 0.0001, + "step": 9186 + }, + { + "epoch": 0.5920603209383257, + "grad_norm": 0.002075990619147738, + "learning_rate": 4.537773003938418e-06, + "loss": 0.0, + "step": 9187 + }, + { + "epoch": 0.5921247663852549, + "grad_norm": 0.0011510265777243546, + "learning_rate": 4.5370569280343715e-06, + "loss": 0.0, + "step": 9188 + }, + { + "epoch": 0.5921892118321841, + "grad_norm": 0.006128596688916639, + "learning_rate": 4.536340852130326e-06, + "loss": 0.0, + "step": 9189 + }, + { + "epoch": 0.5922536572791133, + "grad_norm": 0.3238451195135463, + "learning_rate": 4.535624776226281e-06, + "loss": 0.0008, + "step": 9190 + }, + { + "epoch": 0.5923181027260424, + "grad_norm": 0.033411018542313785, + "learning_rate": 4.534908700322234e-06, + "loss": 0.0, + "step": 9191 + }, + { + "epoch": 0.5923825481729715, + "grad_norm": 0.009916791814087991, + "learning_rate": 4.534192624418189e-06, + "loss": 0.0, + "step": 9192 + }, + { + "epoch": 0.5924469936199007, + "grad_norm": 0.0006763817433519578, + "learning_rate": 4.533476548514143e-06, + "loss": 0.0, + "step": 9193 + }, + { + "epoch": 0.5925114390668299, + "grad_norm": 0.2030172612492698, + "learning_rate": 4.532760472610097e-06, + "loss": 0.0042, + "step": 9194 + }, + { + "epoch": 0.5925758845137591, + "grad_norm": 0.0010908219317394157, + "learning_rate": 4.532044396706052e-06, + "loss": 0.0, + "step": 9195 + }, + { + "epoch": 0.5926403299606883, + "grad_norm": 0.0005182772170067717, + "learning_rate": 4.531328320802005e-06, + "loss": 0.0, + "step": 9196 + }, + { + "epoch": 0.5927047754076175, + "grad_norm": 0.0014441613798425383, + "learning_rate": 4.530612244897959e-06, + "loss": 0.0, + "step": 9197 + }, + { + "epoch": 0.5927692208545466, + "grad_norm": 0.0008751122361331298, + "learning_rate": 4.529896168993914e-06, + "loss": 0.0, + "step": 9198 + }, + { + "epoch": 0.5928336663014758, + "grad_norm": 0.15613020834851898, + "learning_rate": 4.529180093089868e-06, + "loss": 0.0012, + "step": 9199 + }, + { + "epoch": 0.592898111748405, + "grad_norm": 0.023299958553483276, + "learning_rate": 4.528464017185822e-06, + "loss": 0.0, + "step": 9200 + }, + { + "epoch": 0.5929625571953342, + "grad_norm": 0.0002505282700823623, + "learning_rate": 4.527747941281776e-06, + "loss": 0.0, + "step": 9201 + }, + { + "epoch": 0.5930270026422634, + "grad_norm": 0.06823508339202548, + "learning_rate": 4.527031865377731e-06, + "loss": 0.0002, + "step": 9202 + }, + { + "epoch": 0.5930914480891925, + "grad_norm": 0.0023710727571781822, + "learning_rate": 4.526315789473685e-06, + "loss": 0.0, + "step": 9203 + }, + { + "epoch": 0.5931558935361216, + "grad_norm": 0.005031136311208585, + "learning_rate": 4.525599713569639e-06, + "loss": 0.0, + "step": 9204 + }, + { + "epoch": 0.5932203389830508, + "grad_norm": 0.00313223262834699, + "learning_rate": 4.524883637665593e-06, + "loss": 0.0, + "step": 9205 + }, + { + "epoch": 0.59328478442998, + "grad_norm": 0.0023746098856013845, + "learning_rate": 4.524167561761547e-06, + "loss": 0.0, + "step": 9206 + }, + { + "epoch": 0.5933492298769092, + "grad_norm": 0.00026919858289471487, + "learning_rate": 4.5234514858575015e-06, + "loss": 0.0, + "step": 9207 + }, + { + "epoch": 0.5934136753238384, + "grad_norm": 0.004509877341564837, + "learning_rate": 4.522735409953456e-06, + "loss": 0.0, + "step": 9208 + }, + { + "epoch": 0.5934781207707676, + "grad_norm": 0.42020257838400926, + "learning_rate": 4.522019334049409e-06, + "loss": 0.0015, + "step": 9209 + }, + { + "epoch": 0.5935425662176967, + "grad_norm": 0.0028492342946140163, + "learning_rate": 4.5213032581453635e-06, + "loss": 0.0, + "step": 9210 + }, + { + "epoch": 0.5936070116646259, + "grad_norm": 0.09158406548041811, + "learning_rate": 4.520587182241318e-06, + "loss": 0.0007, + "step": 9211 + }, + { + "epoch": 0.5936714571115551, + "grad_norm": 0.005189893702205593, + "learning_rate": 4.519871106337272e-06, + "loss": 0.0, + "step": 9212 + }, + { + "epoch": 0.5937359025584843, + "grad_norm": 0.002655863031524807, + "learning_rate": 4.5191550304332264e-06, + "loss": 0.0, + "step": 9213 + }, + { + "epoch": 0.5938003480054134, + "grad_norm": 0.00017574247321199604, + "learning_rate": 4.518438954529181e-06, + "loss": 0.0, + "step": 9214 + }, + { + "epoch": 0.5938647934523426, + "grad_norm": 0.004720604057724035, + "learning_rate": 4.517722878625135e-06, + "loss": 0.0, + "step": 9215 + }, + { + "epoch": 0.5939292388992717, + "grad_norm": 0.000470269239913421, + "learning_rate": 4.517006802721089e-06, + "loss": 0.0, + "step": 9216 + }, + { + "epoch": 0.5939936843462009, + "grad_norm": 0.004527785334700599, + "learning_rate": 4.516290726817043e-06, + "loss": 0.0, + "step": 9217 + }, + { + "epoch": 0.5940581297931301, + "grad_norm": 0.22733169733458167, + "learning_rate": 4.515574650912997e-06, + "loss": 0.0008, + "step": 9218 + }, + { + "epoch": 0.5941225752400593, + "grad_norm": 0.0009303410068623072, + "learning_rate": 4.514858575008951e-06, + "loss": 0.0, + "step": 9219 + }, + { + "epoch": 0.5941870206869885, + "grad_norm": 0.006910788414918203, + "learning_rate": 4.514142499104906e-06, + "loss": 0.0001, + "step": 9220 + }, + { + "epoch": 0.5942514661339177, + "grad_norm": 0.29944842913354613, + "learning_rate": 4.513426423200859e-06, + "loss": 0.0009, + "step": 9221 + }, + { + "epoch": 0.5943159115808468, + "grad_norm": 0.015923637568431896, + "learning_rate": 4.5127103472968134e-06, + "loss": 0.0, + "step": 9222 + }, + { + "epoch": 0.594380357027776, + "grad_norm": 0.1276660123801875, + "learning_rate": 4.511994271392768e-06, + "loss": 0.0002, + "step": 9223 + }, + { + "epoch": 0.5944448024747052, + "grad_norm": 0.05243302043805129, + "learning_rate": 4.511278195488722e-06, + "loss": 0.0004, + "step": 9224 + }, + { + "epoch": 0.5945092479216343, + "grad_norm": 0.02153507462951738, + "learning_rate": 4.510562119584676e-06, + "loss": 0.0001, + "step": 9225 + }, + { + "epoch": 0.5945736933685635, + "grad_norm": 0.0004915653704900115, + "learning_rate": 4.509846043680631e-06, + "loss": 0.0, + "step": 9226 + }, + { + "epoch": 0.5946381388154927, + "grad_norm": 0.00825432485504778, + "learning_rate": 4.509129967776585e-06, + "loss": 0.0, + "step": 9227 + }, + { + "epoch": 0.5947025842624218, + "grad_norm": 0.08825993565435779, + "learning_rate": 4.508413891872539e-06, + "loss": 0.0006, + "step": 9228 + }, + { + "epoch": 0.594767029709351, + "grad_norm": 8.340152364850253e-05, + "learning_rate": 4.507697815968493e-06, + "loss": 0.0, + "step": 9229 + }, + { + "epoch": 0.5948314751562802, + "grad_norm": 0.013485439351184434, + "learning_rate": 4.506981740064447e-06, + "loss": 0.0001, + "step": 9230 + }, + { + "epoch": 0.5948959206032094, + "grad_norm": 0.005780882656582949, + "learning_rate": 4.506265664160401e-06, + "loss": 0.0001, + "step": 9231 + }, + { + "epoch": 0.5949603660501386, + "grad_norm": 0.01674859071076599, + "learning_rate": 4.5055495882563556e-06, + "loss": 0.0, + "step": 9232 + }, + { + "epoch": 0.5950248114970678, + "grad_norm": 0.0019716342943576176, + "learning_rate": 4.50483351235231e-06, + "loss": 0.0, + "step": 9233 + }, + { + "epoch": 0.595089256943997, + "grad_norm": 8.759305110141916e-05, + "learning_rate": 4.504117436448263e-06, + "loss": 0.0, + "step": 9234 + }, + { + "epoch": 0.5951537023909261, + "grad_norm": 0.002496013202718065, + "learning_rate": 4.503401360544218e-06, + "loss": 0.0, + "step": 9235 + }, + { + "epoch": 0.5952181478378552, + "grad_norm": 0.00025275087997574504, + "learning_rate": 4.502685284640172e-06, + "loss": 0.0, + "step": 9236 + }, + { + "epoch": 0.5952825932847844, + "grad_norm": 0.0024015228303083213, + "learning_rate": 4.501969208736126e-06, + "loss": 0.0, + "step": 9237 + }, + { + "epoch": 0.5953470387317136, + "grad_norm": 0.1258258953590908, + "learning_rate": 4.5012531328320805e-06, + "loss": 0.0003, + "step": 9238 + }, + { + "epoch": 0.5954114841786428, + "grad_norm": 0.0032277228050439774, + "learning_rate": 4.500537056928035e-06, + "loss": 0.0, + "step": 9239 + }, + { + "epoch": 0.5954759296255719, + "grad_norm": 0.0014017478054440399, + "learning_rate": 4.499820981023989e-06, + "loss": 0.0, + "step": 9240 + }, + { + "epoch": 0.5955403750725011, + "grad_norm": 0.0043084187561121135, + "learning_rate": 4.499104905119943e-06, + "loss": 0.0, + "step": 9241 + }, + { + "epoch": 0.5956048205194303, + "grad_norm": 0.014204112734319778, + "learning_rate": 4.498388829215897e-06, + "loss": 0.0, + "step": 9242 + }, + { + "epoch": 0.5956692659663595, + "grad_norm": 0.0012018417368002514, + "learning_rate": 4.497672753311851e-06, + "loss": 0.0, + "step": 9243 + }, + { + "epoch": 0.5957337114132887, + "grad_norm": 0.001397111472659201, + "learning_rate": 4.4969566774078055e-06, + "loss": 0.0, + "step": 9244 + }, + { + "epoch": 0.5957981568602179, + "grad_norm": 0.004663769686834882, + "learning_rate": 4.49624060150376e-06, + "loss": 0.0, + "step": 9245 + }, + { + "epoch": 0.595862602307147, + "grad_norm": 0.056992404426177555, + "learning_rate": 4.495524525599714e-06, + "loss": 0.0004, + "step": 9246 + }, + { + "epoch": 0.5959270477540761, + "grad_norm": 0.002981061286054127, + "learning_rate": 4.4948084496956675e-06, + "loss": 0.0, + "step": 9247 + }, + { + "epoch": 0.5959914932010053, + "grad_norm": 0.903715832698863, + "learning_rate": 4.494092373791623e-06, + "loss": 0.0048, + "step": 9248 + }, + { + "epoch": 0.5960559386479345, + "grad_norm": 0.04485061684727686, + "learning_rate": 4.493376297887577e-06, + "loss": 0.0001, + "step": 9249 + }, + { + "epoch": 0.5961203840948637, + "grad_norm": 0.014483704247325296, + "learning_rate": 4.49266022198353e-06, + "loss": 0.0, + "step": 9250 + }, + { + "epoch": 0.5961848295417929, + "grad_norm": 0.0025339210037765942, + "learning_rate": 4.491944146079485e-06, + "loss": 0.0, + "step": 9251 + }, + { + "epoch": 0.596249274988722, + "grad_norm": 0.02972256912942362, + "learning_rate": 4.491228070175439e-06, + "loss": 0.0, + "step": 9252 + }, + { + "epoch": 0.5963137204356512, + "grad_norm": 0.003333447946224778, + "learning_rate": 4.490511994271393e-06, + "loss": 0.0, + "step": 9253 + }, + { + "epoch": 0.5963781658825804, + "grad_norm": 0.002218025045587023, + "learning_rate": 4.489795918367348e-06, + "loss": 0.0, + "step": 9254 + }, + { + "epoch": 0.5964426113295096, + "grad_norm": 0.00025620211906760514, + "learning_rate": 4.489079842463301e-06, + "loss": 0.0, + "step": 9255 + }, + { + "epoch": 0.5965070567764388, + "grad_norm": 0.3525647500102783, + "learning_rate": 4.488363766559255e-06, + "loss": 0.0013, + "step": 9256 + }, + { + "epoch": 0.596571502223368, + "grad_norm": 0.0009345009565512394, + "learning_rate": 4.48764769065521e-06, + "loss": 0.0, + "step": 9257 + }, + { + "epoch": 0.596635947670297, + "grad_norm": 0.2165891494152304, + "learning_rate": 4.486931614751164e-06, + "loss": 0.0006, + "step": 9258 + }, + { + "epoch": 0.5967003931172262, + "grad_norm": 0.05442176698580331, + "learning_rate": 4.486215538847118e-06, + "loss": 0.0001, + "step": 9259 + }, + { + "epoch": 0.5967648385641554, + "grad_norm": 0.027872295910078077, + "learning_rate": 4.4854994629430725e-06, + "loss": 0.0, + "step": 9260 + }, + { + "epoch": 0.5968292840110846, + "grad_norm": 0.0002799742786045959, + "learning_rate": 4.484783387039027e-06, + "loss": 0.0, + "step": 9261 + }, + { + "epoch": 0.5968937294580138, + "grad_norm": 0.00018996156454566542, + "learning_rate": 4.484067311134981e-06, + "loss": 0.0, + "step": 9262 + }, + { + "epoch": 0.596958174904943, + "grad_norm": 0.14084470328682747, + "learning_rate": 4.483351235230935e-06, + "loss": 0.0012, + "step": 9263 + }, + { + "epoch": 0.5970226203518721, + "grad_norm": 0.001003408319839176, + "learning_rate": 4.482635159326889e-06, + "loss": 0.0, + "step": 9264 + }, + { + "epoch": 0.5970870657988013, + "grad_norm": 0.006807802621411491, + "learning_rate": 4.481919083422843e-06, + "loss": 0.0, + "step": 9265 + }, + { + "epoch": 0.5971515112457305, + "grad_norm": 0.020114278322576007, + "learning_rate": 4.4812030075187975e-06, + "loss": 0.0001, + "step": 9266 + }, + { + "epoch": 0.5972159566926597, + "grad_norm": 0.00017411804282206113, + "learning_rate": 4.480486931614752e-06, + "loss": 0.0, + "step": 9267 + }, + { + "epoch": 0.5972804021395889, + "grad_norm": 0.01873928089012201, + "learning_rate": 4.479770855710705e-06, + "loss": 0.0, + "step": 9268 + }, + { + "epoch": 0.597344847586518, + "grad_norm": 0.0011347830260341888, + "learning_rate": 4.4790547798066595e-06, + "loss": 0.0, + "step": 9269 + }, + { + "epoch": 0.5974092930334471, + "grad_norm": 0.153683499525407, + "learning_rate": 4.478338703902614e-06, + "loss": 0.0005, + "step": 9270 + }, + { + "epoch": 0.5974737384803763, + "grad_norm": 0.005827371173133551, + "learning_rate": 4.477622627998568e-06, + "loss": 0.0, + "step": 9271 + }, + { + "epoch": 0.5975381839273055, + "grad_norm": 0.0005971115847287093, + "learning_rate": 4.4769065520945224e-06, + "loss": 0.0, + "step": 9272 + }, + { + "epoch": 0.5976026293742347, + "grad_norm": 0.004836487626164628, + "learning_rate": 4.476190476190477e-06, + "loss": 0.0, + "step": 9273 + }, + { + "epoch": 0.5976670748211639, + "grad_norm": 0.0005959039040827599, + "learning_rate": 4.475474400286431e-06, + "loss": 0.0, + "step": 9274 + }, + { + "epoch": 0.5977315202680931, + "grad_norm": 0.3020565196447107, + "learning_rate": 4.474758324382385e-06, + "loss": 0.0003, + "step": 9275 + }, + { + "epoch": 0.5977959657150222, + "grad_norm": 0.001559705042993861, + "learning_rate": 4.474042248478339e-06, + "loss": 0.0, + "step": 9276 + }, + { + "epoch": 0.5978604111619514, + "grad_norm": 1.8227666892697204e-05, + "learning_rate": 4.473326172574293e-06, + "loss": 0.0, + "step": 9277 + }, + { + "epoch": 0.5979248566088806, + "grad_norm": 0.0013650415837087972, + "learning_rate": 4.472610096670247e-06, + "loss": 0.0, + "step": 9278 + }, + { + "epoch": 0.5979893020558098, + "grad_norm": 0.16759155859822403, + "learning_rate": 4.471894020766202e-06, + "loss": 0.0009, + "step": 9279 + }, + { + "epoch": 0.598053747502739, + "grad_norm": 0.0008573025217973782, + "learning_rate": 4.471177944862156e-06, + "loss": 0.0, + "step": 9280 + }, + { + "epoch": 0.5981181929496681, + "grad_norm": 4.933914884938549e-05, + "learning_rate": 4.4704618689581094e-06, + "loss": 0.0, + "step": 9281 + }, + { + "epoch": 0.5981826383965972, + "grad_norm": 0.0037086977972102943, + "learning_rate": 4.469745793054064e-06, + "loss": 0.0, + "step": 9282 + }, + { + "epoch": 0.5982470838435264, + "grad_norm": 0.002770381211495419, + "learning_rate": 4.469029717150019e-06, + "loss": 0.0, + "step": 9283 + }, + { + "epoch": 0.5983115292904556, + "grad_norm": 0.002774506415338846, + "learning_rate": 4.468313641245972e-06, + "loss": 0.0, + "step": 9284 + }, + { + "epoch": 0.5983759747373848, + "grad_norm": 0.0010348561652971477, + "learning_rate": 4.467597565341927e-06, + "loss": 0.0, + "step": 9285 + }, + { + "epoch": 0.598440420184314, + "grad_norm": 0.00225808661724457, + "learning_rate": 4.466881489437881e-06, + "loss": 0.0, + "step": 9286 + }, + { + "epoch": 0.5985048656312432, + "grad_norm": 0.0001941368847980672, + "learning_rate": 4.466165413533835e-06, + "loss": 0.0, + "step": 9287 + }, + { + "epoch": 0.5985693110781724, + "grad_norm": 6.751922171693977e-05, + "learning_rate": 4.4654493376297895e-06, + "loss": 0.0, + "step": 9288 + }, + { + "epoch": 0.5986337565251015, + "grad_norm": 0.0003746486823726859, + "learning_rate": 4.464733261725743e-06, + "loss": 0.0, + "step": 9289 + }, + { + "epoch": 0.5986982019720307, + "grad_norm": 0.0026928507882407063, + "learning_rate": 4.464017185821697e-06, + "loss": 0.0, + "step": 9290 + }, + { + "epoch": 0.5987626474189599, + "grad_norm": 0.018445121237624786, + "learning_rate": 4.4633011099176516e-06, + "loss": 0.0002, + "step": 9291 + }, + { + "epoch": 0.598827092865889, + "grad_norm": 0.00025725082956279275, + "learning_rate": 4.462585034013606e-06, + "loss": 0.0, + "step": 9292 + }, + { + "epoch": 0.5988915383128182, + "grad_norm": 0.0010799365281890174, + "learning_rate": 4.46186895810956e-06, + "loss": 0.0, + "step": 9293 + }, + { + "epoch": 0.5989559837597473, + "grad_norm": 0.007159703054942445, + "learning_rate": 4.461152882205514e-06, + "loss": 0.0, + "step": 9294 + }, + { + "epoch": 0.5990204292066765, + "grad_norm": 1.2268386049962541e-05, + "learning_rate": 4.460436806301469e-06, + "loss": 0.0, + "step": 9295 + }, + { + "epoch": 0.5990848746536057, + "grad_norm": 0.06045411872724877, + "learning_rate": 4.459720730397423e-06, + "loss": 0.0006, + "step": 9296 + }, + { + "epoch": 0.5991493201005349, + "grad_norm": 0.19484259344276214, + "learning_rate": 4.4590046544933765e-06, + "loss": 0.0031, + "step": 9297 + }, + { + "epoch": 0.5992137655474641, + "grad_norm": 0.00011573336964747802, + "learning_rate": 4.458288578589331e-06, + "loss": 0.0, + "step": 9298 + }, + { + "epoch": 0.5992782109943933, + "grad_norm": 0.00013179948953989136, + "learning_rate": 4.457572502685285e-06, + "loss": 0.0, + "step": 9299 + }, + { + "epoch": 0.5993426564413225, + "grad_norm": 0.08617986833949201, + "learning_rate": 4.456856426781239e-06, + "loss": 0.0001, + "step": 9300 + }, + { + "epoch": 0.5994071018882516, + "grad_norm": 2.1917965791246905, + "learning_rate": 4.456140350877194e-06, + "loss": 0.0069, + "step": 9301 + }, + { + "epoch": 0.5994715473351808, + "grad_norm": 0.005079315964603255, + "learning_rate": 4.455424274973147e-06, + "loss": 0.0, + "step": 9302 + }, + { + "epoch": 0.5995359927821099, + "grad_norm": 0.0005311949623312273, + "learning_rate": 4.4547081990691015e-06, + "loss": 0.0, + "step": 9303 + }, + { + "epoch": 0.5996004382290391, + "grad_norm": 0.00018466529970892427, + "learning_rate": 4.453992123165056e-06, + "loss": 0.0, + "step": 9304 + }, + { + "epoch": 0.5996648836759683, + "grad_norm": 0.0024224931746407233, + "learning_rate": 4.45327604726101e-06, + "loss": 0.0, + "step": 9305 + }, + { + "epoch": 0.5997293291228974, + "grad_norm": 9.214679526500649e-05, + "learning_rate": 4.452559971356964e-06, + "loss": 0.0, + "step": 9306 + }, + { + "epoch": 0.5997937745698266, + "grad_norm": 0.00013752331314721667, + "learning_rate": 4.451843895452919e-06, + "loss": 0.0, + "step": 9307 + }, + { + "epoch": 0.5998582200167558, + "grad_norm": 0.0012078464524044698, + "learning_rate": 4.451127819548873e-06, + "loss": 0.0, + "step": 9308 + }, + { + "epoch": 0.599922665463685, + "grad_norm": 0.02017807043125452, + "learning_rate": 4.450411743644827e-06, + "loss": 0.0001, + "step": 9309 + }, + { + "epoch": 0.5999871109106142, + "grad_norm": 0.12984587208589446, + "learning_rate": 4.449695667740781e-06, + "loss": 0.0004, + "step": 9310 + }, + { + "epoch": 0.6000515563575434, + "grad_norm": 0.04357864342243002, + "learning_rate": 4.448979591836735e-06, + "loss": 0.0001, + "step": 9311 + }, + { + "epoch": 0.6001160018044726, + "grad_norm": 0.009136213091560104, + "learning_rate": 4.448263515932689e-06, + "loss": 0.0, + "step": 9312 + }, + { + "epoch": 0.6001804472514017, + "grad_norm": 0.03765791702092579, + "learning_rate": 4.447547440028644e-06, + "loss": 0.0, + "step": 9313 + }, + { + "epoch": 0.6002448926983308, + "grad_norm": 0.13066622728000005, + "learning_rate": 4.446831364124598e-06, + "loss": 0.0004, + "step": 9314 + }, + { + "epoch": 0.60030933814526, + "grad_norm": 0.033811319537168136, + "learning_rate": 4.446115288220551e-06, + "loss": 0.0001, + "step": 9315 + }, + { + "epoch": 0.6003737835921892, + "grad_norm": 0.003377651363750598, + "learning_rate": 4.445399212316506e-06, + "loss": 0.0, + "step": 9316 + }, + { + "epoch": 0.6004382290391184, + "grad_norm": 0.0012151443025219713, + "learning_rate": 4.44468313641246e-06, + "loss": 0.0, + "step": 9317 + }, + { + "epoch": 0.6005026744860475, + "grad_norm": 0.00030076058428306606, + "learning_rate": 4.443967060508414e-06, + "loss": 0.0, + "step": 9318 + }, + { + "epoch": 0.6005671199329767, + "grad_norm": 0.0015305195960151965, + "learning_rate": 4.4432509846043685e-06, + "loss": 0.0, + "step": 9319 + }, + { + "epoch": 0.6006315653799059, + "grad_norm": 0.439756161792432, + "learning_rate": 4.442534908700323e-06, + "loss": 0.0012, + "step": 9320 + }, + { + "epoch": 0.6006960108268351, + "grad_norm": 0.00840660182838595, + "learning_rate": 4.441818832796277e-06, + "loss": 0.0, + "step": 9321 + }, + { + "epoch": 0.6007604562737643, + "grad_norm": 0.024425231047921048, + "learning_rate": 4.4411027568922314e-06, + "loss": 0.0002, + "step": 9322 + }, + { + "epoch": 0.6008249017206935, + "grad_norm": 0.00420921162090846, + "learning_rate": 4.440386680988185e-06, + "loss": 0.0, + "step": 9323 + }, + { + "epoch": 0.6008893471676227, + "grad_norm": 0.03988730496138384, + "learning_rate": 4.439670605084139e-06, + "loss": 0.0001, + "step": 9324 + }, + { + "epoch": 0.6009537926145517, + "grad_norm": 0.0012243449380229714, + "learning_rate": 4.4389545291800935e-06, + "loss": 0.0, + "step": 9325 + }, + { + "epoch": 0.6010182380614809, + "grad_norm": 0.0006668839996335907, + "learning_rate": 4.438238453276048e-06, + "loss": 0.0, + "step": 9326 + }, + { + "epoch": 0.6010826835084101, + "grad_norm": 0.0001269798514468688, + "learning_rate": 4.437522377372001e-06, + "loss": 0.0, + "step": 9327 + }, + { + "epoch": 0.6011471289553393, + "grad_norm": 0.006069018507372652, + "learning_rate": 4.4368063014679555e-06, + "loss": 0.0, + "step": 9328 + }, + { + "epoch": 0.6012115744022685, + "grad_norm": 0.009952276861537372, + "learning_rate": 4.43609022556391e-06, + "loss": 0.0, + "step": 9329 + }, + { + "epoch": 0.6012760198491977, + "grad_norm": 0.0001698301808685413, + "learning_rate": 4.435374149659865e-06, + "loss": 0.0, + "step": 9330 + }, + { + "epoch": 0.6013404652961268, + "grad_norm": 0.0007671255886284143, + "learning_rate": 4.4346580737558184e-06, + "loss": 0.0, + "step": 9331 + }, + { + "epoch": 0.601404910743056, + "grad_norm": 0.019085248753486946, + "learning_rate": 4.433941997851773e-06, + "loss": 0.0002, + "step": 9332 + }, + { + "epoch": 0.6014693561899852, + "grad_norm": 0.0005585553011050799, + "learning_rate": 4.433225921947727e-06, + "loss": 0.0, + "step": 9333 + }, + { + "epoch": 0.6015338016369144, + "grad_norm": 0.010856243724535, + "learning_rate": 4.432509846043681e-06, + "loss": 0.0001, + "step": 9334 + }, + { + "epoch": 0.6015982470838436, + "grad_norm": 0.0002317457563166157, + "learning_rate": 4.431793770139635e-06, + "loss": 0.0, + "step": 9335 + }, + { + "epoch": 0.6016626925307726, + "grad_norm": 0.0065507252221229315, + "learning_rate": 4.431077694235589e-06, + "loss": 0.0001, + "step": 9336 + }, + { + "epoch": 0.6017271379777018, + "grad_norm": 0.00010838378621555254, + "learning_rate": 4.430361618331543e-06, + "loss": 0.0, + "step": 9337 + }, + { + "epoch": 0.601791583424631, + "grad_norm": 0.0017090922054476857, + "learning_rate": 4.429645542427498e-06, + "loss": 0.0, + "step": 9338 + }, + { + "epoch": 0.6018560288715602, + "grad_norm": 0.009171633804983492, + "learning_rate": 4.428929466523452e-06, + "loss": 0.0, + "step": 9339 + }, + { + "epoch": 0.6019204743184894, + "grad_norm": 0.0022138650096616835, + "learning_rate": 4.4282133906194054e-06, + "loss": 0.0, + "step": 9340 + }, + { + "epoch": 0.6019849197654186, + "grad_norm": 0.00016321892076394534, + "learning_rate": 4.42749731471536e-06, + "loss": 0.0, + "step": 9341 + }, + { + "epoch": 0.6020493652123478, + "grad_norm": 0.7931170395375743, + "learning_rate": 4.426781238811315e-06, + "loss": 0.0087, + "step": 9342 + }, + { + "epoch": 0.6021138106592769, + "grad_norm": 0.0027087375142905547, + "learning_rate": 4.426065162907268e-06, + "loss": 0.0, + "step": 9343 + }, + { + "epoch": 0.6021782561062061, + "grad_norm": 0.00030620151578539744, + "learning_rate": 4.425349087003223e-06, + "loss": 0.0, + "step": 9344 + }, + { + "epoch": 0.6022427015531353, + "grad_norm": 0.35033655461765545, + "learning_rate": 4.424633011099177e-06, + "loss": 0.007, + "step": 9345 + }, + { + "epoch": 0.6023071470000645, + "grad_norm": 0.0008401919777756227, + "learning_rate": 4.423916935195131e-06, + "loss": 0.0, + "step": 9346 + }, + { + "epoch": 0.6023715924469936, + "grad_norm": 0.00012728167519971065, + "learning_rate": 4.4232008592910855e-06, + "loss": 0.0, + "step": 9347 + }, + { + "epoch": 0.6024360378939227, + "grad_norm": 0.012302640572469041, + "learning_rate": 4.422484783387039e-06, + "loss": 0.0, + "step": 9348 + }, + { + "epoch": 0.6025004833408519, + "grad_norm": 0.08447763946476083, + "learning_rate": 4.421768707482993e-06, + "loss": 0.001, + "step": 9349 + }, + { + "epoch": 0.6025649287877811, + "grad_norm": 0.0011682809611789449, + "learning_rate": 4.4210526315789476e-06, + "loss": 0.0, + "step": 9350 + }, + { + "epoch": 0.6026293742347103, + "grad_norm": 0.0011277098483875475, + "learning_rate": 4.420336555674902e-06, + "loss": 0.0, + "step": 9351 + }, + { + "epoch": 0.6026938196816395, + "grad_norm": 0.011400610017446352, + "learning_rate": 4.419620479770856e-06, + "loss": 0.0, + "step": 9352 + }, + { + "epoch": 0.6027582651285687, + "grad_norm": 0.0541042169718465, + "learning_rate": 4.41890440386681e-06, + "loss": 0.0001, + "step": 9353 + }, + { + "epoch": 0.6028227105754979, + "grad_norm": 0.0004733979353402136, + "learning_rate": 4.418188327962765e-06, + "loss": 0.0, + "step": 9354 + }, + { + "epoch": 0.602887156022427, + "grad_norm": 0.00089674663328231, + "learning_rate": 4.417472252058719e-06, + "loss": 0.0, + "step": 9355 + }, + { + "epoch": 0.6029516014693562, + "grad_norm": 0.0038075149394037884, + "learning_rate": 4.4167561761546725e-06, + "loss": 0.0, + "step": 9356 + }, + { + "epoch": 0.6030160469162854, + "grad_norm": 0.0006955252692623161, + "learning_rate": 4.416040100250627e-06, + "loss": 0.0, + "step": 9357 + }, + { + "epoch": 0.6030804923632146, + "grad_norm": 0.00020624013289641623, + "learning_rate": 4.415324024346581e-06, + "loss": 0.0, + "step": 9358 + }, + { + "epoch": 0.6031449378101437, + "grad_norm": 0.048323787565792246, + "learning_rate": 4.414607948442535e-06, + "loss": 0.0002, + "step": 9359 + }, + { + "epoch": 0.6032093832570729, + "grad_norm": 0.0012364503824618905, + "learning_rate": 4.41389187253849e-06, + "loss": 0.0, + "step": 9360 + }, + { + "epoch": 0.603273828704002, + "grad_norm": 0.6295609472432298, + "learning_rate": 4.413175796634443e-06, + "loss": 0.001, + "step": 9361 + }, + { + "epoch": 0.6033382741509312, + "grad_norm": 7.311658614915109e-05, + "learning_rate": 4.4124597207303975e-06, + "loss": 0.0, + "step": 9362 + }, + { + "epoch": 0.6034027195978604, + "grad_norm": 0.00035291927788144885, + "learning_rate": 4.411743644826352e-06, + "loss": 0.0, + "step": 9363 + }, + { + "epoch": 0.6034671650447896, + "grad_norm": 0.0010846684443867236, + "learning_rate": 4.411027568922306e-06, + "loss": 0.0, + "step": 9364 + }, + { + "epoch": 0.6035316104917188, + "grad_norm": 0.00017466946654724934, + "learning_rate": 4.41031149301826e-06, + "loss": 0.0, + "step": 9365 + }, + { + "epoch": 0.603596055938648, + "grad_norm": 0.001453067649005503, + "learning_rate": 4.409595417114215e-06, + "loss": 0.0, + "step": 9366 + }, + { + "epoch": 0.6036605013855771, + "grad_norm": 0.00018182376128211283, + "learning_rate": 4.408879341210169e-06, + "loss": 0.0, + "step": 9367 + }, + { + "epoch": 0.6037249468325063, + "grad_norm": 0.011400766377271708, + "learning_rate": 4.408163265306123e-06, + "loss": 0.0, + "step": 9368 + }, + { + "epoch": 0.6037893922794355, + "grad_norm": 0.4758228127251253, + "learning_rate": 4.407447189402077e-06, + "loss": 0.0008, + "step": 9369 + }, + { + "epoch": 0.6038538377263646, + "grad_norm": 7.137859350053377e-05, + "learning_rate": 4.406731113498031e-06, + "loss": 0.0, + "step": 9370 + }, + { + "epoch": 0.6039182831732938, + "grad_norm": 0.002106704556151511, + "learning_rate": 4.406015037593985e-06, + "loss": 0.0, + "step": 9371 + }, + { + "epoch": 0.603982728620223, + "grad_norm": 0.01561994489465668, + "learning_rate": 4.40529896168994e-06, + "loss": 0.0, + "step": 9372 + }, + { + "epoch": 0.6040471740671521, + "grad_norm": 0.009708016318482603, + "learning_rate": 4.404582885785894e-06, + "loss": 0.0, + "step": 9373 + }, + { + "epoch": 0.6041116195140813, + "grad_norm": 0.001466915021623313, + "learning_rate": 4.403866809881847e-06, + "loss": 0.0, + "step": 9374 + }, + { + "epoch": 0.6041760649610105, + "grad_norm": 0.0007412942422267652, + "learning_rate": 4.403150733977802e-06, + "loss": 0.0, + "step": 9375 + }, + { + "epoch": 0.6042405104079397, + "grad_norm": 0.005260947110871978, + "learning_rate": 4.402434658073756e-06, + "loss": 0.0, + "step": 9376 + }, + { + "epoch": 0.6043049558548689, + "grad_norm": 5.846787475238024e-05, + "learning_rate": 4.40171858216971e-06, + "loss": 0.0, + "step": 9377 + }, + { + "epoch": 0.6043694013017981, + "grad_norm": 0.0029260585270888422, + "learning_rate": 4.4010025062656645e-06, + "loss": 0.0, + "step": 9378 + }, + { + "epoch": 0.6044338467487272, + "grad_norm": 0.0002419209646407248, + "learning_rate": 4.400286430361619e-06, + "loss": 0.0, + "step": 9379 + }, + { + "epoch": 0.6044982921956564, + "grad_norm": 0.0003612351482923959, + "learning_rate": 4.399570354457573e-06, + "loss": 0.0, + "step": 9380 + }, + { + "epoch": 0.6045627376425855, + "grad_norm": 9.690475230580101e-05, + "learning_rate": 4.3988542785535274e-06, + "loss": 0.0, + "step": 9381 + }, + { + "epoch": 0.6046271830895147, + "grad_norm": 0.11312480103886216, + "learning_rate": 4.398138202649481e-06, + "loss": 0.0004, + "step": 9382 + }, + { + "epoch": 0.6046916285364439, + "grad_norm": 0.0003024319588343002, + "learning_rate": 4.397422126745435e-06, + "loss": 0.0, + "step": 9383 + }, + { + "epoch": 0.604756073983373, + "grad_norm": 0.0012353313973243075, + "learning_rate": 4.3967060508413895e-06, + "loss": 0.0, + "step": 9384 + }, + { + "epoch": 0.6048205194303022, + "grad_norm": 0.012779457318109045, + "learning_rate": 4.395989974937344e-06, + "loss": 0.0002, + "step": 9385 + }, + { + "epoch": 0.6048849648772314, + "grad_norm": 0.0009381798352755683, + "learning_rate": 4.395273899033298e-06, + "loss": 0.0, + "step": 9386 + }, + { + "epoch": 0.6049494103241606, + "grad_norm": 0.00010893321625617296, + "learning_rate": 4.3945578231292515e-06, + "loss": 0.0, + "step": 9387 + }, + { + "epoch": 0.6050138557710898, + "grad_norm": 0.003997170920714122, + "learning_rate": 4.393841747225206e-06, + "loss": 0.0, + "step": 9388 + }, + { + "epoch": 0.605078301218019, + "grad_norm": 8.38234992955942e-05, + "learning_rate": 4.393125671321161e-06, + "loss": 0.0, + "step": 9389 + }, + { + "epoch": 0.6051427466649482, + "grad_norm": 0.0005577757944864189, + "learning_rate": 4.3924095954171144e-06, + "loss": 0.0, + "step": 9390 + }, + { + "epoch": 0.6052071921118773, + "grad_norm": 0.0007627417636095448, + "learning_rate": 4.391693519513069e-06, + "loss": 0.0, + "step": 9391 + }, + { + "epoch": 0.6052716375588064, + "grad_norm": 0.00022876330997271556, + "learning_rate": 4.390977443609023e-06, + "loss": 0.0, + "step": 9392 + }, + { + "epoch": 0.6053360830057356, + "grad_norm": 0.3514795311405156, + "learning_rate": 4.390261367704977e-06, + "loss": 0.0034, + "step": 9393 + }, + { + "epoch": 0.6054005284526648, + "grad_norm": 0.06014872393965382, + "learning_rate": 4.389545291800932e-06, + "loss": 0.0, + "step": 9394 + }, + { + "epoch": 0.605464973899594, + "grad_norm": 0.0012982233820627276, + "learning_rate": 4.388829215896885e-06, + "loss": 0.0, + "step": 9395 + }, + { + "epoch": 0.6055294193465232, + "grad_norm": 0.0006104295092222888, + "learning_rate": 4.388113139992839e-06, + "loss": 0.0, + "step": 9396 + }, + { + "epoch": 0.6055938647934523, + "grad_norm": 0.0015226621546786093, + "learning_rate": 4.387397064088794e-06, + "loss": 0.0, + "step": 9397 + }, + { + "epoch": 0.6056583102403815, + "grad_norm": 0.004909857157835074, + "learning_rate": 4.386680988184748e-06, + "loss": 0.0, + "step": 9398 + }, + { + "epoch": 0.6057227556873107, + "grad_norm": 0.0016943522584871827, + "learning_rate": 4.385964912280702e-06, + "loss": 0.0, + "step": 9399 + }, + { + "epoch": 0.6057872011342399, + "grad_norm": 0.145636188277844, + "learning_rate": 4.3852488363766566e-06, + "loss": 0.0014, + "step": 9400 + }, + { + "epoch": 0.6058516465811691, + "grad_norm": 0.0037991052504417187, + "learning_rate": 4.384532760472611e-06, + "loss": 0.0, + "step": 9401 + }, + { + "epoch": 0.6059160920280983, + "grad_norm": 0.0005582835381395184, + "learning_rate": 4.383816684568565e-06, + "loss": 0.0, + "step": 9402 + }, + { + "epoch": 0.6059805374750273, + "grad_norm": 0.14714135028611341, + "learning_rate": 4.383100608664519e-06, + "loss": 0.004, + "step": 9403 + }, + { + "epoch": 0.6060449829219565, + "grad_norm": 0.00026380090583077884, + "learning_rate": 4.382384532760473e-06, + "loss": 0.0, + "step": 9404 + }, + { + "epoch": 0.6061094283688857, + "grad_norm": 0.007371777117651726, + "learning_rate": 4.381668456856427e-06, + "loss": 0.0, + "step": 9405 + }, + { + "epoch": 0.6061738738158149, + "grad_norm": 0.0010503819422914768, + "learning_rate": 4.3809523809523815e-06, + "loss": 0.0, + "step": 9406 + }, + { + "epoch": 0.6062383192627441, + "grad_norm": 0.06254363771771412, + "learning_rate": 4.380236305048336e-06, + "loss": 0.0001, + "step": 9407 + }, + { + "epoch": 0.6063027647096733, + "grad_norm": 0.0305562041932073, + "learning_rate": 4.379520229144289e-06, + "loss": 0.0001, + "step": 9408 + }, + { + "epoch": 0.6063672101566024, + "grad_norm": 0.00900166756321749, + "learning_rate": 4.3788041532402436e-06, + "loss": 0.0, + "step": 9409 + }, + { + "epoch": 0.6064316556035316, + "grad_norm": 0.30886882883342337, + "learning_rate": 4.378088077336198e-06, + "loss": 0.001, + "step": 9410 + }, + { + "epoch": 0.6064961010504608, + "grad_norm": 0.5044701548161394, + "learning_rate": 4.377372001432152e-06, + "loss": 0.0022, + "step": 9411 + }, + { + "epoch": 0.60656054649739, + "grad_norm": 0.0034124956418660653, + "learning_rate": 4.3766559255281065e-06, + "loss": 0.0, + "step": 9412 + }, + { + "epoch": 0.6066249919443192, + "grad_norm": 0.0024336553075388167, + "learning_rate": 4.375939849624061e-06, + "loss": 0.0, + "step": 9413 + }, + { + "epoch": 0.6066894373912483, + "grad_norm": 0.0006861778038973162, + "learning_rate": 4.375223773720015e-06, + "loss": 0.0, + "step": 9414 + }, + { + "epoch": 0.6067538828381774, + "grad_norm": 0.0007463060227097989, + "learning_rate": 4.374507697815969e-06, + "loss": 0.0, + "step": 9415 + }, + { + "epoch": 0.6068183282851066, + "grad_norm": 0.008138348403934246, + "learning_rate": 4.373791621911923e-06, + "loss": 0.0, + "step": 9416 + }, + { + "epoch": 0.6068827737320358, + "grad_norm": 0.0015037820156294577, + "learning_rate": 4.373075546007877e-06, + "loss": 0.0, + "step": 9417 + }, + { + "epoch": 0.606947219178965, + "grad_norm": 7.03840792272183e-05, + "learning_rate": 4.372359470103831e-06, + "loss": 0.0, + "step": 9418 + }, + { + "epoch": 0.6070116646258942, + "grad_norm": 0.724938618593745, + "learning_rate": 4.371643394199786e-06, + "loss": 0.006, + "step": 9419 + }, + { + "epoch": 0.6070761100728234, + "grad_norm": 0.00116128567512363, + "learning_rate": 4.370927318295739e-06, + "loss": 0.0, + "step": 9420 + }, + { + "epoch": 0.6071405555197525, + "grad_norm": 0.0850829120345377, + "learning_rate": 4.3702112423916935e-06, + "loss": 0.0002, + "step": 9421 + }, + { + "epoch": 0.6072050009666817, + "grad_norm": 0.0010536833509948063, + "learning_rate": 4.369495166487648e-06, + "loss": 0.0, + "step": 9422 + }, + { + "epoch": 0.6072694464136109, + "grad_norm": 0.00385163720268815, + "learning_rate": 4.368779090583602e-06, + "loss": 0.0, + "step": 9423 + }, + { + "epoch": 0.6073338918605401, + "grad_norm": 0.11442118970579021, + "learning_rate": 4.368063014679556e-06, + "loss": 0.0001, + "step": 9424 + }, + { + "epoch": 0.6073983373074693, + "grad_norm": 0.0007916495679545597, + "learning_rate": 4.367346938775511e-06, + "loss": 0.0, + "step": 9425 + }, + { + "epoch": 0.6074627827543984, + "grad_norm": 0.0011092425353548807, + "learning_rate": 4.366630862871465e-06, + "loss": 0.0, + "step": 9426 + }, + { + "epoch": 0.6075272282013275, + "grad_norm": 9.429646011118711e-05, + "learning_rate": 4.365914786967419e-06, + "loss": 0.0, + "step": 9427 + }, + { + "epoch": 0.6075916736482567, + "grad_norm": 0.002318340581239123, + "learning_rate": 4.365198711063373e-06, + "loss": 0.0, + "step": 9428 + }, + { + "epoch": 0.6076561190951859, + "grad_norm": 0.0031013551819890905, + "learning_rate": 4.364482635159327e-06, + "loss": 0.0, + "step": 9429 + }, + { + "epoch": 0.6077205645421151, + "grad_norm": 0.0011860604719637813, + "learning_rate": 4.363766559255281e-06, + "loss": 0.0, + "step": 9430 + }, + { + "epoch": 0.6077850099890443, + "grad_norm": 0.0868075929508591, + "learning_rate": 4.363050483351236e-06, + "loss": 0.0017, + "step": 9431 + }, + { + "epoch": 0.6078494554359735, + "grad_norm": 0.0016228980177938656, + "learning_rate": 4.36233440744719e-06, + "loss": 0.0, + "step": 9432 + }, + { + "epoch": 0.6079139008829026, + "grad_norm": 0.002918866658195965, + "learning_rate": 4.361618331543143e-06, + "loss": 0.0, + "step": 9433 + }, + { + "epoch": 0.6079783463298318, + "grad_norm": 0.40538073119100626, + "learning_rate": 4.360902255639098e-06, + "loss": 0.0006, + "step": 9434 + }, + { + "epoch": 0.608042791776761, + "grad_norm": 0.006536840513148788, + "learning_rate": 4.360186179735053e-06, + "loss": 0.0, + "step": 9435 + }, + { + "epoch": 0.6081072372236902, + "grad_norm": 0.02315314722042714, + "learning_rate": 4.359470103831006e-06, + "loss": 0.0001, + "step": 9436 + }, + { + "epoch": 0.6081716826706193, + "grad_norm": 0.0005369910501333005, + "learning_rate": 4.3587540279269605e-06, + "loss": 0.0, + "step": 9437 + }, + { + "epoch": 0.6082361281175485, + "grad_norm": 0.014507810777531585, + "learning_rate": 4.358037952022915e-06, + "loss": 0.0001, + "step": 9438 + }, + { + "epoch": 0.6083005735644776, + "grad_norm": 0.0014420113067096622, + "learning_rate": 4.357321876118869e-06, + "loss": 0.0, + "step": 9439 + }, + { + "epoch": 0.6083650190114068, + "grad_norm": 0.0003051545929258759, + "learning_rate": 4.3566058002148234e-06, + "loss": 0.0, + "step": 9440 + }, + { + "epoch": 0.608429464458336, + "grad_norm": 0.10313037041927729, + "learning_rate": 4.355889724310777e-06, + "loss": 0.0001, + "step": 9441 + }, + { + "epoch": 0.6084939099052652, + "grad_norm": 0.00023097012118933073, + "learning_rate": 4.355173648406731e-06, + "loss": 0.0, + "step": 9442 + }, + { + "epoch": 0.6085583553521944, + "grad_norm": 0.004176636239720323, + "learning_rate": 4.3544575725026855e-06, + "loss": 0.0, + "step": 9443 + }, + { + "epoch": 0.6086228007991236, + "grad_norm": 0.031410970579226795, + "learning_rate": 4.35374149659864e-06, + "loss": 0.0001, + "step": 9444 + }, + { + "epoch": 0.6086872462460527, + "grad_norm": 0.011897834685541422, + "learning_rate": 4.353025420694594e-06, + "loss": 0.0, + "step": 9445 + }, + { + "epoch": 0.6087516916929819, + "grad_norm": 0.00046606655128807525, + "learning_rate": 4.3523093447905475e-06, + "loss": 0.0, + "step": 9446 + }, + { + "epoch": 0.6088161371399111, + "grad_norm": 0.024800149341012708, + "learning_rate": 4.351593268886503e-06, + "loss": 0.0015, + "step": 9447 + }, + { + "epoch": 0.6088805825868402, + "grad_norm": 0.0221088822473336, + "learning_rate": 4.350877192982457e-06, + "loss": 0.0, + "step": 9448 + }, + { + "epoch": 0.6089450280337694, + "grad_norm": 6.0055351174664875e-05, + "learning_rate": 4.3501611170784104e-06, + "loss": 0.0, + "step": 9449 + }, + { + "epoch": 0.6090094734806986, + "grad_norm": 0.05522244514116825, + "learning_rate": 4.349445041174365e-06, + "loss": 0.0, + "step": 9450 + }, + { + "epoch": 0.6090739189276277, + "grad_norm": 0.0020661568106822036, + "learning_rate": 4.348728965270319e-06, + "loss": 0.0, + "step": 9451 + }, + { + "epoch": 0.6091383643745569, + "grad_norm": 0.00014003216050082992, + "learning_rate": 4.348012889366273e-06, + "loss": 0.0, + "step": 9452 + }, + { + "epoch": 0.6092028098214861, + "grad_norm": 5.3691884195080886e-05, + "learning_rate": 4.347296813462228e-06, + "loss": 0.0, + "step": 9453 + }, + { + "epoch": 0.6092672552684153, + "grad_norm": 2.2470153142569833e-05, + "learning_rate": 4.346580737558181e-06, + "loss": 0.0, + "step": 9454 + }, + { + "epoch": 0.6093317007153445, + "grad_norm": 0.005263171562567179, + "learning_rate": 4.345864661654135e-06, + "loss": 0.0, + "step": 9455 + }, + { + "epoch": 0.6093961461622737, + "grad_norm": 0.028391588065871397, + "learning_rate": 4.34514858575009e-06, + "loss": 0.0, + "step": 9456 + }, + { + "epoch": 0.6094605916092029, + "grad_norm": 0.0032687334314929254, + "learning_rate": 4.344432509846044e-06, + "loss": 0.0, + "step": 9457 + }, + { + "epoch": 0.609525037056132, + "grad_norm": 0.003059619076663178, + "learning_rate": 4.343716433941998e-06, + "loss": 0.0, + "step": 9458 + }, + { + "epoch": 0.6095894825030611, + "grad_norm": 0.00011531079670687498, + "learning_rate": 4.3430003580379526e-06, + "loss": 0.0, + "step": 9459 + }, + { + "epoch": 0.6096539279499903, + "grad_norm": 0.7228147590624995, + "learning_rate": 4.342284282133907e-06, + "loss": 0.0041, + "step": 9460 + }, + { + "epoch": 0.6097183733969195, + "grad_norm": 0.002931431348183814, + "learning_rate": 4.341568206229861e-06, + "loss": 0.0, + "step": 9461 + }, + { + "epoch": 0.6097828188438487, + "grad_norm": 2.102471073261476e-05, + "learning_rate": 4.340852130325815e-06, + "loss": 0.0, + "step": 9462 + }, + { + "epoch": 0.6098472642907778, + "grad_norm": 0.00016897046354369543, + "learning_rate": 4.340136054421769e-06, + "loss": 0.0, + "step": 9463 + }, + { + "epoch": 0.609911709737707, + "grad_norm": 0.0014111495206503312, + "learning_rate": 4.339419978517723e-06, + "loss": 0.0, + "step": 9464 + }, + { + "epoch": 0.6099761551846362, + "grad_norm": 0.00035216732375646924, + "learning_rate": 4.3387039026136775e-06, + "loss": 0.0, + "step": 9465 + }, + { + "epoch": 0.6100406006315654, + "grad_norm": 0.0001382897188749735, + "learning_rate": 4.337987826709632e-06, + "loss": 0.0, + "step": 9466 + }, + { + "epoch": 0.6101050460784946, + "grad_norm": 0.009698598664787292, + "learning_rate": 4.337271750805585e-06, + "loss": 0.0, + "step": 9467 + }, + { + "epoch": 0.6101694915254238, + "grad_norm": 0.06168647575651353, + "learning_rate": 4.3365556749015396e-06, + "loss": 0.0, + "step": 9468 + }, + { + "epoch": 0.610233936972353, + "grad_norm": 0.004649457971022612, + "learning_rate": 4.335839598997494e-06, + "loss": 0.0, + "step": 9469 + }, + { + "epoch": 0.610298382419282, + "grad_norm": 0.006819369683324906, + "learning_rate": 4.335123523093448e-06, + "loss": 0.0, + "step": 9470 + }, + { + "epoch": 0.6103628278662112, + "grad_norm": 0.0038879490160562474, + "learning_rate": 4.3344074471894025e-06, + "loss": 0.0, + "step": 9471 + }, + { + "epoch": 0.6104272733131404, + "grad_norm": 0.0001479780592880403, + "learning_rate": 4.333691371285357e-06, + "loss": 0.0, + "step": 9472 + }, + { + "epoch": 0.6104917187600696, + "grad_norm": 0.0005454773247210619, + "learning_rate": 4.332975295381311e-06, + "loss": 0.0, + "step": 9473 + }, + { + "epoch": 0.6105561642069988, + "grad_norm": 0.041149449013538716, + "learning_rate": 4.332259219477265e-06, + "loss": 0.0001, + "step": 9474 + }, + { + "epoch": 0.610620609653928, + "grad_norm": 0.0016966869856339295, + "learning_rate": 4.331543143573219e-06, + "loss": 0.0, + "step": 9475 + }, + { + "epoch": 0.6106850551008571, + "grad_norm": 0.10398651031290745, + "learning_rate": 4.330827067669173e-06, + "loss": 0.0004, + "step": 9476 + }, + { + "epoch": 0.6107495005477863, + "grad_norm": 0.0028450556875423104, + "learning_rate": 4.330110991765127e-06, + "loss": 0.0, + "step": 9477 + }, + { + "epoch": 0.6108139459947155, + "grad_norm": 0.0033837477527026193, + "learning_rate": 4.329394915861082e-06, + "loss": 0.0, + "step": 9478 + }, + { + "epoch": 0.6108783914416447, + "grad_norm": 0.006324049606286477, + "learning_rate": 4.328678839957036e-06, + "loss": 0.0, + "step": 9479 + }, + { + "epoch": 0.6109428368885739, + "grad_norm": 0.07765264767556082, + "learning_rate": 4.3279627640529895e-06, + "loss": 0.0001, + "step": 9480 + }, + { + "epoch": 0.6110072823355029, + "grad_norm": 0.01570204834010833, + "learning_rate": 4.327246688148944e-06, + "loss": 0.0001, + "step": 9481 + }, + { + "epoch": 0.6110717277824321, + "grad_norm": 0.0018498621459318526, + "learning_rate": 4.326530612244899e-06, + "loss": 0.0, + "step": 9482 + }, + { + "epoch": 0.6111361732293613, + "grad_norm": 0.037915579330810155, + "learning_rate": 4.325814536340852e-06, + "loss": 0.0, + "step": 9483 + }, + { + "epoch": 0.6112006186762905, + "grad_norm": 0.18641078003385522, + "learning_rate": 4.325098460436807e-06, + "loss": 0.0009, + "step": 9484 + }, + { + "epoch": 0.6112650641232197, + "grad_norm": 0.004106447632898952, + "learning_rate": 4.324382384532761e-06, + "loss": 0.0015, + "step": 9485 + }, + { + "epoch": 0.6113295095701489, + "grad_norm": 0.00024362199685292236, + "learning_rate": 4.323666308628715e-06, + "loss": 0.0, + "step": 9486 + }, + { + "epoch": 0.611393955017078, + "grad_norm": 0.3062199665445999, + "learning_rate": 4.3229502327246696e-06, + "loss": 0.0016, + "step": 9487 + }, + { + "epoch": 0.6114584004640072, + "grad_norm": 0.010514023663567535, + "learning_rate": 4.322234156820623e-06, + "loss": 0.0001, + "step": 9488 + }, + { + "epoch": 0.6115228459109364, + "grad_norm": 0.0024672399660492102, + "learning_rate": 4.321518080916577e-06, + "loss": 0.0, + "step": 9489 + }, + { + "epoch": 0.6115872913578656, + "grad_norm": 0.0035961915359396693, + "learning_rate": 4.320802005012532e-06, + "loss": 0.0, + "step": 9490 + }, + { + "epoch": 0.6116517368047948, + "grad_norm": 0.0008687014780787742, + "learning_rate": 4.320085929108486e-06, + "loss": 0.0, + "step": 9491 + }, + { + "epoch": 0.6117161822517239, + "grad_norm": 0.00013052206685491074, + "learning_rate": 4.31936985320444e-06, + "loss": 0.0, + "step": 9492 + }, + { + "epoch": 0.611780627698653, + "grad_norm": 0.0008957571000338346, + "learning_rate": 4.318653777300394e-06, + "loss": 0.0, + "step": 9493 + }, + { + "epoch": 0.6118450731455822, + "grad_norm": 0.0026848465137663054, + "learning_rate": 4.317937701396349e-06, + "loss": 0.0, + "step": 9494 + }, + { + "epoch": 0.6119095185925114, + "grad_norm": 0.32030492866685867, + "learning_rate": 4.317221625492303e-06, + "loss": 0.0006, + "step": 9495 + }, + { + "epoch": 0.6119739640394406, + "grad_norm": 0.06446050110427554, + "learning_rate": 4.3165055495882565e-06, + "loss": 0.0003, + "step": 9496 + }, + { + "epoch": 0.6120384094863698, + "grad_norm": 0.0011579000983677743, + "learning_rate": 4.315789473684211e-06, + "loss": 0.0, + "step": 9497 + }, + { + "epoch": 0.612102854933299, + "grad_norm": 0.009338821450512087, + "learning_rate": 4.315073397780165e-06, + "loss": 0.0, + "step": 9498 + }, + { + "epoch": 0.6121673003802282, + "grad_norm": 0.0014420975540785869, + "learning_rate": 4.3143573218761194e-06, + "loss": 0.0, + "step": 9499 + }, + { + "epoch": 0.6122317458271573, + "grad_norm": 2.6957402428930943, + "learning_rate": 4.313641245972074e-06, + "loss": 0.0235, + "step": 9500 + }, + { + "epoch": 0.6122961912740865, + "grad_norm": 0.042654696893015, + "learning_rate": 4.312925170068027e-06, + "loss": 0.0004, + "step": 9501 + }, + { + "epoch": 0.6123606367210157, + "grad_norm": 0.3342348687492541, + "learning_rate": 4.3122090941639815e-06, + "loss": 0.0006, + "step": 9502 + }, + { + "epoch": 0.6124250821679449, + "grad_norm": 0.01680525990444114, + "learning_rate": 4.311493018259936e-06, + "loss": 0.0, + "step": 9503 + }, + { + "epoch": 0.612489527614874, + "grad_norm": 0.0285541931701851, + "learning_rate": 4.31077694235589e-06, + "loss": 0.0001, + "step": 9504 + }, + { + "epoch": 0.6125539730618031, + "grad_norm": 0.0024596945855401516, + "learning_rate": 4.310060866451844e-06, + "loss": 0.0, + "step": 9505 + }, + { + "epoch": 0.6126184185087323, + "grad_norm": 0.16790719345938113, + "learning_rate": 4.309344790547799e-06, + "loss": 0.0014, + "step": 9506 + }, + { + "epoch": 0.6126828639556615, + "grad_norm": 0.0001499272645216901, + "learning_rate": 4.308628714643753e-06, + "loss": 0.0, + "step": 9507 + }, + { + "epoch": 0.6127473094025907, + "grad_norm": 0.000488662623177265, + "learning_rate": 4.307912638739707e-06, + "loss": 0.0, + "step": 9508 + }, + { + "epoch": 0.6128117548495199, + "grad_norm": 0.00029549820058340356, + "learning_rate": 4.307196562835661e-06, + "loss": 0.0, + "step": 9509 + }, + { + "epoch": 0.6128762002964491, + "grad_norm": 0.16494169692561012, + "learning_rate": 4.306480486931615e-06, + "loss": 0.0021, + "step": 9510 + }, + { + "epoch": 0.6129406457433783, + "grad_norm": 0.0022252980179280958, + "learning_rate": 4.305764411027569e-06, + "loss": 0.0, + "step": 9511 + }, + { + "epoch": 0.6130050911903074, + "grad_norm": 0.0031953051167575635, + "learning_rate": 4.305048335123524e-06, + "loss": 0.0, + "step": 9512 + }, + { + "epoch": 0.6130695366372366, + "grad_norm": 0.0798065214137776, + "learning_rate": 4.304332259219477e-06, + "loss": 0.0002, + "step": 9513 + }, + { + "epoch": 0.6131339820841658, + "grad_norm": 0.0032883889415415957, + "learning_rate": 4.303616183315431e-06, + "loss": 0.0, + "step": 9514 + }, + { + "epoch": 0.6131984275310949, + "grad_norm": 0.026226701485447677, + "learning_rate": 4.302900107411386e-06, + "loss": 0.0001, + "step": 9515 + }, + { + "epoch": 0.6132628729780241, + "grad_norm": 0.027247142216119388, + "learning_rate": 4.30218403150734e-06, + "loss": 0.0002, + "step": 9516 + }, + { + "epoch": 0.6133273184249532, + "grad_norm": 0.03939227437205754, + "learning_rate": 4.301467955603294e-06, + "loss": 0.0, + "step": 9517 + }, + { + "epoch": 0.6133917638718824, + "grad_norm": 0.00019537654302919405, + "learning_rate": 4.3007518796992486e-06, + "loss": 0.0, + "step": 9518 + }, + { + "epoch": 0.6134562093188116, + "grad_norm": 0.02171230850239767, + "learning_rate": 4.300035803795203e-06, + "loss": 0.0001, + "step": 9519 + }, + { + "epoch": 0.6135206547657408, + "grad_norm": 0.205370531067778, + "learning_rate": 4.299319727891157e-06, + "loss": 0.0008, + "step": 9520 + }, + { + "epoch": 0.61358510021267, + "grad_norm": 0.0006344435985339772, + "learning_rate": 4.2986036519871115e-06, + "loss": 0.0, + "step": 9521 + }, + { + "epoch": 0.6136495456595992, + "grad_norm": 0.031260613412393105, + "learning_rate": 4.297887576083065e-06, + "loss": 0.0001, + "step": 9522 + }, + { + "epoch": 0.6137139911065284, + "grad_norm": 0.048878088660678165, + "learning_rate": 4.297171500179019e-06, + "loss": 0.0001, + "step": 9523 + }, + { + "epoch": 0.6137784365534575, + "grad_norm": 0.5282191040348414, + "learning_rate": 4.2964554242749735e-06, + "loss": 0.0025, + "step": 9524 + }, + { + "epoch": 0.6138428820003867, + "grad_norm": 0.12579375317430874, + "learning_rate": 4.295739348370928e-06, + "loss": 0.0005, + "step": 9525 + }, + { + "epoch": 0.6139073274473158, + "grad_norm": 0.04641811663453107, + "learning_rate": 4.295023272466881e-06, + "loss": 0.0001, + "step": 9526 + }, + { + "epoch": 0.613971772894245, + "grad_norm": 0.0002580502539569169, + "learning_rate": 4.2943071965628356e-06, + "loss": 0.0, + "step": 9527 + }, + { + "epoch": 0.6140362183411742, + "grad_norm": 0.0033282476706218336, + "learning_rate": 4.29359112065879e-06, + "loss": 0.0, + "step": 9528 + }, + { + "epoch": 0.6141006637881034, + "grad_norm": 0.00047556915669403905, + "learning_rate": 4.292875044754745e-06, + "loss": 0.0, + "step": 9529 + }, + { + "epoch": 0.6141651092350325, + "grad_norm": 0.0013045353118663315, + "learning_rate": 4.2921589688506985e-06, + "loss": 0.0, + "step": 9530 + }, + { + "epoch": 0.6142295546819617, + "grad_norm": 1.2612551377705028, + "learning_rate": 4.291442892946653e-06, + "loss": 0.0021, + "step": 9531 + }, + { + "epoch": 0.6142940001288909, + "grad_norm": 0.00489277865661325, + "learning_rate": 4.290726817042607e-06, + "loss": 0.0, + "step": 9532 + }, + { + "epoch": 0.6143584455758201, + "grad_norm": 0.0012519150045497776, + "learning_rate": 4.290010741138561e-06, + "loss": 0.0, + "step": 9533 + }, + { + "epoch": 0.6144228910227493, + "grad_norm": 0.00015498001967760488, + "learning_rate": 4.289294665234515e-06, + "loss": 0.0, + "step": 9534 + }, + { + "epoch": 0.6144873364696785, + "grad_norm": 0.28547099784379487, + "learning_rate": 4.288578589330469e-06, + "loss": 0.0015, + "step": 9535 + }, + { + "epoch": 0.6145517819166076, + "grad_norm": 0.004607738427160797, + "learning_rate": 4.287862513426423e-06, + "loss": 0.0, + "step": 9536 + }, + { + "epoch": 0.6146162273635367, + "grad_norm": 0.03986350100567441, + "learning_rate": 4.287146437522378e-06, + "loss": 0.0, + "step": 9537 + }, + { + "epoch": 0.6146806728104659, + "grad_norm": 0.030827555896124095, + "learning_rate": 4.286430361618332e-06, + "loss": 0.0, + "step": 9538 + }, + { + "epoch": 0.6147451182573951, + "grad_norm": 0.00016944203631999406, + "learning_rate": 4.2857142857142855e-06, + "loss": 0.0, + "step": 9539 + }, + { + "epoch": 0.6148095637043243, + "grad_norm": 0.3045021862960025, + "learning_rate": 4.284998209810241e-06, + "loss": 0.0008, + "step": 9540 + }, + { + "epoch": 0.6148740091512535, + "grad_norm": 0.14254954456604813, + "learning_rate": 4.284282133906195e-06, + "loss": 0.0017, + "step": 9541 + }, + { + "epoch": 0.6149384545981826, + "grad_norm": 0.015514097077617816, + "learning_rate": 4.283566058002148e-06, + "loss": 0.0001, + "step": 9542 + }, + { + "epoch": 0.6150029000451118, + "grad_norm": 0.00337946789828538, + "learning_rate": 4.282849982098103e-06, + "loss": 0.0, + "step": 9543 + }, + { + "epoch": 0.615067345492041, + "grad_norm": 0.10997992786917365, + "learning_rate": 4.282133906194057e-06, + "loss": 0.0001, + "step": 9544 + }, + { + "epoch": 0.6151317909389702, + "grad_norm": 0.08706040598996456, + "learning_rate": 4.281417830290011e-06, + "loss": 0.0002, + "step": 9545 + }, + { + "epoch": 0.6151962363858994, + "grad_norm": 0.0017644996784956883, + "learning_rate": 4.2807017543859656e-06, + "loss": 0.0, + "step": 9546 + }, + { + "epoch": 0.6152606818328286, + "grad_norm": 0.06708993009425664, + "learning_rate": 4.279985678481919e-06, + "loss": 0.0001, + "step": 9547 + }, + { + "epoch": 0.6153251272797576, + "grad_norm": 0.24657193153772985, + "learning_rate": 4.279269602577873e-06, + "loss": 0.0002, + "step": 9548 + }, + { + "epoch": 0.6153895727266868, + "grad_norm": 0.0006839220314001171, + "learning_rate": 4.278553526673828e-06, + "loss": 0.0, + "step": 9549 + }, + { + "epoch": 0.615454018173616, + "grad_norm": 0.028784731907356236, + "learning_rate": 4.277837450769782e-06, + "loss": 0.0002, + "step": 9550 + }, + { + "epoch": 0.6155184636205452, + "grad_norm": 0.0002460372595801576, + "learning_rate": 4.277121374865736e-06, + "loss": 0.0, + "step": 9551 + }, + { + "epoch": 0.6155829090674744, + "grad_norm": 0.08505335527793859, + "learning_rate": 4.2764052989616905e-06, + "loss": 0.0001, + "step": 9552 + }, + { + "epoch": 0.6156473545144036, + "grad_norm": 0.0007359800036842759, + "learning_rate": 4.275689223057645e-06, + "loss": 0.0, + "step": 9553 + }, + { + "epoch": 0.6157117999613327, + "grad_norm": 0.0001408191174706121, + "learning_rate": 4.274973147153599e-06, + "loss": 0.0, + "step": 9554 + }, + { + "epoch": 0.6157762454082619, + "grad_norm": 0.0023287981858707055, + "learning_rate": 4.2742570712495525e-06, + "loss": 0.0, + "step": 9555 + }, + { + "epoch": 0.6158406908551911, + "grad_norm": 0.0019438432122889477, + "learning_rate": 4.273540995345507e-06, + "loss": 0.0, + "step": 9556 + }, + { + "epoch": 0.6159051363021203, + "grad_norm": 1.152815331124407, + "learning_rate": 4.272824919441461e-06, + "loss": 0.0041, + "step": 9557 + }, + { + "epoch": 0.6159695817490495, + "grad_norm": 0.009054800502372921, + "learning_rate": 4.2721088435374154e-06, + "loss": 0.0001, + "step": 9558 + }, + { + "epoch": 0.6160340271959786, + "grad_norm": 0.41673807088647546, + "learning_rate": 4.27139276763337e-06, + "loss": 0.0025, + "step": 9559 + }, + { + "epoch": 0.6160984726429077, + "grad_norm": 0.001291795997738671, + "learning_rate": 4.270676691729323e-06, + "loss": 0.0, + "step": 9560 + }, + { + "epoch": 0.6161629180898369, + "grad_norm": 0.00037450827288190545, + "learning_rate": 4.2699606158252775e-06, + "loss": 0.0, + "step": 9561 + }, + { + "epoch": 0.6162273635367661, + "grad_norm": 0.0005643279883164137, + "learning_rate": 4.269244539921232e-06, + "loss": 0.0, + "step": 9562 + }, + { + "epoch": 0.6162918089836953, + "grad_norm": 0.00013711708141211, + "learning_rate": 4.268528464017186e-06, + "loss": 0.0, + "step": 9563 + }, + { + "epoch": 0.6163562544306245, + "grad_norm": 0.00627109371547579, + "learning_rate": 4.26781238811314e-06, + "loss": 0.0015, + "step": 9564 + }, + { + "epoch": 0.6164206998775537, + "grad_norm": 0.025730302441083985, + "learning_rate": 4.267096312209095e-06, + "loss": 0.0001, + "step": 9565 + }, + { + "epoch": 0.6164851453244828, + "grad_norm": 0.0020360262097681384, + "learning_rate": 4.266380236305049e-06, + "loss": 0.0, + "step": 9566 + }, + { + "epoch": 0.616549590771412, + "grad_norm": 0.00888623677442725, + "learning_rate": 4.265664160401003e-06, + "loss": 0.0, + "step": 9567 + }, + { + "epoch": 0.6166140362183412, + "grad_norm": 0.00021110422840300865, + "learning_rate": 4.264948084496957e-06, + "loss": 0.0, + "step": 9568 + }, + { + "epoch": 0.6166784816652704, + "grad_norm": 0.5416255501278395, + "learning_rate": 4.264232008592911e-06, + "loss": 0.0025, + "step": 9569 + }, + { + "epoch": 0.6167429271121995, + "grad_norm": 0.19467164760361685, + "learning_rate": 4.263515932688865e-06, + "loss": 0.0003, + "step": 9570 + }, + { + "epoch": 0.6168073725591287, + "grad_norm": 0.13399001852000092, + "learning_rate": 4.26279985678482e-06, + "loss": 0.0003, + "step": 9571 + }, + { + "epoch": 0.6168718180060578, + "grad_norm": 0.00011533534958016236, + "learning_rate": 4.262083780880774e-06, + "loss": 0.0, + "step": 9572 + }, + { + "epoch": 0.616936263452987, + "grad_norm": 0.08406597431618373, + "learning_rate": 4.261367704976727e-06, + "loss": 0.0001, + "step": 9573 + }, + { + "epoch": 0.6170007088999162, + "grad_norm": 0.006471258951393888, + "learning_rate": 4.260651629072682e-06, + "loss": 0.0, + "step": 9574 + }, + { + "epoch": 0.6170651543468454, + "grad_norm": 0.11885665445735855, + "learning_rate": 4.259935553168637e-06, + "loss": 0.0016, + "step": 9575 + }, + { + "epoch": 0.6171295997937746, + "grad_norm": 0.005192039990253033, + "learning_rate": 4.25921947726459e-06, + "loss": 0.0001, + "step": 9576 + }, + { + "epoch": 0.6171940452407038, + "grad_norm": 0.10412234963006879, + "learning_rate": 4.2585034013605446e-06, + "loss": 0.0001, + "step": 9577 + }, + { + "epoch": 0.6172584906876329, + "grad_norm": 0.03084306802890673, + "learning_rate": 4.257787325456499e-06, + "loss": 0.0, + "step": 9578 + }, + { + "epoch": 0.6173229361345621, + "grad_norm": 0.1886445616070666, + "learning_rate": 4.257071249552453e-06, + "loss": 0.0003, + "step": 9579 + }, + { + "epoch": 0.6173873815814913, + "grad_norm": 0.007571403887036818, + "learning_rate": 4.2563551736484075e-06, + "loss": 0.0, + "step": 9580 + }, + { + "epoch": 0.6174518270284205, + "grad_norm": 0.11695276831761708, + "learning_rate": 4.255639097744361e-06, + "loss": 0.0015, + "step": 9581 + }, + { + "epoch": 0.6175162724753496, + "grad_norm": 0.03454928162524213, + "learning_rate": 4.254923021840315e-06, + "loss": 0.0001, + "step": 9582 + }, + { + "epoch": 0.6175807179222788, + "grad_norm": 0.08363969623079226, + "learning_rate": 4.2542069459362695e-06, + "loss": 0.0001, + "step": 9583 + }, + { + "epoch": 0.6176451633692079, + "grad_norm": 0.0032724227689246324, + "learning_rate": 4.253490870032224e-06, + "loss": 0.0, + "step": 9584 + }, + { + "epoch": 0.6177096088161371, + "grad_norm": 0.16838093020000738, + "learning_rate": 4.252774794128178e-06, + "loss": 0.002, + "step": 9585 + }, + { + "epoch": 0.6177740542630663, + "grad_norm": 0.015356828445240035, + "learning_rate": 4.2520587182241316e-06, + "loss": 0.0001, + "step": 9586 + }, + { + "epoch": 0.6178384997099955, + "grad_norm": 0.24950890436843448, + "learning_rate": 4.251342642320087e-06, + "loss": 0.0009, + "step": 9587 + }, + { + "epoch": 0.6179029451569247, + "grad_norm": 0.0004905781353135952, + "learning_rate": 4.250626566416041e-06, + "loss": 0.0, + "step": 9588 + }, + { + "epoch": 0.6179673906038539, + "grad_norm": 0.0017723076927522122, + "learning_rate": 4.2499104905119945e-06, + "loss": 0.0, + "step": 9589 + }, + { + "epoch": 0.618031836050783, + "grad_norm": 0.00027595294939807507, + "learning_rate": 4.249194414607949e-06, + "loss": 0.0, + "step": 9590 + }, + { + "epoch": 0.6180962814977122, + "grad_norm": 0.44881769604763494, + "learning_rate": 4.248478338703903e-06, + "loss": 0.0044, + "step": 9591 + }, + { + "epoch": 0.6181607269446414, + "grad_norm": 0.0011654496177690313, + "learning_rate": 4.247762262799857e-06, + "loss": 0.0, + "step": 9592 + }, + { + "epoch": 0.6182251723915705, + "grad_norm": 0.0002969354686474064, + "learning_rate": 4.247046186895812e-06, + "loss": 0.0, + "step": 9593 + }, + { + "epoch": 0.6182896178384997, + "grad_norm": 0.06758952912812587, + "learning_rate": 4.246330110991765e-06, + "loss": 0.0003, + "step": 9594 + }, + { + "epoch": 0.6183540632854289, + "grad_norm": 0.0023606559277491745, + "learning_rate": 4.245614035087719e-06, + "loss": 0.0, + "step": 9595 + }, + { + "epoch": 0.618418508732358, + "grad_norm": 0.0005240889689227749, + "learning_rate": 4.244897959183674e-06, + "loss": 0.0, + "step": 9596 + }, + { + "epoch": 0.6184829541792872, + "grad_norm": 0.0028731344088614675, + "learning_rate": 4.244181883279628e-06, + "loss": 0.0, + "step": 9597 + }, + { + "epoch": 0.6185473996262164, + "grad_norm": 0.003774219453522588, + "learning_rate": 4.243465807375582e-06, + "loss": 0.0, + "step": 9598 + }, + { + "epoch": 0.6186118450731456, + "grad_norm": 0.04812190750716624, + "learning_rate": 4.242749731471537e-06, + "loss": 0.0002, + "step": 9599 + }, + { + "epoch": 0.6186762905200748, + "grad_norm": 0.0009303746721064348, + "learning_rate": 4.242033655567491e-06, + "loss": 0.0, + "step": 9600 + }, + { + "epoch": 0.618740735967004, + "grad_norm": 0.000261596983827811, + "learning_rate": 4.241317579663445e-06, + "loss": 0.0, + "step": 9601 + }, + { + "epoch": 0.6188051814139331, + "grad_norm": 4.060820420659366e-05, + "learning_rate": 4.240601503759399e-06, + "loss": 0.0, + "step": 9602 + }, + { + "epoch": 0.6188696268608623, + "grad_norm": 0.05925821210830824, + "learning_rate": 4.239885427855353e-06, + "loss": 0.0016, + "step": 9603 + }, + { + "epoch": 0.6189340723077914, + "grad_norm": 0.0003562257420606521, + "learning_rate": 4.239169351951307e-06, + "loss": 0.0, + "step": 9604 + }, + { + "epoch": 0.6189985177547206, + "grad_norm": 0.031397710705029135, + "learning_rate": 4.2384532760472616e-06, + "loss": 0.0, + "step": 9605 + }, + { + "epoch": 0.6190629632016498, + "grad_norm": 2.4042094265960856e-05, + "learning_rate": 4.237737200143216e-06, + "loss": 0.0, + "step": 9606 + }, + { + "epoch": 0.619127408648579, + "grad_norm": 0.021245099475145683, + "learning_rate": 4.237021124239169e-06, + "loss": 0.0002, + "step": 9607 + }, + { + "epoch": 0.6191918540955081, + "grad_norm": 0.0018329740066415307, + "learning_rate": 4.236305048335124e-06, + "loss": 0.0, + "step": 9608 + }, + { + "epoch": 0.6192562995424373, + "grad_norm": 0.003174532843030457, + "learning_rate": 4.235588972431078e-06, + "loss": 0.0, + "step": 9609 + }, + { + "epoch": 0.6193207449893665, + "grad_norm": 0.12445039922168191, + "learning_rate": 4.234872896527032e-06, + "loss": 0.0004, + "step": 9610 + }, + { + "epoch": 0.6193851904362957, + "grad_norm": 0.6708809129849929, + "learning_rate": 4.2341568206229865e-06, + "loss": 0.0025, + "step": 9611 + }, + { + "epoch": 0.6194496358832249, + "grad_norm": 0.06722900209427916, + "learning_rate": 4.233440744718941e-06, + "loss": 0.0002, + "step": 9612 + }, + { + "epoch": 0.6195140813301541, + "grad_norm": 0.02487789501266384, + "learning_rate": 4.232724668814895e-06, + "loss": 0.0001, + "step": 9613 + }, + { + "epoch": 0.6195785267770832, + "grad_norm": 0.0070891124843938435, + "learning_rate": 4.232008592910849e-06, + "loss": 0.0, + "step": 9614 + }, + { + "epoch": 0.6196429722240123, + "grad_norm": 0.03626208442549633, + "learning_rate": 4.231292517006803e-06, + "loss": 0.0001, + "step": 9615 + }, + { + "epoch": 0.6197074176709415, + "grad_norm": 0.13685853487861604, + "learning_rate": 4.230576441102757e-06, + "loss": 0.0004, + "step": 9616 + }, + { + "epoch": 0.6197718631178707, + "grad_norm": 0.02757626052057049, + "learning_rate": 4.2298603651987114e-06, + "loss": 0.0001, + "step": 9617 + }, + { + "epoch": 0.6198363085647999, + "grad_norm": 0.0014798321427789366, + "learning_rate": 4.229144289294666e-06, + "loss": 0.0, + "step": 9618 + }, + { + "epoch": 0.6199007540117291, + "grad_norm": 0.01049815395489131, + "learning_rate": 4.228428213390619e-06, + "loss": 0.0001, + "step": 9619 + }, + { + "epoch": 0.6199651994586582, + "grad_norm": 0.6844524113320779, + "learning_rate": 4.2277121374865735e-06, + "loss": 0.0044, + "step": 9620 + }, + { + "epoch": 0.6200296449055874, + "grad_norm": 0.0002147181528283652, + "learning_rate": 4.226996061582528e-06, + "loss": 0.0, + "step": 9621 + }, + { + "epoch": 0.6200940903525166, + "grad_norm": 0.2500937256360808, + "learning_rate": 4.226279985678483e-06, + "loss": 0.001, + "step": 9622 + }, + { + "epoch": 0.6201585357994458, + "grad_norm": 0.04441692228177357, + "learning_rate": 4.225563909774436e-06, + "loss": 0.0001, + "step": 9623 + }, + { + "epoch": 0.620222981246375, + "grad_norm": 0.0028260496702314057, + "learning_rate": 4.224847833870391e-06, + "loss": 0.0, + "step": 9624 + }, + { + "epoch": 0.6202874266933042, + "grad_norm": 0.027488300872075832, + "learning_rate": 4.224131757966345e-06, + "loss": 0.0, + "step": 9625 + }, + { + "epoch": 0.6203518721402332, + "grad_norm": 7.346583784405198e-05, + "learning_rate": 4.223415682062299e-06, + "loss": 0.0, + "step": 9626 + }, + { + "epoch": 0.6204163175871624, + "grad_norm": 0.06306652635555225, + "learning_rate": 4.222699606158253e-06, + "loss": 0.0002, + "step": 9627 + }, + { + "epoch": 0.6204807630340916, + "grad_norm": 0.00592616535198368, + "learning_rate": 4.221983530254207e-06, + "loss": 0.0001, + "step": 9628 + }, + { + "epoch": 0.6205452084810208, + "grad_norm": 0.0001044583771192941, + "learning_rate": 4.221267454350161e-06, + "loss": 0.0, + "step": 9629 + }, + { + "epoch": 0.62060965392795, + "grad_norm": 5.9252248572389913e-05, + "learning_rate": 4.220551378446116e-06, + "loss": 0.0, + "step": 9630 + }, + { + "epoch": 0.6206740993748792, + "grad_norm": 0.04576163754311494, + "learning_rate": 4.21983530254207e-06, + "loss": 0.0017, + "step": 9631 + }, + { + "epoch": 0.6207385448218083, + "grad_norm": 0.011120442526384637, + "learning_rate": 4.219119226638023e-06, + "loss": 0.0001, + "step": 9632 + }, + { + "epoch": 0.6208029902687375, + "grad_norm": 0.0012373695941060873, + "learning_rate": 4.218403150733978e-06, + "loss": 0.0, + "step": 9633 + }, + { + "epoch": 0.6208674357156667, + "grad_norm": 0.0033271694642447464, + "learning_rate": 4.217687074829933e-06, + "loss": 0.0, + "step": 9634 + }, + { + "epoch": 0.6209318811625959, + "grad_norm": 0.0023266009334657824, + "learning_rate": 4.216970998925886e-06, + "loss": 0.0, + "step": 9635 + }, + { + "epoch": 0.6209963266095251, + "grad_norm": 0.0006776249456023286, + "learning_rate": 4.2162549230218406e-06, + "loss": 0.0, + "step": 9636 + }, + { + "epoch": 0.6210607720564542, + "grad_norm": 0.01654521411218407, + "learning_rate": 4.215538847117795e-06, + "loss": 0.0, + "step": 9637 + }, + { + "epoch": 0.6211252175033833, + "grad_norm": 4.852683199395689e-05, + "learning_rate": 4.214822771213749e-06, + "loss": 0.0, + "step": 9638 + }, + { + "epoch": 0.6211896629503125, + "grad_norm": 0.0021993879120563247, + "learning_rate": 4.2141066953097035e-06, + "loss": 0.0, + "step": 9639 + }, + { + "epoch": 0.6212541083972417, + "grad_norm": 0.0007637886447388776, + "learning_rate": 4.213390619405657e-06, + "loss": 0.0, + "step": 9640 + }, + { + "epoch": 0.6213185538441709, + "grad_norm": 0.007377042865803844, + "learning_rate": 4.212674543501611e-06, + "loss": 0.0, + "step": 9641 + }, + { + "epoch": 0.6213829992911001, + "grad_norm": 0.0005533541648899976, + "learning_rate": 4.2119584675975655e-06, + "loss": 0.0, + "step": 9642 + }, + { + "epoch": 0.6214474447380293, + "grad_norm": 0.42125994387586024, + "learning_rate": 4.21124239169352e-06, + "loss": 0.0025, + "step": 9643 + }, + { + "epoch": 0.6215118901849584, + "grad_norm": 0.03175686695537614, + "learning_rate": 4.210526315789474e-06, + "loss": 0.0001, + "step": 9644 + }, + { + "epoch": 0.6215763356318876, + "grad_norm": 0.299370056123468, + "learning_rate": 4.209810239885428e-06, + "loss": 0.0024, + "step": 9645 + }, + { + "epoch": 0.6216407810788168, + "grad_norm": 0.26325604179437306, + "learning_rate": 4.209094163981383e-06, + "loss": 0.0007, + "step": 9646 + }, + { + "epoch": 0.621705226525746, + "grad_norm": 0.01473776976325703, + "learning_rate": 4.208378088077337e-06, + "loss": 0.0, + "step": 9647 + }, + { + "epoch": 0.6217696719726751, + "grad_norm": 0.0005109300137223879, + "learning_rate": 4.2076620121732905e-06, + "loss": 0.0, + "step": 9648 + }, + { + "epoch": 0.6218341174196043, + "grad_norm": 0.0002493537642654104, + "learning_rate": 4.206945936269245e-06, + "loss": 0.0, + "step": 9649 + }, + { + "epoch": 0.6218985628665334, + "grad_norm": 0.07706493977662457, + "learning_rate": 4.206229860365199e-06, + "loss": 0.0001, + "step": 9650 + }, + { + "epoch": 0.6219630083134626, + "grad_norm": 0.0003456695922869574, + "learning_rate": 4.205513784461153e-06, + "loss": 0.0, + "step": 9651 + }, + { + "epoch": 0.6220274537603918, + "grad_norm": 0.0018471179024505506, + "learning_rate": 4.204797708557108e-06, + "loss": 0.0, + "step": 9652 + }, + { + "epoch": 0.622091899207321, + "grad_norm": 0.4382099794044541, + "learning_rate": 4.204081632653061e-06, + "loss": 0.0014, + "step": 9653 + }, + { + "epoch": 0.6221563446542502, + "grad_norm": 0.00014206190967321758, + "learning_rate": 4.203365556749015e-06, + "loss": 0.0, + "step": 9654 + }, + { + "epoch": 0.6222207901011794, + "grad_norm": 0.023678985918828478, + "learning_rate": 4.20264948084497e-06, + "loss": 0.0, + "step": 9655 + }, + { + "epoch": 0.6222852355481086, + "grad_norm": 0.0015190579278471688, + "learning_rate": 4.201933404940924e-06, + "loss": 0.0, + "step": 9656 + }, + { + "epoch": 0.6223496809950377, + "grad_norm": 2.9540092315116344, + "learning_rate": 4.201217329036878e-06, + "loss": 0.0244, + "step": 9657 + }, + { + "epoch": 0.6224141264419669, + "grad_norm": 0.00012206465167372003, + "learning_rate": 4.200501253132833e-06, + "loss": 0.0, + "step": 9658 + }, + { + "epoch": 0.6224785718888961, + "grad_norm": 0.0002953703826734249, + "learning_rate": 4.199785177228787e-06, + "loss": 0.0, + "step": 9659 + }, + { + "epoch": 0.6225430173358252, + "grad_norm": 0.0006869575541465469, + "learning_rate": 4.199069101324741e-06, + "loss": 0.0, + "step": 9660 + }, + { + "epoch": 0.6226074627827544, + "grad_norm": 0.005004019924537658, + "learning_rate": 4.198353025420695e-06, + "loss": 0.0, + "step": 9661 + }, + { + "epoch": 0.6226719082296835, + "grad_norm": 0.0036624435182955875, + "learning_rate": 4.197636949516649e-06, + "loss": 0.0, + "step": 9662 + }, + { + "epoch": 0.6227363536766127, + "grad_norm": 0.00772927658587039, + "learning_rate": 4.196920873612603e-06, + "loss": 0.0001, + "step": 9663 + }, + { + "epoch": 0.6228007991235419, + "grad_norm": 0.007641090487948875, + "learning_rate": 4.1962047977085576e-06, + "loss": 0.0, + "step": 9664 + }, + { + "epoch": 0.6228652445704711, + "grad_norm": 0.12874149139306848, + "learning_rate": 4.195488721804512e-06, + "loss": 0.0005, + "step": 9665 + }, + { + "epoch": 0.6229296900174003, + "grad_norm": 0.043186854746607224, + "learning_rate": 4.194772645900465e-06, + "loss": 0.0001, + "step": 9666 + }, + { + "epoch": 0.6229941354643295, + "grad_norm": 0.0034495539036033776, + "learning_rate": 4.19405656999642e-06, + "loss": 0.0, + "step": 9667 + }, + { + "epoch": 0.6230585809112587, + "grad_norm": 0.0015735462216765262, + "learning_rate": 4.193340494092374e-06, + "loss": 0.0, + "step": 9668 + }, + { + "epoch": 0.6231230263581878, + "grad_norm": 0.0028003798244138406, + "learning_rate": 4.192624418188328e-06, + "loss": 0.0, + "step": 9669 + }, + { + "epoch": 0.623187471805117, + "grad_norm": 0.0021314886590723505, + "learning_rate": 4.1919083422842825e-06, + "loss": 0.0, + "step": 9670 + }, + { + "epoch": 0.6232519172520461, + "grad_norm": 0.020338036201677363, + "learning_rate": 4.191192266380237e-06, + "loss": 0.0, + "step": 9671 + }, + { + "epoch": 0.6233163626989753, + "grad_norm": 0.15354739217184882, + "learning_rate": 4.190476190476191e-06, + "loss": 0.0016, + "step": 9672 + }, + { + "epoch": 0.6233808081459045, + "grad_norm": 0.015586767135734498, + "learning_rate": 4.189760114572145e-06, + "loss": 0.0, + "step": 9673 + }, + { + "epoch": 0.6234452535928336, + "grad_norm": 0.016917160072103598, + "learning_rate": 4.189044038668099e-06, + "loss": 0.0, + "step": 9674 + }, + { + "epoch": 0.6235096990397628, + "grad_norm": 0.04137864002051029, + "learning_rate": 4.188327962764053e-06, + "loss": 0.0, + "step": 9675 + }, + { + "epoch": 0.623574144486692, + "grad_norm": 0.005969930532603671, + "learning_rate": 4.1876118868600074e-06, + "loss": 0.0, + "step": 9676 + }, + { + "epoch": 0.6236385899336212, + "grad_norm": 0.01609130556531648, + "learning_rate": 4.186895810955962e-06, + "loss": 0.0, + "step": 9677 + }, + { + "epoch": 0.6237030353805504, + "grad_norm": 0.0025735075923348596, + "learning_rate": 4.186179735051916e-06, + "loss": 0.0, + "step": 9678 + }, + { + "epoch": 0.6237674808274796, + "grad_norm": 0.0016625419018875228, + "learning_rate": 4.1854636591478695e-06, + "loss": 0.0, + "step": 9679 + }, + { + "epoch": 0.6238319262744088, + "grad_norm": 0.0003346156449337851, + "learning_rate": 4.184747583243825e-06, + "loss": 0.0, + "step": 9680 + }, + { + "epoch": 0.6238963717213379, + "grad_norm": 0.007099106515354779, + "learning_rate": 4.184031507339779e-06, + "loss": 0.0, + "step": 9681 + }, + { + "epoch": 0.623960817168267, + "grad_norm": 0.003438129501815967, + "learning_rate": 4.183315431435732e-06, + "loss": 0.0, + "step": 9682 + }, + { + "epoch": 0.6240252626151962, + "grad_norm": 0.03816232143606552, + "learning_rate": 4.182599355531687e-06, + "loss": 0.0005, + "step": 9683 + }, + { + "epoch": 0.6240897080621254, + "grad_norm": 0.07756988779862854, + "learning_rate": 4.181883279627641e-06, + "loss": 0.0004, + "step": 9684 + }, + { + "epoch": 0.6241541535090546, + "grad_norm": 0.4092447233751244, + "learning_rate": 4.181167203723595e-06, + "loss": 0.001, + "step": 9685 + }, + { + "epoch": 0.6242185989559838, + "grad_norm": 0.0007207571886176135, + "learning_rate": 4.18045112781955e-06, + "loss": 0.0, + "step": 9686 + }, + { + "epoch": 0.6242830444029129, + "grad_norm": 0.0015483125744365732, + "learning_rate": 4.179735051915503e-06, + "loss": 0.0, + "step": 9687 + }, + { + "epoch": 0.6243474898498421, + "grad_norm": 0.0021180640674535764, + "learning_rate": 4.179018976011457e-06, + "loss": 0.0, + "step": 9688 + }, + { + "epoch": 0.6244119352967713, + "grad_norm": 0.0004169306514595993, + "learning_rate": 4.178302900107412e-06, + "loss": 0.0, + "step": 9689 + }, + { + "epoch": 0.6244763807437005, + "grad_norm": 0.2161596745728091, + "learning_rate": 4.177586824203366e-06, + "loss": 0.0003, + "step": 9690 + }, + { + "epoch": 0.6245408261906297, + "grad_norm": 0.0011705409204292648, + "learning_rate": 4.17687074829932e-06, + "loss": 0.0, + "step": 9691 + }, + { + "epoch": 0.6246052716375589, + "grad_norm": 0.0003713461406357066, + "learning_rate": 4.1761546723952745e-06, + "loss": 0.0, + "step": 9692 + }, + { + "epoch": 0.6246697170844879, + "grad_norm": 0.021227762380770367, + "learning_rate": 4.175438596491229e-06, + "loss": 0.0, + "step": 9693 + }, + { + "epoch": 0.6247341625314171, + "grad_norm": 0.002796829726804933, + "learning_rate": 4.174722520587183e-06, + "loss": 0.0, + "step": 9694 + }, + { + "epoch": 0.6247986079783463, + "grad_norm": 0.029885802088271313, + "learning_rate": 4.1740064446831366e-06, + "loss": 0.0, + "step": 9695 + }, + { + "epoch": 0.6248630534252755, + "grad_norm": 0.058992337485208504, + "learning_rate": 4.173290368779091e-06, + "loss": 0.0001, + "step": 9696 + }, + { + "epoch": 0.6249274988722047, + "grad_norm": 0.006196414162415305, + "learning_rate": 4.172574292875045e-06, + "loss": 0.0, + "step": 9697 + }, + { + "epoch": 0.6249919443191339, + "grad_norm": 0.0003641721267636872, + "learning_rate": 4.1718582169709995e-06, + "loss": 0.0, + "step": 9698 + }, + { + "epoch": 0.625056389766063, + "grad_norm": 0.000895571559838155, + "learning_rate": 4.171142141066954e-06, + "loss": 0.0, + "step": 9699 + }, + { + "epoch": 0.6251208352129922, + "grad_norm": 0.035813595296406875, + "learning_rate": 4.170426065162907e-06, + "loss": 0.0004, + "step": 9700 + }, + { + "epoch": 0.6251852806599214, + "grad_norm": 0.014866533526694621, + "learning_rate": 4.1697099892588615e-06, + "loss": 0.0, + "step": 9701 + }, + { + "epoch": 0.6252497261068506, + "grad_norm": 0.0004485504683336761, + "learning_rate": 4.168993913354816e-06, + "loss": 0.0, + "step": 9702 + }, + { + "epoch": 0.6253141715537798, + "grad_norm": 0.003314286639645609, + "learning_rate": 4.16827783745077e-06, + "loss": 0.0, + "step": 9703 + }, + { + "epoch": 0.6253786170007088, + "grad_norm": 0.0038918480210237234, + "learning_rate": 4.167561761546724e-06, + "loss": 0.0, + "step": 9704 + }, + { + "epoch": 0.625443062447638, + "grad_norm": 0.12356112889405628, + "learning_rate": 4.166845685642679e-06, + "loss": 0.0005, + "step": 9705 + }, + { + "epoch": 0.6255075078945672, + "grad_norm": 0.010094385634850529, + "learning_rate": 4.166129609738633e-06, + "loss": 0.0, + "step": 9706 + }, + { + "epoch": 0.6255719533414964, + "grad_norm": 0.0004073141393491479, + "learning_rate": 4.165413533834587e-06, + "loss": 0.0, + "step": 9707 + }, + { + "epoch": 0.6256363987884256, + "grad_norm": 0.017301657748644873, + "learning_rate": 4.164697457930541e-06, + "loss": 0.0001, + "step": 9708 + }, + { + "epoch": 0.6257008442353548, + "grad_norm": 0.008330549915594329, + "learning_rate": 4.163981382026495e-06, + "loss": 0.0, + "step": 9709 + }, + { + "epoch": 0.625765289682284, + "grad_norm": 0.0012220208405838166, + "learning_rate": 4.163265306122449e-06, + "loss": 0.0, + "step": 9710 + }, + { + "epoch": 0.6258297351292131, + "grad_norm": 0.0024680865602183513, + "learning_rate": 4.162549230218404e-06, + "loss": 0.0, + "step": 9711 + }, + { + "epoch": 0.6258941805761423, + "grad_norm": 0.0005113877056839675, + "learning_rate": 4.161833154314357e-06, + "loss": 0.0, + "step": 9712 + }, + { + "epoch": 0.6259586260230715, + "grad_norm": 0.22660793473691426, + "learning_rate": 4.161117078410311e-06, + "loss": 0.0002, + "step": 9713 + }, + { + "epoch": 0.6260230714700007, + "grad_norm": 0.0678046505039315, + "learning_rate": 4.160401002506266e-06, + "loss": 0.0, + "step": 9714 + }, + { + "epoch": 0.6260875169169298, + "grad_norm": 0.0009742323449192132, + "learning_rate": 4.159684926602221e-06, + "loss": 0.0, + "step": 9715 + }, + { + "epoch": 0.626151962363859, + "grad_norm": 0.006071902899983566, + "learning_rate": 4.158968850698174e-06, + "loss": 0.0, + "step": 9716 + }, + { + "epoch": 0.6262164078107881, + "grad_norm": 0.2007009679693991, + "learning_rate": 4.158252774794129e-06, + "loss": 0.0006, + "step": 9717 + }, + { + "epoch": 0.6262808532577173, + "grad_norm": 0.6364653615568687, + "learning_rate": 4.157536698890083e-06, + "loss": 0.0149, + "step": 9718 + }, + { + "epoch": 0.6263452987046465, + "grad_norm": 0.002847480991120004, + "learning_rate": 4.156820622986037e-06, + "loss": 0.0, + "step": 9719 + }, + { + "epoch": 0.6264097441515757, + "grad_norm": 0.0029042838819600516, + "learning_rate": 4.156104547081991e-06, + "loss": 0.0, + "step": 9720 + }, + { + "epoch": 0.6264741895985049, + "grad_norm": 0.0010571381934525565, + "learning_rate": 4.155388471177945e-06, + "loss": 0.0, + "step": 9721 + }, + { + "epoch": 0.6265386350454341, + "grad_norm": 0.0006312867694469445, + "learning_rate": 4.154672395273899e-06, + "loss": 0.0, + "step": 9722 + }, + { + "epoch": 0.6266030804923632, + "grad_norm": 0.501257536919454, + "learning_rate": 4.1539563193698536e-06, + "loss": 0.0035, + "step": 9723 + }, + { + "epoch": 0.6266675259392924, + "grad_norm": 0.0003598110398831839, + "learning_rate": 4.153240243465808e-06, + "loss": 0.0, + "step": 9724 + }, + { + "epoch": 0.6267319713862216, + "grad_norm": 0.4419211195223192, + "learning_rate": 4.152524167561761e-06, + "loss": 0.0013, + "step": 9725 + }, + { + "epoch": 0.6267964168331507, + "grad_norm": 0.005140032610661535, + "learning_rate": 4.151808091657716e-06, + "loss": 0.0, + "step": 9726 + }, + { + "epoch": 0.6268608622800799, + "grad_norm": 0.006610240234879396, + "learning_rate": 4.151092015753671e-06, + "loss": 0.0, + "step": 9727 + }, + { + "epoch": 0.626925307727009, + "grad_norm": 0.012762952650590107, + "learning_rate": 4.150375939849624e-06, + "loss": 0.0, + "step": 9728 + }, + { + "epoch": 0.6269897531739382, + "grad_norm": 0.010514889004076607, + "learning_rate": 4.1496598639455785e-06, + "loss": 0.0001, + "step": 9729 + }, + { + "epoch": 0.6270541986208674, + "grad_norm": 3.305536977108447, + "learning_rate": 4.148943788041533e-06, + "loss": 0.0243, + "step": 9730 + }, + { + "epoch": 0.6271186440677966, + "grad_norm": 0.009644390248062271, + "learning_rate": 4.148227712137487e-06, + "loss": 0.0001, + "step": 9731 + }, + { + "epoch": 0.6271830895147258, + "grad_norm": 0.004172765545080662, + "learning_rate": 4.147511636233441e-06, + "loss": 0.0, + "step": 9732 + }, + { + "epoch": 0.627247534961655, + "grad_norm": 0.6687569011795615, + "learning_rate": 4.146795560329395e-06, + "loss": 0.0072, + "step": 9733 + }, + { + "epoch": 0.6273119804085842, + "grad_norm": 0.005077958161566104, + "learning_rate": 4.146079484425349e-06, + "loss": 0.0, + "step": 9734 + }, + { + "epoch": 0.6273764258555133, + "grad_norm": 0.0011856766644601346, + "learning_rate": 4.1453634085213034e-06, + "loss": 0.0, + "step": 9735 + }, + { + "epoch": 0.6274408713024425, + "grad_norm": 0.0011318798438648293, + "learning_rate": 4.144647332617258e-06, + "loss": 0.0, + "step": 9736 + }, + { + "epoch": 0.6275053167493717, + "grad_norm": 0.0022622025217353646, + "learning_rate": 4.143931256713212e-06, + "loss": 0.0, + "step": 9737 + }, + { + "epoch": 0.6275697621963008, + "grad_norm": 0.0031208555963091454, + "learning_rate": 4.1432151808091655e-06, + "loss": 0.0, + "step": 9738 + }, + { + "epoch": 0.62763420764323, + "grad_norm": 0.11720895173505456, + "learning_rate": 4.142499104905121e-06, + "loss": 0.0001, + "step": 9739 + }, + { + "epoch": 0.6276986530901592, + "grad_norm": 0.005784035279351796, + "learning_rate": 4.141783029001075e-06, + "loss": 0.0, + "step": 9740 + }, + { + "epoch": 0.6277630985370883, + "grad_norm": 0.001448754885450368, + "learning_rate": 4.141066953097028e-06, + "loss": 0.0, + "step": 9741 + }, + { + "epoch": 0.6278275439840175, + "grad_norm": 0.0007223212627194857, + "learning_rate": 4.140350877192983e-06, + "loss": 0.0, + "step": 9742 + }, + { + "epoch": 0.6278919894309467, + "grad_norm": 0.14653846377902094, + "learning_rate": 4.139634801288937e-06, + "loss": 0.0009, + "step": 9743 + }, + { + "epoch": 0.6279564348778759, + "grad_norm": 0.06317755627566499, + "learning_rate": 4.138918725384891e-06, + "loss": 0.0, + "step": 9744 + }, + { + "epoch": 0.6280208803248051, + "grad_norm": 0.3560164288228587, + "learning_rate": 4.138202649480846e-06, + "loss": 0.0018, + "step": 9745 + }, + { + "epoch": 0.6280853257717343, + "grad_norm": 0.005363883013006638, + "learning_rate": 4.137486573576799e-06, + "loss": 0.0, + "step": 9746 + }, + { + "epoch": 0.6281497712186634, + "grad_norm": 0.033138692973302124, + "learning_rate": 4.136770497672753e-06, + "loss": 0.0, + "step": 9747 + }, + { + "epoch": 0.6282142166655926, + "grad_norm": 0.28489794855303097, + "learning_rate": 4.136054421768708e-06, + "loss": 0.0004, + "step": 9748 + }, + { + "epoch": 0.6282786621125217, + "grad_norm": 0.003932125381898977, + "learning_rate": 4.135338345864662e-06, + "loss": 0.0, + "step": 9749 + }, + { + "epoch": 0.6283431075594509, + "grad_norm": 0.019557865455868052, + "learning_rate": 4.134622269960616e-06, + "loss": 0.0001, + "step": 9750 + }, + { + "epoch": 0.6284075530063801, + "grad_norm": 0.000930857796089239, + "learning_rate": 4.1339061940565705e-06, + "loss": 0.0, + "step": 9751 + }, + { + "epoch": 0.6284719984533093, + "grad_norm": 0.004989099766665328, + "learning_rate": 4.133190118152525e-06, + "loss": 0.0, + "step": 9752 + }, + { + "epoch": 0.6285364439002384, + "grad_norm": 0.0035396782047819827, + "learning_rate": 4.132474042248479e-06, + "loss": 0.0, + "step": 9753 + }, + { + "epoch": 0.6286008893471676, + "grad_norm": 0.05278230405212935, + "learning_rate": 4.1317579663444326e-06, + "loss": 0.0001, + "step": 9754 + }, + { + "epoch": 0.6286653347940968, + "grad_norm": 0.003403609391087778, + "learning_rate": 4.131041890440387e-06, + "loss": 0.0, + "step": 9755 + }, + { + "epoch": 0.628729780241026, + "grad_norm": 0.07996074866573993, + "learning_rate": 4.130325814536341e-06, + "loss": 0.0002, + "step": 9756 + }, + { + "epoch": 0.6287942256879552, + "grad_norm": 0.2723902128574586, + "learning_rate": 4.1296097386322955e-06, + "loss": 0.0012, + "step": 9757 + }, + { + "epoch": 0.6288586711348844, + "grad_norm": 0.0025250918803746652, + "learning_rate": 4.12889366272825e-06, + "loss": 0.0, + "step": 9758 + }, + { + "epoch": 0.6289231165818135, + "grad_norm": 0.0360516493613523, + "learning_rate": 4.128177586824203e-06, + "loss": 0.0001, + "step": 9759 + }, + { + "epoch": 0.6289875620287426, + "grad_norm": 0.051373972553429056, + "learning_rate": 4.1274615109201575e-06, + "loss": 0.0002, + "step": 9760 + }, + { + "epoch": 0.6290520074756718, + "grad_norm": 0.007127486987450691, + "learning_rate": 4.126745435016112e-06, + "loss": 0.0, + "step": 9761 + }, + { + "epoch": 0.629116452922601, + "grad_norm": 0.04414521157648746, + "learning_rate": 4.126029359112066e-06, + "loss": 0.0001, + "step": 9762 + }, + { + "epoch": 0.6291808983695302, + "grad_norm": 0.015933055726976987, + "learning_rate": 4.12531328320802e-06, + "loss": 0.0, + "step": 9763 + }, + { + "epoch": 0.6292453438164594, + "grad_norm": 0.0004764234417019966, + "learning_rate": 4.124597207303975e-06, + "loss": 0.0, + "step": 9764 + }, + { + "epoch": 0.6293097892633885, + "grad_norm": 0.005602980997375922, + "learning_rate": 4.123881131399929e-06, + "loss": 0.0, + "step": 9765 + }, + { + "epoch": 0.6293742347103177, + "grad_norm": 0.0022225477643587023, + "learning_rate": 4.123165055495883e-06, + "loss": 0.0, + "step": 9766 + }, + { + "epoch": 0.6294386801572469, + "grad_norm": 0.3022448735374261, + "learning_rate": 4.122448979591837e-06, + "loss": 0.0012, + "step": 9767 + }, + { + "epoch": 0.6295031256041761, + "grad_norm": 0.02914737365516086, + "learning_rate": 4.121732903687791e-06, + "loss": 0.0001, + "step": 9768 + }, + { + "epoch": 0.6295675710511053, + "grad_norm": 0.05615636769679371, + "learning_rate": 4.121016827783745e-06, + "loss": 0.0001, + "step": 9769 + }, + { + "epoch": 0.6296320164980345, + "grad_norm": 0.0003076192540867642, + "learning_rate": 4.1203007518797e-06, + "loss": 0.0, + "step": 9770 + }, + { + "epoch": 0.6296964619449635, + "grad_norm": 0.06890702224480363, + "learning_rate": 4.119584675975654e-06, + "loss": 0.0003, + "step": 9771 + }, + { + "epoch": 0.6297609073918927, + "grad_norm": 0.006440353235390373, + "learning_rate": 4.118868600071607e-06, + "loss": 0.0, + "step": 9772 + }, + { + "epoch": 0.6298253528388219, + "grad_norm": 0.001049338266341266, + "learning_rate": 4.118152524167562e-06, + "loss": 0.0, + "step": 9773 + }, + { + "epoch": 0.6298897982857511, + "grad_norm": 0.09006338639124273, + "learning_rate": 4.117436448263517e-06, + "loss": 0.0017, + "step": 9774 + }, + { + "epoch": 0.6299542437326803, + "grad_norm": 0.32334044423878544, + "learning_rate": 4.11672037235947e-06, + "loss": 0.0022, + "step": 9775 + }, + { + "epoch": 0.6300186891796095, + "grad_norm": 0.0014797895047647244, + "learning_rate": 4.116004296455425e-06, + "loss": 0.0, + "step": 9776 + }, + { + "epoch": 0.6300831346265386, + "grad_norm": 0.002647025070808818, + "learning_rate": 4.115288220551379e-06, + "loss": 0.0, + "step": 9777 + }, + { + "epoch": 0.6301475800734678, + "grad_norm": 0.0002881831873300413, + "learning_rate": 4.114572144647333e-06, + "loss": 0.0, + "step": 9778 + }, + { + "epoch": 0.630212025520397, + "grad_norm": 0.0025381103557760654, + "learning_rate": 4.1138560687432875e-06, + "loss": 0.0, + "step": 9779 + }, + { + "epoch": 0.6302764709673262, + "grad_norm": 0.0006100557027432721, + "learning_rate": 4.113139992839241e-06, + "loss": 0.0, + "step": 9780 + }, + { + "epoch": 0.6303409164142554, + "grad_norm": 0.00111819279469302, + "learning_rate": 4.112423916935195e-06, + "loss": 0.0, + "step": 9781 + }, + { + "epoch": 0.6304053618611845, + "grad_norm": 0.009024577275247043, + "learning_rate": 4.1117078410311496e-06, + "loss": 0.0, + "step": 9782 + }, + { + "epoch": 0.6304698073081136, + "grad_norm": 0.040998943660520266, + "learning_rate": 4.110991765127104e-06, + "loss": 0.0005, + "step": 9783 + }, + { + "epoch": 0.6305342527550428, + "grad_norm": 0.2744994794192702, + "learning_rate": 4.110275689223058e-06, + "loss": 0.0017, + "step": 9784 + }, + { + "epoch": 0.630598698201972, + "grad_norm": 0.47197642973060894, + "learning_rate": 4.109559613319012e-06, + "loss": 0.0008, + "step": 9785 + }, + { + "epoch": 0.6306631436489012, + "grad_norm": 0.011569277487227457, + "learning_rate": 4.108843537414967e-06, + "loss": 0.0001, + "step": 9786 + }, + { + "epoch": 0.6307275890958304, + "grad_norm": 0.00016951053258177544, + "learning_rate": 4.108127461510921e-06, + "loss": 0.0, + "step": 9787 + }, + { + "epoch": 0.6307920345427596, + "grad_norm": 0.0002565186560096678, + "learning_rate": 4.1074113856068745e-06, + "loss": 0.0, + "step": 9788 + }, + { + "epoch": 0.6308564799896887, + "grad_norm": 0.015760302528762305, + "learning_rate": 4.106695309702829e-06, + "loss": 0.0001, + "step": 9789 + }, + { + "epoch": 0.6309209254366179, + "grad_norm": 0.0005307053243126115, + "learning_rate": 4.105979233798783e-06, + "loss": 0.0, + "step": 9790 + }, + { + "epoch": 0.6309853708835471, + "grad_norm": 0.009203236432359213, + "learning_rate": 4.105263157894737e-06, + "loss": 0.0, + "step": 9791 + }, + { + "epoch": 0.6310498163304763, + "grad_norm": 0.00010736600066678872, + "learning_rate": 4.104547081990692e-06, + "loss": 0.0, + "step": 9792 + }, + { + "epoch": 0.6311142617774054, + "grad_norm": 0.01727893981573304, + "learning_rate": 4.103831006086645e-06, + "loss": 0.0, + "step": 9793 + }, + { + "epoch": 0.6311787072243346, + "grad_norm": 0.005858238365772789, + "learning_rate": 4.1031149301825994e-06, + "loss": 0.0, + "step": 9794 + }, + { + "epoch": 0.6312431526712637, + "grad_norm": 0.03891247460398242, + "learning_rate": 4.102398854278554e-06, + "loss": 0.0001, + "step": 9795 + }, + { + "epoch": 0.6313075981181929, + "grad_norm": 0.03522325296595622, + "learning_rate": 4.101682778374508e-06, + "loss": 0.0001, + "step": 9796 + }, + { + "epoch": 0.6313720435651221, + "grad_norm": 0.0002808154914054595, + "learning_rate": 4.100966702470462e-06, + "loss": 0.0, + "step": 9797 + }, + { + "epoch": 0.6314364890120513, + "grad_norm": 0.0385147856123727, + "learning_rate": 4.100250626566417e-06, + "loss": 0.0004, + "step": 9798 + }, + { + "epoch": 0.6315009344589805, + "grad_norm": 0.005532733142487714, + "learning_rate": 4.099534550662371e-06, + "loss": 0.0, + "step": 9799 + }, + { + "epoch": 0.6315653799059097, + "grad_norm": 0.034152268583626674, + "learning_rate": 4.098818474758325e-06, + "loss": 0.0001, + "step": 9800 + }, + { + "epoch": 0.6316298253528388, + "grad_norm": 0.39788263618123626, + "learning_rate": 4.098102398854279e-06, + "loss": 0.0004, + "step": 9801 + }, + { + "epoch": 0.631694270799768, + "grad_norm": 0.008583379104093024, + "learning_rate": 4.097386322950233e-06, + "loss": 0.0, + "step": 9802 + }, + { + "epoch": 0.6317587162466972, + "grad_norm": 0.0025010910330443806, + "learning_rate": 4.096670247046187e-06, + "loss": 0.0, + "step": 9803 + }, + { + "epoch": 0.6318231616936264, + "grad_norm": 0.01292406597771796, + "learning_rate": 4.095954171142142e-06, + "loss": 0.0, + "step": 9804 + }, + { + "epoch": 0.6318876071405555, + "grad_norm": 0.011911608905516258, + "learning_rate": 4.095238095238096e-06, + "loss": 0.0, + "step": 9805 + }, + { + "epoch": 0.6319520525874847, + "grad_norm": 0.021996962163873125, + "learning_rate": 4.094522019334049e-06, + "loss": 0.0, + "step": 9806 + }, + { + "epoch": 0.6320164980344138, + "grad_norm": 0.0006577608885751697, + "learning_rate": 4.093805943430004e-06, + "loss": 0.0, + "step": 9807 + }, + { + "epoch": 0.632080943481343, + "grad_norm": 0.0022851470470816276, + "learning_rate": 4.093089867525958e-06, + "loss": 0.0, + "step": 9808 + }, + { + "epoch": 0.6321453889282722, + "grad_norm": 0.00044632028055951176, + "learning_rate": 4.092373791621912e-06, + "loss": 0.0, + "step": 9809 + }, + { + "epoch": 0.6322098343752014, + "grad_norm": 0.0005976803320070037, + "learning_rate": 4.0916577157178665e-06, + "loss": 0.0, + "step": 9810 + }, + { + "epoch": 0.6322742798221306, + "grad_norm": 0.0005299045073304586, + "learning_rate": 4.090941639813821e-06, + "loss": 0.0, + "step": 9811 + }, + { + "epoch": 0.6323387252690598, + "grad_norm": 0.00022852497761108778, + "learning_rate": 4.090225563909775e-06, + "loss": 0.0, + "step": 9812 + }, + { + "epoch": 0.632403170715989, + "grad_norm": 0.21186302379079988, + "learning_rate": 4.089509488005729e-06, + "loss": 0.0031, + "step": 9813 + }, + { + "epoch": 0.6324676161629181, + "grad_norm": 0.12596821572501107, + "learning_rate": 4.088793412101683e-06, + "loss": 0.0002, + "step": 9814 + }, + { + "epoch": 0.6325320616098473, + "grad_norm": 0.0013975578937536427, + "learning_rate": 4.088077336197637e-06, + "loss": 0.0, + "step": 9815 + }, + { + "epoch": 0.6325965070567764, + "grad_norm": 0.005904553442262322, + "learning_rate": 4.0873612602935915e-06, + "loss": 0.0, + "step": 9816 + }, + { + "epoch": 0.6326609525037056, + "grad_norm": 0.016000071829722146, + "learning_rate": 4.086645184389546e-06, + "loss": 0.0002, + "step": 9817 + }, + { + "epoch": 0.6327253979506348, + "grad_norm": 0.0028822450328563408, + "learning_rate": 4.085929108485499e-06, + "loss": 0.0, + "step": 9818 + }, + { + "epoch": 0.632789843397564, + "grad_norm": 0.05139031940953903, + "learning_rate": 4.0852130325814535e-06, + "loss": 0.0002, + "step": 9819 + }, + { + "epoch": 0.6328542888444931, + "grad_norm": 9.16404113373634e-05, + "learning_rate": 4.084496956677408e-06, + "loss": 0.0, + "step": 9820 + }, + { + "epoch": 0.6329187342914223, + "grad_norm": 0.0016909506227782403, + "learning_rate": 4.083780880773363e-06, + "loss": 0.0, + "step": 9821 + }, + { + "epoch": 0.6329831797383515, + "grad_norm": 0.025381268785659834, + "learning_rate": 4.083064804869316e-06, + "loss": 0.0, + "step": 9822 + }, + { + "epoch": 0.6330476251852807, + "grad_norm": 0.004422708829880086, + "learning_rate": 4.082348728965271e-06, + "loss": 0.0001, + "step": 9823 + }, + { + "epoch": 0.6331120706322099, + "grad_norm": 0.002109393715706444, + "learning_rate": 4.081632653061225e-06, + "loss": 0.0, + "step": 9824 + }, + { + "epoch": 0.633176516079139, + "grad_norm": 0.002699339732813906, + "learning_rate": 4.080916577157179e-06, + "loss": 0.0, + "step": 9825 + }, + { + "epoch": 0.6332409615260682, + "grad_norm": 0.0017968199788469807, + "learning_rate": 4.080200501253133e-06, + "loss": 0.0, + "step": 9826 + }, + { + "epoch": 0.6333054069729973, + "grad_norm": 0.004023536004873301, + "learning_rate": 4.079484425349087e-06, + "loss": 0.0, + "step": 9827 + }, + { + "epoch": 0.6333698524199265, + "grad_norm": 0.0003069514670206831, + "learning_rate": 4.078768349445041e-06, + "loss": 0.0, + "step": 9828 + }, + { + "epoch": 0.6334342978668557, + "grad_norm": 0.0028215493809561425, + "learning_rate": 4.078052273540996e-06, + "loss": 0.0, + "step": 9829 + }, + { + "epoch": 0.6334987433137849, + "grad_norm": 0.11994944466776976, + "learning_rate": 4.07733619763695e-06, + "loss": 0.0003, + "step": 9830 + }, + { + "epoch": 0.633563188760714, + "grad_norm": 0.10761587225219324, + "learning_rate": 4.076620121732903e-06, + "loss": 0.0003, + "step": 9831 + }, + { + "epoch": 0.6336276342076432, + "grad_norm": 0.011345525980090358, + "learning_rate": 4.0759040458288586e-06, + "loss": 0.0, + "step": 9832 + }, + { + "epoch": 0.6336920796545724, + "grad_norm": 0.02846167078207075, + "learning_rate": 4.075187969924813e-06, + "loss": 0.0, + "step": 9833 + }, + { + "epoch": 0.6337565251015016, + "grad_norm": 0.0062590929192869165, + "learning_rate": 4.074471894020766e-06, + "loss": 0.0, + "step": 9834 + }, + { + "epoch": 0.6338209705484308, + "grad_norm": 0.3454879802491421, + "learning_rate": 4.073755818116721e-06, + "loss": 0.0009, + "step": 9835 + }, + { + "epoch": 0.63388541599536, + "grad_norm": 0.00633899276324277, + "learning_rate": 4.073039742212675e-06, + "loss": 0.0, + "step": 9836 + }, + { + "epoch": 0.6339498614422892, + "grad_norm": 0.014193635549446313, + "learning_rate": 4.072323666308629e-06, + "loss": 0.0, + "step": 9837 + }, + { + "epoch": 0.6340143068892182, + "grad_norm": 0.11098855918860485, + "learning_rate": 4.0716075904045835e-06, + "loss": 0.0002, + "step": 9838 + }, + { + "epoch": 0.6340787523361474, + "grad_norm": 0.0023207273082203215, + "learning_rate": 4.070891514500537e-06, + "loss": 0.0, + "step": 9839 + }, + { + "epoch": 0.6341431977830766, + "grad_norm": 0.0006344213112119258, + "learning_rate": 4.070175438596491e-06, + "loss": 0.0, + "step": 9840 + }, + { + "epoch": 0.6342076432300058, + "grad_norm": 0.08611299195893168, + "learning_rate": 4.0694593626924456e-06, + "loss": 0.0004, + "step": 9841 + }, + { + "epoch": 0.634272088676935, + "grad_norm": 8.981659056621043e-05, + "learning_rate": 4.0687432867884e-06, + "loss": 0.0, + "step": 9842 + }, + { + "epoch": 0.6343365341238641, + "grad_norm": 0.00012564965903519854, + "learning_rate": 4.068027210884354e-06, + "loss": 0.0, + "step": 9843 + }, + { + "epoch": 0.6344009795707933, + "grad_norm": 0.0006654507491736668, + "learning_rate": 4.0673111349803084e-06, + "loss": 0.0, + "step": 9844 + }, + { + "epoch": 0.6344654250177225, + "grad_norm": 0.0034752734788544896, + "learning_rate": 4.066595059076263e-06, + "loss": 0.0, + "step": 9845 + }, + { + "epoch": 0.6345298704646517, + "grad_norm": 0.0011339178097354364, + "learning_rate": 4.065878983172217e-06, + "loss": 0.0, + "step": 9846 + }, + { + "epoch": 0.6345943159115809, + "grad_norm": 0.008264259238473211, + "learning_rate": 4.0651629072681705e-06, + "loss": 0.0, + "step": 9847 + }, + { + "epoch": 0.6346587613585101, + "grad_norm": 0.003021064877237617, + "learning_rate": 4.064446831364125e-06, + "loss": 0.0, + "step": 9848 + }, + { + "epoch": 0.6347232068054391, + "grad_norm": 0.0014914980012132165, + "learning_rate": 4.063730755460079e-06, + "loss": 0.0, + "step": 9849 + }, + { + "epoch": 0.6347876522523683, + "grad_norm": 0.12833182633070414, + "learning_rate": 4.063014679556033e-06, + "loss": 0.0003, + "step": 9850 + }, + { + "epoch": 0.6348520976992975, + "grad_norm": 0.010818723560201225, + "learning_rate": 4.062298603651988e-06, + "loss": 0.0, + "step": 9851 + }, + { + "epoch": 0.6349165431462267, + "grad_norm": 0.15318523214957946, + "learning_rate": 4.061582527747941e-06, + "loss": 0.0004, + "step": 9852 + }, + { + "epoch": 0.6349809885931559, + "grad_norm": 0.19271358384033405, + "learning_rate": 4.0608664518438954e-06, + "loss": 0.0023, + "step": 9853 + }, + { + "epoch": 0.6350454340400851, + "grad_norm": 0.10603754438942112, + "learning_rate": 4.06015037593985e-06, + "loss": 0.0002, + "step": 9854 + }, + { + "epoch": 0.6351098794870143, + "grad_norm": 0.00042468406024622174, + "learning_rate": 4.059434300035804e-06, + "loss": 0.0, + "step": 9855 + }, + { + "epoch": 0.6351743249339434, + "grad_norm": 0.0011900971873508306, + "learning_rate": 4.058718224131758e-06, + "loss": 0.0, + "step": 9856 + }, + { + "epoch": 0.6352387703808726, + "grad_norm": 0.2271719250462828, + "learning_rate": 4.058002148227713e-06, + "loss": 0.0003, + "step": 9857 + }, + { + "epoch": 0.6353032158278018, + "grad_norm": 0.0006767117946808032, + "learning_rate": 4.057286072323667e-06, + "loss": 0.0, + "step": 9858 + }, + { + "epoch": 0.635367661274731, + "grad_norm": 0.00020405993774540835, + "learning_rate": 4.056569996419621e-06, + "loss": 0.0, + "step": 9859 + }, + { + "epoch": 0.6354321067216601, + "grad_norm": 0.0038386513119053446, + "learning_rate": 4.055853920515575e-06, + "loss": 0.0, + "step": 9860 + }, + { + "epoch": 0.6354965521685892, + "grad_norm": 0.14175836497304545, + "learning_rate": 4.055137844611529e-06, + "loss": 0.0041, + "step": 9861 + }, + { + "epoch": 0.6355609976155184, + "grad_norm": 0.5609968286873434, + "learning_rate": 4.054421768707483e-06, + "loss": 0.0015, + "step": 9862 + }, + { + "epoch": 0.6356254430624476, + "grad_norm": 0.22833693274969805, + "learning_rate": 4.053705692803438e-06, + "loss": 0.0037, + "step": 9863 + }, + { + "epoch": 0.6356898885093768, + "grad_norm": 0.0004343785643513413, + "learning_rate": 4.052989616899392e-06, + "loss": 0.0, + "step": 9864 + }, + { + "epoch": 0.635754333956306, + "grad_norm": 0.00029451475877984956, + "learning_rate": 4.052273540995345e-06, + "loss": 0.0, + "step": 9865 + }, + { + "epoch": 0.6358187794032352, + "grad_norm": 0.1578427849877735, + "learning_rate": 4.0515574650913e-06, + "loss": 0.0021, + "step": 9866 + }, + { + "epoch": 0.6358832248501644, + "grad_norm": 0.002889111443432167, + "learning_rate": 4.050841389187255e-06, + "loss": 0.0, + "step": 9867 + }, + { + "epoch": 0.6359476702970935, + "grad_norm": 0.015472808998329517, + "learning_rate": 4.050125313283208e-06, + "loss": 0.0, + "step": 9868 + }, + { + "epoch": 0.6360121157440227, + "grad_norm": 0.0006547937423044578, + "learning_rate": 4.0494092373791625e-06, + "loss": 0.0, + "step": 9869 + }, + { + "epoch": 0.6360765611909519, + "grad_norm": 0.0016740075257970334, + "learning_rate": 4.048693161475117e-06, + "loss": 0.0, + "step": 9870 + }, + { + "epoch": 0.636141006637881, + "grad_norm": 0.0032675604364454484, + "learning_rate": 4.047977085571071e-06, + "loss": 0.0, + "step": 9871 + }, + { + "epoch": 0.6362054520848102, + "grad_norm": 0.07995895205773623, + "learning_rate": 4.047261009667025e-06, + "loss": 0.0001, + "step": 9872 + }, + { + "epoch": 0.6362698975317393, + "grad_norm": 0.01585555228427254, + "learning_rate": 4.046544933762979e-06, + "loss": 0.0002, + "step": 9873 + }, + { + "epoch": 0.6363343429786685, + "grad_norm": 0.0005467433965174832, + "learning_rate": 4.045828857858933e-06, + "loss": 0.0, + "step": 9874 + }, + { + "epoch": 0.6363987884255977, + "grad_norm": 0.002782702034882121, + "learning_rate": 4.0451127819548875e-06, + "loss": 0.0, + "step": 9875 + }, + { + "epoch": 0.6364632338725269, + "grad_norm": 0.0010571305041660374, + "learning_rate": 4.044396706050842e-06, + "loss": 0.0, + "step": 9876 + }, + { + "epoch": 0.6365276793194561, + "grad_norm": 0.671455673838229, + "learning_rate": 4.043680630146796e-06, + "loss": 0.0056, + "step": 9877 + }, + { + "epoch": 0.6365921247663853, + "grad_norm": 6.159026134501892e-05, + "learning_rate": 4.0429645542427495e-06, + "loss": 0.0, + "step": 9878 + }, + { + "epoch": 0.6366565702133145, + "grad_norm": 0.11269901540612999, + "learning_rate": 4.042248478338705e-06, + "loss": 0.0002, + "step": 9879 + }, + { + "epoch": 0.6367210156602436, + "grad_norm": 0.13829956983992775, + "learning_rate": 4.041532402434659e-06, + "loss": 0.0004, + "step": 9880 + }, + { + "epoch": 0.6367854611071728, + "grad_norm": 0.00028129003725998087, + "learning_rate": 4.040816326530612e-06, + "loss": 0.0, + "step": 9881 + }, + { + "epoch": 0.636849906554102, + "grad_norm": 0.18228105218956056, + "learning_rate": 4.040100250626567e-06, + "loss": 0.0002, + "step": 9882 + }, + { + "epoch": 0.6369143520010311, + "grad_norm": 0.00044808417187598156, + "learning_rate": 4.039384174722521e-06, + "loss": 0.0, + "step": 9883 + }, + { + "epoch": 0.6369787974479603, + "grad_norm": 0.004541619186065626, + "learning_rate": 4.038668098818475e-06, + "loss": 0.0, + "step": 9884 + }, + { + "epoch": 0.6370432428948894, + "grad_norm": 0.1287355882717353, + "learning_rate": 4.03795202291443e-06, + "loss": 0.0004, + "step": 9885 + }, + { + "epoch": 0.6371076883418186, + "grad_norm": 0.000665257157196762, + "learning_rate": 4.037235947010383e-06, + "loss": 0.0, + "step": 9886 + }, + { + "epoch": 0.6371721337887478, + "grad_norm": 0.00015430858028718633, + "learning_rate": 4.036519871106337e-06, + "loss": 0.0, + "step": 9887 + }, + { + "epoch": 0.637236579235677, + "grad_norm": 0.01693254525602601, + "learning_rate": 4.035803795202292e-06, + "loss": 0.0001, + "step": 9888 + }, + { + "epoch": 0.6373010246826062, + "grad_norm": 0.000977959590454271, + "learning_rate": 4.035087719298246e-06, + "loss": 0.0, + "step": 9889 + }, + { + "epoch": 0.6373654701295354, + "grad_norm": 0.0001807368223541276, + "learning_rate": 4.0343716433942e-06, + "loss": 0.0, + "step": 9890 + }, + { + "epoch": 0.6374299155764646, + "grad_norm": 0.13511406041563384, + "learning_rate": 4.0336555674901546e-06, + "loss": 0.0004, + "step": 9891 + }, + { + "epoch": 0.6374943610233937, + "grad_norm": 0.00462214646127079, + "learning_rate": 4.032939491586109e-06, + "loss": 0.0, + "step": 9892 + }, + { + "epoch": 0.6375588064703229, + "grad_norm": 8.380419679333197e-05, + "learning_rate": 4.032223415682063e-06, + "loss": 0.0, + "step": 9893 + }, + { + "epoch": 0.637623251917252, + "grad_norm": 0.001185555940977872, + "learning_rate": 4.031507339778017e-06, + "loss": 0.0, + "step": 9894 + }, + { + "epoch": 0.6376876973641812, + "grad_norm": 0.01787338779530966, + "learning_rate": 4.030791263873971e-06, + "loss": 0.0, + "step": 9895 + }, + { + "epoch": 0.6377521428111104, + "grad_norm": 0.05857672841469521, + "learning_rate": 4.030075187969925e-06, + "loss": 0.0003, + "step": 9896 + }, + { + "epoch": 0.6378165882580396, + "grad_norm": 0.042201623274486, + "learning_rate": 4.0293591120658795e-06, + "loss": 0.0002, + "step": 9897 + }, + { + "epoch": 0.6378810337049687, + "grad_norm": 0.014896175954414791, + "learning_rate": 4.028643036161834e-06, + "loss": 0.0, + "step": 9898 + }, + { + "epoch": 0.6379454791518979, + "grad_norm": 0.006496097356850603, + "learning_rate": 4.027926960257787e-06, + "loss": 0.0, + "step": 9899 + }, + { + "epoch": 0.6380099245988271, + "grad_norm": 0.0005663189838518361, + "learning_rate": 4.0272108843537416e-06, + "loss": 0.0, + "step": 9900 + }, + { + "epoch": 0.6380743700457563, + "grad_norm": 0.00168549572276779, + "learning_rate": 4.026494808449696e-06, + "loss": 0.0, + "step": 9901 + }, + { + "epoch": 0.6381388154926855, + "grad_norm": 0.021107483516845608, + "learning_rate": 4.02577873254565e-06, + "loss": 0.0001, + "step": 9902 + }, + { + "epoch": 0.6382032609396147, + "grad_norm": 7.06100544471114e-05, + "learning_rate": 4.0250626566416044e-06, + "loss": 0.0, + "step": 9903 + }, + { + "epoch": 0.6382677063865438, + "grad_norm": 0.004029435938904156, + "learning_rate": 4.024346580737559e-06, + "loss": 0.0, + "step": 9904 + }, + { + "epoch": 0.6383321518334729, + "grad_norm": 0.003942796891727747, + "learning_rate": 4.023630504833513e-06, + "loss": 0.0, + "step": 9905 + }, + { + "epoch": 0.6383965972804021, + "grad_norm": 0.10042532347163381, + "learning_rate": 4.022914428929467e-06, + "loss": 0.0055, + "step": 9906 + }, + { + "epoch": 0.6384610427273313, + "grad_norm": 0.013568788087903462, + "learning_rate": 4.022198353025421e-06, + "loss": 0.0, + "step": 9907 + }, + { + "epoch": 0.6385254881742605, + "grad_norm": 0.11221282353071427, + "learning_rate": 4.021482277121375e-06, + "loss": 0.0001, + "step": 9908 + }, + { + "epoch": 0.6385899336211897, + "grad_norm": 0.00044984605912318877, + "learning_rate": 4.020766201217329e-06, + "loss": 0.0, + "step": 9909 + }, + { + "epoch": 0.6386543790681188, + "grad_norm": 0.00013384249196529097, + "learning_rate": 4.020050125313284e-06, + "loss": 0.0, + "step": 9910 + }, + { + "epoch": 0.638718824515048, + "grad_norm": 0.003407667433976282, + "learning_rate": 4.019334049409237e-06, + "loss": 0.0, + "step": 9911 + }, + { + "epoch": 0.6387832699619772, + "grad_norm": 0.011748166466181148, + "learning_rate": 4.0186179735051914e-06, + "loss": 0.0, + "step": 9912 + }, + { + "epoch": 0.6388477154089064, + "grad_norm": 0.027792480039555506, + "learning_rate": 4.017901897601146e-06, + "loss": 0.0, + "step": 9913 + }, + { + "epoch": 0.6389121608558356, + "grad_norm": 0.008272129900484659, + "learning_rate": 4.017185821697101e-06, + "loss": 0.0001, + "step": 9914 + }, + { + "epoch": 0.6389766063027648, + "grad_norm": 0.0002833124323843118, + "learning_rate": 4.016469745793054e-06, + "loss": 0.0, + "step": 9915 + }, + { + "epoch": 0.6390410517496938, + "grad_norm": 0.025964498676430598, + "learning_rate": 4.015753669889009e-06, + "loss": 0.0, + "step": 9916 + }, + { + "epoch": 0.639105497196623, + "grad_norm": 0.17665134300402457, + "learning_rate": 4.015037593984963e-06, + "loss": 0.0008, + "step": 9917 + }, + { + "epoch": 0.6391699426435522, + "grad_norm": 0.010062429170806275, + "learning_rate": 4.014321518080917e-06, + "loss": 0.0, + "step": 9918 + }, + { + "epoch": 0.6392343880904814, + "grad_norm": 0.001940753301590691, + "learning_rate": 4.013605442176871e-06, + "loss": 0.0, + "step": 9919 + }, + { + "epoch": 0.6392988335374106, + "grad_norm": 0.13457927397190214, + "learning_rate": 4.012889366272825e-06, + "loss": 0.0004, + "step": 9920 + }, + { + "epoch": 0.6393632789843398, + "grad_norm": 0.00035575603334679986, + "learning_rate": 4.012173290368779e-06, + "loss": 0.0, + "step": 9921 + }, + { + "epoch": 0.6394277244312689, + "grad_norm": 0.19851455393941003, + "learning_rate": 4.011457214464734e-06, + "loss": 0.0019, + "step": 9922 + }, + { + "epoch": 0.6394921698781981, + "grad_norm": 0.0008800058176206106, + "learning_rate": 4.010741138560688e-06, + "loss": 0.0, + "step": 9923 + }, + { + "epoch": 0.6395566153251273, + "grad_norm": 0.015232629286257246, + "learning_rate": 4.010025062656641e-06, + "loss": 0.0, + "step": 9924 + }, + { + "epoch": 0.6396210607720565, + "grad_norm": 0.005831437638291429, + "learning_rate": 4.009308986752596e-06, + "loss": 0.0, + "step": 9925 + }, + { + "epoch": 0.6396855062189857, + "grad_norm": 0.00974646379763554, + "learning_rate": 4.008592910848551e-06, + "loss": 0.0, + "step": 9926 + }, + { + "epoch": 0.6397499516659148, + "grad_norm": 0.00011297209002473684, + "learning_rate": 4.007876834944504e-06, + "loss": 0.0, + "step": 9927 + }, + { + "epoch": 0.6398143971128439, + "grad_norm": 0.00044290672661680006, + "learning_rate": 4.0071607590404585e-06, + "loss": 0.0, + "step": 9928 + }, + { + "epoch": 0.6398788425597731, + "grad_norm": 0.002342356820183904, + "learning_rate": 4.006444683136413e-06, + "loss": 0.0, + "step": 9929 + }, + { + "epoch": 0.6399432880067023, + "grad_norm": 0.002250269272006791, + "learning_rate": 4.005728607232367e-06, + "loss": 0.0, + "step": 9930 + }, + { + "epoch": 0.6400077334536315, + "grad_norm": 0.007098118431674166, + "learning_rate": 4.005012531328321e-06, + "loss": 0.0001, + "step": 9931 + }, + { + "epoch": 0.6400721789005607, + "grad_norm": 0.0006225908267203256, + "learning_rate": 4.004296455424275e-06, + "loss": 0.0, + "step": 9932 + }, + { + "epoch": 0.6401366243474899, + "grad_norm": 0.002042717291839209, + "learning_rate": 4.003580379520229e-06, + "loss": 0.0, + "step": 9933 + }, + { + "epoch": 0.640201069794419, + "grad_norm": 0.8397476141266291, + "learning_rate": 4.0028643036161835e-06, + "loss": 0.0014, + "step": 9934 + }, + { + "epoch": 0.6402655152413482, + "grad_norm": 0.01909159525474567, + "learning_rate": 4.002148227712138e-06, + "loss": 0.0001, + "step": 9935 + }, + { + "epoch": 0.6403299606882774, + "grad_norm": 0.0063646263341570804, + "learning_rate": 4.001432151808092e-06, + "loss": 0.0, + "step": 9936 + }, + { + "epoch": 0.6403944061352066, + "grad_norm": 0.00024152938378510026, + "learning_rate": 4.000716075904046e-06, + "loss": 0.0, + "step": 9937 + }, + { + "epoch": 0.6404588515821357, + "grad_norm": 0.00029932230104452766, + "learning_rate": 4.000000000000001e-06, + "loss": 0.0, + "step": 9938 + }, + { + "epoch": 0.6405232970290649, + "grad_norm": 0.01575336589201022, + "learning_rate": 3.999283924095955e-06, + "loss": 0.0001, + "step": 9939 + }, + { + "epoch": 0.640587742475994, + "grad_norm": 0.4669428800544325, + "learning_rate": 3.998567848191908e-06, + "loss": 0.0021, + "step": 9940 + }, + { + "epoch": 0.6406521879229232, + "grad_norm": 0.0012947301126589104, + "learning_rate": 3.997851772287863e-06, + "loss": 0.0, + "step": 9941 + }, + { + "epoch": 0.6407166333698524, + "grad_norm": 0.013855897877270457, + "learning_rate": 3.997135696383817e-06, + "loss": 0.0, + "step": 9942 + }, + { + "epoch": 0.6407810788167816, + "grad_norm": 0.00033208404617710225, + "learning_rate": 3.996419620479771e-06, + "loss": 0.0, + "step": 9943 + }, + { + "epoch": 0.6408455242637108, + "grad_norm": 0.10435821286848203, + "learning_rate": 3.995703544575726e-06, + "loss": 0.0006, + "step": 9944 + }, + { + "epoch": 0.64090996971064, + "grad_norm": 0.015503957950853523, + "learning_rate": 3.994987468671679e-06, + "loss": 0.0, + "step": 9945 + }, + { + "epoch": 0.6409744151575691, + "grad_norm": 0.0005417660503162809, + "learning_rate": 3.994271392767633e-06, + "loss": 0.0, + "step": 9946 + }, + { + "epoch": 0.6410388606044983, + "grad_norm": 2.6578900723737707e-05, + "learning_rate": 3.993555316863588e-06, + "loss": 0.0, + "step": 9947 + }, + { + "epoch": 0.6411033060514275, + "grad_norm": 0.00017938237437263428, + "learning_rate": 3.992839240959542e-06, + "loss": 0.0, + "step": 9948 + }, + { + "epoch": 0.6411677514983566, + "grad_norm": 0.34429821818284284, + "learning_rate": 3.992123165055496e-06, + "loss": 0.0011, + "step": 9949 + }, + { + "epoch": 0.6412321969452858, + "grad_norm": 0.0034726296503280736, + "learning_rate": 3.9914070891514506e-06, + "loss": 0.0, + "step": 9950 + }, + { + "epoch": 0.641296642392215, + "grad_norm": 0.00018911348694327, + "learning_rate": 3.990691013247405e-06, + "loss": 0.0, + "step": 9951 + }, + { + "epoch": 0.6413610878391441, + "grad_norm": 0.0021938632494621483, + "learning_rate": 3.989974937343359e-06, + "loss": 0.0, + "step": 9952 + }, + { + "epoch": 0.6414255332860733, + "grad_norm": 0.0012114879226396045, + "learning_rate": 3.989258861439313e-06, + "loss": 0.0, + "step": 9953 + }, + { + "epoch": 0.6414899787330025, + "grad_norm": 0.05394574339195268, + "learning_rate": 3.988542785535267e-06, + "loss": 0.0005, + "step": 9954 + }, + { + "epoch": 0.6415544241799317, + "grad_norm": 0.9579358243969295, + "learning_rate": 3.987826709631221e-06, + "loss": 0.0048, + "step": 9955 + }, + { + "epoch": 0.6416188696268609, + "grad_norm": 0.34143033673758666, + "learning_rate": 3.9871106337271755e-06, + "loss": 0.0042, + "step": 9956 + }, + { + "epoch": 0.6416833150737901, + "grad_norm": 0.17423421059847782, + "learning_rate": 3.98639455782313e-06, + "loss": 0.0005, + "step": 9957 + }, + { + "epoch": 0.6417477605207192, + "grad_norm": 0.0009932734330962791, + "learning_rate": 3.985678481919083e-06, + "loss": 0.0, + "step": 9958 + }, + { + "epoch": 0.6418122059676484, + "grad_norm": 0.000888577600146786, + "learning_rate": 3.9849624060150376e-06, + "loss": 0.0, + "step": 9959 + }, + { + "epoch": 0.6418766514145776, + "grad_norm": 0.012914636043180742, + "learning_rate": 3.984246330110992e-06, + "loss": 0.0002, + "step": 9960 + }, + { + "epoch": 0.6419410968615067, + "grad_norm": 0.04609532181840023, + "learning_rate": 3.983530254206946e-06, + "loss": 0.0, + "step": 9961 + }, + { + "epoch": 0.6420055423084359, + "grad_norm": 0.16728946370704814, + "learning_rate": 3.9828141783029004e-06, + "loss": 0.0015, + "step": 9962 + }, + { + "epoch": 0.6420699877553651, + "grad_norm": 0.0006231815176938104, + "learning_rate": 3.982098102398855e-06, + "loss": 0.0, + "step": 9963 + }, + { + "epoch": 0.6421344332022942, + "grad_norm": 0.04196601012932234, + "learning_rate": 3.981382026494809e-06, + "loss": 0.0001, + "step": 9964 + }, + { + "epoch": 0.6421988786492234, + "grad_norm": 0.6129773352744001, + "learning_rate": 3.980665950590763e-06, + "loss": 0.0047, + "step": 9965 + }, + { + "epoch": 0.6422633240961526, + "grad_norm": 0.006540673913466794, + "learning_rate": 3.979949874686717e-06, + "loss": 0.0, + "step": 9966 + }, + { + "epoch": 0.6423277695430818, + "grad_norm": 0.010330284011252865, + "learning_rate": 3.979233798782671e-06, + "loss": 0.0001, + "step": 9967 + }, + { + "epoch": 0.642392214990011, + "grad_norm": 0.0012068267576809393, + "learning_rate": 3.978517722878625e-06, + "loss": 0.0, + "step": 9968 + }, + { + "epoch": 0.6424566604369402, + "grad_norm": 0.004279481705147905, + "learning_rate": 3.97780164697458e-06, + "loss": 0.0, + "step": 9969 + }, + { + "epoch": 0.6425211058838693, + "grad_norm": 0.015038512520982627, + "learning_rate": 3.977085571070534e-06, + "loss": 0.0, + "step": 9970 + }, + { + "epoch": 0.6425855513307985, + "grad_norm": 0.0005527755708677373, + "learning_rate": 3.9763694951664874e-06, + "loss": 0.0, + "step": 9971 + }, + { + "epoch": 0.6426499967777276, + "grad_norm": 0.002589969194584389, + "learning_rate": 3.975653419262443e-06, + "loss": 0.0, + "step": 9972 + }, + { + "epoch": 0.6427144422246568, + "grad_norm": 0.4182773591616916, + "learning_rate": 3.974937343358397e-06, + "loss": 0.0002, + "step": 9973 + }, + { + "epoch": 0.642778887671586, + "grad_norm": 0.09478272415081301, + "learning_rate": 3.97422126745435e-06, + "loss": 0.0001, + "step": 9974 + }, + { + "epoch": 0.6428433331185152, + "grad_norm": 0.06265709550030224, + "learning_rate": 3.973505191550305e-06, + "loss": 0.0, + "step": 9975 + }, + { + "epoch": 0.6429077785654443, + "grad_norm": 0.004482413553391734, + "learning_rate": 3.972789115646259e-06, + "loss": 0.0, + "step": 9976 + }, + { + "epoch": 0.6429722240123735, + "grad_norm": 0.012129023568842196, + "learning_rate": 3.972073039742213e-06, + "loss": 0.0001, + "step": 9977 + }, + { + "epoch": 0.6430366694593027, + "grad_norm": 0.02426212837729917, + "learning_rate": 3.9713569638381675e-06, + "loss": 0.0001, + "step": 9978 + }, + { + "epoch": 0.6431011149062319, + "grad_norm": 0.388016344424232, + "learning_rate": 3.970640887934121e-06, + "loss": 0.0015, + "step": 9979 + }, + { + "epoch": 0.6431655603531611, + "grad_norm": 0.009141367046433759, + "learning_rate": 3.969924812030075e-06, + "loss": 0.0, + "step": 9980 + }, + { + "epoch": 0.6432300058000903, + "grad_norm": 0.0032954234977543386, + "learning_rate": 3.96920873612603e-06, + "loss": 0.0, + "step": 9981 + }, + { + "epoch": 0.6432944512470194, + "grad_norm": 0.0022175645149016825, + "learning_rate": 3.968492660221984e-06, + "loss": 0.0, + "step": 9982 + }, + { + "epoch": 0.6433588966939485, + "grad_norm": 0.0007280509024169878, + "learning_rate": 3.967776584317938e-06, + "loss": 0.0, + "step": 9983 + }, + { + "epoch": 0.6434233421408777, + "grad_norm": 0.056778146835200966, + "learning_rate": 3.9670605084138925e-06, + "loss": 0.0001, + "step": 9984 + }, + { + "epoch": 0.6434877875878069, + "grad_norm": 0.26815644299933966, + "learning_rate": 3.966344432509847e-06, + "loss": 0.0022, + "step": 9985 + }, + { + "epoch": 0.6435522330347361, + "grad_norm": 0.007594321936676571, + "learning_rate": 3.965628356605801e-06, + "loss": 0.0, + "step": 9986 + }, + { + "epoch": 0.6436166784816653, + "grad_norm": 0.001696701726712196, + "learning_rate": 3.9649122807017545e-06, + "loss": 0.0, + "step": 9987 + }, + { + "epoch": 0.6436811239285944, + "grad_norm": 0.001928534918932584, + "learning_rate": 3.964196204797709e-06, + "loss": 0.0, + "step": 9988 + }, + { + "epoch": 0.6437455693755236, + "grad_norm": 0.08768668886361478, + "learning_rate": 3.963480128893663e-06, + "loss": 0.0008, + "step": 9989 + }, + { + "epoch": 0.6438100148224528, + "grad_norm": 0.001866720703754344, + "learning_rate": 3.962764052989617e-06, + "loss": 0.0, + "step": 9990 + }, + { + "epoch": 0.643874460269382, + "grad_norm": 0.11344296324003514, + "learning_rate": 3.962047977085572e-06, + "loss": 0.0017, + "step": 9991 + }, + { + "epoch": 0.6439389057163112, + "grad_norm": 0.0041534937274514475, + "learning_rate": 3.961331901181525e-06, + "loss": 0.0, + "step": 9992 + }, + { + "epoch": 0.6440033511632404, + "grad_norm": 0.0043058563308134915, + "learning_rate": 3.9606158252774795e-06, + "loss": 0.0, + "step": 9993 + }, + { + "epoch": 0.6440677966101694, + "grad_norm": 0.0028722903588658793, + "learning_rate": 3.959899749373434e-06, + "loss": 0.0, + "step": 9994 + }, + { + "epoch": 0.6441322420570986, + "grad_norm": 0.0002852931476689053, + "learning_rate": 3.959183673469388e-06, + "loss": 0.0, + "step": 9995 + }, + { + "epoch": 0.6441966875040278, + "grad_norm": 1.4214037439131955, + "learning_rate": 3.958467597565342e-06, + "loss": 0.0013, + "step": 9996 + }, + { + "epoch": 0.644261132950957, + "grad_norm": 0.002912964128315929, + "learning_rate": 3.957751521661297e-06, + "loss": 0.0, + "step": 9997 + }, + { + "epoch": 0.6443255783978862, + "grad_norm": 0.008286350517975941, + "learning_rate": 3.957035445757251e-06, + "loss": 0.0, + "step": 9998 + }, + { + "epoch": 0.6443900238448154, + "grad_norm": 0.0009665192786302733, + "learning_rate": 3.956319369853205e-06, + "loss": 0.0, + "step": 9999 + }, + { + "epoch": 0.6444544692917445, + "grad_norm": 0.0026490413554310444, + "learning_rate": 3.955603293949159e-06, + "loss": 0.0, + "step": 10000 + }, + { + "epoch": 0.6445189147386737, + "grad_norm": 0.01759335401256255, + "learning_rate": 3.954887218045113e-06, + "loss": 0.0, + "step": 10001 + }, + { + "epoch": 0.6445833601856029, + "grad_norm": 0.009520969315805258, + "learning_rate": 3.954171142141067e-06, + "loss": 0.0, + "step": 10002 + }, + { + "epoch": 0.6446478056325321, + "grad_norm": 0.001728311984742966, + "learning_rate": 3.953455066237022e-06, + "loss": 0.0, + "step": 10003 + }, + { + "epoch": 0.6447122510794613, + "grad_norm": 0.0009198832699478046, + "learning_rate": 3.952738990332975e-06, + "loss": 0.0, + "step": 10004 + }, + { + "epoch": 0.6447766965263904, + "grad_norm": 0.0011534950561722203, + "learning_rate": 3.952022914428929e-06, + "loss": 0.0, + "step": 10005 + }, + { + "epoch": 0.6448411419733195, + "grad_norm": 0.0003031917733232299, + "learning_rate": 3.951306838524884e-06, + "loss": 0.0, + "step": 10006 + }, + { + "epoch": 0.6449055874202487, + "grad_norm": 0.001510916530904056, + "learning_rate": 3.950590762620839e-06, + "loss": 0.0, + "step": 10007 + }, + { + "epoch": 0.6449700328671779, + "grad_norm": 0.0451465203765521, + "learning_rate": 3.949874686716792e-06, + "loss": 0.0004, + "step": 10008 + }, + { + "epoch": 0.6450344783141071, + "grad_norm": 0.0009382365449063471, + "learning_rate": 3.9491586108127466e-06, + "loss": 0.0, + "step": 10009 + }, + { + "epoch": 0.6450989237610363, + "grad_norm": 0.00043472888646857314, + "learning_rate": 3.948442534908701e-06, + "loss": 0.0, + "step": 10010 + }, + { + "epoch": 0.6451633692079655, + "grad_norm": 0.0012430078323916194, + "learning_rate": 3.947726459004655e-06, + "loss": 0.0, + "step": 10011 + }, + { + "epoch": 0.6452278146548946, + "grad_norm": 0.0014639720055084765, + "learning_rate": 3.947010383100609e-06, + "loss": 0.0, + "step": 10012 + }, + { + "epoch": 0.6452922601018238, + "grad_norm": 0.002401018043739887, + "learning_rate": 3.946294307196563e-06, + "loss": 0.0, + "step": 10013 + }, + { + "epoch": 0.645356705548753, + "grad_norm": 0.015478704234006003, + "learning_rate": 3.945578231292517e-06, + "loss": 0.0, + "step": 10014 + }, + { + "epoch": 0.6454211509956822, + "grad_norm": 0.001973197331748077, + "learning_rate": 3.9448621553884715e-06, + "loss": 0.0, + "step": 10015 + }, + { + "epoch": 0.6454855964426113, + "grad_norm": 0.05995384634129306, + "learning_rate": 3.944146079484426e-06, + "loss": 0.0016, + "step": 10016 + }, + { + "epoch": 0.6455500418895405, + "grad_norm": 0.04199333799249915, + "learning_rate": 3.943430003580379e-06, + "loss": 0.0, + "step": 10017 + }, + { + "epoch": 0.6456144873364696, + "grad_norm": 0.05694073473606429, + "learning_rate": 3.9427139276763336e-06, + "loss": 0.0001, + "step": 10018 + }, + { + "epoch": 0.6456789327833988, + "grad_norm": 0.0034993164557464897, + "learning_rate": 3.941997851772289e-06, + "loss": 0.0, + "step": 10019 + }, + { + "epoch": 0.645743378230328, + "grad_norm": 0.009195842661245979, + "learning_rate": 3.941281775868242e-06, + "loss": 0.0001, + "step": 10020 + }, + { + "epoch": 0.6458078236772572, + "grad_norm": 0.00044558898441164694, + "learning_rate": 3.9405656999641964e-06, + "loss": 0.0, + "step": 10021 + }, + { + "epoch": 0.6458722691241864, + "grad_norm": 0.0009647395321825095, + "learning_rate": 3.939849624060151e-06, + "loss": 0.0, + "step": 10022 + }, + { + "epoch": 0.6459367145711156, + "grad_norm": 0.05492106017896077, + "learning_rate": 3.939133548156105e-06, + "loss": 0.0001, + "step": 10023 + }, + { + "epoch": 0.6460011600180448, + "grad_norm": 0.11704149687743283, + "learning_rate": 3.938417472252059e-06, + "loss": 0.0001, + "step": 10024 + }, + { + "epoch": 0.6460656054649739, + "grad_norm": 0.005956494344295038, + "learning_rate": 3.937701396348013e-06, + "loss": 0.0, + "step": 10025 + }, + { + "epoch": 0.6461300509119031, + "grad_norm": 0.00031369930774159714, + "learning_rate": 3.936985320443967e-06, + "loss": 0.0, + "step": 10026 + }, + { + "epoch": 0.6461944963588322, + "grad_norm": 0.06431663756990698, + "learning_rate": 3.936269244539921e-06, + "loss": 0.0007, + "step": 10027 + }, + { + "epoch": 0.6462589418057614, + "grad_norm": 0.0011454047402656453, + "learning_rate": 3.935553168635876e-06, + "loss": 0.0, + "step": 10028 + }, + { + "epoch": 0.6463233872526906, + "grad_norm": 0.005924979209156877, + "learning_rate": 3.93483709273183e-06, + "loss": 0.0, + "step": 10029 + }, + { + "epoch": 0.6463878326996197, + "grad_norm": 0.021247387098242476, + "learning_rate": 3.9341210168277834e-06, + "loss": 0.0003, + "step": 10030 + }, + { + "epoch": 0.6464522781465489, + "grad_norm": 0.009782796901761426, + "learning_rate": 3.933404940923739e-06, + "loss": 0.0, + "step": 10031 + }, + { + "epoch": 0.6465167235934781, + "grad_norm": 0.034589421586336565, + "learning_rate": 3.932688865019693e-06, + "loss": 0.0, + "step": 10032 + }, + { + "epoch": 0.6465811690404073, + "grad_norm": 0.18486351863565736, + "learning_rate": 3.931972789115646e-06, + "loss": 0.002, + "step": 10033 + }, + { + "epoch": 0.6466456144873365, + "grad_norm": 7.558595765490377e-05, + "learning_rate": 3.931256713211601e-06, + "loss": 0.0, + "step": 10034 + }, + { + "epoch": 0.6467100599342657, + "grad_norm": 0.001811319970900262, + "learning_rate": 3.930540637307555e-06, + "loss": 0.0, + "step": 10035 + }, + { + "epoch": 0.6467745053811949, + "grad_norm": 0.00010906105212224324, + "learning_rate": 3.929824561403509e-06, + "loss": 0.0, + "step": 10036 + }, + { + "epoch": 0.646838950828124, + "grad_norm": 3.836100339832203, + "learning_rate": 3.9291084854994635e-06, + "loss": 0.0449, + "step": 10037 + }, + { + "epoch": 0.6469033962750532, + "grad_norm": 0.0005711962089788333, + "learning_rate": 3.928392409595417e-06, + "loss": 0.0, + "step": 10038 + }, + { + "epoch": 0.6469678417219823, + "grad_norm": 0.0012421915486166405, + "learning_rate": 3.927676333691371e-06, + "loss": 0.0, + "step": 10039 + }, + { + "epoch": 0.6470322871689115, + "grad_norm": 0.10516009377572318, + "learning_rate": 3.926960257787326e-06, + "loss": 0.0004, + "step": 10040 + }, + { + "epoch": 0.6470967326158407, + "grad_norm": 0.011991362360826062, + "learning_rate": 3.92624418188328e-06, + "loss": 0.0, + "step": 10041 + }, + { + "epoch": 0.6471611780627698, + "grad_norm": 0.0014761180281003693, + "learning_rate": 3.925528105979234e-06, + "loss": 0.0, + "step": 10042 + }, + { + "epoch": 0.647225623509699, + "grad_norm": 0.0014376881504091783, + "learning_rate": 3.9248120300751885e-06, + "loss": 0.0, + "step": 10043 + }, + { + "epoch": 0.6472900689566282, + "grad_norm": 0.32301565801310034, + "learning_rate": 3.924095954171143e-06, + "loss": 0.0005, + "step": 10044 + }, + { + "epoch": 0.6473545144035574, + "grad_norm": 0.0014243909713996896, + "learning_rate": 3.923379878267097e-06, + "loss": 0.0, + "step": 10045 + }, + { + "epoch": 0.6474189598504866, + "grad_norm": 0.000623841882694804, + "learning_rate": 3.9226638023630505e-06, + "loss": 0.0, + "step": 10046 + }, + { + "epoch": 0.6474834052974158, + "grad_norm": 0.021735964259168947, + "learning_rate": 3.921947726459005e-06, + "loss": 0.0, + "step": 10047 + }, + { + "epoch": 0.647547850744345, + "grad_norm": 0.003005139298277695, + "learning_rate": 3.921231650554959e-06, + "loss": 0.0, + "step": 10048 + }, + { + "epoch": 0.6476122961912741, + "grad_norm": 0.009583776841016588, + "learning_rate": 3.920515574650913e-06, + "loss": 0.0, + "step": 10049 + }, + { + "epoch": 0.6476767416382032, + "grad_norm": 0.0008914135898324495, + "learning_rate": 3.919799498746868e-06, + "loss": 0.0, + "step": 10050 + }, + { + "epoch": 0.6477411870851324, + "grad_norm": 0.0013635385532760897, + "learning_rate": 3.919083422842821e-06, + "loss": 0.0, + "step": 10051 + }, + { + "epoch": 0.6478056325320616, + "grad_norm": 0.09818460111478787, + "learning_rate": 3.9183673469387755e-06, + "loss": 0.0012, + "step": 10052 + }, + { + "epoch": 0.6478700779789908, + "grad_norm": 0.0008677872964371634, + "learning_rate": 3.91765127103473e-06, + "loss": 0.0, + "step": 10053 + }, + { + "epoch": 0.64793452342592, + "grad_norm": 0.013500065410627189, + "learning_rate": 3.916935195130684e-06, + "loss": 0.0001, + "step": 10054 + }, + { + "epoch": 0.6479989688728491, + "grad_norm": 0.00023195192506884497, + "learning_rate": 3.916219119226638e-06, + "loss": 0.0, + "step": 10055 + }, + { + "epoch": 0.6480634143197783, + "grad_norm": 0.00956348066167725, + "learning_rate": 3.915503043322593e-06, + "loss": 0.0, + "step": 10056 + }, + { + "epoch": 0.6481278597667075, + "grad_norm": 0.0011233392884167606, + "learning_rate": 3.914786967418547e-06, + "loss": 0.0, + "step": 10057 + }, + { + "epoch": 0.6481923052136367, + "grad_norm": 0.00217687030993054, + "learning_rate": 3.914070891514501e-06, + "loss": 0.0, + "step": 10058 + }, + { + "epoch": 0.6482567506605659, + "grad_norm": 0.0007690180828168407, + "learning_rate": 3.913354815610455e-06, + "loss": 0.0, + "step": 10059 + }, + { + "epoch": 0.6483211961074951, + "grad_norm": 0.00018957142718229443, + "learning_rate": 3.912638739706409e-06, + "loss": 0.0, + "step": 10060 + }, + { + "epoch": 0.6483856415544241, + "grad_norm": 0.0033156219554045124, + "learning_rate": 3.911922663802363e-06, + "loss": 0.0, + "step": 10061 + }, + { + "epoch": 0.6484500870013533, + "grad_norm": 0.0193806849948235, + "learning_rate": 3.911206587898318e-06, + "loss": 0.0, + "step": 10062 + }, + { + "epoch": 0.6485145324482825, + "grad_norm": 0.0018474096180259869, + "learning_rate": 3.910490511994272e-06, + "loss": 0.0, + "step": 10063 + }, + { + "epoch": 0.6485789778952117, + "grad_norm": 0.0027109400656019204, + "learning_rate": 3.909774436090225e-06, + "loss": 0.0, + "step": 10064 + }, + { + "epoch": 0.6486434233421409, + "grad_norm": 0.17048972712294916, + "learning_rate": 3.90905836018618e-06, + "loss": 0.0036, + "step": 10065 + }, + { + "epoch": 0.64870786878907, + "grad_norm": 0.001639427937846142, + "learning_rate": 3.908342284282135e-06, + "loss": 0.0, + "step": 10066 + }, + { + "epoch": 0.6487723142359992, + "grad_norm": 0.0003054587604726002, + "learning_rate": 3.907626208378088e-06, + "loss": 0.0, + "step": 10067 + }, + { + "epoch": 0.6488367596829284, + "grad_norm": 0.0027535541277126873, + "learning_rate": 3.9069101324740426e-06, + "loss": 0.0, + "step": 10068 + }, + { + "epoch": 0.6489012051298576, + "grad_norm": 0.0006657856217193417, + "learning_rate": 3.906194056569997e-06, + "loss": 0.0, + "step": 10069 + }, + { + "epoch": 0.6489656505767868, + "grad_norm": 0.0060369187223777215, + "learning_rate": 3.905477980665951e-06, + "loss": 0.0, + "step": 10070 + }, + { + "epoch": 0.649030096023716, + "grad_norm": 0.0019431457633511454, + "learning_rate": 3.9047619047619055e-06, + "loss": 0.0, + "step": 10071 + }, + { + "epoch": 0.649094541470645, + "grad_norm": 0.3679831994007836, + "learning_rate": 3.904045828857859e-06, + "loss": 0.0011, + "step": 10072 + }, + { + "epoch": 0.6491589869175742, + "grad_norm": 0.007803615252833467, + "learning_rate": 3.903329752953813e-06, + "loss": 0.0001, + "step": 10073 + }, + { + "epoch": 0.6492234323645034, + "grad_norm": 0.11488098575653381, + "learning_rate": 3.9026136770497675e-06, + "loss": 0.0004, + "step": 10074 + }, + { + "epoch": 0.6492878778114326, + "grad_norm": 0.00033450819125250343, + "learning_rate": 3.901897601145722e-06, + "loss": 0.0, + "step": 10075 + }, + { + "epoch": 0.6493523232583618, + "grad_norm": 0.15049065941398837, + "learning_rate": 3.901181525241676e-06, + "loss": 0.0002, + "step": 10076 + }, + { + "epoch": 0.649416768705291, + "grad_norm": 0.001570508333555772, + "learning_rate": 3.90046544933763e-06, + "loss": 0.0, + "step": 10077 + }, + { + "epoch": 0.6494812141522202, + "grad_norm": 0.030068070105821104, + "learning_rate": 3.899749373433585e-06, + "loss": 0.0, + "step": 10078 + }, + { + "epoch": 0.6495456595991493, + "grad_norm": 0.07123143989605957, + "learning_rate": 3.899033297529539e-06, + "loss": 0.0001, + "step": 10079 + }, + { + "epoch": 0.6496101050460785, + "grad_norm": 0.007017937085895609, + "learning_rate": 3.8983172216254924e-06, + "loss": 0.0, + "step": 10080 + }, + { + "epoch": 0.6496745504930077, + "grad_norm": 0.0017217782930513232, + "learning_rate": 3.897601145721447e-06, + "loss": 0.0, + "step": 10081 + }, + { + "epoch": 0.6497389959399369, + "grad_norm": 0.46246827248698735, + "learning_rate": 3.896885069817401e-06, + "loss": 0.0006, + "step": 10082 + }, + { + "epoch": 0.649803441386866, + "grad_norm": 0.020012365780105686, + "learning_rate": 3.896168993913355e-06, + "loss": 0.0, + "step": 10083 + }, + { + "epoch": 0.6498678868337951, + "grad_norm": 0.0037748346140321957, + "learning_rate": 3.89545291800931e-06, + "loss": 0.0, + "step": 10084 + }, + { + "epoch": 0.6499323322807243, + "grad_norm": 7.76592282656403e-05, + "learning_rate": 3.894736842105263e-06, + "loss": 0.0, + "step": 10085 + }, + { + "epoch": 0.6499967777276535, + "grad_norm": 0.00016100952285215606, + "learning_rate": 3.894020766201217e-06, + "loss": 0.0, + "step": 10086 + }, + { + "epoch": 0.6500612231745827, + "grad_norm": 0.20804745112879044, + "learning_rate": 3.893304690297172e-06, + "loss": 0.0007, + "step": 10087 + }, + { + "epoch": 0.6501256686215119, + "grad_norm": 0.0005138267208881703, + "learning_rate": 3.892588614393126e-06, + "loss": 0.0, + "step": 10088 + }, + { + "epoch": 0.6501901140684411, + "grad_norm": 0.00140696667912771, + "learning_rate": 3.89187253848908e-06, + "loss": 0.0, + "step": 10089 + }, + { + "epoch": 0.6502545595153703, + "grad_norm": 0.0014816001929669846, + "learning_rate": 3.891156462585035e-06, + "loss": 0.0, + "step": 10090 + }, + { + "epoch": 0.6503190049622994, + "grad_norm": 0.011444484703948571, + "learning_rate": 3.890440386680989e-06, + "loss": 0.0, + "step": 10091 + }, + { + "epoch": 0.6503834504092286, + "grad_norm": 0.002593092375976146, + "learning_rate": 3.889724310776943e-06, + "loss": 0.0, + "step": 10092 + }, + { + "epoch": 0.6504478958561578, + "grad_norm": 0.0008823114322447084, + "learning_rate": 3.889008234872897e-06, + "loss": 0.0, + "step": 10093 + }, + { + "epoch": 0.6505123413030869, + "grad_norm": 0.45643913189772434, + "learning_rate": 3.888292158968851e-06, + "loss": 0.0005, + "step": 10094 + }, + { + "epoch": 0.6505767867500161, + "grad_norm": 0.001553883683300925, + "learning_rate": 3.887576083064805e-06, + "loss": 0.0, + "step": 10095 + }, + { + "epoch": 0.6506412321969453, + "grad_norm": 5.411795877500031e-05, + "learning_rate": 3.8868600071607595e-06, + "loss": 0.0, + "step": 10096 + }, + { + "epoch": 0.6507056776438744, + "grad_norm": 0.02975246910597393, + "learning_rate": 3.886143931256714e-06, + "loss": 0.0003, + "step": 10097 + }, + { + "epoch": 0.6507701230908036, + "grad_norm": 0.0008351681676013702, + "learning_rate": 3.885427855352667e-06, + "loss": 0.0, + "step": 10098 + }, + { + "epoch": 0.6508345685377328, + "grad_norm": 0.3116308164738484, + "learning_rate": 3.884711779448622e-06, + "loss": 0.0023, + "step": 10099 + }, + { + "epoch": 0.650899013984662, + "grad_norm": 0.003933262974662413, + "learning_rate": 3.883995703544576e-06, + "loss": 0.0, + "step": 10100 + }, + { + "epoch": 0.6509634594315912, + "grad_norm": 0.07911621652664459, + "learning_rate": 3.88327962764053e-06, + "loss": 0.0001, + "step": 10101 + }, + { + "epoch": 0.6510279048785204, + "grad_norm": 0.0004710866212675768, + "learning_rate": 3.8825635517364845e-06, + "loss": 0.0, + "step": 10102 + }, + { + "epoch": 0.6510923503254495, + "grad_norm": 0.0062199568895057135, + "learning_rate": 3.881847475832439e-06, + "loss": 0.0, + "step": 10103 + }, + { + "epoch": 0.6511567957723787, + "grad_norm": 0.47167855019758076, + "learning_rate": 3.881131399928393e-06, + "loss": 0.0011, + "step": 10104 + }, + { + "epoch": 0.6512212412193079, + "grad_norm": 0.32272571896552665, + "learning_rate": 3.880415324024347e-06, + "loss": 0.0016, + "step": 10105 + }, + { + "epoch": 0.651285686666237, + "grad_norm": 0.07696105436920875, + "learning_rate": 3.879699248120301e-06, + "loss": 0.0001, + "step": 10106 + }, + { + "epoch": 0.6513501321131662, + "grad_norm": 0.0015881369909475057, + "learning_rate": 3.878983172216255e-06, + "loss": 0.0, + "step": 10107 + }, + { + "epoch": 0.6514145775600954, + "grad_norm": 0.00023232975344055165, + "learning_rate": 3.878267096312209e-06, + "loss": 0.0, + "step": 10108 + }, + { + "epoch": 0.6514790230070245, + "grad_norm": 0.0025092237827565697, + "learning_rate": 3.877551020408164e-06, + "loss": 0.0, + "step": 10109 + }, + { + "epoch": 0.6515434684539537, + "grad_norm": 0.000561346000613679, + "learning_rate": 3.876834944504117e-06, + "loss": 0.0, + "step": 10110 + }, + { + "epoch": 0.6516079139008829, + "grad_norm": 0.08851294074908397, + "learning_rate": 3.8761188686000715e-06, + "loss": 0.0008, + "step": 10111 + }, + { + "epoch": 0.6516723593478121, + "grad_norm": 9.094776729926713e-05, + "learning_rate": 3.875402792696027e-06, + "loss": 0.0, + "step": 10112 + }, + { + "epoch": 0.6517368047947413, + "grad_norm": 0.009651514658336328, + "learning_rate": 3.874686716791981e-06, + "loss": 0.0001, + "step": 10113 + }, + { + "epoch": 0.6518012502416705, + "grad_norm": 8.080717123989275e-05, + "learning_rate": 3.873970640887934e-06, + "loss": 0.0, + "step": 10114 + }, + { + "epoch": 0.6518656956885996, + "grad_norm": 0.005040657665753197, + "learning_rate": 3.873254564983889e-06, + "loss": 0.0, + "step": 10115 + }, + { + "epoch": 0.6519301411355288, + "grad_norm": 3.942586078710562e-05, + "learning_rate": 3.872538489079843e-06, + "loss": 0.0, + "step": 10116 + }, + { + "epoch": 0.6519945865824579, + "grad_norm": 0.0007002757109208436, + "learning_rate": 3.871822413175797e-06, + "loss": 0.0, + "step": 10117 + }, + { + "epoch": 0.6520590320293871, + "grad_norm": 0.0019366055380994323, + "learning_rate": 3.871106337271751e-06, + "loss": 0.0, + "step": 10118 + }, + { + "epoch": 0.6521234774763163, + "grad_norm": 0.0035408849877017687, + "learning_rate": 3.870390261367705e-06, + "loss": 0.0, + "step": 10119 + }, + { + "epoch": 0.6521879229232455, + "grad_norm": 0.6270142760119335, + "learning_rate": 3.869674185463659e-06, + "loss": 0.0046, + "step": 10120 + }, + { + "epoch": 0.6522523683701746, + "grad_norm": 0.00039780686488753573, + "learning_rate": 3.868958109559614e-06, + "loss": 0.0, + "step": 10121 + }, + { + "epoch": 0.6523168138171038, + "grad_norm": 0.003640746475764863, + "learning_rate": 3.868242033655568e-06, + "loss": 0.0, + "step": 10122 + }, + { + "epoch": 0.652381259264033, + "grad_norm": 0.4130364426705445, + "learning_rate": 3.867525957751521e-06, + "loss": 0.0016, + "step": 10123 + }, + { + "epoch": 0.6524457047109622, + "grad_norm": 0.059154666829826646, + "learning_rate": 3.8668098818474765e-06, + "loss": 0.0004, + "step": 10124 + }, + { + "epoch": 0.6525101501578914, + "grad_norm": 0.0013061928478204214, + "learning_rate": 3.866093805943431e-06, + "loss": 0.0, + "step": 10125 + }, + { + "epoch": 0.6525745956048206, + "grad_norm": 0.28045806386242605, + "learning_rate": 3.865377730039384e-06, + "loss": 0.0004, + "step": 10126 + }, + { + "epoch": 0.6526390410517497, + "grad_norm": 0.01899712727240623, + "learning_rate": 3.8646616541353386e-06, + "loss": 0.0001, + "step": 10127 + }, + { + "epoch": 0.6527034864986788, + "grad_norm": 0.051157784159917466, + "learning_rate": 3.863945578231293e-06, + "loss": 0.0001, + "step": 10128 + }, + { + "epoch": 0.652767931945608, + "grad_norm": 0.025598438954331462, + "learning_rate": 3.863229502327247e-06, + "loss": 0.0001, + "step": 10129 + }, + { + "epoch": 0.6528323773925372, + "grad_norm": 0.0016328159009993447, + "learning_rate": 3.8625134264232015e-06, + "loss": 0.0, + "step": 10130 + }, + { + "epoch": 0.6528968228394664, + "grad_norm": 2.6339330807109222, + "learning_rate": 3.861797350519155e-06, + "loss": 0.0112, + "step": 10131 + }, + { + "epoch": 0.6529612682863956, + "grad_norm": 0.05387536153884999, + "learning_rate": 3.861081274615109e-06, + "loss": 0.0007, + "step": 10132 + }, + { + "epoch": 0.6530257137333247, + "grad_norm": 0.4103096084899589, + "learning_rate": 3.8603651987110635e-06, + "loss": 0.002, + "step": 10133 + }, + { + "epoch": 0.6530901591802539, + "grad_norm": 0.0023990892949300193, + "learning_rate": 3.859649122807018e-06, + "loss": 0.0, + "step": 10134 + }, + { + "epoch": 0.6531546046271831, + "grad_norm": 0.0025769823262489213, + "learning_rate": 3.858933046902972e-06, + "loss": 0.0, + "step": 10135 + }, + { + "epoch": 0.6532190500741123, + "grad_norm": 0.008294945393946193, + "learning_rate": 3.858216970998926e-06, + "loss": 0.0, + "step": 10136 + }, + { + "epoch": 0.6532834955210415, + "grad_norm": 0.0038449482692927794, + "learning_rate": 3.857500895094881e-06, + "loss": 0.0, + "step": 10137 + }, + { + "epoch": 0.6533479409679707, + "grad_norm": 0.016722334512031662, + "learning_rate": 3.856784819190835e-06, + "loss": 0.0002, + "step": 10138 + }, + { + "epoch": 0.6534123864148997, + "grad_norm": 0.45878159933893453, + "learning_rate": 3.8560687432867884e-06, + "loss": 0.0022, + "step": 10139 + }, + { + "epoch": 0.6534768318618289, + "grad_norm": 0.003303008831653256, + "learning_rate": 3.855352667382743e-06, + "loss": 0.0, + "step": 10140 + }, + { + "epoch": 0.6535412773087581, + "grad_norm": 0.01095606656160586, + "learning_rate": 3.854636591478697e-06, + "loss": 0.0, + "step": 10141 + }, + { + "epoch": 0.6536057227556873, + "grad_norm": 0.0007942663676417572, + "learning_rate": 3.853920515574651e-06, + "loss": 0.0, + "step": 10142 + }, + { + "epoch": 0.6536701682026165, + "grad_norm": 0.011422861115300881, + "learning_rate": 3.853204439670606e-06, + "loss": 0.0, + "step": 10143 + }, + { + "epoch": 0.6537346136495457, + "grad_norm": 0.13214908426043642, + "learning_rate": 3.852488363766559e-06, + "loss": 0.0003, + "step": 10144 + }, + { + "epoch": 0.6537990590964748, + "grad_norm": 0.00021781995305225694, + "learning_rate": 3.851772287862513e-06, + "loss": 0.0, + "step": 10145 + }, + { + "epoch": 0.653863504543404, + "grad_norm": 0.002687471652329774, + "learning_rate": 3.851056211958468e-06, + "loss": 0.0, + "step": 10146 + }, + { + "epoch": 0.6539279499903332, + "grad_norm": 0.0005095891078883199, + "learning_rate": 3.850340136054422e-06, + "loss": 0.0, + "step": 10147 + }, + { + "epoch": 0.6539923954372624, + "grad_norm": 0.00152943308971141, + "learning_rate": 3.849624060150376e-06, + "loss": 0.0, + "step": 10148 + }, + { + "epoch": 0.6540568408841916, + "grad_norm": 0.0010116890607374909, + "learning_rate": 3.848907984246331e-06, + "loss": 0.0, + "step": 10149 + }, + { + "epoch": 0.6541212863311207, + "grad_norm": 0.008247315056338593, + "learning_rate": 3.848191908342285e-06, + "loss": 0.0, + "step": 10150 + }, + { + "epoch": 0.6541857317780498, + "grad_norm": 0.03783836195338165, + "learning_rate": 3.847475832438239e-06, + "loss": 0.0001, + "step": 10151 + }, + { + "epoch": 0.654250177224979, + "grad_norm": 0.03685747469146827, + "learning_rate": 3.846759756534193e-06, + "loss": 0.0, + "step": 10152 + }, + { + "epoch": 0.6543146226719082, + "grad_norm": 0.01833364381008545, + "learning_rate": 3.846043680630147e-06, + "loss": 0.0, + "step": 10153 + }, + { + "epoch": 0.6543790681188374, + "grad_norm": 0.5712872105755907, + "learning_rate": 3.845327604726101e-06, + "loss": 0.0033, + "step": 10154 + }, + { + "epoch": 0.6544435135657666, + "grad_norm": 0.004128894377295956, + "learning_rate": 3.8446115288220555e-06, + "loss": 0.0, + "step": 10155 + }, + { + "epoch": 0.6545079590126958, + "grad_norm": 0.026285459872411314, + "learning_rate": 3.84389545291801e-06, + "loss": 0.0, + "step": 10156 + }, + { + "epoch": 0.654572404459625, + "grad_norm": 0.010629958355985208, + "learning_rate": 3.843179377013963e-06, + "loss": 0.0001, + "step": 10157 + }, + { + "epoch": 0.6546368499065541, + "grad_norm": 0.0003458473109674389, + "learning_rate": 3.842463301109918e-06, + "loss": 0.0, + "step": 10158 + }, + { + "epoch": 0.6547012953534833, + "grad_norm": 0.019025562351784445, + "learning_rate": 3.841747225205873e-06, + "loss": 0.0, + "step": 10159 + }, + { + "epoch": 0.6547657408004125, + "grad_norm": 0.0031882815792937374, + "learning_rate": 3.841031149301826e-06, + "loss": 0.0, + "step": 10160 + }, + { + "epoch": 0.6548301862473416, + "grad_norm": 0.01813268104120978, + "learning_rate": 3.8403150733977805e-06, + "loss": 0.0, + "step": 10161 + }, + { + "epoch": 0.6548946316942708, + "grad_norm": 0.02995992804919363, + "learning_rate": 3.839598997493735e-06, + "loss": 0.0, + "step": 10162 + }, + { + "epoch": 0.6549590771411999, + "grad_norm": 0.006443501348050104, + "learning_rate": 3.838882921589689e-06, + "loss": 0.0, + "step": 10163 + }, + { + "epoch": 0.6550235225881291, + "grad_norm": 0.00042109950495271373, + "learning_rate": 3.838166845685643e-06, + "loss": 0.0, + "step": 10164 + }, + { + "epoch": 0.6550879680350583, + "grad_norm": 0.0008626208900784901, + "learning_rate": 3.837450769781597e-06, + "loss": 0.0, + "step": 10165 + }, + { + "epoch": 0.6551524134819875, + "grad_norm": 0.003391952493412297, + "learning_rate": 3.836734693877551e-06, + "loss": 0.0, + "step": 10166 + }, + { + "epoch": 0.6552168589289167, + "grad_norm": 0.005285101991014211, + "learning_rate": 3.836018617973505e-06, + "loss": 0.0, + "step": 10167 + }, + { + "epoch": 0.6552813043758459, + "grad_norm": 0.01946311256116589, + "learning_rate": 3.83530254206946e-06, + "loss": 0.0, + "step": 10168 + }, + { + "epoch": 0.655345749822775, + "grad_norm": 0.0026740134172567116, + "learning_rate": 3.834586466165414e-06, + "loss": 0.0, + "step": 10169 + }, + { + "epoch": 0.6554101952697042, + "grad_norm": 0.0033936852631130097, + "learning_rate": 3.8338703902613675e-06, + "loss": 0.0, + "step": 10170 + }, + { + "epoch": 0.6554746407166334, + "grad_norm": 0.0009185211832299836, + "learning_rate": 3.833154314357323e-06, + "loss": 0.0, + "step": 10171 + }, + { + "epoch": 0.6555390861635625, + "grad_norm": 0.06459203475290538, + "learning_rate": 3.832438238453277e-06, + "loss": 0.0005, + "step": 10172 + }, + { + "epoch": 0.6556035316104917, + "grad_norm": 0.0026117542544247074, + "learning_rate": 3.83172216254923e-06, + "loss": 0.0, + "step": 10173 + }, + { + "epoch": 0.6556679770574209, + "grad_norm": 0.1658535446637217, + "learning_rate": 3.831006086645185e-06, + "loss": 0.0001, + "step": 10174 + }, + { + "epoch": 0.65573242250435, + "grad_norm": 0.12451728359577288, + "learning_rate": 3.830290010741139e-06, + "loss": 0.002, + "step": 10175 + }, + { + "epoch": 0.6557968679512792, + "grad_norm": 0.30647530928600114, + "learning_rate": 3.829573934837093e-06, + "loss": 0.0006, + "step": 10176 + }, + { + "epoch": 0.6558613133982084, + "grad_norm": 0.05586064285560092, + "learning_rate": 3.8288578589330476e-06, + "loss": 0.0002, + "step": 10177 + }, + { + "epoch": 0.6559257588451376, + "grad_norm": 0.0025995801811718247, + "learning_rate": 3.828141783029001e-06, + "loss": 0.0, + "step": 10178 + }, + { + "epoch": 0.6559902042920668, + "grad_norm": 0.0038249163839321164, + "learning_rate": 3.827425707124955e-06, + "loss": 0.0, + "step": 10179 + }, + { + "epoch": 0.656054649738996, + "grad_norm": 0.4108950183217354, + "learning_rate": 3.82670963122091e-06, + "loss": 0.0029, + "step": 10180 + }, + { + "epoch": 0.6561190951859251, + "grad_norm": 0.004374683406390487, + "learning_rate": 3.825993555316864e-06, + "loss": 0.0, + "step": 10181 + }, + { + "epoch": 0.6561835406328543, + "grad_norm": 0.0014969097910061068, + "learning_rate": 3.825277479412818e-06, + "loss": 0.0, + "step": 10182 + }, + { + "epoch": 0.6562479860797835, + "grad_norm": 0.0012723266838122523, + "learning_rate": 3.8245614035087725e-06, + "loss": 0.0, + "step": 10183 + }, + { + "epoch": 0.6563124315267126, + "grad_norm": 0.008525120834268957, + "learning_rate": 3.823845327604727e-06, + "loss": 0.0, + "step": 10184 + }, + { + "epoch": 0.6563768769736418, + "grad_norm": 0.0030668060505652244, + "learning_rate": 3.823129251700681e-06, + "loss": 0.0, + "step": 10185 + }, + { + "epoch": 0.656441322420571, + "grad_norm": 0.00534143900713158, + "learning_rate": 3.8224131757966346e-06, + "loss": 0.0, + "step": 10186 + }, + { + "epoch": 0.6565057678675001, + "grad_norm": 0.0037717083756212658, + "learning_rate": 3.821697099892589e-06, + "loss": 0.0, + "step": 10187 + }, + { + "epoch": 0.6565702133144293, + "grad_norm": 0.0012179376897736268, + "learning_rate": 3.820981023988543e-06, + "loss": 0.0, + "step": 10188 + }, + { + "epoch": 0.6566346587613585, + "grad_norm": 0.00037249200636945535, + "learning_rate": 3.8202649480844975e-06, + "loss": 0.0, + "step": 10189 + }, + { + "epoch": 0.6566991042082877, + "grad_norm": 0.017734283475125033, + "learning_rate": 3.819548872180452e-06, + "loss": 0.0002, + "step": 10190 + }, + { + "epoch": 0.6567635496552169, + "grad_norm": 0.020177854077664724, + "learning_rate": 3.818832796276405e-06, + "loss": 0.0, + "step": 10191 + }, + { + "epoch": 0.6568279951021461, + "grad_norm": 0.007316823046694329, + "learning_rate": 3.8181167203723595e-06, + "loss": 0.0, + "step": 10192 + }, + { + "epoch": 0.6568924405490753, + "grad_norm": 0.0005207608387712419, + "learning_rate": 3.817400644468314e-06, + "loss": 0.0, + "step": 10193 + }, + { + "epoch": 0.6569568859960044, + "grad_norm": 0.0009445133087726743, + "learning_rate": 3.816684568564268e-06, + "loss": 0.0, + "step": 10194 + }, + { + "epoch": 0.6570213314429335, + "grad_norm": 0.02219222355146514, + "learning_rate": 3.815968492660222e-06, + "loss": 0.0, + "step": 10195 + }, + { + "epoch": 0.6570857768898627, + "grad_norm": 0.0012817471509581164, + "learning_rate": 3.815252416756177e-06, + "loss": 0.0, + "step": 10196 + }, + { + "epoch": 0.6571502223367919, + "grad_norm": 0.010393691367853066, + "learning_rate": 3.8145363408521306e-06, + "loss": 0.0, + "step": 10197 + }, + { + "epoch": 0.6572146677837211, + "grad_norm": 0.016148501614400432, + "learning_rate": 3.813820264948085e-06, + "loss": 0.0, + "step": 10198 + }, + { + "epoch": 0.6572791132306502, + "grad_norm": 0.028812178897839727, + "learning_rate": 3.813104189044039e-06, + "loss": 0.0, + "step": 10199 + }, + { + "epoch": 0.6573435586775794, + "grad_norm": 0.0002261249635128685, + "learning_rate": 3.812388113139993e-06, + "loss": 0.0, + "step": 10200 + }, + { + "epoch": 0.6574080041245086, + "grad_norm": 0.001779168386697235, + "learning_rate": 3.8116720372359473e-06, + "loss": 0.0, + "step": 10201 + }, + { + "epoch": 0.6574724495714378, + "grad_norm": 0.0011019397221192367, + "learning_rate": 3.8109559613319012e-06, + "loss": 0.0, + "step": 10202 + }, + { + "epoch": 0.657536895018367, + "grad_norm": 0.00014219508472730657, + "learning_rate": 3.8102398854278555e-06, + "loss": 0.0, + "step": 10203 + }, + { + "epoch": 0.6576013404652962, + "grad_norm": 0.0265215081898783, + "learning_rate": 3.80952380952381e-06, + "loss": 0.0002, + "step": 10204 + }, + { + "epoch": 0.6576657859122254, + "grad_norm": 0.03401318937206382, + "learning_rate": 3.8088077336197637e-06, + "loss": 0.0002, + "step": 10205 + }, + { + "epoch": 0.6577302313591544, + "grad_norm": 0.07009893009647121, + "learning_rate": 3.8080916577157184e-06, + "loss": 0.0, + "step": 10206 + }, + { + "epoch": 0.6577946768060836, + "grad_norm": 0.008663384096209157, + "learning_rate": 3.8073755818116727e-06, + "loss": 0.0, + "step": 10207 + }, + { + "epoch": 0.6578591222530128, + "grad_norm": 9.288414808061254e-05, + "learning_rate": 3.8066595059076266e-06, + "loss": 0.0, + "step": 10208 + }, + { + "epoch": 0.657923567699942, + "grad_norm": 0.007428766665651455, + "learning_rate": 3.805943430003581e-06, + "loss": 0.0, + "step": 10209 + }, + { + "epoch": 0.6579880131468712, + "grad_norm": 0.026942313183641934, + "learning_rate": 3.8052273540995348e-06, + "loss": 0.0, + "step": 10210 + }, + { + "epoch": 0.6580524585938003, + "grad_norm": 0.0005140052428280983, + "learning_rate": 3.804511278195489e-06, + "loss": 0.0, + "step": 10211 + }, + { + "epoch": 0.6581169040407295, + "grad_norm": 0.07336983683314319, + "learning_rate": 3.8037952022914434e-06, + "loss": 0.0001, + "step": 10212 + }, + { + "epoch": 0.6581813494876587, + "grad_norm": 0.03837160424420366, + "learning_rate": 3.8030791263873972e-06, + "loss": 0.0001, + "step": 10213 + }, + { + "epoch": 0.6582457949345879, + "grad_norm": 0.03194277067175515, + "learning_rate": 3.8023630504833515e-06, + "loss": 0.0, + "step": 10214 + }, + { + "epoch": 0.6583102403815171, + "grad_norm": 0.008725165434776942, + "learning_rate": 3.8016469745793054e-06, + "loss": 0.0, + "step": 10215 + }, + { + "epoch": 0.6583746858284463, + "grad_norm": 0.011985939675556128, + "learning_rate": 3.8009308986752597e-06, + "loss": 0.0, + "step": 10216 + }, + { + "epoch": 0.6584391312753753, + "grad_norm": 0.09138096272130088, + "learning_rate": 3.8002148227712136e-06, + "loss": 0.0001, + "step": 10217 + }, + { + "epoch": 0.6585035767223045, + "grad_norm": 0.1891860258878625, + "learning_rate": 3.7994987468671683e-06, + "loss": 0.0002, + "step": 10218 + }, + { + "epoch": 0.6585680221692337, + "grad_norm": 0.08866577064407546, + "learning_rate": 3.7987826709631226e-06, + "loss": 0.0001, + "step": 10219 + }, + { + "epoch": 0.6586324676161629, + "grad_norm": 0.006611086351549965, + "learning_rate": 3.798066595059077e-06, + "loss": 0.0, + "step": 10220 + }, + { + "epoch": 0.6586969130630921, + "grad_norm": 0.00027910876529814774, + "learning_rate": 3.7973505191550308e-06, + "loss": 0.0, + "step": 10221 + }, + { + "epoch": 0.6587613585100213, + "grad_norm": 0.007999838552962334, + "learning_rate": 3.796634443250985e-06, + "loss": 0.0001, + "step": 10222 + }, + { + "epoch": 0.6588258039569505, + "grad_norm": 0.2032794548559423, + "learning_rate": 3.795918367346939e-06, + "loss": 0.0005, + "step": 10223 + }, + { + "epoch": 0.6588902494038796, + "grad_norm": 0.0003595132206011157, + "learning_rate": 3.7952022914428932e-06, + "loss": 0.0, + "step": 10224 + }, + { + "epoch": 0.6589546948508088, + "grad_norm": 0.0008375620930174066, + "learning_rate": 3.794486215538847e-06, + "loss": 0.0, + "step": 10225 + }, + { + "epoch": 0.659019140297738, + "grad_norm": 0.020827546998719356, + "learning_rate": 3.7937701396348014e-06, + "loss": 0.0, + "step": 10226 + }, + { + "epoch": 0.6590835857446672, + "grad_norm": 0.03600446840366598, + "learning_rate": 3.7930540637307557e-06, + "loss": 0.0001, + "step": 10227 + }, + { + "epoch": 0.6591480311915963, + "grad_norm": 0.004241069782651026, + "learning_rate": 3.7923379878267096e-06, + "loss": 0.0, + "step": 10228 + }, + { + "epoch": 0.6592124766385254, + "grad_norm": 0.019451562174052747, + "learning_rate": 3.7916219119226643e-06, + "loss": 0.0001, + "step": 10229 + }, + { + "epoch": 0.6592769220854546, + "grad_norm": 0.002471784102794206, + "learning_rate": 3.7909058360186186e-06, + "loss": 0.0, + "step": 10230 + }, + { + "epoch": 0.6593413675323838, + "grad_norm": 0.006852716105307031, + "learning_rate": 3.7901897601145725e-06, + "loss": 0.0, + "step": 10231 + }, + { + "epoch": 0.659405812979313, + "grad_norm": 0.06718536487225223, + "learning_rate": 3.789473684210527e-06, + "loss": 0.0003, + "step": 10232 + }, + { + "epoch": 0.6594702584262422, + "grad_norm": 0.15153008586304992, + "learning_rate": 3.7887576083064807e-06, + "loss": 0.0007, + "step": 10233 + }, + { + "epoch": 0.6595347038731714, + "grad_norm": 0.10961949745578928, + "learning_rate": 3.788041532402435e-06, + "loss": 0.0001, + "step": 10234 + }, + { + "epoch": 0.6595991493201006, + "grad_norm": 0.00020195634307442343, + "learning_rate": 3.7873254564983893e-06, + "loss": 0.0, + "step": 10235 + }, + { + "epoch": 0.6596635947670297, + "grad_norm": 0.008753787719257302, + "learning_rate": 3.786609380594343e-06, + "loss": 0.0001, + "step": 10236 + }, + { + "epoch": 0.6597280402139589, + "grad_norm": 0.00017843003587698393, + "learning_rate": 3.7858933046902974e-06, + "loss": 0.0, + "step": 10237 + }, + { + "epoch": 0.6597924856608881, + "grad_norm": 0.003163697938415987, + "learning_rate": 3.7851772287862513e-06, + "loss": 0.0, + "step": 10238 + }, + { + "epoch": 0.6598569311078172, + "grad_norm": 0.0016409395954385996, + "learning_rate": 3.7844611528822056e-06, + "loss": 0.0, + "step": 10239 + }, + { + "epoch": 0.6599213765547464, + "grad_norm": 0.0016194677414049581, + "learning_rate": 3.78374507697816e-06, + "loss": 0.0, + "step": 10240 + }, + { + "epoch": 0.6599858220016755, + "grad_norm": 0.2341164513889306, + "learning_rate": 3.783029001074114e-06, + "loss": 0.002, + "step": 10241 + }, + { + "epoch": 0.6600502674486047, + "grad_norm": 0.00014059234136107755, + "learning_rate": 3.7823129251700685e-06, + "loss": 0.0, + "step": 10242 + }, + { + "epoch": 0.6601147128955339, + "grad_norm": 0.012938931855012437, + "learning_rate": 3.781596849266023e-06, + "loss": 0.0001, + "step": 10243 + }, + { + "epoch": 0.6601791583424631, + "grad_norm": 0.00014948623886025877, + "learning_rate": 3.7808807733619767e-06, + "loss": 0.0, + "step": 10244 + }, + { + "epoch": 0.6602436037893923, + "grad_norm": 0.0002305776851317747, + "learning_rate": 3.780164697457931e-06, + "loss": 0.0, + "step": 10245 + }, + { + "epoch": 0.6603080492363215, + "grad_norm": 0.0003601501449476667, + "learning_rate": 3.779448621553885e-06, + "loss": 0.0, + "step": 10246 + }, + { + "epoch": 0.6603724946832507, + "grad_norm": 0.006198069005480283, + "learning_rate": 3.778732545649839e-06, + "loss": 0.0, + "step": 10247 + }, + { + "epoch": 0.6604369401301798, + "grad_norm": 0.003078498213774524, + "learning_rate": 3.7780164697457935e-06, + "loss": 0.0, + "step": 10248 + }, + { + "epoch": 0.660501385577109, + "grad_norm": 0.0014853955072896012, + "learning_rate": 3.7773003938417473e-06, + "loss": 0.0, + "step": 10249 + }, + { + "epoch": 0.6605658310240381, + "grad_norm": 0.0002482578605181744, + "learning_rate": 3.7765843179377016e-06, + "loss": 0.0, + "step": 10250 + }, + { + "epoch": 0.6606302764709673, + "grad_norm": 0.8442832180262457, + "learning_rate": 3.7758682420336555e-06, + "loss": 0.0057, + "step": 10251 + }, + { + "epoch": 0.6606947219178965, + "grad_norm": 9.143114696421962e-06, + "learning_rate": 3.77515216612961e-06, + "loss": 0.0, + "step": 10252 + }, + { + "epoch": 0.6607591673648257, + "grad_norm": 0.05428738982897947, + "learning_rate": 3.7744360902255645e-06, + "loss": 0.0001, + "step": 10253 + }, + { + "epoch": 0.6608236128117548, + "grad_norm": 0.0005961785885248, + "learning_rate": 3.7737200143215184e-06, + "loss": 0.0, + "step": 10254 + }, + { + "epoch": 0.660888058258684, + "grad_norm": 0.003677904195814865, + "learning_rate": 3.7730039384174727e-06, + "loss": 0.0, + "step": 10255 + }, + { + "epoch": 0.6609525037056132, + "grad_norm": 0.03861703337575527, + "learning_rate": 3.772287862513427e-06, + "loss": 0.0001, + "step": 10256 + }, + { + "epoch": 0.6610169491525424, + "grad_norm": 0.000588057262521846, + "learning_rate": 3.771571786609381e-06, + "loss": 0.0, + "step": 10257 + }, + { + "epoch": 0.6610813945994716, + "grad_norm": 0.0016087935946511298, + "learning_rate": 3.770855710705335e-06, + "loss": 0.0, + "step": 10258 + }, + { + "epoch": 0.6611458400464008, + "grad_norm": 0.0012067817277527145, + "learning_rate": 3.770139634801289e-06, + "loss": 0.0, + "step": 10259 + }, + { + "epoch": 0.6612102854933299, + "grad_norm": 0.00023797434159941603, + "learning_rate": 3.7694235588972433e-06, + "loss": 0.0, + "step": 10260 + }, + { + "epoch": 0.6612747309402591, + "grad_norm": 0.0037044470691454294, + "learning_rate": 3.7687074829931976e-06, + "loss": 0.0, + "step": 10261 + }, + { + "epoch": 0.6613391763871882, + "grad_norm": 8.325739223354846e-05, + "learning_rate": 3.7679914070891515e-06, + "loss": 0.0, + "step": 10262 + }, + { + "epoch": 0.6614036218341174, + "grad_norm": 0.45104876403275584, + "learning_rate": 3.767275331185106e-06, + "loss": 0.0009, + "step": 10263 + }, + { + "epoch": 0.6614680672810466, + "grad_norm": 0.03306756149105825, + "learning_rate": 3.7665592552810605e-06, + "loss": 0.0002, + "step": 10264 + }, + { + "epoch": 0.6615325127279758, + "grad_norm": 0.0009416338385984487, + "learning_rate": 3.7658431793770144e-06, + "loss": 0.0, + "step": 10265 + }, + { + "epoch": 0.6615969581749049, + "grad_norm": 0.000314439940892325, + "learning_rate": 3.7651271034729687e-06, + "loss": 0.0, + "step": 10266 + }, + { + "epoch": 0.6616614036218341, + "grad_norm": 0.000485502264331188, + "learning_rate": 3.7644110275689226e-06, + "loss": 0.0, + "step": 10267 + }, + { + "epoch": 0.6617258490687633, + "grad_norm": 0.0014131418138240244, + "learning_rate": 3.763694951664877e-06, + "loss": 0.0, + "step": 10268 + }, + { + "epoch": 0.6617902945156925, + "grad_norm": 0.0012902404209281864, + "learning_rate": 3.762978875760831e-06, + "loss": 0.0, + "step": 10269 + }, + { + "epoch": 0.6618547399626217, + "grad_norm": 0.5734556928030273, + "learning_rate": 3.762262799856785e-06, + "loss": 0.0018, + "step": 10270 + }, + { + "epoch": 0.6619191854095509, + "grad_norm": 0.016855807832785976, + "learning_rate": 3.7615467239527394e-06, + "loss": 0.0, + "step": 10271 + }, + { + "epoch": 0.66198363085648, + "grad_norm": 0.06585408753475606, + "learning_rate": 3.7608306480486932e-06, + "loss": 0.0001, + "step": 10272 + }, + { + "epoch": 0.6620480763034091, + "grad_norm": 0.0023837119585305598, + "learning_rate": 3.7601145721446475e-06, + "loss": 0.0, + "step": 10273 + }, + { + "epoch": 0.6621125217503383, + "grad_norm": 0.03470923425898855, + "learning_rate": 3.7593984962406014e-06, + "loss": 0.0, + "step": 10274 + }, + { + "epoch": 0.6621769671972675, + "grad_norm": 0.004313669130485635, + "learning_rate": 3.7586824203365557e-06, + "loss": 0.0, + "step": 10275 + }, + { + "epoch": 0.6622414126441967, + "grad_norm": 0.21375927750213114, + "learning_rate": 3.7579663444325104e-06, + "loss": 0.0007, + "step": 10276 + }, + { + "epoch": 0.6623058580911259, + "grad_norm": 0.00922187225142663, + "learning_rate": 3.7572502685284647e-06, + "loss": 0.0, + "step": 10277 + }, + { + "epoch": 0.662370303538055, + "grad_norm": 0.0019563550661400313, + "learning_rate": 3.7565341926244186e-06, + "loss": 0.0, + "step": 10278 + }, + { + "epoch": 0.6624347489849842, + "grad_norm": 0.012985174580154232, + "learning_rate": 3.755818116720373e-06, + "loss": 0.0, + "step": 10279 + }, + { + "epoch": 0.6624991944319134, + "grad_norm": 0.1419093634249794, + "learning_rate": 3.7551020408163268e-06, + "loss": 0.0015, + "step": 10280 + }, + { + "epoch": 0.6625636398788426, + "grad_norm": 7.844805718374012e-05, + "learning_rate": 3.754385964912281e-06, + "loss": 0.0, + "step": 10281 + }, + { + "epoch": 0.6626280853257718, + "grad_norm": 0.000882768606512407, + "learning_rate": 3.753669889008235e-06, + "loss": 0.0, + "step": 10282 + }, + { + "epoch": 0.662692530772701, + "grad_norm": 0.0002696189077644942, + "learning_rate": 3.7529538131041892e-06, + "loss": 0.0, + "step": 10283 + }, + { + "epoch": 0.66275697621963, + "grad_norm": 0.064502581492965, + "learning_rate": 3.7522377372001435e-06, + "loss": 0.0011, + "step": 10284 + }, + { + "epoch": 0.6628214216665592, + "grad_norm": 0.00019324913286995657, + "learning_rate": 3.7515216612960974e-06, + "loss": 0.0, + "step": 10285 + }, + { + "epoch": 0.6628858671134884, + "grad_norm": 0.005003145435338595, + "learning_rate": 3.7508055853920517e-06, + "loss": 0.0, + "step": 10286 + }, + { + "epoch": 0.6629503125604176, + "grad_norm": 0.005832136003543727, + "learning_rate": 3.7500895094880056e-06, + "loss": 0.0, + "step": 10287 + }, + { + "epoch": 0.6630147580073468, + "grad_norm": 0.008914760200775532, + "learning_rate": 3.7493734335839603e-06, + "loss": 0.0, + "step": 10288 + }, + { + "epoch": 0.663079203454276, + "grad_norm": 0.06776215709505574, + "learning_rate": 3.7486573576799146e-06, + "loss": 0.0001, + "step": 10289 + }, + { + "epoch": 0.6631436489012051, + "grad_norm": 0.0010801679433969562, + "learning_rate": 3.7479412817758685e-06, + "loss": 0.0, + "step": 10290 + }, + { + "epoch": 0.6632080943481343, + "grad_norm": 0.3396181804918623, + "learning_rate": 3.747225205871823e-06, + "loss": 0.0009, + "step": 10291 + }, + { + "epoch": 0.6632725397950635, + "grad_norm": 0.1640217764584769, + "learning_rate": 3.746509129967777e-06, + "loss": 0.0024, + "step": 10292 + }, + { + "epoch": 0.6633369852419927, + "grad_norm": 0.0013012118961681762, + "learning_rate": 3.745793054063731e-06, + "loss": 0.0, + "step": 10293 + }, + { + "epoch": 0.6634014306889219, + "grad_norm": 0.05860680792265722, + "learning_rate": 3.7450769781596853e-06, + "loss": 0.0002, + "step": 10294 + }, + { + "epoch": 0.663465876135851, + "grad_norm": 0.8549390572584554, + "learning_rate": 3.744360902255639e-06, + "loss": 0.0212, + "step": 10295 + }, + { + "epoch": 0.6635303215827801, + "grad_norm": 0.0017172884277614458, + "learning_rate": 3.7436448263515934e-06, + "loss": 0.0, + "step": 10296 + }, + { + "epoch": 0.6635947670297093, + "grad_norm": 0.08668761005700598, + "learning_rate": 3.7429287504475477e-06, + "loss": 0.0001, + "step": 10297 + }, + { + "epoch": 0.6636592124766385, + "grad_norm": 0.0023396848876374708, + "learning_rate": 3.7422126745435016e-06, + "loss": 0.0, + "step": 10298 + }, + { + "epoch": 0.6637236579235677, + "grad_norm": 0.0006625158127470572, + "learning_rate": 3.7414965986394563e-06, + "loss": 0.0, + "step": 10299 + }, + { + "epoch": 0.6637881033704969, + "grad_norm": 0.006708177138559468, + "learning_rate": 3.7407805227354106e-06, + "loss": 0.0, + "step": 10300 + }, + { + "epoch": 0.6638525488174261, + "grad_norm": 0.0007932890888525091, + "learning_rate": 3.7400644468313645e-06, + "loss": 0.0, + "step": 10301 + }, + { + "epoch": 0.6639169942643552, + "grad_norm": 0.0015757591637932753, + "learning_rate": 3.739348370927319e-06, + "loss": 0.0, + "step": 10302 + }, + { + "epoch": 0.6639814397112844, + "grad_norm": 0.002359474580749016, + "learning_rate": 3.7386322950232727e-06, + "loss": 0.0, + "step": 10303 + }, + { + "epoch": 0.6640458851582136, + "grad_norm": 0.047649399272796086, + "learning_rate": 3.737916219119227e-06, + "loss": 0.0002, + "step": 10304 + }, + { + "epoch": 0.6641103306051428, + "grad_norm": 0.02729586683217372, + "learning_rate": 3.7372001432151813e-06, + "loss": 0.0001, + "step": 10305 + }, + { + "epoch": 0.6641747760520719, + "grad_norm": 0.0031866841413431637, + "learning_rate": 3.736484067311135e-06, + "loss": 0.0, + "step": 10306 + }, + { + "epoch": 0.664239221499001, + "grad_norm": 0.0036964935657331686, + "learning_rate": 3.7357679914070895e-06, + "loss": 0.0, + "step": 10307 + }, + { + "epoch": 0.6643036669459302, + "grad_norm": 0.003219235261201563, + "learning_rate": 3.7350519155030433e-06, + "loss": 0.0, + "step": 10308 + }, + { + "epoch": 0.6643681123928594, + "grad_norm": 0.22426532434435242, + "learning_rate": 3.7343358395989976e-06, + "loss": 0.0003, + "step": 10309 + }, + { + "epoch": 0.6644325578397886, + "grad_norm": 0.0018211682093739476, + "learning_rate": 3.733619763694952e-06, + "loss": 0.0, + "step": 10310 + }, + { + "epoch": 0.6644970032867178, + "grad_norm": 0.2506031122662469, + "learning_rate": 3.7329036877909062e-06, + "loss": 0.0008, + "step": 10311 + }, + { + "epoch": 0.664561448733647, + "grad_norm": 0.0004966368882332178, + "learning_rate": 3.7321876118868605e-06, + "loss": 0.0, + "step": 10312 + }, + { + "epoch": 0.6646258941805762, + "grad_norm": 0.03001260390392036, + "learning_rate": 3.731471535982815e-06, + "loss": 0.0, + "step": 10313 + }, + { + "epoch": 0.6646903396275053, + "grad_norm": 0.0038538890073602046, + "learning_rate": 3.7307554600787687e-06, + "loss": 0.0, + "step": 10314 + }, + { + "epoch": 0.6647547850744345, + "grad_norm": 0.02009810895984538, + "learning_rate": 3.730039384174723e-06, + "loss": 0.0, + "step": 10315 + }, + { + "epoch": 0.6648192305213637, + "grad_norm": 0.0017787021194924147, + "learning_rate": 3.729323308270677e-06, + "loss": 0.0, + "step": 10316 + }, + { + "epoch": 0.6648836759682928, + "grad_norm": 0.2367472360801086, + "learning_rate": 3.728607232366631e-06, + "loss": 0.0008, + "step": 10317 + }, + { + "epoch": 0.664948121415222, + "grad_norm": 0.001101869151755424, + "learning_rate": 3.7278911564625855e-06, + "loss": 0.0, + "step": 10318 + }, + { + "epoch": 0.6650125668621512, + "grad_norm": 6.84407851724217e-05, + "learning_rate": 3.7271750805585393e-06, + "loss": 0.0, + "step": 10319 + }, + { + "epoch": 0.6650770123090803, + "grad_norm": 0.04718631936326828, + "learning_rate": 3.7264590046544936e-06, + "loss": 0.0001, + "step": 10320 + }, + { + "epoch": 0.6651414577560095, + "grad_norm": 0.002101132604809257, + "learning_rate": 3.7257429287504475e-06, + "loss": 0.0, + "step": 10321 + }, + { + "epoch": 0.6652059032029387, + "grad_norm": 0.054981715372835395, + "learning_rate": 3.725026852846402e-06, + "loss": 0.0, + "step": 10322 + }, + { + "epoch": 0.6652703486498679, + "grad_norm": 0.0002458432285043592, + "learning_rate": 3.7243107769423565e-06, + "loss": 0.0, + "step": 10323 + }, + { + "epoch": 0.6653347940967971, + "grad_norm": 0.14842282398353007, + "learning_rate": 3.7235947010383104e-06, + "loss": 0.0018, + "step": 10324 + }, + { + "epoch": 0.6653992395437263, + "grad_norm": 0.06344344302113403, + "learning_rate": 3.7228786251342647e-06, + "loss": 0.0, + "step": 10325 + }, + { + "epoch": 0.6654636849906554, + "grad_norm": 0.0023448721186625793, + "learning_rate": 3.722162549230219e-06, + "loss": 0.0, + "step": 10326 + }, + { + "epoch": 0.6655281304375846, + "grad_norm": 0.019546412006886026, + "learning_rate": 3.721446473326173e-06, + "loss": 0.0, + "step": 10327 + }, + { + "epoch": 0.6655925758845137, + "grad_norm": 0.16962522064579758, + "learning_rate": 3.720730397422127e-06, + "loss": 0.0005, + "step": 10328 + }, + { + "epoch": 0.6656570213314429, + "grad_norm": 0.01771669440975255, + "learning_rate": 3.720014321518081e-06, + "loss": 0.0, + "step": 10329 + }, + { + "epoch": 0.6657214667783721, + "grad_norm": 0.00024993857282373024, + "learning_rate": 3.7192982456140354e-06, + "loss": 0.0, + "step": 10330 + }, + { + "epoch": 0.6657859122253013, + "grad_norm": 0.3408004951727263, + "learning_rate": 3.7185821697099892e-06, + "loss": 0.0013, + "step": 10331 + }, + { + "epoch": 0.6658503576722304, + "grad_norm": 0.0005357757778321938, + "learning_rate": 3.7178660938059435e-06, + "loss": 0.0, + "step": 10332 + }, + { + "epoch": 0.6659148031191596, + "grad_norm": 0.03956123790735388, + "learning_rate": 3.717150017901898e-06, + "loss": 0.0001, + "step": 10333 + }, + { + "epoch": 0.6659792485660888, + "grad_norm": 0.0036982926831180154, + "learning_rate": 3.7164339419978526e-06, + "loss": 0.0, + "step": 10334 + }, + { + "epoch": 0.666043694013018, + "grad_norm": 0.0012340388580778937, + "learning_rate": 3.7157178660938064e-06, + "loss": 0.0, + "step": 10335 + }, + { + "epoch": 0.6661081394599472, + "grad_norm": 0.007880068164499425, + "learning_rate": 3.7150017901897607e-06, + "loss": 0.0, + "step": 10336 + }, + { + "epoch": 0.6661725849068764, + "grad_norm": 0.03773377443955219, + "learning_rate": 3.7142857142857146e-06, + "loss": 0.0004, + "step": 10337 + }, + { + "epoch": 0.6662370303538055, + "grad_norm": 0.0001938463646608074, + "learning_rate": 3.713569638381669e-06, + "loss": 0.0, + "step": 10338 + }, + { + "epoch": 0.6663014758007347, + "grad_norm": 0.0012649033419837533, + "learning_rate": 3.7128535624776228e-06, + "loss": 0.0, + "step": 10339 + }, + { + "epoch": 0.6663659212476638, + "grad_norm": 0.0038339946928142945, + "learning_rate": 3.712137486573577e-06, + "loss": 0.0, + "step": 10340 + }, + { + "epoch": 0.666430366694593, + "grad_norm": 0.7119580583622275, + "learning_rate": 3.7114214106695314e-06, + "loss": 0.0041, + "step": 10341 + }, + { + "epoch": 0.6664948121415222, + "grad_norm": 0.06270795364927652, + "learning_rate": 3.7107053347654852e-06, + "loss": 0.0, + "step": 10342 + }, + { + "epoch": 0.6665592575884514, + "grad_norm": 0.0682359125476423, + "learning_rate": 3.7099892588614395e-06, + "loss": 0.0017, + "step": 10343 + }, + { + "epoch": 0.6666237030353805, + "grad_norm": 0.027771674137431628, + "learning_rate": 3.7092731829573934e-06, + "loss": 0.0003, + "step": 10344 + }, + { + "epoch": 0.6666881484823097, + "grad_norm": 0.06264411062499124, + "learning_rate": 3.7085571070533477e-06, + "loss": 0.001, + "step": 10345 + }, + { + "epoch": 0.6667525939292389, + "grad_norm": 0.25018068579057157, + "learning_rate": 3.7078410311493024e-06, + "loss": 0.003, + "step": 10346 + }, + { + "epoch": 0.6668170393761681, + "grad_norm": 0.0006350143128432878, + "learning_rate": 3.7071249552452563e-06, + "loss": 0.0, + "step": 10347 + }, + { + "epoch": 0.6668814848230973, + "grad_norm": 0.004212846550878931, + "learning_rate": 3.7064088793412106e-06, + "loss": 0.0, + "step": 10348 + }, + { + "epoch": 0.6669459302700265, + "grad_norm": 0.00048795961950414513, + "learning_rate": 3.705692803437165e-06, + "loss": 0.0, + "step": 10349 + }, + { + "epoch": 0.6670103757169557, + "grad_norm": 0.2391944801432455, + "learning_rate": 3.704976727533119e-06, + "loss": 0.0008, + "step": 10350 + }, + { + "epoch": 0.6670748211638847, + "grad_norm": 0.000245488891439777, + "learning_rate": 3.704260651629073e-06, + "loss": 0.0, + "step": 10351 + }, + { + "epoch": 0.6671392666108139, + "grad_norm": 2.1831801820844412e-05, + "learning_rate": 3.703544575725027e-06, + "loss": 0.0, + "step": 10352 + }, + { + "epoch": 0.6672037120577431, + "grad_norm": 0.06861953276863456, + "learning_rate": 3.7028284998209813e-06, + "loss": 0.0001, + "step": 10353 + }, + { + "epoch": 0.6672681575046723, + "grad_norm": 0.21130783632373465, + "learning_rate": 3.7021124239169356e-06, + "loss": 0.0009, + "step": 10354 + }, + { + "epoch": 0.6673326029516015, + "grad_norm": 0.0006989204759455222, + "learning_rate": 3.7013963480128894e-06, + "loss": 0.0, + "step": 10355 + }, + { + "epoch": 0.6673970483985306, + "grad_norm": 0.00032966569814352545, + "learning_rate": 3.7006802721088437e-06, + "loss": 0.0, + "step": 10356 + }, + { + "epoch": 0.6674614938454598, + "grad_norm": 0.29515427786156617, + "learning_rate": 3.6999641962047976e-06, + "loss": 0.0017, + "step": 10357 + }, + { + "epoch": 0.667525939292389, + "grad_norm": 0.003538618157151033, + "learning_rate": 3.6992481203007523e-06, + "loss": 0.0, + "step": 10358 + }, + { + "epoch": 0.6675903847393182, + "grad_norm": 0.02578721018192703, + "learning_rate": 3.6985320443967066e-06, + "loss": 0.0, + "step": 10359 + }, + { + "epoch": 0.6676548301862474, + "grad_norm": 0.000487793444415082, + "learning_rate": 3.6978159684926605e-06, + "loss": 0.0, + "step": 10360 + }, + { + "epoch": 0.6677192756331766, + "grad_norm": 0.0018810635425694684, + "learning_rate": 3.697099892588615e-06, + "loss": 0.0, + "step": 10361 + }, + { + "epoch": 0.6677837210801056, + "grad_norm": 0.0018085405660967183, + "learning_rate": 3.696383816684569e-06, + "loss": 0.0, + "step": 10362 + }, + { + "epoch": 0.6678481665270348, + "grad_norm": 0.1033503724846543, + "learning_rate": 3.695667740780523e-06, + "loss": 0.0001, + "step": 10363 + }, + { + "epoch": 0.667912611973964, + "grad_norm": 0.03163602936890037, + "learning_rate": 3.6949516648764773e-06, + "loss": 0.0001, + "step": 10364 + }, + { + "epoch": 0.6679770574208932, + "grad_norm": 0.0005767042099623167, + "learning_rate": 3.694235588972431e-06, + "loss": 0.0, + "step": 10365 + }, + { + "epoch": 0.6680415028678224, + "grad_norm": 0.17640823233059116, + "learning_rate": 3.6935195130683855e-06, + "loss": 0.0018, + "step": 10366 + }, + { + "epoch": 0.6681059483147516, + "grad_norm": 0.0022090416866323596, + "learning_rate": 3.6928034371643393e-06, + "loss": 0.0, + "step": 10367 + }, + { + "epoch": 0.6681703937616807, + "grad_norm": 0.044256572962756785, + "learning_rate": 3.6920873612602936e-06, + "loss": 0.0, + "step": 10368 + }, + { + "epoch": 0.6682348392086099, + "grad_norm": 0.011968673310753146, + "learning_rate": 3.6913712853562483e-06, + "loss": 0.0001, + "step": 10369 + }, + { + "epoch": 0.6682992846555391, + "grad_norm": 0.002222004775121942, + "learning_rate": 3.6906552094522026e-06, + "loss": 0.0, + "step": 10370 + }, + { + "epoch": 0.6683637301024683, + "grad_norm": 0.003974169480094497, + "learning_rate": 3.6899391335481565e-06, + "loss": 0.0, + "step": 10371 + }, + { + "epoch": 0.6684281755493975, + "grad_norm": 0.011160728574854355, + "learning_rate": 3.689223057644111e-06, + "loss": 0.0001, + "step": 10372 + }, + { + "epoch": 0.6684926209963266, + "grad_norm": 0.03456428294752379, + "learning_rate": 3.6885069817400647e-06, + "loss": 0.0004, + "step": 10373 + }, + { + "epoch": 0.6685570664432557, + "grad_norm": 0.004038779991600407, + "learning_rate": 3.687790905836019e-06, + "loss": 0.0, + "step": 10374 + }, + { + "epoch": 0.6686215118901849, + "grad_norm": 0.000989251636449544, + "learning_rate": 3.687074829931973e-06, + "loss": 0.0, + "step": 10375 + }, + { + "epoch": 0.6686859573371141, + "grad_norm": 0.004165436098205323, + "learning_rate": 3.686358754027927e-06, + "loss": 0.0, + "step": 10376 + }, + { + "epoch": 0.6687504027840433, + "grad_norm": 0.14913321884813732, + "learning_rate": 3.6856426781238815e-06, + "loss": 0.0003, + "step": 10377 + }, + { + "epoch": 0.6688148482309725, + "grad_norm": 0.00021340746164696231, + "learning_rate": 3.6849266022198353e-06, + "loss": 0.0, + "step": 10378 + }, + { + "epoch": 0.6688792936779017, + "grad_norm": 0.001224805831090876, + "learning_rate": 3.6842105263157896e-06, + "loss": 0.0, + "step": 10379 + }, + { + "epoch": 0.6689437391248308, + "grad_norm": 0.059466480614118154, + "learning_rate": 3.6834944504117435e-06, + "loss": 0.0004, + "step": 10380 + }, + { + "epoch": 0.66900818457176, + "grad_norm": 0.022219569955405607, + "learning_rate": 3.6827783745076982e-06, + "loss": 0.0, + "step": 10381 + }, + { + "epoch": 0.6690726300186892, + "grad_norm": 0.010246315911304519, + "learning_rate": 3.6820622986036525e-06, + "loss": 0.0001, + "step": 10382 + }, + { + "epoch": 0.6691370754656184, + "grad_norm": 0.18016104909322808, + "learning_rate": 3.6813462226996064e-06, + "loss": 0.0015, + "step": 10383 + }, + { + "epoch": 0.6692015209125475, + "grad_norm": 0.0039480985634492635, + "learning_rate": 3.6806301467955607e-06, + "loss": 0.0, + "step": 10384 + }, + { + "epoch": 0.6692659663594767, + "grad_norm": 0.00021953914315241395, + "learning_rate": 3.679914070891515e-06, + "loss": 0.0, + "step": 10385 + }, + { + "epoch": 0.6693304118064058, + "grad_norm": 0.00140818974422586, + "learning_rate": 3.679197994987469e-06, + "loss": 0.0, + "step": 10386 + }, + { + "epoch": 0.669394857253335, + "grad_norm": 0.06190358518524449, + "learning_rate": 3.678481919083423e-06, + "loss": 0.0002, + "step": 10387 + }, + { + "epoch": 0.6694593027002642, + "grad_norm": 0.00048093340199941554, + "learning_rate": 3.677765843179377e-06, + "loss": 0.0, + "step": 10388 + }, + { + "epoch": 0.6695237481471934, + "grad_norm": 0.010102305479308519, + "learning_rate": 3.6770497672753314e-06, + "loss": 0.0, + "step": 10389 + }, + { + "epoch": 0.6695881935941226, + "grad_norm": 0.012027537219855382, + "learning_rate": 3.6763336913712857e-06, + "loss": 0.0, + "step": 10390 + }, + { + "epoch": 0.6696526390410518, + "grad_norm": 0.00474196388602497, + "learning_rate": 3.6756176154672395e-06, + "loss": 0.0, + "step": 10391 + }, + { + "epoch": 0.669717084487981, + "grad_norm": 0.00038679097703264, + "learning_rate": 3.674901539563194e-06, + "loss": 0.0, + "step": 10392 + }, + { + "epoch": 0.6697815299349101, + "grad_norm": 7.549534460174918e-05, + "learning_rate": 3.6741854636591486e-06, + "loss": 0.0, + "step": 10393 + }, + { + "epoch": 0.6698459753818393, + "grad_norm": 0.0027491068659299105, + "learning_rate": 3.6734693877551024e-06, + "loss": 0.0, + "step": 10394 + }, + { + "epoch": 0.6699104208287684, + "grad_norm": 0.05399843993640478, + "learning_rate": 3.6727533118510567e-06, + "loss": 0.0001, + "step": 10395 + }, + { + "epoch": 0.6699748662756976, + "grad_norm": 0.004283962775883192, + "learning_rate": 3.6720372359470106e-06, + "loss": 0.0, + "step": 10396 + }, + { + "epoch": 0.6700393117226268, + "grad_norm": 0.0001430948322371208, + "learning_rate": 3.671321160042965e-06, + "loss": 0.0, + "step": 10397 + }, + { + "epoch": 0.670103757169556, + "grad_norm": 0.00017731739681702292, + "learning_rate": 3.670605084138919e-06, + "loss": 0.0, + "step": 10398 + }, + { + "epoch": 0.6701682026164851, + "grad_norm": 0.000991244561069628, + "learning_rate": 3.669889008234873e-06, + "loss": 0.0, + "step": 10399 + }, + { + "epoch": 0.6702326480634143, + "grad_norm": 0.0001893566769277467, + "learning_rate": 3.6691729323308274e-06, + "loss": 0.0, + "step": 10400 + }, + { + "epoch": 0.6702970935103435, + "grad_norm": 0.004589059406335244, + "learning_rate": 3.6684568564267812e-06, + "loss": 0.0, + "step": 10401 + }, + { + "epoch": 0.6703615389572727, + "grad_norm": 3.647468846965012e-05, + "learning_rate": 3.6677407805227355e-06, + "loss": 0.0, + "step": 10402 + }, + { + "epoch": 0.6704259844042019, + "grad_norm": 6.37729973034846e-05, + "learning_rate": 3.66702470461869e-06, + "loss": 0.0, + "step": 10403 + }, + { + "epoch": 0.670490429851131, + "grad_norm": 0.2196042122873555, + "learning_rate": 3.666308628714644e-06, + "loss": 0.0008, + "step": 10404 + }, + { + "epoch": 0.6705548752980602, + "grad_norm": 0.007662267436804598, + "learning_rate": 3.6655925528105984e-06, + "loss": 0.0, + "step": 10405 + }, + { + "epoch": 0.6706193207449893, + "grad_norm": 0.0004067778012657678, + "learning_rate": 3.6648764769065527e-06, + "loss": 0.0, + "step": 10406 + }, + { + "epoch": 0.6706837661919185, + "grad_norm": 0.010651062407556951, + "learning_rate": 3.6641604010025066e-06, + "loss": 0.0, + "step": 10407 + }, + { + "epoch": 0.6707482116388477, + "grad_norm": 0.0031969810332983287, + "learning_rate": 3.663444325098461e-06, + "loss": 0.0, + "step": 10408 + }, + { + "epoch": 0.6708126570857769, + "grad_norm": 0.00044501171077525185, + "learning_rate": 3.662728249194415e-06, + "loss": 0.0, + "step": 10409 + }, + { + "epoch": 0.670877102532706, + "grad_norm": 0.00013680105479665556, + "learning_rate": 3.662012173290369e-06, + "loss": 0.0, + "step": 10410 + }, + { + "epoch": 0.6709415479796352, + "grad_norm": 0.025411179136745145, + "learning_rate": 3.6612960973863234e-06, + "loss": 0.0, + "step": 10411 + }, + { + "epoch": 0.6710059934265644, + "grad_norm": 0.05550540515503469, + "learning_rate": 3.6605800214822773e-06, + "loss": 0.0001, + "step": 10412 + }, + { + "epoch": 0.6710704388734936, + "grad_norm": 3.929251421293004e-05, + "learning_rate": 3.6598639455782316e-06, + "loss": 0.0, + "step": 10413 + }, + { + "epoch": 0.6711348843204228, + "grad_norm": 0.09189680423013248, + "learning_rate": 3.6591478696741854e-06, + "loss": 0.0002, + "step": 10414 + }, + { + "epoch": 0.671199329767352, + "grad_norm": 0.0006463515367147947, + "learning_rate": 3.6584317937701397e-06, + "loss": 0.0, + "step": 10415 + }, + { + "epoch": 0.6712637752142812, + "grad_norm": 0.00046602857314811133, + "learning_rate": 3.6577157178660945e-06, + "loss": 0.0, + "step": 10416 + }, + { + "epoch": 0.6713282206612103, + "grad_norm": 8.019380777964712e-05, + "learning_rate": 3.6569996419620483e-06, + "loss": 0.0, + "step": 10417 + }, + { + "epoch": 0.6713926661081394, + "grad_norm": 0.035754243974769166, + "learning_rate": 3.6562835660580026e-06, + "loss": 0.0, + "step": 10418 + }, + { + "epoch": 0.6714571115550686, + "grad_norm": 0.03264983678887585, + "learning_rate": 3.655567490153957e-06, + "loss": 0.0001, + "step": 10419 + }, + { + "epoch": 0.6715215570019978, + "grad_norm": 0.00046047314390313494, + "learning_rate": 3.654851414249911e-06, + "loss": 0.0, + "step": 10420 + }, + { + "epoch": 0.671586002448927, + "grad_norm": 0.14819997681575406, + "learning_rate": 3.654135338345865e-06, + "loss": 0.0023, + "step": 10421 + }, + { + "epoch": 0.6716504478958562, + "grad_norm": 0.12865062133924046, + "learning_rate": 3.653419262441819e-06, + "loss": 0.0002, + "step": 10422 + }, + { + "epoch": 0.6717148933427853, + "grad_norm": 0.000465925691879833, + "learning_rate": 3.6527031865377733e-06, + "loss": 0.0, + "step": 10423 + }, + { + "epoch": 0.6717793387897145, + "grad_norm": 0.10362138383915132, + "learning_rate": 3.651987110633727e-06, + "loss": 0.0008, + "step": 10424 + }, + { + "epoch": 0.6718437842366437, + "grad_norm": 0.11788996914326605, + "learning_rate": 3.6512710347296815e-06, + "loss": 0.0001, + "step": 10425 + }, + { + "epoch": 0.6719082296835729, + "grad_norm": 0.0004748098370773154, + "learning_rate": 3.6505549588256358e-06, + "loss": 0.0, + "step": 10426 + }, + { + "epoch": 0.6719726751305021, + "grad_norm": 0.06221575286604827, + "learning_rate": 3.6498388829215896e-06, + "loss": 0.0002, + "step": 10427 + }, + { + "epoch": 0.6720371205774313, + "grad_norm": 0.0006763647039382567, + "learning_rate": 3.6491228070175443e-06, + "loss": 0.0, + "step": 10428 + }, + { + "epoch": 0.6721015660243603, + "grad_norm": 0.013047591917105377, + "learning_rate": 3.6484067311134986e-06, + "loss": 0.0, + "step": 10429 + }, + { + "epoch": 0.6721660114712895, + "grad_norm": 0.0018885810922089606, + "learning_rate": 3.6476906552094525e-06, + "loss": 0.0, + "step": 10430 + }, + { + "epoch": 0.6722304569182187, + "grad_norm": 0.00018979485411064437, + "learning_rate": 3.646974579305407e-06, + "loss": 0.0, + "step": 10431 + }, + { + "epoch": 0.6722949023651479, + "grad_norm": 0.25609584975065497, + "learning_rate": 3.6462585034013607e-06, + "loss": 0.0009, + "step": 10432 + }, + { + "epoch": 0.6723593478120771, + "grad_norm": 0.024535905068322705, + "learning_rate": 3.645542427497315e-06, + "loss": 0.0001, + "step": 10433 + }, + { + "epoch": 0.6724237932590063, + "grad_norm": 0.0027585886411952447, + "learning_rate": 3.6448263515932693e-06, + "loss": 0.0, + "step": 10434 + }, + { + "epoch": 0.6724882387059354, + "grad_norm": 0.028292844046307754, + "learning_rate": 3.644110275689223e-06, + "loss": 0.0001, + "step": 10435 + }, + { + "epoch": 0.6725526841528646, + "grad_norm": 0.027540698239601234, + "learning_rate": 3.6433941997851775e-06, + "loss": 0.0001, + "step": 10436 + }, + { + "epoch": 0.6726171295997938, + "grad_norm": 0.00963730360489511, + "learning_rate": 3.6426781238811313e-06, + "loss": 0.0001, + "step": 10437 + }, + { + "epoch": 0.672681575046723, + "grad_norm": 0.0018280268136798214, + "learning_rate": 3.6419620479770856e-06, + "loss": 0.0, + "step": 10438 + }, + { + "epoch": 0.6727460204936522, + "grad_norm": 0.0001900797635635939, + "learning_rate": 3.6412459720730404e-06, + "loss": 0.0, + "step": 10439 + }, + { + "epoch": 0.6728104659405812, + "grad_norm": 0.00023532304022595498, + "learning_rate": 3.6405298961689942e-06, + "loss": 0.0, + "step": 10440 + }, + { + "epoch": 0.6728749113875104, + "grad_norm": 0.0019142251629258287, + "learning_rate": 3.6398138202649485e-06, + "loss": 0.0, + "step": 10441 + }, + { + "epoch": 0.6729393568344396, + "grad_norm": 0.00029465788696560115, + "learning_rate": 3.639097744360903e-06, + "loss": 0.0, + "step": 10442 + }, + { + "epoch": 0.6730038022813688, + "grad_norm": 0.00023300991071189333, + "learning_rate": 3.6383816684568567e-06, + "loss": 0.0, + "step": 10443 + }, + { + "epoch": 0.673068247728298, + "grad_norm": 0.22997037699398992, + "learning_rate": 3.637665592552811e-06, + "loss": 0.0006, + "step": 10444 + }, + { + "epoch": 0.6731326931752272, + "grad_norm": 0.41474730166085766, + "learning_rate": 3.636949516648765e-06, + "loss": 0.0018, + "step": 10445 + }, + { + "epoch": 0.6731971386221564, + "grad_norm": 0.0018304284832990753, + "learning_rate": 3.636233440744719e-06, + "loss": 0.0, + "step": 10446 + }, + { + "epoch": 0.6732615840690855, + "grad_norm": 0.2089151285548803, + "learning_rate": 3.6355173648406735e-06, + "loss": 0.0019, + "step": 10447 + }, + { + "epoch": 0.6733260295160147, + "grad_norm": 0.0003634330347498069, + "learning_rate": 3.6348012889366274e-06, + "loss": 0.0, + "step": 10448 + }, + { + "epoch": 0.6733904749629439, + "grad_norm": 0.001954738083069696, + "learning_rate": 3.6340852130325817e-06, + "loss": 0.0, + "step": 10449 + }, + { + "epoch": 0.6734549204098731, + "grad_norm": 0.001967566134604329, + "learning_rate": 3.6333691371285355e-06, + "loss": 0.0, + "step": 10450 + }, + { + "epoch": 0.6735193658568022, + "grad_norm": 0.01803615169688362, + "learning_rate": 3.6326530612244903e-06, + "loss": 0.0002, + "step": 10451 + }, + { + "epoch": 0.6735838113037314, + "grad_norm": 0.18593643853341382, + "learning_rate": 3.6319369853204446e-06, + "loss": 0.0004, + "step": 10452 + }, + { + "epoch": 0.6736482567506605, + "grad_norm": 0.0021236017655750163, + "learning_rate": 3.6312209094163984e-06, + "loss": 0.0, + "step": 10453 + }, + { + "epoch": 0.6737127021975897, + "grad_norm": 0.0017699274242623294, + "learning_rate": 3.6305048335123527e-06, + "loss": 0.0, + "step": 10454 + }, + { + "epoch": 0.6737771476445189, + "grad_norm": 0.0001289320386872569, + "learning_rate": 3.629788757608307e-06, + "loss": 0.0, + "step": 10455 + }, + { + "epoch": 0.6738415930914481, + "grad_norm": 0.017909558811695953, + "learning_rate": 3.629072681704261e-06, + "loss": 0.0, + "step": 10456 + }, + { + "epoch": 0.6739060385383773, + "grad_norm": 0.8763559597886815, + "learning_rate": 3.628356605800215e-06, + "loss": 0.0046, + "step": 10457 + }, + { + "epoch": 0.6739704839853065, + "grad_norm": 0.00027276141796046864, + "learning_rate": 3.627640529896169e-06, + "loss": 0.0, + "step": 10458 + }, + { + "epoch": 0.6740349294322356, + "grad_norm": 2.0164551910408486, + "learning_rate": 3.6269244539921234e-06, + "loss": 0.0096, + "step": 10459 + }, + { + "epoch": 0.6740993748791648, + "grad_norm": 0.00017352627869547739, + "learning_rate": 3.6262083780880777e-06, + "loss": 0.0, + "step": 10460 + }, + { + "epoch": 0.674163820326094, + "grad_norm": 0.0023188071801069775, + "learning_rate": 3.6254923021840315e-06, + "loss": 0.0, + "step": 10461 + }, + { + "epoch": 0.6742282657730231, + "grad_norm": 0.004480081186474678, + "learning_rate": 3.624776226279986e-06, + "loss": 0.0, + "step": 10462 + }, + { + "epoch": 0.6742927112199523, + "grad_norm": 0.003887006294521679, + "learning_rate": 3.6240601503759406e-06, + "loss": 0.0, + "step": 10463 + }, + { + "epoch": 0.6743571566668815, + "grad_norm": 0.00026653596353573024, + "learning_rate": 3.6233440744718944e-06, + "loss": 0.0, + "step": 10464 + }, + { + "epoch": 0.6744216021138106, + "grad_norm": 0.00012657650139262516, + "learning_rate": 3.6226279985678487e-06, + "loss": 0.0, + "step": 10465 + }, + { + "epoch": 0.6744860475607398, + "grad_norm": 0.0031173247361041124, + "learning_rate": 3.6219119226638026e-06, + "loss": 0.0, + "step": 10466 + }, + { + "epoch": 0.674550493007669, + "grad_norm": 0.23030574335774806, + "learning_rate": 3.621195846759757e-06, + "loss": 0.0005, + "step": 10467 + }, + { + "epoch": 0.6746149384545982, + "grad_norm": 0.05862473026472335, + "learning_rate": 3.6204797708557112e-06, + "loss": 0.0006, + "step": 10468 + }, + { + "epoch": 0.6746793839015274, + "grad_norm": 0.0003351622123052417, + "learning_rate": 3.619763694951665e-06, + "loss": 0.0, + "step": 10469 + }, + { + "epoch": 0.6747438293484566, + "grad_norm": 0.00023233519721705282, + "learning_rate": 3.6190476190476194e-06, + "loss": 0.0, + "step": 10470 + }, + { + "epoch": 0.6748082747953857, + "grad_norm": 0.0035822470136483175, + "learning_rate": 3.6183315431435733e-06, + "loss": 0.0, + "step": 10471 + }, + { + "epoch": 0.6748727202423149, + "grad_norm": 0.0004719819756121723, + "learning_rate": 3.6176154672395276e-06, + "loss": 0.0, + "step": 10472 + }, + { + "epoch": 0.674937165689244, + "grad_norm": 0.004377407497624656, + "learning_rate": 3.6168993913354814e-06, + "loss": 0.0, + "step": 10473 + }, + { + "epoch": 0.6750016111361732, + "grad_norm": 0.00190075138693745, + "learning_rate": 3.616183315431436e-06, + "loss": 0.0, + "step": 10474 + }, + { + "epoch": 0.6750660565831024, + "grad_norm": 0.004317345459707869, + "learning_rate": 3.6154672395273905e-06, + "loss": 0.0, + "step": 10475 + }, + { + "epoch": 0.6751305020300316, + "grad_norm": 0.0014907583759944283, + "learning_rate": 3.6147511636233448e-06, + "loss": 0.0, + "step": 10476 + }, + { + "epoch": 0.6751949474769607, + "grad_norm": 0.0018248634672141091, + "learning_rate": 3.6140350877192986e-06, + "loss": 0.0, + "step": 10477 + }, + { + "epoch": 0.6752593929238899, + "grad_norm": 0.000946823727749897, + "learning_rate": 3.613319011815253e-06, + "loss": 0.0, + "step": 10478 + }, + { + "epoch": 0.6753238383708191, + "grad_norm": 0.014769051121375172, + "learning_rate": 3.612602935911207e-06, + "loss": 0.0001, + "step": 10479 + }, + { + "epoch": 0.6753882838177483, + "grad_norm": 0.0037277822552123324, + "learning_rate": 3.611886860007161e-06, + "loss": 0.0, + "step": 10480 + }, + { + "epoch": 0.6754527292646775, + "grad_norm": 0.003775872304534592, + "learning_rate": 3.611170784103115e-06, + "loss": 0.0, + "step": 10481 + }, + { + "epoch": 0.6755171747116067, + "grad_norm": 0.0004943616280413878, + "learning_rate": 3.6104547081990693e-06, + "loss": 0.0, + "step": 10482 + }, + { + "epoch": 0.6755816201585358, + "grad_norm": 1.1965923065115471, + "learning_rate": 3.6097386322950236e-06, + "loss": 0.0035, + "step": 10483 + }, + { + "epoch": 0.675646065605465, + "grad_norm": 0.04179766004707495, + "learning_rate": 3.6090225563909775e-06, + "loss": 0.0001, + "step": 10484 + }, + { + "epoch": 0.6757105110523941, + "grad_norm": 0.006248109641524533, + "learning_rate": 3.6083064804869318e-06, + "loss": 0.0, + "step": 10485 + }, + { + "epoch": 0.6757749564993233, + "grad_norm": 0.0003839978250398899, + "learning_rate": 3.6075904045828865e-06, + "loss": 0.0, + "step": 10486 + }, + { + "epoch": 0.6758394019462525, + "grad_norm": 0.006934538377296952, + "learning_rate": 3.6068743286788403e-06, + "loss": 0.0, + "step": 10487 + }, + { + "epoch": 0.6759038473931817, + "grad_norm": 0.00016873140626715315, + "learning_rate": 3.6061582527747946e-06, + "loss": 0.0, + "step": 10488 + }, + { + "epoch": 0.6759682928401108, + "grad_norm": 0.01762084273842604, + "learning_rate": 3.6054421768707485e-06, + "loss": 0.0, + "step": 10489 + }, + { + "epoch": 0.67603273828704, + "grad_norm": 0.46920061704573907, + "learning_rate": 3.604726100966703e-06, + "loss": 0.0009, + "step": 10490 + }, + { + "epoch": 0.6760971837339692, + "grad_norm": 0.05753273553305687, + "learning_rate": 3.604010025062657e-06, + "loss": 0.0004, + "step": 10491 + }, + { + "epoch": 0.6761616291808984, + "grad_norm": 0.0003436129141898001, + "learning_rate": 3.603293949158611e-06, + "loss": 0.0, + "step": 10492 + }, + { + "epoch": 0.6762260746278276, + "grad_norm": 0.0037809435500818145, + "learning_rate": 3.6025778732545653e-06, + "loss": 0.0, + "step": 10493 + }, + { + "epoch": 0.6762905200747568, + "grad_norm": 0.0003521812249406442, + "learning_rate": 3.601861797350519e-06, + "loss": 0.0, + "step": 10494 + }, + { + "epoch": 0.676354965521686, + "grad_norm": 0.00025709992147468773, + "learning_rate": 3.6011457214464735e-06, + "loss": 0.0, + "step": 10495 + }, + { + "epoch": 0.676419410968615, + "grad_norm": 0.005573763881191372, + "learning_rate": 3.6004296455424278e-06, + "loss": 0.0, + "step": 10496 + }, + { + "epoch": 0.6764838564155442, + "grad_norm": 0.02430756228068615, + "learning_rate": 3.5997135696383816e-06, + "loss": 0.0, + "step": 10497 + }, + { + "epoch": 0.6765483018624734, + "grad_norm": 0.017898788840930383, + "learning_rate": 3.5989974937343364e-06, + "loss": 0.0, + "step": 10498 + }, + { + "epoch": 0.6766127473094026, + "grad_norm": 0.008393354787299115, + "learning_rate": 3.5982814178302907e-06, + "loss": 0.0, + "step": 10499 + }, + { + "epoch": 0.6766771927563318, + "grad_norm": 0.003671404471434068, + "learning_rate": 3.5975653419262445e-06, + "loss": 0.0, + "step": 10500 + }, + { + "epoch": 0.6767416382032609, + "grad_norm": 0.0050457432757222654, + "learning_rate": 3.596849266022199e-06, + "loss": 0.0, + "step": 10501 + }, + { + "epoch": 0.6768060836501901, + "grad_norm": 0.00114763842394845, + "learning_rate": 3.5961331901181527e-06, + "loss": 0.0, + "step": 10502 + }, + { + "epoch": 0.6768705290971193, + "grad_norm": 0.0008545279761466147, + "learning_rate": 3.595417114214107e-06, + "loss": 0.0, + "step": 10503 + }, + { + "epoch": 0.6769349745440485, + "grad_norm": 0.00034751624162334137, + "learning_rate": 3.5947010383100613e-06, + "loss": 0.0, + "step": 10504 + }, + { + "epoch": 0.6769994199909777, + "grad_norm": 0.015582994439986578, + "learning_rate": 3.593984962406015e-06, + "loss": 0.0, + "step": 10505 + }, + { + "epoch": 0.6770638654379069, + "grad_norm": 0.040964320145442906, + "learning_rate": 3.5932688865019695e-06, + "loss": 0.0001, + "step": 10506 + }, + { + "epoch": 0.6771283108848359, + "grad_norm": 0.010853452613408077, + "learning_rate": 3.5925528105979234e-06, + "loss": 0.0001, + "step": 10507 + }, + { + "epoch": 0.6771927563317651, + "grad_norm": 0.02525209498935422, + "learning_rate": 3.5918367346938777e-06, + "loss": 0.0001, + "step": 10508 + }, + { + "epoch": 0.6772572017786943, + "grad_norm": 0.022178140745299144, + "learning_rate": 3.5911206587898324e-06, + "loss": 0.0, + "step": 10509 + }, + { + "epoch": 0.6773216472256235, + "grad_norm": 0.008376311699932986, + "learning_rate": 3.5904045828857863e-06, + "loss": 0.0001, + "step": 10510 + }, + { + "epoch": 0.6773860926725527, + "grad_norm": 0.001207447161221422, + "learning_rate": 3.5896885069817406e-06, + "loss": 0.0, + "step": 10511 + }, + { + "epoch": 0.6774505381194819, + "grad_norm": 0.0029356591079545027, + "learning_rate": 3.588972431077695e-06, + "loss": 0.0, + "step": 10512 + }, + { + "epoch": 0.677514983566411, + "grad_norm": 0.017201078670219305, + "learning_rate": 3.5882563551736487e-06, + "loss": 0.0, + "step": 10513 + }, + { + "epoch": 0.6775794290133402, + "grad_norm": 0.00014996079096439788, + "learning_rate": 3.587540279269603e-06, + "loss": 0.0, + "step": 10514 + }, + { + "epoch": 0.6776438744602694, + "grad_norm": 0.01063396315857339, + "learning_rate": 3.586824203365557e-06, + "loss": 0.0001, + "step": 10515 + }, + { + "epoch": 0.6777083199071986, + "grad_norm": 0.000691680774477271, + "learning_rate": 3.586108127461511e-06, + "loss": 0.0, + "step": 10516 + }, + { + "epoch": 0.6777727653541278, + "grad_norm": 0.057157545980472034, + "learning_rate": 3.585392051557465e-06, + "loss": 0.0002, + "step": 10517 + }, + { + "epoch": 0.6778372108010569, + "grad_norm": 0.00029040852215511486, + "learning_rate": 3.5846759756534194e-06, + "loss": 0.0, + "step": 10518 + }, + { + "epoch": 0.677901656247986, + "grad_norm": 0.003804059215826307, + "learning_rate": 3.5839598997493737e-06, + "loss": 0.0, + "step": 10519 + }, + { + "epoch": 0.6779661016949152, + "grad_norm": 0.009584564853263446, + "learning_rate": 3.5832438238453275e-06, + "loss": 0.0001, + "step": 10520 + }, + { + "epoch": 0.6780305471418444, + "grad_norm": 0.0015142631223244963, + "learning_rate": 3.5825277479412823e-06, + "loss": 0.0, + "step": 10521 + }, + { + "epoch": 0.6780949925887736, + "grad_norm": 0.0005718388729823543, + "learning_rate": 3.5818116720372366e-06, + "loss": 0.0, + "step": 10522 + }, + { + "epoch": 0.6781594380357028, + "grad_norm": 0.012310340151570972, + "learning_rate": 3.5810955961331904e-06, + "loss": 0.0, + "step": 10523 + }, + { + "epoch": 0.678223883482632, + "grad_norm": 0.00043170374826357976, + "learning_rate": 3.5803795202291447e-06, + "loss": 0.0, + "step": 10524 + }, + { + "epoch": 0.6782883289295611, + "grad_norm": 0.027632992934753346, + "learning_rate": 3.5796634443250986e-06, + "loss": 0.0, + "step": 10525 + }, + { + "epoch": 0.6783527743764903, + "grad_norm": 0.001345522934734624, + "learning_rate": 3.578947368421053e-06, + "loss": 0.0, + "step": 10526 + }, + { + "epoch": 0.6784172198234195, + "grad_norm": 0.0008807635063323086, + "learning_rate": 3.5782312925170072e-06, + "loss": 0.0, + "step": 10527 + }, + { + "epoch": 0.6784816652703487, + "grad_norm": 0.002128459962488065, + "learning_rate": 3.577515216612961e-06, + "loss": 0.0, + "step": 10528 + }, + { + "epoch": 0.6785461107172778, + "grad_norm": 0.0006520468088653372, + "learning_rate": 3.5767991407089154e-06, + "loss": 0.0, + "step": 10529 + }, + { + "epoch": 0.678610556164207, + "grad_norm": 0.02735517573911248, + "learning_rate": 3.5760830648048693e-06, + "loss": 0.0, + "step": 10530 + }, + { + "epoch": 0.6786750016111361, + "grad_norm": 0.4512681734435724, + "learning_rate": 3.5753669889008236e-06, + "loss": 0.0032, + "step": 10531 + }, + { + "epoch": 0.6787394470580653, + "grad_norm": 0.0036808522723295995, + "learning_rate": 3.574650912996778e-06, + "loss": 0.0, + "step": 10532 + }, + { + "epoch": 0.6788038925049945, + "grad_norm": 7.517311356908132e-05, + "learning_rate": 3.573934837092732e-06, + "loss": 0.0, + "step": 10533 + }, + { + "epoch": 0.6788683379519237, + "grad_norm": 0.11774557663446128, + "learning_rate": 3.5732187611886865e-06, + "loss": 0.0018, + "step": 10534 + }, + { + "epoch": 0.6789327833988529, + "grad_norm": 0.02647121095954798, + "learning_rate": 3.5725026852846408e-06, + "loss": 0.0002, + "step": 10535 + }, + { + "epoch": 0.6789972288457821, + "grad_norm": 0.035277858321916856, + "learning_rate": 3.5717866093805946e-06, + "loss": 0.0, + "step": 10536 + }, + { + "epoch": 0.6790616742927112, + "grad_norm": 0.41236802431897623, + "learning_rate": 3.571070533476549e-06, + "loss": 0.0012, + "step": 10537 + }, + { + "epoch": 0.6791261197396404, + "grad_norm": 0.0002817103551778652, + "learning_rate": 3.570354457572503e-06, + "loss": 0.0, + "step": 10538 + }, + { + "epoch": 0.6791905651865696, + "grad_norm": 0.0067230817027691785, + "learning_rate": 3.569638381668457e-06, + "loss": 0.0, + "step": 10539 + }, + { + "epoch": 0.6792550106334987, + "grad_norm": 0.0019842109186937367, + "learning_rate": 3.5689223057644114e-06, + "loss": 0.0, + "step": 10540 + }, + { + "epoch": 0.6793194560804279, + "grad_norm": 0.0008768804890963424, + "learning_rate": 3.5682062298603653e-06, + "loss": 0.0, + "step": 10541 + }, + { + "epoch": 0.6793839015273571, + "grad_norm": 0.052039909390406425, + "learning_rate": 3.5674901539563196e-06, + "loss": 0.0, + "step": 10542 + }, + { + "epoch": 0.6794483469742862, + "grad_norm": 0.021989190979891722, + "learning_rate": 3.5667740780522735e-06, + "loss": 0.0, + "step": 10543 + }, + { + "epoch": 0.6795127924212154, + "grad_norm": 0.0007638219482915713, + "learning_rate": 3.5660580021482278e-06, + "loss": 0.0, + "step": 10544 + }, + { + "epoch": 0.6795772378681446, + "grad_norm": 1.0232287477942066, + "learning_rate": 3.5653419262441825e-06, + "loss": 0.0047, + "step": 10545 + }, + { + "epoch": 0.6796416833150738, + "grad_norm": 0.001587062494048603, + "learning_rate": 3.5646258503401363e-06, + "loss": 0.0, + "step": 10546 + }, + { + "epoch": 0.679706128762003, + "grad_norm": 0.018462534965671778, + "learning_rate": 3.5639097744360906e-06, + "loss": 0.0, + "step": 10547 + }, + { + "epoch": 0.6797705742089322, + "grad_norm": 0.013377734314796297, + "learning_rate": 3.563193698532045e-06, + "loss": 0.0, + "step": 10548 + }, + { + "epoch": 0.6798350196558614, + "grad_norm": 0.9623252028734142, + "learning_rate": 3.562477622627999e-06, + "loss": 0.0018, + "step": 10549 + }, + { + "epoch": 0.6798994651027905, + "grad_norm": 0.0017705625053728948, + "learning_rate": 3.561761546723953e-06, + "loss": 0.0, + "step": 10550 + }, + { + "epoch": 0.6799639105497196, + "grad_norm": 0.00017713197781208631, + "learning_rate": 3.561045470819907e-06, + "loss": 0.0, + "step": 10551 + }, + { + "epoch": 0.6800283559966488, + "grad_norm": 0.020427811779944585, + "learning_rate": 3.5603293949158613e-06, + "loss": 0.0, + "step": 10552 + }, + { + "epoch": 0.680092801443578, + "grad_norm": 2.4890916304191784, + "learning_rate": 3.5596133190118156e-06, + "loss": 0.0203, + "step": 10553 + }, + { + "epoch": 0.6801572468905072, + "grad_norm": 0.001329291279453643, + "learning_rate": 3.5588972431077695e-06, + "loss": 0.0, + "step": 10554 + }, + { + "epoch": 0.6802216923374363, + "grad_norm": 0.1980014711359068, + "learning_rate": 3.5581811672037238e-06, + "loss": 0.0014, + "step": 10555 + }, + { + "epoch": 0.6802861377843655, + "grad_norm": 0.0051990412329180015, + "learning_rate": 3.5574650912996785e-06, + "loss": 0.0, + "step": 10556 + }, + { + "epoch": 0.6803505832312947, + "grad_norm": 0.017665047112924433, + "learning_rate": 3.5567490153956324e-06, + "loss": 0.0, + "step": 10557 + }, + { + "epoch": 0.6804150286782239, + "grad_norm": 0.001657024012985856, + "learning_rate": 3.5560329394915867e-06, + "loss": 0.0, + "step": 10558 + }, + { + "epoch": 0.6804794741251531, + "grad_norm": 0.0008793134489039452, + "learning_rate": 3.5553168635875405e-06, + "loss": 0.0, + "step": 10559 + }, + { + "epoch": 0.6805439195720823, + "grad_norm": 0.0007335920395671671, + "learning_rate": 3.554600787683495e-06, + "loss": 0.0, + "step": 10560 + }, + { + "epoch": 0.6806083650190115, + "grad_norm": 0.0045903698338804495, + "learning_rate": 3.553884711779449e-06, + "loss": 0.0, + "step": 10561 + }, + { + "epoch": 0.6806728104659406, + "grad_norm": 0.000662578100139206, + "learning_rate": 3.553168635875403e-06, + "loss": 0.0, + "step": 10562 + }, + { + "epoch": 0.6807372559128697, + "grad_norm": 0.0009688112778837717, + "learning_rate": 3.5524525599713573e-06, + "loss": 0.0, + "step": 10563 + }, + { + "epoch": 0.6808017013597989, + "grad_norm": 0.0014027335978677121, + "learning_rate": 3.551736484067311e-06, + "loss": 0.0, + "step": 10564 + }, + { + "epoch": 0.6808661468067281, + "grad_norm": 6.014109765455335e-05, + "learning_rate": 3.5510204081632655e-06, + "loss": 0.0, + "step": 10565 + }, + { + "epoch": 0.6809305922536573, + "grad_norm": 0.0012696360857282496, + "learning_rate": 3.5503043322592194e-06, + "loss": 0.0, + "step": 10566 + }, + { + "epoch": 0.6809950377005864, + "grad_norm": 0.0020264019625621287, + "learning_rate": 3.5495882563551737e-06, + "loss": 0.0, + "step": 10567 + }, + { + "epoch": 0.6810594831475156, + "grad_norm": 0.0001300260332313125, + "learning_rate": 3.5488721804511284e-06, + "loss": 0.0, + "step": 10568 + }, + { + "epoch": 0.6811239285944448, + "grad_norm": 0.11144132046079638, + "learning_rate": 3.5481561045470827e-06, + "loss": 0.0001, + "step": 10569 + }, + { + "epoch": 0.681188374041374, + "grad_norm": 0.0040325540813843305, + "learning_rate": 3.5474400286430366e-06, + "loss": 0.0, + "step": 10570 + }, + { + "epoch": 0.6812528194883032, + "grad_norm": 0.00011549701540412366, + "learning_rate": 3.546723952738991e-06, + "loss": 0.0, + "step": 10571 + }, + { + "epoch": 0.6813172649352324, + "grad_norm": 0.011268333555497053, + "learning_rate": 3.5460078768349447e-06, + "loss": 0.0001, + "step": 10572 + }, + { + "epoch": 0.6813817103821616, + "grad_norm": 0.0006847428079941398, + "learning_rate": 3.545291800930899e-06, + "loss": 0.0, + "step": 10573 + }, + { + "epoch": 0.6814461558290906, + "grad_norm": 0.009468708355515205, + "learning_rate": 3.544575725026853e-06, + "loss": 0.0, + "step": 10574 + }, + { + "epoch": 0.6815106012760198, + "grad_norm": 0.006315914786455601, + "learning_rate": 3.543859649122807e-06, + "loss": 0.0, + "step": 10575 + }, + { + "epoch": 0.681575046722949, + "grad_norm": 0.006827846703311107, + "learning_rate": 3.5431435732187615e-06, + "loss": 0.0, + "step": 10576 + }, + { + "epoch": 0.6816394921698782, + "grad_norm": 0.029604628539176416, + "learning_rate": 3.5424274973147154e-06, + "loss": 0.0002, + "step": 10577 + }, + { + "epoch": 0.6817039376168074, + "grad_norm": 0.051215081592810865, + "learning_rate": 3.5417114214106697e-06, + "loss": 0.0002, + "step": 10578 + }, + { + "epoch": 0.6817683830637365, + "grad_norm": 0.016480723087823855, + "learning_rate": 3.5409953455066235e-06, + "loss": 0.0001, + "step": 10579 + }, + { + "epoch": 0.6818328285106657, + "grad_norm": 0.0006766804201587516, + "learning_rate": 3.5402792696025783e-06, + "loss": 0.0, + "step": 10580 + }, + { + "epoch": 0.6818972739575949, + "grad_norm": 0.0019866411763065327, + "learning_rate": 3.5395631936985326e-06, + "loss": 0.0, + "step": 10581 + }, + { + "epoch": 0.6819617194045241, + "grad_norm": 0.0004252701808851787, + "learning_rate": 3.5388471177944864e-06, + "loss": 0.0, + "step": 10582 + }, + { + "epoch": 0.6820261648514533, + "grad_norm": 0.00031296475467953647, + "learning_rate": 3.5381310418904407e-06, + "loss": 0.0, + "step": 10583 + }, + { + "epoch": 0.6820906102983825, + "grad_norm": 0.00832683055652773, + "learning_rate": 3.537414965986395e-06, + "loss": 0.0, + "step": 10584 + }, + { + "epoch": 0.6821550557453115, + "grad_norm": 0.021867903048846355, + "learning_rate": 3.536698890082349e-06, + "loss": 0.0002, + "step": 10585 + }, + { + "epoch": 0.6822195011922407, + "grad_norm": 0.48420196949728816, + "learning_rate": 3.5359828141783032e-06, + "loss": 0.0034, + "step": 10586 + }, + { + "epoch": 0.6822839466391699, + "grad_norm": 0.0024605339805774456, + "learning_rate": 3.535266738274257e-06, + "loss": 0.0, + "step": 10587 + }, + { + "epoch": 0.6823483920860991, + "grad_norm": 0.011360759604440345, + "learning_rate": 3.5345506623702114e-06, + "loss": 0.0, + "step": 10588 + }, + { + "epoch": 0.6824128375330283, + "grad_norm": 0.0009775462378554197, + "learning_rate": 3.5338345864661657e-06, + "loss": 0.0, + "step": 10589 + }, + { + "epoch": 0.6824772829799575, + "grad_norm": 0.001092895735309418, + "learning_rate": 3.5331185105621196e-06, + "loss": 0.0, + "step": 10590 + }, + { + "epoch": 0.6825417284268867, + "grad_norm": 0.0027246687183617134, + "learning_rate": 3.5324024346580743e-06, + "loss": 0.0, + "step": 10591 + }, + { + "epoch": 0.6826061738738158, + "grad_norm": 0.0007618257843385489, + "learning_rate": 3.5316863587540286e-06, + "loss": 0.0, + "step": 10592 + }, + { + "epoch": 0.682670619320745, + "grad_norm": 0.18731919153917567, + "learning_rate": 3.5309702828499825e-06, + "loss": 0.0006, + "step": 10593 + }, + { + "epoch": 0.6827350647676742, + "grad_norm": 0.00033679473596064693, + "learning_rate": 3.5302542069459368e-06, + "loss": 0.0, + "step": 10594 + }, + { + "epoch": 0.6827995102146034, + "grad_norm": 0.3369809402210824, + "learning_rate": 3.5295381310418906e-06, + "loss": 0.0029, + "step": 10595 + }, + { + "epoch": 0.6828639556615325, + "grad_norm": 0.007815817597920173, + "learning_rate": 3.528822055137845e-06, + "loss": 0.0, + "step": 10596 + }, + { + "epoch": 0.6829284011084616, + "grad_norm": 0.01382235643620594, + "learning_rate": 3.5281059792337992e-06, + "loss": 0.0, + "step": 10597 + }, + { + "epoch": 0.6829928465553908, + "grad_norm": 0.0020800628314902774, + "learning_rate": 3.527389903329753e-06, + "loss": 0.0, + "step": 10598 + }, + { + "epoch": 0.68305729200232, + "grad_norm": 0.014260892033320325, + "learning_rate": 3.5266738274257074e-06, + "loss": 0.0, + "step": 10599 + }, + { + "epoch": 0.6831217374492492, + "grad_norm": 0.0028445670215190093, + "learning_rate": 3.5259577515216613e-06, + "loss": 0.0, + "step": 10600 + }, + { + "epoch": 0.6831861828961784, + "grad_norm": 0.0008771312919833636, + "learning_rate": 3.5252416756176156e-06, + "loss": 0.0, + "step": 10601 + }, + { + "epoch": 0.6832506283431076, + "grad_norm": 0.00018122247011554844, + "learning_rate": 3.52452559971357e-06, + "loss": 0.0, + "step": 10602 + }, + { + "epoch": 0.6833150737900368, + "grad_norm": 0.0008672090977392164, + "learning_rate": 3.523809523809524e-06, + "loss": 0.0, + "step": 10603 + }, + { + "epoch": 0.6833795192369659, + "grad_norm": 0.0035352795664838213, + "learning_rate": 3.5230934479054785e-06, + "loss": 0.0, + "step": 10604 + }, + { + "epoch": 0.6834439646838951, + "grad_norm": 0.0002724304476376662, + "learning_rate": 3.5223773720014328e-06, + "loss": 0.0, + "step": 10605 + }, + { + "epoch": 0.6835084101308243, + "grad_norm": 0.5370559665829012, + "learning_rate": 3.5216612960973866e-06, + "loss": 0.0063, + "step": 10606 + }, + { + "epoch": 0.6835728555777534, + "grad_norm": 0.0007439298862113308, + "learning_rate": 3.520945220193341e-06, + "loss": 0.0, + "step": 10607 + }, + { + "epoch": 0.6836373010246826, + "grad_norm": 0.009702558574781294, + "learning_rate": 3.520229144289295e-06, + "loss": 0.0, + "step": 10608 + }, + { + "epoch": 0.6837017464716117, + "grad_norm": 0.0011016534404801282, + "learning_rate": 3.519513068385249e-06, + "loss": 0.0, + "step": 10609 + }, + { + "epoch": 0.6837661919185409, + "grad_norm": 0.017491655293344472, + "learning_rate": 3.5187969924812034e-06, + "loss": 0.0015, + "step": 10610 + }, + { + "epoch": 0.6838306373654701, + "grad_norm": 0.173947823491574, + "learning_rate": 3.5180809165771573e-06, + "loss": 0.0015, + "step": 10611 + }, + { + "epoch": 0.6838950828123993, + "grad_norm": 0.0004411864460499935, + "learning_rate": 3.5173648406731116e-06, + "loss": 0.0, + "step": 10612 + }, + { + "epoch": 0.6839595282593285, + "grad_norm": 0.05571853331626954, + "learning_rate": 3.5166487647690655e-06, + "loss": 0.0004, + "step": 10613 + }, + { + "epoch": 0.6840239737062577, + "grad_norm": 0.0010041793745665642, + "learning_rate": 3.5159326888650198e-06, + "loss": 0.0, + "step": 10614 + }, + { + "epoch": 0.6840884191531869, + "grad_norm": 0.0983773975207399, + "learning_rate": 3.5152166129609745e-06, + "loss": 0.0002, + "step": 10615 + }, + { + "epoch": 0.684152864600116, + "grad_norm": 0.028895436137705906, + "learning_rate": 3.5145005370569284e-06, + "loss": 0.0, + "step": 10616 + }, + { + "epoch": 0.6842173100470452, + "grad_norm": 0.03385962597373734, + "learning_rate": 3.5137844611528827e-06, + "loss": 0.0, + "step": 10617 + }, + { + "epoch": 0.6842817554939743, + "grad_norm": 0.023507120372402705, + "learning_rate": 3.513068385248837e-06, + "loss": 0.0002, + "step": 10618 + }, + { + "epoch": 0.6843462009409035, + "grad_norm": 0.024889008607317492, + "learning_rate": 3.512352309344791e-06, + "loss": 0.0, + "step": 10619 + }, + { + "epoch": 0.6844106463878327, + "grad_norm": 0.0014179847128868335, + "learning_rate": 3.511636233440745e-06, + "loss": 0.0, + "step": 10620 + }, + { + "epoch": 0.6844750918347619, + "grad_norm": 0.013406477162943287, + "learning_rate": 3.510920157536699e-06, + "loss": 0.0, + "step": 10621 + }, + { + "epoch": 0.684539537281691, + "grad_norm": 0.28027808953122846, + "learning_rate": 3.5102040816326533e-06, + "loss": 0.0023, + "step": 10622 + }, + { + "epoch": 0.6846039827286202, + "grad_norm": 0.00861103756734106, + "learning_rate": 3.509488005728607e-06, + "loss": 0.0, + "step": 10623 + }, + { + "epoch": 0.6846684281755494, + "grad_norm": 0.09979447762360293, + "learning_rate": 3.5087719298245615e-06, + "loss": 0.0017, + "step": 10624 + }, + { + "epoch": 0.6847328736224786, + "grad_norm": 0.08866655842139125, + "learning_rate": 3.5080558539205158e-06, + "loss": 0.0017, + "step": 10625 + }, + { + "epoch": 0.6847973190694078, + "grad_norm": 0.09441540926634437, + "learning_rate": 3.5073397780164705e-06, + "loss": 0.0024, + "step": 10626 + }, + { + "epoch": 0.684861764516337, + "grad_norm": 0.10564041736905677, + "learning_rate": 3.5066237021124244e-06, + "loss": 0.0009, + "step": 10627 + }, + { + "epoch": 0.6849262099632661, + "grad_norm": 0.00668683870026609, + "learning_rate": 3.5059076262083787e-06, + "loss": 0.0, + "step": 10628 + }, + { + "epoch": 0.6849906554101952, + "grad_norm": 0.007881264256661852, + "learning_rate": 3.5051915503043326e-06, + "loss": 0.0, + "step": 10629 + }, + { + "epoch": 0.6850551008571244, + "grad_norm": 0.009405162217880085, + "learning_rate": 3.504475474400287e-06, + "loss": 0.0, + "step": 10630 + }, + { + "epoch": 0.6851195463040536, + "grad_norm": 0.002327160651186275, + "learning_rate": 3.5037593984962407e-06, + "loss": 0.0, + "step": 10631 + }, + { + "epoch": 0.6851839917509828, + "grad_norm": 0.02192513837913387, + "learning_rate": 3.503043322592195e-06, + "loss": 0.0, + "step": 10632 + }, + { + "epoch": 0.685248437197912, + "grad_norm": 4.1137151297167716e-05, + "learning_rate": 3.5023272466881493e-06, + "loss": 0.0, + "step": 10633 + }, + { + "epoch": 0.6853128826448411, + "grad_norm": 0.26734327883653797, + "learning_rate": 3.501611170784103e-06, + "loss": 0.0002, + "step": 10634 + }, + { + "epoch": 0.6853773280917703, + "grad_norm": 0.03286824451174531, + "learning_rate": 3.5008950948800575e-06, + "loss": 0.0001, + "step": 10635 + }, + { + "epoch": 0.6854417735386995, + "grad_norm": 0.03350731756160981, + "learning_rate": 3.5001790189760114e-06, + "loss": 0.0001, + "step": 10636 + }, + { + "epoch": 0.6855062189856287, + "grad_norm": 0.003075251300806112, + "learning_rate": 3.4994629430719657e-06, + "loss": 0.0, + "step": 10637 + }, + { + "epoch": 0.6855706644325579, + "grad_norm": 0.010674466356599537, + "learning_rate": 3.4987468671679204e-06, + "loss": 0.0, + "step": 10638 + }, + { + "epoch": 0.6856351098794871, + "grad_norm": 0.013242521425976893, + "learning_rate": 3.4980307912638743e-06, + "loss": 0.0, + "step": 10639 + }, + { + "epoch": 0.6856995553264162, + "grad_norm": 0.23402005179956256, + "learning_rate": 3.4973147153598286e-06, + "loss": 0.0005, + "step": 10640 + }, + { + "epoch": 0.6857640007733453, + "grad_norm": 0.006823108867421351, + "learning_rate": 3.496598639455783e-06, + "loss": 0.0, + "step": 10641 + }, + { + "epoch": 0.6858284462202745, + "grad_norm": 0.005193235394536426, + "learning_rate": 3.4958825635517367e-06, + "loss": 0.0, + "step": 10642 + }, + { + "epoch": 0.6858928916672037, + "grad_norm": 0.05758601897356446, + "learning_rate": 3.495166487647691e-06, + "loss": 0.0001, + "step": 10643 + }, + { + "epoch": 0.6859573371141329, + "grad_norm": 0.0002420785467362924, + "learning_rate": 3.494450411743645e-06, + "loss": 0.0, + "step": 10644 + }, + { + "epoch": 0.686021782561062, + "grad_norm": 0.013903106800684462, + "learning_rate": 3.4937343358395992e-06, + "loss": 0.0, + "step": 10645 + }, + { + "epoch": 0.6860862280079912, + "grad_norm": 0.0013727656492007864, + "learning_rate": 3.4930182599355535e-06, + "loss": 0.0, + "step": 10646 + }, + { + "epoch": 0.6861506734549204, + "grad_norm": 0.005639060991519206, + "learning_rate": 3.4923021840315074e-06, + "loss": 0.0, + "step": 10647 + }, + { + "epoch": 0.6862151189018496, + "grad_norm": 0.00012059895211766672, + "learning_rate": 3.4915861081274617e-06, + "loss": 0.0, + "step": 10648 + }, + { + "epoch": 0.6862795643487788, + "grad_norm": 0.000295021304383494, + "learning_rate": 3.4908700322234156e-06, + "loss": 0.0, + "step": 10649 + }, + { + "epoch": 0.686344009795708, + "grad_norm": 0.006692717847118855, + "learning_rate": 3.4901539563193703e-06, + "loss": 0.0, + "step": 10650 + }, + { + "epoch": 0.6864084552426372, + "grad_norm": 0.0008308969576633938, + "learning_rate": 3.4894378804153246e-06, + "loss": 0.0, + "step": 10651 + }, + { + "epoch": 0.6864729006895662, + "grad_norm": 0.02864746925349982, + "learning_rate": 3.4887218045112785e-06, + "loss": 0.0001, + "step": 10652 + }, + { + "epoch": 0.6865373461364954, + "grad_norm": 0.0004295749878789247, + "learning_rate": 3.4880057286072328e-06, + "loss": 0.0, + "step": 10653 + }, + { + "epoch": 0.6866017915834246, + "grad_norm": 0.0003032108960250809, + "learning_rate": 3.487289652703187e-06, + "loss": 0.0, + "step": 10654 + }, + { + "epoch": 0.6866662370303538, + "grad_norm": 0.0032508764152708682, + "learning_rate": 3.486573576799141e-06, + "loss": 0.0, + "step": 10655 + }, + { + "epoch": 0.686730682477283, + "grad_norm": 0.3218022338317263, + "learning_rate": 3.4858575008950952e-06, + "loss": 0.0021, + "step": 10656 + }, + { + "epoch": 0.6867951279242122, + "grad_norm": 0.013413374439972325, + "learning_rate": 3.485141424991049e-06, + "loss": 0.0001, + "step": 10657 + }, + { + "epoch": 0.6868595733711413, + "grad_norm": 0.0022459060523140166, + "learning_rate": 3.4844253490870034e-06, + "loss": 0.0, + "step": 10658 + }, + { + "epoch": 0.6869240188180705, + "grad_norm": 0.0025463195329138914, + "learning_rate": 3.4837092731829573e-06, + "loss": 0.0, + "step": 10659 + }, + { + "epoch": 0.6869884642649997, + "grad_norm": 0.07051437260233849, + "learning_rate": 3.4829931972789116e-06, + "loss": 0.0017, + "step": 10660 + }, + { + "epoch": 0.6870529097119289, + "grad_norm": 0.002290644215445257, + "learning_rate": 3.4822771213748663e-06, + "loss": 0.0, + "step": 10661 + }, + { + "epoch": 0.6871173551588581, + "grad_norm": 0.00039243013710876817, + "learning_rate": 3.4815610454708206e-06, + "loss": 0.0, + "step": 10662 + }, + { + "epoch": 0.6871818006057872, + "grad_norm": 0.001251349809574215, + "learning_rate": 3.4808449695667745e-06, + "loss": 0.0, + "step": 10663 + }, + { + "epoch": 0.6872462460527163, + "grad_norm": 0.02047947219666481, + "learning_rate": 3.4801288936627288e-06, + "loss": 0.0001, + "step": 10664 + }, + { + "epoch": 0.6873106914996455, + "grad_norm": 0.030119726822941218, + "learning_rate": 3.4794128177586826e-06, + "loss": 0.0001, + "step": 10665 + }, + { + "epoch": 0.6873751369465747, + "grad_norm": 0.005128161573191081, + "learning_rate": 3.478696741854637e-06, + "loss": 0.0, + "step": 10666 + }, + { + "epoch": 0.6874395823935039, + "grad_norm": 0.06837941625274693, + "learning_rate": 3.477980665950591e-06, + "loss": 0.0001, + "step": 10667 + }, + { + "epoch": 0.6875040278404331, + "grad_norm": 0.03753905404742778, + "learning_rate": 3.477264590046545e-06, + "loss": 0.0, + "step": 10668 + }, + { + "epoch": 0.6875684732873623, + "grad_norm": 0.0034440145309386637, + "learning_rate": 3.4765485141424994e-06, + "loss": 0.0, + "step": 10669 + }, + { + "epoch": 0.6876329187342914, + "grad_norm": 0.14563323382401655, + "learning_rate": 3.4758324382384533e-06, + "loss": 0.0002, + "step": 10670 + }, + { + "epoch": 0.6876973641812206, + "grad_norm": 0.007722652524045524, + "learning_rate": 3.4751163623344076e-06, + "loss": 0.0, + "step": 10671 + }, + { + "epoch": 0.6877618096281498, + "grad_norm": 0.0032592152884733613, + "learning_rate": 3.4744002864303615e-06, + "loss": 0.0, + "step": 10672 + }, + { + "epoch": 0.687826255075079, + "grad_norm": 0.008455749548395619, + "learning_rate": 3.473684210526316e-06, + "loss": 0.0, + "step": 10673 + }, + { + "epoch": 0.6878907005220081, + "grad_norm": 0.09676174091931598, + "learning_rate": 3.4729681346222705e-06, + "loss": 0.0003, + "step": 10674 + }, + { + "epoch": 0.6879551459689373, + "grad_norm": 0.0010924824758405294, + "learning_rate": 3.4722520587182244e-06, + "loss": 0.0, + "step": 10675 + }, + { + "epoch": 0.6880195914158664, + "grad_norm": 0.010774355308097124, + "learning_rate": 3.4715359828141787e-06, + "loss": 0.0016, + "step": 10676 + }, + { + "epoch": 0.6880840368627956, + "grad_norm": 0.01795689798363324, + "learning_rate": 3.470819906910133e-06, + "loss": 0.0, + "step": 10677 + }, + { + "epoch": 0.6881484823097248, + "grad_norm": 0.0027896545235474085, + "learning_rate": 3.470103831006087e-06, + "loss": 0.0, + "step": 10678 + }, + { + "epoch": 0.688212927756654, + "grad_norm": 0.22212767183159907, + "learning_rate": 3.469387755102041e-06, + "loss": 0.0005, + "step": 10679 + }, + { + "epoch": 0.6882773732035832, + "grad_norm": 0.0003912980966343014, + "learning_rate": 3.468671679197995e-06, + "loss": 0.0, + "step": 10680 + }, + { + "epoch": 0.6883418186505124, + "grad_norm": 0.0005156493416187653, + "learning_rate": 3.4679556032939493e-06, + "loss": 0.0, + "step": 10681 + }, + { + "epoch": 0.6884062640974415, + "grad_norm": 0.021590134423169444, + "learning_rate": 3.4672395273899036e-06, + "loss": 0.0, + "step": 10682 + }, + { + "epoch": 0.6884707095443707, + "grad_norm": 0.023141501828001405, + "learning_rate": 3.4665234514858575e-06, + "loss": 0.0, + "step": 10683 + }, + { + "epoch": 0.6885351549912999, + "grad_norm": 0.007118016829719887, + "learning_rate": 3.4658073755818118e-06, + "loss": 0.0001, + "step": 10684 + }, + { + "epoch": 0.688599600438229, + "grad_norm": 0.25918209531883035, + "learning_rate": 3.4650912996777665e-06, + "loss": 0.0005, + "step": 10685 + }, + { + "epoch": 0.6886640458851582, + "grad_norm": 0.004237713823522767, + "learning_rate": 3.4643752237737204e-06, + "loss": 0.0, + "step": 10686 + }, + { + "epoch": 0.6887284913320874, + "grad_norm": 0.038278050860711475, + "learning_rate": 3.4636591478696747e-06, + "loss": 0.0, + "step": 10687 + }, + { + "epoch": 0.6887929367790165, + "grad_norm": 0.000426746379782578, + "learning_rate": 3.4629430719656286e-06, + "loss": 0.0, + "step": 10688 + }, + { + "epoch": 0.6888573822259457, + "grad_norm": 0.0034051439268017562, + "learning_rate": 3.462226996061583e-06, + "loss": 0.0, + "step": 10689 + }, + { + "epoch": 0.6889218276728749, + "grad_norm": 0.018312988913133918, + "learning_rate": 3.461510920157537e-06, + "loss": 0.0, + "step": 10690 + }, + { + "epoch": 0.6889862731198041, + "grad_norm": 0.0008704069031677928, + "learning_rate": 3.460794844253491e-06, + "loss": 0.0, + "step": 10691 + }, + { + "epoch": 0.6890507185667333, + "grad_norm": 0.00011138326842831115, + "learning_rate": 3.4600787683494453e-06, + "loss": 0.0, + "step": 10692 + }, + { + "epoch": 0.6891151640136625, + "grad_norm": 6.343770674378296e-05, + "learning_rate": 3.459362692445399e-06, + "loss": 0.0, + "step": 10693 + }, + { + "epoch": 0.6891796094605916, + "grad_norm": 0.000850738362306896, + "learning_rate": 3.4586466165413535e-06, + "loss": 0.0, + "step": 10694 + }, + { + "epoch": 0.6892440549075208, + "grad_norm": 0.15241558875373135, + "learning_rate": 3.457930540637308e-06, + "loss": 0.0005, + "step": 10695 + }, + { + "epoch": 0.6893085003544499, + "grad_norm": 0.0037602512408664913, + "learning_rate": 3.457214464733262e-06, + "loss": 0.0, + "step": 10696 + }, + { + "epoch": 0.6893729458013791, + "grad_norm": 0.16921284611583368, + "learning_rate": 3.4564983888292164e-06, + "loss": 0.0014, + "step": 10697 + }, + { + "epoch": 0.6894373912483083, + "grad_norm": 0.04126671726871299, + "learning_rate": 3.4557823129251707e-06, + "loss": 0.0001, + "step": 10698 + }, + { + "epoch": 0.6895018366952375, + "grad_norm": 0.24241588960094448, + "learning_rate": 3.4550662370211246e-06, + "loss": 0.0006, + "step": 10699 + }, + { + "epoch": 0.6895662821421666, + "grad_norm": 0.0011653326035232546, + "learning_rate": 3.454350161117079e-06, + "loss": 0.0, + "step": 10700 + }, + { + "epoch": 0.6896307275890958, + "grad_norm": 0.0053412736709698705, + "learning_rate": 3.4536340852130327e-06, + "loss": 0.0001, + "step": 10701 + }, + { + "epoch": 0.689695173036025, + "grad_norm": 0.0027695711960786054, + "learning_rate": 3.452918009308987e-06, + "loss": 0.0, + "step": 10702 + }, + { + "epoch": 0.6897596184829542, + "grad_norm": 0.000903931335369121, + "learning_rate": 3.4522019334049413e-06, + "loss": 0.0, + "step": 10703 + }, + { + "epoch": 0.6898240639298834, + "grad_norm": 0.0032606382890580307, + "learning_rate": 3.4514858575008952e-06, + "loss": 0.0, + "step": 10704 + }, + { + "epoch": 0.6898885093768126, + "grad_norm": 0.0019891495099166236, + "learning_rate": 3.4507697815968495e-06, + "loss": 0.0, + "step": 10705 + }, + { + "epoch": 0.6899529548237417, + "grad_norm": 0.014469597284214894, + "learning_rate": 3.4500537056928034e-06, + "loss": 0.0001, + "step": 10706 + }, + { + "epoch": 0.6900174002706708, + "grad_norm": 0.0036697474437936265, + "learning_rate": 3.4493376297887577e-06, + "loss": 0.0001, + "step": 10707 + }, + { + "epoch": 0.6900818457176, + "grad_norm": 0.001639690389296713, + "learning_rate": 3.4486215538847124e-06, + "loss": 0.0, + "step": 10708 + }, + { + "epoch": 0.6901462911645292, + "grad_norm": 0.001005555053943566, + "learning_rate": 3.4479054779806663e-06, + "loss": 0.0, + "step": 10709 + }, + { + "epoch": 0.6902107366114584, + "grad_norm": 0.09674031832156237, + "learning_rate": 3.4471894020766206e-06, + "loss": 0.0002, + "step": 10710 + }, + { + "epoch": 0.6902751820583876, + "grad_norm": 0.17584697766118745, + "learning_rate": 3.446473326172575e-06, + "loss": 0.0001, + "step": 10711 + }, + { + "epoch": 0.6903396275053167, + "grad_norm": 0.0003299470982010914, + "learning_rate": 3.4457572502685288e-06, + "loss": 0.0, + "step": 10712 + }, + { + "epoch": 0.6904040729522459, + "grad_norm": 0.0012676769435483054, + "learning_rate": 3.445041174364483e-06, + "loss": 0.0, + "step": 10713 + }, + { + "epoch": 0.6904685183991751, + "grad_norm": 0.0001250973702220075, + "learning_rate": 3.444325098460437e-06, + "loss": 0.0, + "step": 10714 + }, + { + "epoch": 0.6905329638461043, + "grad_norm": 0.0016230453999726533, + "learning_rate": 3.4436090225563912e-06, + "loss": 0.0, + "step": 10715 + }, + { + "epoch": 0.6905974092930335, + "grad_norm": 0.7712714178984845, + "learning_rate": 3.442892946652345e-06, + "loss": 0.0016, + "step": 10716 + }, + { + "epoch": 0.6906618547399627, + "grad_norm": 0.012430636333727142, + "learning_rate": 3.4421768707482994e-06, + "loss": 0.0, + "step": 10717 + }, + { + "epoch": 0.6907263001868919, + "grad_norm": 0.00038744205676761773, + "learning_rate": 3.4414607948442537e-06, + "loss": 0.0, + "step": 10718 + }, + { + "epoch": 0.6907907456338209, + "grad_norm": 0.001783591718322277, + "learning_rate": 3.4407447189402076e-06, + "loss": 0.0, + "step": 10719 + }, + { + "epoch": 0.6908551910807501, + "grad_norm": 0.00012095653921781966, + "learning_rate": 3.4400286430361623e-06, + "loss": 0.0, + "step": 10720 + }, + { + "epoch": 0.6909196365276793, + "grad_norm": 1.0810774582805374, + "learning_rate": 3.4393125671321166e-06, + "loss": 0.0064, + "step": 10721 + }, + { + "epoch": 0.6909840819746085, + "grad_norm": 0.006299809688504188, + "learning_rate": 3.4385964912280705e-06, + "loss": 0.0, + "step": 10722 + }, + { + "epoch": 0.6910485274215377, + "grad_norm": 0.002780990180928392, + "learning_rate": 3.4378804153240248e-06, + "loss": 0.0, + "step": 10723 + }, + { + "epoch": 0.6911129728684668, + "grad_norm": 0.057636782983209595, + "learning_rate": 3.4371643394199786e-06, + "loss": 0.0001, + "step": 10724 + }, + { + "epoch": 0.691177418315396, + "grad_norm": 0.0008725221319251466, + "learning_rate": 3.436448263515933e-06, + "loss": 0.0, + "step": 10725 + }, + { + "epoch": 0.6912418637623252, + "grad_norm": 0.026326179625309587, + "learning_rate": 3.4357321876118872e-06, + "loss": 0.0002, + "step": 10726 + }, + { + "epoch": 0.6913063092092544, + "grad_norm": 0.521066305189868, + "learning_rate": 3.435016111707841e-06, + "loss": 0.0036, + "step": 10727 + }, + { + "epoch": 0.6913707546561836, + "grad_norm": 0.00021508146730564562, + "learning_rate": 3.4343000358037954e-06, + "loss": 0.0, + "step": 10728 + }, + { + "epoch": 0.6914352001031128, + "grad_norm": 5.2332481397573e-05, + "learning_rate": 3.4335839598997493e-06, + "loss": 0.0, + "step": 10729 + }, + { + "epoch": 0.6914996455500418, + "grad_norm": 0.0025788359544813106, + "learning_rate": 3.4328678839957036e-06, + "loss": 0.0, + "step": 10730 + }, + { + "epoch": 0.691564090996971, + "grad_norm": 0.7060573678077907, + "learning_rate": 3.4321518080916583e-06, + "loss": 0.0049, + "step": 10731 + }, + { + "epoch": 0.6916285364439002, + "grad_norm": 0.44814332690464703, + "learning_rate": 3.431435732187612e-06, + "loss": 0.0036, + "step": 10732 + }, + { + "epoch": 0.6916929818908294, + "grad_norm": 0.3525222816594591, + "learning_rate": 3.4307196562835665e-06, + "loss": 0.0026, + "step": 10733 + }, + { + "epoch": 0.6917574273377586, + "grad_norm": 0.10370206238152077, + "learning_rate": 3.4300035803795208e-06, + "loss": 0.0001, + "step": 10734 + }, + { + "epoch": 0.6918218727846878, + "grad_norm": 0.00335698601833503, + "learning_rate": 3.4292875044754747e-06, + "loss": 0.0, + "step": 10735 + }, + { + "epoch": 0.691886318231617, + "grad_norm": 0.006926952458820839, + "learning_rate": 3.428571428571429e-06, + "loss": 0.0001, + "step": 10736 + }, + { + "epoch": 0.6919507636785461, + "grad_norm": 0.0003903578948580782, + "learning_rate": 3.427855352667383e-06, + "loss": 0.0, + "step": 10737 + }, + { + "epoch": 0.6920152091254753, + "grad_norm": 0.06039492128259943, + "learning_rate": 3.427139276763337e-06, + "loss": 0.0016, + "step": 10738 + }, + { + "epoch": 0.6920796545724045, + "grad_norm": 0.21314970053730722, + "learning_rate": 3.4264232008592914e-06, + "loss": 0.0006, + "step": 10739 + }, + { + "epoch": 0.6921441000193337, + "grad_norm": 0.009887192339525711, + "learning_rate": 3.4257071249552453e-06, + "loss": 0.0001, + "step": 10740 + }, + { + "epoch": 0.6922085454662628, + "grad_norm": 0.0023324859903385378, + "learning_rate": 3.4249910490511996e-06, + "loss": 0.0, + "step": 10741 + }, + { + "epoch": 0.6922729909131919, + "grad_norm": 0.0017754964193178054, + "learning_rate": 3.4242749731471535e-06, + "loss": 0.0, + "step": 10742 + }, + { + "epoch": 0.6923374363601211, + "grad_norm": 0.11862733365427512, + "learning_rate": 3.423558897243108e-06, + "loss": 0.0002, + "step": 10743 + }, + { + "epoch": 0.6924018818070503, + "grad_norm": 0.12538965409042613, + "learning_rate": 3.4228428213390625e-06, + "loss": 0.0017, + "step": 10744 + }, + { + "epoch": 0.6924663272539795, + "grad_norm": 0.1790398036759841, + "learning_rate": 3.4221267454350164e-06, + "loss": 0.0012, + "step": 10745 + }, + { + "epoch": 0.6925307727009087, + "grad_norm": 0.016628649121499365, + "learning_rate": 3.4214106695309707e-06, + "loss": 0.0001, + "step": 10746 + }, + { + "epoch": 0.6925952181478379, + "grad_norm": 0.003371619886892593, + "learning_rate": 3.420694593626925e-06, + "loss": 0.0, + "step": 10747 + }, + { + "epoch": 0.692659663594767, + "grad_norm": 0.24389006154688553, + "learning_rate": 3.419978517722879e-06, + "loss": 0.0006, + "step": 10748 + }, + { + "epoch": 0.6927241090416962, + "grad_norm": 0.14232802337674683, + "learning_rate": 3.419262441818833e-06, + "loss": 0.0003, + "step": 10749 + }, + { + "epoch": 0.6927885544886254, + "grad_norm": 0.019736813074936816, + "learning_rate": 3.418546365914787e-06, + "loss": 0.0, + "step": 10750 + }, + { + "epoch": 0.6928529999355546, + "grad_norm": 0.05097154578008529, + "learning_rate": 3.4178302900107413e-06, + "loss": 0.0, + "step": 10751 + }, + { + "epoch": 0.6929174453824837, + "grad_norm": 0.00107446008995132, + "learning_rate": 3.4171142141066956e-06, + "loss": 0.0, + "step": 10752 + }, + { + "epoch": 0.6929818908294129, + "grad_norm": 0.012435354325548929, + "learning_rate": 3.4163981382026495e-06, + "loss": 0.0, + "step": 10753 + }, + { + "epoch": 0.693046336276342, + "grad_norm": 0.021957133028454156, + "learning_rate": 3.415682062298604e-06, + "loss": 0.0, + "step": 10754 + }, + { + "epoch": 0.6931107817232712, + "grad_norm": 0.0002480213227601337, + "learning_rate": 3.4149659863945585e-06, + "loss": 0.0, + "step": 10755 + }, + { + "epoch": 0.6931752271702004, + "grad_norm": 0.0623140810818036, + "learning_rate": 3.4142499104905124e-06, + "loss": 0.0, + "step": 10756 + }, + { + "epoch": 0.6932396726171296, + "grad_norm": 0.0017141559011994213, + "learning_rate": 3.4135338345864667e-06, + "loss": 0.0, + "step": 10757 + }, + { + "epoch": 0.6933041180640588, + "grad_norm": 0.014615192039343805, + "learning_rate": 3.4128177586824206e-06, + "loss": 0.0, + "step": 10758 + }, + { + "epoch": 0.693368563510988, + "grad_norm": 0.0017439914087220587, + "learning_rate": 3.412101682778375e-06, + "loss": 0.0, + "step": 10759 + }, + { + "epoch": 0.6934330089579172, + "grad_norm": 0.0012153270549224956, + "learning_rate": 3.411385606874329e-06, + "loss": 0.0, + "step": 10760 + }, + { + "epoch": 0.6934974544048463, + "grad_norm": 0.014303665596357573, + "learning_rate": 3.410669530970283e-06, + "loss": 0.0, + "step": 10761 + }, + { + "epoch": 0.6935618998517755, + "grad_norm": 7.439093973990937e-05, + "learning_rate": 3.4099534550662373e-06, + "loss": 0.0, + "step": 10762 + }, + { + "epoch": 0.6936263452987046, + "grad_norm": 0.19960271073953648, + "learning_rate": 3.4092373791621912e-06, + "loss": 0.0003, + "step": 10763 + }, + { + "epoch": 0.6936907907456338, + "grad_norm": 0.018784679469903552, + "learning_rate": 3.4085213032581455e-06, + "loss": 0.0001, + "step": 10764 + }, + { + "epoch": 0.693755236192563, + "grad_norm": 0.06285531773678558, + "learning_rate": 3.4078052273540994e-06, + "loss": 0.0003, + "step": 10765 + }, + { + "epoch": 0.6938196816394921, + "grad_norm": 0.02711289526911007, + "learning_rate": 3.407089151450054e-06, + "loss": 0.0, + "step": 10766 + }, + { + "epoch": 0.6938841270864213, + "grad_norm": 0.005589801722090483, + "learning_rate": 3.4063730755460084e-06, + "loss": 0.0, + "step": 10767 + }, + { + "epoch": 0.6939485725333505, + "grad_norm": 0.012897300775219056, + "learning_rate": 3.4056569996419627e-06, + "loss": 0.0001, + "step": 10768 + }, + { + "epoch": 0.6940130179802797, + "grad_norm": 0.01099284164409652, + "learning_rate": 3.4049409237379166e-06, + "loss": 0.0001, + "step": 10769 + }, + { + "epoch": 0.6940774634272089, + "grad_norm": 0.0016891558399507187, + "learning_rate": 3.404224847833871e-06, + "loss": 0.0, + "step": 10770 + }, + { + "epoch": 0.6941419088741381, + "grad_norm": 0.006634654576318617, + "learning_rate": 3.4035087719298248e-06, + "loss": 0.0, + "step": 10771 + }, + { + "epoch": 0.6942063543210673, + "grad_norm": 0.13746926170005874, + "learning_rate": 3.402792696025779e-06, + "loss": 0.0001, + "step": 10772 + }, + { + "epoch": 0.6942707997679964, + "grad_norm": 0.02645937380972612, + "learning_rate": 3.402076620121733e-06, + "loss": 0.0001, + "step": 10773 + }, + { + "epoch": 0.6943352452149255, + "grad_norm": 0.0033664323788412985, + "learning_rate": 3.4013605442176872e-06, + "loss": 0.0, + "step": 10774 + }, + { + "epoch": 0.6943996906618547, + "grad_norm": 0.0022188361754276117, + "learning_rate": 3.4006444683136415e-06, + "loss": 0.0, + "step": 10775 + }, + { + "epoch": 0.6944641361087839, + "grad_norm": 0.0004594087068661742, + "learning_rate": 3.3999283924095954e-06, + "loss": 0.0, + "step": 10776 + }, + { + "epoch": 0.6945285815557131, + "grad_norm": 0.0032890205841026223, + "learning_rate": 3.3992123165055497e-06, + "loss": 0.0, + "step": 10777 + }, + { + "epoch": 0.6945930270026422, + "grad_norm": 0.07355642935563832, + "learning_rate": 3.3984962406015044e-06, + "loss": 0.0001, + "step": 10778 + }, + { + "epoch": 0.6946574724495714, + "grad_norm": 0.019127169783181307, + "learning_rate": 3.3977801646974583e-06, + "loss": 0.0, + "step": 10779 + }, + { + "epoch": 0.6947219178965006, + "grad_norm": 0.0010884441376112783, + "learning_rate": 3.3970640887934126e-06, + "loss": 0.0, + "step": 10780 + }, + { + "epoch": 0.6947863633434298, + "grad_norm": 0.0023750329760530457, + "learning_rate": 3.3963480128893665e-06, + "loss": 0.0, + "step": 10781 + }, + { + "epoch": 0.694850808790359, + "grad_norm": 0.0007830149276151957, + "learning_rate": 3.3956319369853208e-06, + "loss": 0.0, + "step": 10782 + }, + { + "epoch": 0.6949152542372882, + "grad_norm": 0.04364455540834853, + "learning_rate": 3.394915861081275e-06, + "loss": 0.0001, + "step": 10783 + }, + { + "epoch": 0.6949796996842174, + "grad_norm": 0.016637574609427337, + "learning_rate": 3.394199785177229e-06, + "loss": 0.0002, + "step": 10784 + }, + { + "epoch": 0.6950441451311464, + "grad_norm": 0.002434681407404538, + "learning_rate": 3.3934837092731832e-06, + "loss": 0.0, + "step": 10785 + }, + { + "epoch": 0.6951085905780756, + "grad_norm": 0.006948871061763611, + "learning_rate": 3.392767633369137e-06, + "loss": 0.0, + "step": 10786 + }, + { + "epoch": 0.6951730360250048, + "grad_norm": 0.004640961971091515, + "learning_rate": 3.3920515574650914e-06, + "loss": 0.0, + "step": 10787 + }, + { + "epoch": 0.695237481471934, + "grad_norm": 0.00041903898312994556, + "learning_rate": 3.3913354815610457e-06, + "loss": 0.0, + "step": 10788 + }, + { + "epoch": 0.6953019269188632, + "grad_norm": 0.053400623619324995, + "learning_rate": 3.3906194056569996e-06, + "loss": 0.0001, + "step": 10789 + }, + { + "epoch": 0.6953663723657924, + "grad_norm": 0.0006922881994538185, + "learning_rate": 3.3899033297529543e-06, + "loss": 0.0, + "step": 10790 + }, + { + "epoch": 0.6954308178127215, + "grad_norm": 0.009405572997517235, + "learning_rate": 3.3891872538489086e-06, + "loss": 0.0, + "step": 10791 + }, + { + "epoch": 0.6954952632596507, + "grad_norm": 0.00023536262169622017, + "learning_rate": 3.3884711779448625e-06, + "loss": 0.0, + "step": 10792 + }, + { + "epoch": 0.6955597087065799, + "grad_norm": 0.000701178056448638, + "learning_rate": 3.3877551020408168e-06, + "loss": 0.0, + "step": 10793 + }, + { + "epoch": 0.6956241541535091, + "grad_norm": 0.04493621669798482, + "learning_rate": 3.3870390261367707e-06, + "loss": 0.0001, + "step": 10794 + }, + { + "epoch": 0.6956885996004383, + "grad_norm": 0.03422308278740534, + "learning_rate": 3.386322950232725e-06, + "loss": 0.0, + "step": 10795 + }, + { + "epoch": 0.6957530450473675, + "grad_norm": 0.0007628641240363434, + "learning_rate": 3.3856068743286793e-06, + "loss": 0.0, + "step": 10796 + }, + { + "epoch": 0.6958174904942965, + "grad_norm": 9.180779913812038e-05, + "learning_rate": 3.384890798424633e-06, + "loss": 0.0, + "step": 10797 + }, + { + "epoch": 0.6958819359412257, + "grad_norm": 0.10470937724474787, + "learning_rate": 3.3841747225205874e-06, + "loss": 0.0004, + "step": 10798 + }, + { + "epoch": 0.6959463813881549, + "grad_norm": 0.05540156375470692, + "learning_rate": 3.3834586466165413e-06, + "loss": 0.0002, + "step": 10799 + }, + { + "epoch": 0.6960108268350841, + "grad_norm": 0.0035984583674340755, + "learning_rate": 3.3827425707124956e-06, + "loss": 0.0, + "step": 10800 + }, + { + "epoch": 0.6960752722820133, + "grad_norm": 0.0015088185612061752, + "learning_rate": 3.3820264948084503e-06, + "loss": 0.0, + "step": 10801 + }, + { + "epoch": 0.6961397177289425, + "grad_norm": 0.04853284722866676, + "learning_rate": 3.381310418904404e-06, + "loss": 0.0001, + "step": 10802 + }, + { + "epoch": 0.6962041631758716, + "grad_norm": 0.0012557955823111358, + "learning_rate": 3.3805943430003585e-06, + "loss": 0.0, + "step": 10803 + }, + { + "epoch": 0.6962686086228008, + "grad_norm": 0.01631435553766888, + "learning_rate": 3.379878267096313e-06, + "loss": 0.0, + "step": 10804 + }, + { + "epoch": 0.69633305406973, + "grad_norm": 0.003211445832762659, + "learning_rate": 3.3791621911922667e-06, + "loss": 0.0, + "step": 10805 + }, + { + "epoch": 0.6963974995166592, + "grad_norm": 0.0006773092919922552, + "learning_rate": 3.378446115288221e-06, + "loss": 0.0, + "step": 10806 + }, + { + "epoch": 0.6964619449635884, + "grad_norm": 0.097851148669192, + "learning_rate": 3.377730039384175e-06, + "loss": 0.0002, + "step": 10807 + }, + { + "epoch": 0.6965263904105174, + "grad_norm": 0.000700090125051526, + "learning_rate": 3.377013963480129e-06, + "loss": 0.0, + "step": 10808 + }, + { + "epoch": 0.6965908358574466, + "grad_norm": 0.01792088747671209, + "learning_rate": 3.376297887576083e-06, + "loss": 0.0001, + "step": 10809 + }, + { + "epoch": 0.6966552813043758, + "grad_norm": 0.004783641588541656, + "learning_rate": 3.3755818116720373e-06, + "loss": 0.0, + "step": 10810 + }, + { + "epoch": 0.696719726751305, + "grad_norm": 0.0020453910864311918, + "learning_rate": 3.3748657357679916e-06, + "loss": 0.0, + "step": 10811 + }, + { + "epoch": 0.6967841721982342, + "grad_norm": 0.0832148634326752, + "learning_rate": 3.3741496598639455e-06, + "loss": 0.0005, + "step": 10812 + }, + { + "epoch": 0.6968486176451634, + "grad_norm": 0.0001007275952549603, + "learning_rate": 3.3734335839599002e-06, + "loss": 0.0, + "step": 10813 + }, + { + "epoch": 0.6969130630920926, + "grad_norm": 0.005044496693702052, + "learning_rate": 3.3727175080558545e-06, + "loss": 0.0, + "step": 10814 + }, + { + "epoch": 0.6969775085390217, + "grad_norm": 0.17172673180819398, + "learning_rate": 3.3720014321518084e-06, + "loss": 0.0019, + "step": 10815 + }, + { + "epoch": 0.6970419539859509, + "grad_norm": 0.0009186030854356705, + "learning_rate": 3.3712853562477627e-06, + "loss": 0.0, + "step": 10816 + }, + { + "epoch": 0.6971063994328801, + "grad_norm": 0.012453707151726626, + "learning_rate": 3.3705692803437166e-06, + "loss": 0.0, + "step": 10817 + }, + { + "epoch": 0.6971708448798093, + "grad_norm": 0.0009751430932883996, + "learning_rate": 3.369853204439671e-06, + "loss": 0.0, + "step": 10818 + }, + { + "epoch": 0.6972352903267384, + "grad_norm": 0.0004079109002148108, + "learning_rate": 3.369137128535625e-06, + "loss": 0.0, + "step": 10819 + }, + { + "epoch": 0.6972997357736676, + "grad_norm": 0.2528612640796825, + "learning_rate": 3.368421052631579e-06, + "loss": 0.0003, + "step": 10820 + }, + { + "epoch": 0.6973641812205967, + "grad_norm": 0.1139363960253434, + "learning_rate": 3.3677049767275333e-06, + "loss": 0.0013, + "step": 10821 + }, + { + "epoch": 0.6974286266675259, + "grad_norm": 0.005500716077919033, + "learning_rate": 3.3669889008234872e-06, + "loss": 0.0, + "step": 10822 + }, + { + "epoch": 0.6974930721144551, + "grad_norm": 0.08116801795662411, + "learning_rate": 3.3662728249194415e-06, + "loss": 0.0001, + "step": 10823 + }, + { + "epoch": 0.6975575175613843, + "grad_norm": 0.0035024566652384916, + "learning_rate": 3.365556749015396e-06, + "loss": 0.0, + "step": 10824 + }, + { + "epoch": 0.6976219630083135, + "grad_norm": 0.015370411768365658, + "learning_rate": 3.36484067311135e-06, + "loss": 0.0001, + "step": 10825 + }, + { + "epoch": 0.6976864084552427, + "grad_norm": 0.06178277284897271, + "learning_rate": 3.3641245972073044e-06, + "loss": 0.0002, + "step": 10826 + }, + { + "epoch": 0.6977508539021718, + "grad_norm": 0.00037744400225636377, + "learning_rate": 3.3634085213032587e-06, + "loss": 0.0, + "step": 10827 + }, + { + "epoch": 0.697815299349101, + "grad_norm": 0.005827135498293183, + "learning_rate": 3.3626924453992126e-06, + "loss": 0.0, + "step": 10828 + }, + { + "epoch": 0.6978797447960302, + "grad_norm": 0.22508163560634026, + "learning_rate": 3.361976369495167e-06, + "loss": 0.0002, + "step": 10829 + }, + { + "epoch": 0.6979441902429593, + "grad_norm": 0.00026929893020132337, + "learning_rate": 3.3612602935911208e-06, + "loss": 0.0, + "step": 10830 + }, + { + "epoch": 0.6980086356898885, + "grad_norm": 0.0020361584289357894, + "learning_rate": 3.360544217687075e-06, + "loss": 0.0, + "step": 10831 + }, + { + "epoch": 0.6980730811368177, + "grad_norm": 0.0004221505435830251, + "learning_rate": 3.3598281417830294e-06, + "loss": 0.0, + "step": 10832 + }, + { + "epoch": 0.6981375265837468, + "grad_norm": 0.05349353661611049, + "learning_rate": 3.3591120658789832e-06, + "loss": 0.0001, + "step": 10833 + }, + { + "epoch": 0.698201972030676, + "grad_norm": 0.05797448742304965, + "learning_rate": 3.3583959899749375e-06, + "loss": 0.0008, + "step": 10834 + }, + { + "epoch": 0.6982664174776052, + "grad_norm": 0.0008506722447421768, + "learning_rate": 3.3576799140708914e-06, + "loss": 0.0, + "step": 10835 + }, + { + "epoch": 0.6983308629245344, + "grad_norm": 0.00021320946359430795, + "learning_rate": 3.356963838166846e-06, + "loss": 0.0, + "step": 10836 + }, + { + "epoch": 0.6983953083714636, + "grad_norm": 0.39711350760972824, + "learning_rate": 3.3562477622628004e-06, + "loss": 0.0011, + "step": 10837 + }, + { + "epoch": 0.6984597538183928, + "grad_norm": 0.04127152969316703, + "learning_rate": 3.3555316863587543e-06, + "loss": 0.0001, + "step": 10838 + }, + { + "epoch": 0.6985241992653219, + "grad_norm": 0.006618943598043729, + "learning_rate": 3.3548156104547086e-06, + "loss": 0.0, + "step": 10839 + }, + { + "epoch": 0.6985886447122511, + "grad_norm": 0.04176915064793223, + "learning_rate": 3.354099534550663e-06, + "loss": 0.0001, + "step": 10840 + }, + { + "epoch": 0.6986530901591802, + "grad_norm": 0.0001714131345175587, + "learning_rate": 3.3533834586466168e-06, + "loss": 0.0, + "step": 10841 + }, + { + "epoch": 0.6987175356061094, + "grad_norm": 0.0010024964582433073, + "learning_rate": 3.352667382742571e-06, + "loss": 0.0, + "step": 10842 + }, + { + "epoch": 0.6987819810530386, + "grad_norm": 0.00040623033050484755, + "learning_rate": 3.351951306838525e-06, + "loss": 0.0, + "step": 10843 + }, + { + "epoch": 0.6988464264999678, + "grad_norm": 0.00402618346109113, + "learning_rate": 3.3512352309344792e-06, + "loss": 0.0, + "step": 10844 + }, + { + "epoch": 0.6989108719468969, + "grad_norm": 0.22191415897871572, + "learning_rate": 3.3505191550304335e-06, + "loss": 0.0002, + "step": 10845 + }, + { + "epoch": 0.6989753173938261, + "grad_norm": 0.014029645455560074, + "learning_rate": 3.3498030791263874e-06, + "loss": 0.0015, + "step": 10846 + }, + { + "epoch": 0.6990397628407553, + "grad_norm": 8.324158896226437e-05, + "learning_rate": 3.3490870032223417e-06, + "loss": 0.0, + "step": 10847 + }, + { + "epoch": 0.6991042082876845, + "grad_norm": 0.0006412068556455778, + "learning_rate": 3.3483709273182964e-06, + "loss": 0.0, + "step": 10848 + }, + { + "epoch": 0.6991686537346137, + "grad_norm": 4.787087265715539e-05, + "learning_rate": 3.3476548514142503e-06, + "loss": 0.0, + "step": 10849 + }, + { + "epoch": 0.6992330991815429, + "grad_norm": 0.00030164510962597276, + "learning_rate": 3.3469387755102046e-06, + "loss": 0.0, + "step": 10850 + }, + { + "epoch": 0.699297544628472, + "grad_norm": 0.30012512478553754, + "learning_rate": 3.3462226996061585e-06, + "loss": 0.0024, + "step": 10851 + }, + { + "epoch": 0.6993619900754011, + "grad_norm": 0.0059113082282672945, + "learning_rate": 3.3455066237021128e-06, + "loss": 0.0001, + "step": 10852 + }, + { + "epoch": 0.6994264355223303, + "grad_norm": 0.000438918178855173, + "learning_rate": 3.344790547798067e-06, + "loss": 0.0, + "step": 10853 + }, + { + "epoch": 0.6994908809692595, + "grad_norm": 0.0004580738369121127, + "learning_rate": 3.344074471894021e-06, + "loss": 0.0, + "step": 10854 + }, + { + "epoch": 0.6995553264161887, + "grad_norm": 0.0008452896284506048, + "learning_rate": 3.3433583959899753e-06, + "loss": 0.0, + "step": 10855 + }, + { + "epoch": 0.6996197718631179, + "grad_norm": 0.0020973779835961976, + "learning_rate": 3.342642320085929e-06, + "loss": 0.0, + "step": 10856 + }, + { + "epoch": 0.699684217310047, + "grad_norm": 0.0008297177221649112, + "learning_rate": 3.3419262441818834e-06, + "loss": 0.0, + "step": 10857 + }, + { + "epoch": 0.6997486627569762, + "grad_norm": 0.0027529704284585976, + "learning_rate": 3.3412101682778373e-06, + "loss": 0.0, + "step": 10858 + }, + { + "epoch": 0.6998131082039054, + "grad_norm": 0.0008741259795175414, + "learning_rate": 3.3404940923737916e-06, + "loss": 0.0, + "step": 10859 + }, + { + "epoch": 0.6998775536508346, + "grad_norm": 0.0035309142146040173, + "learning_rate": 3.3397780164697463e-06, + "loss": 0.0, + "step": 10860 + }, + { + "epoch": 0.6999419990977638, + "grad_norm": 0.4958865867061118, + "learning_rate": 3.3390619405657006e-06, + "loss": 0.0028, + "step": 10861 + }, + { + "epoch": 0.700006444544693, + "grad_norm": 0.0072535462915399975, + "learning_rate": 3.3383458646616545e-06, + "loss": 0.0, + "step": 10862 + }, + { + "epoch": 0.7000708899916221, + "grad_norm": 0.02617736765886348, + "learning_rate": 3.337629788757609e-06, + "loss": 0.0, + "step": 10863 + }, + { + "epoch": 0.7001353354385512, + "grad_norm": 0.025339570434611148, + "learning_rate": 3.3369137128535627e-06, + "loss": 0.0, + "step": 10864 + }, + { + "epoch": 0.7001997808854804, + "grad_norm": 0.0001549518610992421, + "learning_rate": 3.336197636949517e-06, + "loss": 0.0, + "step": 10865 + }, + { + "epoch": 0.7002642263324096, + "grad_norm": 0.0006099219318847603, + "learning_rate": 3.335481561045471e-06, + "loss": 0.0, + "step": 10866 + }, + { + "epoch": 0.7003286717793388, + "grad_norm": 0.00019973177616691002, + "learning_rate": 3.334765485141425e-06, + "loss": 0.0, + "step": 10867 + }, + { + "epoch": 0.700393117226268, + "grad_norm": 0.0203871412425311, + "learning_rate": 3.3340494092373794e-06, + "loss": 0.0, + "step": 10868 + }, + { + "epoch": 0.7004575626731971, + "grad_norm": 0.00012341835821285828, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.0, + "step": 10869 + }, + { + "epoch": 0.7005220081201263, + "grad_norm": 3.691681563023068e-05, + "learning_rate": 3.3326172574292876e-06, + "loss": 0.0, + "step": 10870 + }, + { + "epoch": 0.7005864535670555, + "grad_norm": 0.041492466191393934, + "learning_rate": 3.3319011815252423e-06, + "loss": 0.0002, + "step": 10871 + }, + { + "epoch": 0.7006508990139847, + "grad_norm": 0.16212739040050764, + "learning_rate": 3.3311851056211962e-06, + "loss": 0.0005, + "step": 10872 + }, + { + "epoch": 0.7007153444609139, + "grad_norm": 0.001993052236712686, + "learning_rate": 3.3304690297171505e-06, + "loss": 0.0, + "step": 10873 + }, + { + "epoch": 0.7007797899078431, + "grad_norm": 0.0021670367114100574, + "learning_rate": 3.3297529538131044e-06, + "loss": 0.0, + "step": 10874 + }, + { + "epoch": 0.7008442353547721, + "grad_norm": 0.0013950064005321817, + "learning_rate": 3.3290368779090587e-06, + "loss": 0.0, + "step": 10875 + }, + { + "epoch": 0.7009086808017013, + "grad_norm": 0.0007154109309629973, + "learning_rate": 3.328320802005013e-06, + "loss": 0.0, + "step": 10876 + }, + { + "epoch": 0.7009731262486305, + "grad_norm": 0.03324730647386008, + "learning_rate": 3.327604726100967e-06, + "loss": 0.0003, + "step": 10877 + }, + { + "epoch": 0.7010375716955597, + "grad_norm": 0.001851781740293082, + "learning_rate": 3.326888650196921e-06, + "loss": 0.0, + "step": 10878 + }, + { + "epoch": 0.7011020171424889, + "grad_norm": 0.47544556412340017, + "learning_rate": 3.326172574292875e-06, + "loss": 0.0027, + "step": 10879 + }, + { + "epoch": 0.7011664625894181, + "grad_norm": 0.03185670473705229, + "learning_rate": 3.3254564983888293e-06, + "loss": 0.0, + "step": 10880 + }, + { + "epoch": 0.7012309080363472, + "grad_norm": 7.054423748793358e-05, + "learning_rate": 3.3247404224847836e-06, + "loss": 0.0, + "step": 10881 + }, + { + "epoch": 0.7012953534832764, + "grad_norm": 0.006720652727550893, + "learning_rate": 3.3240243465807375e-06, + "loss": 0.0001, + "step": 10882 + }, + { + "epoch": 0.7013597989302056, + "grad_norm": 0.00926893960988273, + "learning_rate": 3.3233082706766922e-06, + "loss": 0.0, + "step": 10883 + }, + { + "epoch": 0.7014242443771348, + "grad_norm": 0.0037271138191373935, + "learning_rate": 3.3225921947726465e-06, + "loss": 0.0, + "step": 10884 + }, + { + "epoch": 0.701488689824064, + "grad_norm": 0.26028400697964355, + "learning_rate": 3.3218761188686004e-06, + "loss": 0.0001, + "step": 10885 + }, + { + "epoch": 0.7015531352709931, + "grad_norm": 0.008150517832203049, + "learning_rate": 3.3211600429645547e-06, + "loss": 0.0, + "step": 10886 + }, + { + "epoch": 0.7016175807179222, + "grad_norm": 0.016123935218947682, + "learning_rate": 3.3204439670605086e-06, + "loss": 0.0, + "step": 10887 + }, + { + "epoch": 0.7016820261648514, + "grad_norm": 0.0025847379419570832, + "learning_rate": 3.319727891156463e-06, + "loss": 0.0, + "step": 10888 + }, + { + "epoch": 0.7017464716117806, + "grad_norm": 0.02264905884294404, + "learning_rate": 3.319011815252417e-06, + "loss": 0.0, + "step": 10889 + }, + { + "epoch": 0.7018109170587098, + "grad_norm": 0.012890156146756319, + "learning_rate": 3.318295739348371e-06, + "loss": 0.0, + "step": 10890 + }, + { + "epoch": 0.701875362505639, + "grad_norm": 0.0005745350547465109, + "learning_rate": 3.3175796634443254e-06, + "loss": 0.0, + "step": 10891 + }, + { + "epoch": 0.7019398079525682, + "grad_norm": 0.015874661805627098, + "learning_rate": 3.3168635875402792e-06, + "loss": 0.0, + "step": 10892 + }, + { + "epoch": 0.7020042533994973, + "grad_norm": 0.0008491966764384103, + "learning_rate": 3.3161475116362335e-06, + "loss": 0.0, + "step": 10893 + }, + { + "epoch": 0.7020686988464265, + "grad_norm": 0.004415929786913111, + "learning_rate": 3.315431435732188e-06, + "loss": 0.0, + "step": 10894 + }, + { + "epoch": 0.7021331442933557, + "grad_norm": 0.04686867154635354, + "learning_rate": 3.314715359828142e-06, + "loss": 0.0001, + "step": 10895 + }, + { + "epoch": 0.7021975897402849, + "grad_norm": 0.026635296990076995, + "learning_rate": 3.3139992839240964e-06, + "loss": 0.0001, + "step": 10896 + }, + { + "epoch": 0.702262035187214, + "grad_norm": 0.0002269513361057032, + "learning_rate": 3.3132832080200507e-06, + "loss": 0.0, + "step": 10897 + }, + { + "epoch": 0.7023264806341432, + "grad_norm": 0.0008907517858752973, + "learning_rate": 3.3125671321160046e-06, + "loss": 0.0, + "step": 10898 + }, + { + "epoch": 0.7023909260810723, + "grad_norm": 0.13432873522726482, + "learning_rate": 3.311851056211959e-06, + "loss": 0.0003, + "step": 10899 + }, + { + "epoch": 0.7024553715280015, + "grad_norm": 0.005984730876438511, + "learning_rate": 3.3111349803079128e-06, + "loss": 0.0, + "step": 10900 + }, + { + "epoch": 0.7025198169749307, + "grad_norm": 0.17898883995504597, + "learning_rate": 3.310418904403867e-06, + "loss": 0.0002, + "step": 10901 + }, + { + "epoch": 0.7025842624218599, + "grad_norm": 0.00810524339822239, + "learning_rate": 3.3097028284998214e-06, + "loss": 0.0, + "step": 10902 + }, + { + "epoch": 0.7026487078687891, + "grad_norm": 0.002444304481964341, + "learning_rate": 3.3089867525957752e-06, + "loss": 0.0, + "step": 10903 + }, + { + "epoch": 0.7027131533157183, + "grad_norm": 0.0017770346103979729, + "learning_rate": 3.3082706766917295e-06, + "loss": 0.0, + "step": 10904 + }, + { + "epoch": 0.7027775987626474, + "grad_norm": 0.001104809674803772, + "learning_rate": 3.3075546007876834e-06, + "loss": 0.0, + "step": 10905 + }, + { + "epoch": 0.7028420442095766, + "grad_norm": 0.009800180749657906, + "learning_rate": 3.306838524883638e-06, + "loss": 0.0, + "step": 10906 + }, + { + "epoch": 0.7029064896565058, + "grad_norm": 0.0026749097474474373, + "learning_rate": 3.3061224489795924e-06, + "loss": 0.0, + "step": 10907 + }, + { + "epoch": 0.7029709351034349, + "grad_norm": 0.01313033705099765, + "learning_rate": 3.3054063730755463e-06, + "loss": 0.0001, + "step": 10908 + }, + { + "epoch": 0.7030353805503641, + "grad_norm": 0.00436655342451471, + "learning_rate": 3.3046902971715006e-06, + "loss": 0.0, + "step": 10909 + }, + { + "epoch": 0.7030998259972933, + "grad_norm": 0.007957100060090051, + "learning_rate": 3.303974221267455e-06, + "loss": 0.0, + "step": 10910 + }, + { + "epoch": 0.7031642714442224, + "grad_norm": 0.6992021270992388, + "learning_rate": 3.3032581453634088e-06, + "loss": 0.0096, + "step": 10911 + }, + { + "epoch": 0.7032287168911516, + "grad_norm": 0.004975517335532034, + "learning_rate": 3.302542069459363e-06, + "loss": 0.0, + "step": 10912 + }, + { + "epoch": 0.7032931623380808, + "grad_norm": 0.013508631034990674, + "learning_rate": 3.301825993555317e-06, + "loss": 0.0, + "step": 10913 + }, + { + "epoch": 0.70335760778501, + "grad_norm": 0.8323221906874271, + "learning_rate": 3.3011099176512713e-06, + "loss": 0.0153, + "step": 10914 + }, + { + "epoch": 0.7034220532319392, + "grad_norm": 0.0002831184487363148, + "learning_rate": 3.300393841747225e-06, + "loss": 0.0, + "step": 10915 + }, + { + "epoch": 0.7034864986788684, + "grad_norm": 0.014769501888290858, + "learning_rate": 3.2996777658431794e-06, + "loss": 0.0, + "step": 10916 + }, + { + "epoch": 0.7035509441257976, + "grad_norm": 0.4236323875892939, + "learning_rate": 3.2989616899391337e-06, + "loss": 0.0031, + "step": 10917 + }, + { + "epoch": 0.7036153895727267, + "grad_norm": 0.011189639856460357, + "learning_rate": 3.2982456140350885e-06, + "loss": 0.0, + "step": 10918 + }, + { + "epoch": 0.7036798350196558, + "grad_norm": 0.00829430680010469, + "learning_rate": 3.2975295381310423e-06, + "loss": 0.0, + "step": 10919 + }, + { + "epoch": 0.703744280466585, + "grad_norm": 0.0039546963976976515, + "learning_rate": 3.2968134622269966e-06, + "loss": 0.0, + "step": 10920 + }, + { + "epoch": 0.7038087259135142, + "grad_norm": 0.01591126280223268, + "learning_rate": 3.2960973863229505e-06, + "loss": 0.0002, + "step": 10921 + }, + { + "epoch": 0.7038731713604434, + "grad_norm": 0.012323091831577818, + "learning_rate": 3.295381310418905e-06, + "loss": 0.0, + "step": 10922 + }, + { + "epoch": 0.7039376168073725, + "grad_norm": 0.0007462944453321257, + "learning_rate": 3.2946652345148587e-06, + "loss": 0.0, + "step": 10923 + }, + { + "epoch": 0.7040020622543017, + "grad_norm": 0.0035263945187977517, + "learning_rate": 3.293949158610813e-06, + "loss": 0.0, + "step": 10924 + }, + { + "epoch": 0.7040665077012309, + "grad_norm": 0.006692252532699696, + "learning_rate": 3.2932330827067673e-06, + "loss": 0.0, + "step": 10925 + }, + { + "epoch": 0.7041309531481601, + "grad_norm": 0.011862744356579047, + "learning_rate": 3.292517006802721e-06, + "loss": 0.0, + "step": 10926 + }, + { + "epoch": 0.7041953985950893, + "grad_norm": 0.0009588384878591355, + "learning_rate": 3.2918009308986754e-06, + "loss": 0.0, + "step": 10927 + }, + { + "epoch": 0.7042598440420185, + "grad_norm": 0.06882280888848988, + "learning_rate": 3.2910848549946293e-06, + "loss": 0.0002, + "step": 10928 + }, + { + "epoch": 0.7043242894889477, + "grad_norm": 0.03040901424031329, + "learning_rate": 3.2903687790905836e-06, + "loss": 0.0, + "step": 10929 + }, + { + "epoch": 0.7043887349358767, + "grad_norm": 0.003522038167252063, + "learning_rate": 3.2896527031865383e-06, + "loss": 0.0, + "step": 10930 + }, + { + "epoch": 0.7044531803828059, + "grad_norm": 0.15972103322031145, + "learning_rate": 3.2889366272824922e-06, + "loss": 0.0018, + "step": 10931 + }, + { + "epoch": 0.7045176258297351, + "grad_norm": 0.06893447717889657, + "learning_rate": 3.2882205513784465e-06, + "loss": 0.0001, + "step": 10932 + }, + { + "epoch": 0.7045820712766643, + "grad_norm": 3.0882135037847522, + "learning_rate": 3.287504475474401e-06, + "loss": 0.0268, + "step": 10933 + }, + { + "epoch": 0.7046465167235935, + "grad_norm": 0.139790129637713, + "learning_rate": 3.2867883995703547e-06, + "loss": 0.0003, + "step": 10934 + }, + { + "epoch": 0.7047109621705226, + "grad_norm": 0.01926119261248183, + "learning_rate": 3.286072323666309e-06, + "loss": 0.0, + "step": 10935 + }, + { + "epoch": 0.7047754076174518, + "grad_norm": 0.003360752891597998, + "learning_rate": 3.285356247762263e-06, + "loss": 0.0, + "step": 10936 + }, + { + "epoch": 0.704839853064381, + "grad_norm": 0.02846292441579501, + "learning_rate": 3.284640171858217e-06, + "loss": 0.0, + "step": 10937 + }, + { + "epoch": 0.7049042985113102, + "grad_norm": 0.010289595574202861, + "learning_rate": 3.2839240959541715e-06, + "loss": 0.0, + "step": 10938 + }, + { + "epoch": 0.7049687439582394, + "grad_norm": 0.022801437024493606, + "learning_rate": 3.2832080200501253e-06, + "loss": 0.0, + "step": 10939 + }, + { + "epoch": 0.7050331894051686, + "grad_norm": 0.00020393405837112934, + "learning_rate": 3.2824919441460796e-06, + "loss": 0.0, + "step": 10940 + }, + { + "epoch": 0.7050976348520978, + "grad_norm": 0.012538392880569913, + "learning_rate": 3.2817758682420335e-06, + "loss": 0.0, + "step": 10941 + }, + { + "epoch": 0.7051620802990268, + "grad_norm": 0.02402381009882897, + "learning_rate": 3.2810597923379882e-06, + "loss": 0.0, + "step": 10942 + }, + { + "epoch": 0.705226525745956, + "grad_norm": 0.03254472748774419, + "learning_rate": 3.2803437164339425e-06, + "loss": 0.0, + "step": 10943 + }, + { + "epoch": 0.7052909711928852, + "grad_norm": 0.004207380008318214, + "learning_rate": 3.2796276405298964e-06, + "loss": 0.0, + "step": 10944 + }, + { + "epoch": 0.7053554166398144, + "grad_norm": 0.020072217846481624, + "learning_rate": 3.2789115646258507e-06, + "loss": 0.0, + "step": 10945 + }, + { + "epoch": 0.7054198620867436, + "grad_norm": 0.016132958636072284, + "learning_rate": 3.278195488721805e-06, + "loss": 0.0, + "step": 10946 + }, + { + "epoch": 0.7054843075336727, + "grad_norm": 0.04939703204957824, + "learning_rate": 3.277479412817759e-06, + "loss": 0.0001, + "step": 10947 + }, + { + "epoch": 0.7055487529806019, + "grad_norm": 0.0008245602376543607, + "learning_rate": 3.276763336913713e-06, + "loss": 0.0, + "step": 10948 + }, + { + "epoch": 0.7056131984275311, + "grad_norm": 0.0005412021396897577, + "learning_rate": 3.276047261009667e-06, + "loss": 0.0, + "step": 10949 + }, + { + "epoch": 0.7056776438744603, + "grad_norm": 0.0034232693202888597, + "learning_rate": 3.2753311851056214e-06, + "loss": 0.0, + "step": 10950 + }, + { + "epoch": 0.7057420893213895, + "grad_norm": 0.005485021994327029, + "learning_rate": 3.2746151092015752e-06, + "loss": 0.0, + "step": 10951 + }, + { + "epoch": 0.7058065347683187, + "grad_norm": 0.017868621028440626, + "learning_rate": 3.2738990332975295e-06, + "loss": 0.0001, + "step": 10952 + }, + { + "epoch": 0.7058709802152477, + "grad_norm": 0.002559377594809387, + "learning_rate": 3.2731829573934843e-06, + "loss": 0.0, + "step": 10953 + }, + { + "epoch": 0.7059354256621769, + "grad_norm": 0.10073643138781171, + "learning_rate": 3.2724668814894385e-06, + "loss": 0.0001, + "step": 10954 + }, + { + "epoch": 0.7059998711091061, + "grad_norm": 0.005557456868990858, + "learning_rate": 3.2717508055853924e-06, + "loss": 0.0, + "step": 10955 + }, + { + "epoch": 0.7060643165560353, + "grad_norm": 0.018022847987917433, + "learning_rate": 3.2710347296813467e-06, + "loss": 0.0, + "step": 10956 + }, + { + "epoch": 0.7061287620029645, + "grad_norm": 0.0010845042955528442, + "learning_rate": 3.2703186537773006e-06, + "loss": 0.0, + "step": 10957 + }, + { + "epoch": 0.7061932074498937, + "grad_norm": 0.006678590974714848, + "learning_rate": 3.269602577873255e-06, + "loss": 0.0, + "step": 10958 + }, + { + "epoch": 0.7062576528968229, + "grad_norm": 0.24238967916520135, + "learning_rate": 3.2688865019692088e-06, + "loss": 0.0003, + "step": 10959 + }, + { + "epoch": 0.706322098343752, + "grad_norm": 0.0005615450731238256, + "learning_rate": 3.268170426065163e-06, + "loss": 0.0, + "step": 10960 + }, + { + "epoch": 0.7063865437906812, + "grad_norm": 0.001237721068779436, + "learning_rate": 3.2674543501611174e-06, + "loss": 0.0, + "step": 10961 + }, + { + "epoch": 0.7064509892376104, + "grad_norm": 0.0022154238217625168, + "learning_rate": 3.2667382742570712e-06, + "loss": 0.0, + "step": 10962 + }, + { + "epoch": 0.7065154346845396, + "grad_norm": 0.00036969182107827645, + "learning_rate": 3.2660221983530255e-06, + "loss": 0.0, + "step": 10963 + }, + { + "epoch": 0.7065798801314687, + "grad_norm": 0.14243230155867326, + "learning_rate": 3.2653061224489794e-06, + "loss": 0.0003, + "step": 10964 + }, + { + "epoch": 0.7066443255783978, + "grad_norm": 0.005058644391544198, + "learning_rate": 3.264590046544934e-06, + "loss": 0.0, + "step": 10965 + }, + { + "epoch": 0.706708771025327, + "grad_norm": 0.0032059561848995183, + "learning_rate": 3.2638739706408884e-06, + "loss": 0.0, + "step": 10966 + }, + { + "epoch": 0.7067732164722562, + "grad_norm": 0.013179660912521117, + "learning_rate": 3.2631578947368423e-06, + "loss": 0.0001, + "step": 10967 + }, + { + "epoch": 0.7068376619191854, + "grad_norm": 0.11744684687956847, + "learning_rate": 3.2624418188327966e-06, + "loss": 0.0002, + "step": 10968 + }, + { + "epoch": 0.7069021073661146, + "grad_norm": 0.004042410681713448, + "learning_rate": 3.261725742928751e-06, + "loss": 0.0, + "step": 10969 + }, + { + "epoch": 0.7069665528130438, + "grad_norm": 4.283922259766499e-05, + "learning_rate": 3.2610096670247048e-06, + "loss": 0.0, + "step": 10970 + }, + { + "epoch": 0.707030998259973, + "grad_norm": 0.9901557003270737, + "learning_rate": 3.260293591120659e-06, + "loss": 0.006, + "step": 10971 + }, + { + "epoch": 0.7070954437069021, + "grad_norm": 0.037523802452588115, + "learning_rate": 3.259577515216613e-06, + "loss": 0.0, + "step": 10972 + }, + { + "epoch": 0.7071598891538313, + "grad_norm": 0.015195530897357019, + "learning_rate": 3.2588614393125673e-06, + "loss": 0.0, + "step": 10973 + }, + { + "epoch": 0.7072243346007605, + "grad_norm": 0.0022295571638922154, + "learning_rate": 3.2581453634085216e-06, + "loss": 0.0, + "step": 10974 + }, + { + "epoch": 0.7072887800476896, + "grad_norm": 0.8156517229050663, + "learning_rate": 3.2574292875044754e-06, + "loss": 0.0035, + "step": 10975 + }, + { + "epoch": 0.7073532254946188, + "grad_norm": 0.004237609517313484, + "learning_rate": 3.2567132116004297e-06, + "loss": 0.0, + "step": 10976 + }, + { + "epoch": 0.707417670941548, + "grad_norm": 0.005571668982925098, + "learning_rate": 3.2559971356963845e-06, + "loss": 0.0, + "step": 10977 + }, + { + "epoch": 0.7074821163884771, + "grad_norm": 0.0036520828137762886, + "learning_rate": 3.2552810597923383e-06, + "loss": 0.0, + "step": 10978 + }, + { + "epoch": 0.7075465618354063, + "grad_norm": 0.016016935812411235, + "learning_rate": 3.2545649838882926e-06, + "loss": 0.0, + "step": 10979 + }, + { + "epoch": 0.7076110072823355, + "grad_norm": 0.3980306343157112, + "learning_rate": 3.2538489079842465e-06, + "loss": 0.0041, + "step": 10980 + }, + { + "epoch": 0.7076754527292647, + "grad_norm": 0.0009197268556172491, + "learning_rate": 3.253132832080201e-06, + "loss": 0.0, + "step": 10981 + }, + { + "epoch": 0.7077398981761939, + "grad_norm": 0.00043302879034731197, + "learning_rate": 3.252416756176155e-06, + "loss": 0.0, + "step": 10982 + }, + { + "epoch": 0.7078043436231231, + "grad_norm": 0.004272269559729301, + "learning_rate": 3.251700680272109e-06, + "loss": 0.0, + "step": 10983 + }, + { + "epoch": 0.7078687890700522, + "grad_norm": 0.0024739338398368465, + "learning_rate": 3.2509846043680633e-06, + "loss": 0.0, + "step": 10984 + }, + { + "epoch": 0.7079332345169814, + "grad_norm": 0.1845882851059872, + "learning_rate": 3.250268528464017e-06, + "loss": 0.0028, + "step": 10985 + }, + { + "epoch": 0.7079976799639105, + "grad_norm": 0.08929369829022431, + "learning_rate": 3.2495524525599714e-06, + "loss": 0.0002, + "step": 10986 + }, + { + "epoch": 0.7080621254108397, + "grad_norm": 0.0027870529419560435, + "learning_rate": 3.2488363766559257e-06, + "loss": 0.0, + "step": 10987 + }, + { + "epoch": 0.7081265708577689, + "grad_norm": 0.0017530348572803891, + "learning_rate": 3.24812030075188e-06, + "loss": 0.0, + "step": 10988 + }, + { + "epoch": 0.708191016304698, + "grad_norm": 0.006516597776420439, + "learning_rate": 3.2474042248478343e-06, + "loss": 0.0001, + "step": 10989 + }, + { + "epoch": 0.7082554617516272, + "grad_norm": 0.05491665317800124, + "learning_rate": 3.2466881489437886e-06, + "loss": 0.0006, + "step": 10990 + }, + { + "epoch": 0.7083199071985564, + "grad_norm": 0.007344119285479627, + "learning_rate": 3.2459720730397425e-06, + "loss": 0.0, + "step": 10991 + }, + { + "epoch": 0.7083843526454856, + "grad_norm": 0.004823036421794768, + "learning_rate": 3.245255997135697e-06, + "loss": 0.0, + "step": 10992 + }, + { + "epoch": 0.7084487980924148, + "grad_norm": 0.001133053437795517, + "learning_rate": 3.2445399212316507e-06, + "loss": 0.0, + "step": 10993 + }, + { + "epoch": 0.708513243539344, + "grad_norm": 0.010246483494655423, + "learning_rate": 3.243823845327605e-06, + "loss": 0.0001, + "step": 10994 + }, + { + "epoch": 0.7085776889862732, + "grad_norm": 0.00031286769307990606, + "learning_rate": 3.2431077694235593e-06, + "loss": 0.0, + "step": 10995 + }, + { + "epoch": 0.7086421344332023, + "grad_norm": 0.025983279433569137, + "learning_rate": 3.242391693519513e-06, + "loss": 0.0003, + "step": 10996 + }, + { + "epoch": 0.7087065798801314, + "grad_norm": 0.002991161752113345, + "learning_rate": 3.2416756176154675e-06, + "loss": 0.0, + "step": 10997 + }, + { + "epoch": 0.7087710253270606, + "grad_norm": 0.00019414406743275953, + "learning_rate": 3.2409595417114213e-06, + "loss": 0.0, + "step": 10998 + }, + { + "epoch": 0.7088354707739898, + "grad_norm": 0.0051728910727569175, + "learning_rate": 3.2402434658073756e-06, + "loss": 0.0, + "step": 10999 + }, + { + "epoch": 0.708899916220919, + "grad_norm": 0.032033257450606374, + "learning_rate": 3.2395273899033304e-06, + "loss": 0.0, + "step": 11000 + }, + { + "epoch": 0.7089643616678482, + "grad_norm": 0.0013291294574410593, + "learning_rate": 3.2388113139992842e-06, + "loss": 0.0, + "step": 11001 + }, + { + "epoch": 0.7090288071147773, + "grad_norm": 0.0030029524874015485, + "learning_rate": 3.2380952380952385e-06, + "loss": 0.0, + "step": 11002 + }, + { + "epoch": 0.7090932525617065, + "grad_norm": 0.018705699350202824, + "learning_rate": 3.237379162191193e-06, + "loss": 0.0, + "step": 11003 + }, + { + "epoch": 0.7091576980086357, + "grad_norm": 0.12145905230771745, + "learning_rate": 3.2366630862871467e-06, + "loss": 0.0004, + "step": 11004 + }, + { + "epoch": 0.7092221434555649, + "grad_norm": 0.002445939444972882, + "learning_rate": 3.235947010383101e-06, + "loss": 0.0, + "step": 11005 + }, + { + "epoch": 0.7092865889024941, + "grad_norm": 0.0030465461515954964, + "learning_rate": 3.235230934479055e-06, + "loss": 0.0, + "step": 11006 + }, + { + "epoch": 0.7093510343494233, + "grad_norm": 0.0022634303186151597, + "learning_rate": 3.234514858575009e-06, + "loss": 0.0, + "step": 11007 + }, + { + "epoch": 0.7094154797963523, + "grad_norm": 0.1156427131436469, + "learning_rate": 3.233798782670963e-06, + "loss": 0.0002, + "step": 11008 + }, + { + "epoch": 0.7094799252432815, + "grad_norm": 0.002947625272147337, + "learning_rate": 3.2330827067669174e-06, + "loss": 0.0, + "step": 11009 + }, + { + "epoch": 0.7095443706902107, + "grad_norm": 0.04937201848564881, + "learning_rate": 3.2323666308628717e-06, + "loss": 0.0001, + "step": 11010 + }, + { + "epoch": 0.7096088161371399, + "grad_norm": 0.011345745946028784, + "learning_rate": 3.2316505549588255e-06, + "loss": 0.0, + "step": 11011 + }, + { + "epoch": 0.7096732615840691, + "grad_norm": 0.13372508837535974, + "learning_rate": 3.2309344790547803e-06, + "loss": 0.0005, + "step": 11012 + }, + { + "epoch": 0.7097377070309983, + "grad_norm": 0.02591297992435188, + "learning_rate": 3.2302184031507345e-06, + "loss": 0.0, + "step": 11013 + }, + { + "epoch": 0.7098021524779274, + "grad_norm": 0.0005216657897116044, + "learning_rate": 3.2295023272466884e-06, + "loss": 0.0, + "step": 11014 + }, + { + "epoch": 0.7098665979248566, + "grad_norm": 0.001228820551302051, + "learning_rate": 3.2287862513426427e-06, + "loss": 0.0, + "step": 11015 + }, + { + "epoch": 0.7099310433717858, + "grad_norm": 0.004049530687622813, + "learning_rate": 3.2280701754385966e-06, + "loss": 0.0, + "step": 11016 + }, + { + "epoch": 0.709995488818715, + "grad_norm": 0.01042890442965104, + "learning_rate": 3.227354099534551e-06, + "loss": 0.0, + "step": 11017 + }, + { + "epoch": 0.7100599342656442, + "grad_norm": 0.20473263111957016, + "learning_rate": 3.226638023630505e-06, + "loss": 0.0013, + "step": 11018 + }, + { + "epoch": 0.7101243797125734, + "grad_norm": 0.003618323682590707, + "learning_rate": 3.225921947726459e-06, + "loss": 0.0, + "step": 11019 + }, + { + "epoch": 0.7101888251595024, + "grad_norm": 0.00013886623571113355, + "learning_rate": 3.2252058718224134e-06, + "loss": 0.0, + "step": 11020 + }, + { + "epoch": 0.7102532706064316, + "grad_norm": 0.00031239600076566764, + "learning_rate": 3.2244897959183672e-06, + "loss": 0.0, + "step": 11021 + }, + { + "epoch": 0.7103177160533608, + "grad_norm": 0.015242570841153947, + "learning_rate": 3.2237737200143215e-06, + "loss": 0.0001, + "step": 11022 + }, + { + "epoch": 0.71038216150029, + "grad_norm": 5.747915247272944e-05, + "learning_rate": 3.2230576441102763e-06, + "loss": 0.0, + "step": 11023 + }, + { + "epoch": 0.7104466069472192, + "grad_norm": 0.0062475296909040865, + "learning_rate": 3.22234156820623e-06, + "loss": 0.0, + "step": 11024 + }, + { + "epoch": 0.7105110523941484, + "grad_norm": 0.0017142098867375052, + "learning_rate": 3.2216254923021844e-06, + "loss": 0.0, + "step": 11025 + }, + { + "epoch": 0.7105754978410775, + "grad_norm": 0.0011041837520175915, + "learning_rate": 3.2209094163981387e-06, + "loss": 0.0, + "step": 11026 + }, + { + "epoch": 0.7106399432880067, + "grad_norm": 0.00122298031984066, + "learning_rate": 3.2201933404940926e-06, + "loss": 0.0, + "step": 11027 + }, + { + "epoch": 0.7107043887349359, + "grad_norm": 0.00011782227449732348, + "learning_rate": 3.219477264590047e-06, + "loss": 0.0, + "step": 11028 + }, + { + "epoch": 0.7107688341818651, + "grad_norm": 0.000478511478482095, + "learning_rate": 3.2187611886860008e-06, + "loss": 0.0, + "step": 11029 + }, + { + "epoch": 0.7108332796287943, + "grad_norm": 0.0075666354507896, + "learning_rate": 3.218045112781955e-06, + "loss": 0.0, + "step": 11030 + }, + { + "epoch": 0.7108977250757234, + "grad_norm": 0.040470238174043854, + "learning_rate": 3.2173290368779094e-06, + "loss": 0.0003, + "step": 11031 + }, + { + "epoch": 0.7109621705226525, + "grad_norm": 0.049252596260576695, + "learning_rate": 3.2166129609738633e-06, + "loss": 0.0005, + "step": 11032 + }, + { + "epoch": 0.7110266159695817, + "grad_norm": 0.5214163060714664, + "learning_rate": 3.2158968850698176e-06, + "loss": 0.0017, + "step": 11033 + }, + { + "epoch": 0.7110910614165109, + "grad_norm": 0.004091547353671431, + "learning_rate": 3.2151808091657714e-06, + "loss": 0.0, + "step": 11034 + }, + { + "epoch": 0.7111555068634401, + "grad_norm": 0.8728484540582238, + "learning_rate": 3.214464733261726e-06, + "loss": 0.0037, + "step": 11035 + }, + { + "epoch": 0.7112199523103693, + "grad_norm": 0.002028648083962951, + "learning_rate": 3.2137486573576805e-06, + "loss": 0.0, + "step": 11036 + }, + { + "epoch": 0.7112843977572985, + "grad_norm": 0.0017844158187033214, + "learning_rate": 3.2130325814536343e-06, + "loss": 0.0, + "step": 11037 + }, + { + "epoch": 0.7113488432042276, + "grad_norm": 0.29092505665033047, + "learning_rate": 3.2123165055495886e-06, + "loss": 0.0003, + "step": 11038 + }, + { + "epoch": 0.7114132886511568, + "grad_norm": 0.03781926653402004, + "learning_rate": 3.211600429645543e-06, + "loss": 0.0001, + "step": 11039 + }, + { + "epoch": 0.711477734098086, + "grad_norm": 0.023017320635540486, + "learning_rate": 3.210884353741497e-06, + "loss": 0.0, + "step": 11040 + }, + { + "epoch": 0.7115421795450152, + "grad_norm": 0.2787249919125269, + "learning_rate": 3.210168277837451e-06, + "loss": 0.001, + "step": 11041 + }, + { + "epoch": 0.7116066249919443, + "grad_norm": 0.0002637866973357019, + "learning_rate": 3.209452201933405e-06, + "loss": 0.0, + "step": 11042 + }, + { + "epoch": 0.7116710704388735, + "grad_norm": 0.003413300700603889, + "learning_rate": 3.2087361260293593e-06, + "loss": 0.0, + "step": 11043 + }, + { + "epoch": 0.7117355158858026, + "grad_norm": 0.008288206540353161, + "learning_rate": 3.2080200501253136e-06, + "loss": 0.0, + "step": 11044 + }, + { + "epoch": 0.7117999613327318, + "grad_norm": 0.0009564137452828161, + "learning_rate": 3.2073039742212674e-06, + "loss": 0.0, + "step": 11045 + }, + { + "epoch": 0.711864406779661, + "grad_norm": 0.011072656092947879, + "learning_rate": 3.2065878983172217e-06, + "loss": 0.0, + "step": 11046 + }, + { + "epoch": 0.7119288522265902, + "grad_norm": 0.1796167379448892, + "learning_rate": 3.2058718224131765e-06, + "loss": 0.0003, + "step": 11047 + }, + { + "epoch": 0.7119932976735194, + "grad_norm": 0.07329979538349857, + "learning_rate": 3.2051557465091303e-06, + "loss": 0.0001, + "step": 11048 + }, + { + "epoch": 0.7120577431204486, + "grad_norm": 0.1903261245429498, + "learning_rate": 3.2044396706050846e-06, + "loss": 0.002, + "step": 11049 + }, + { + "epoch": 0.7121221885673777, + "grad_norm": 0.0020960073602700443, + "learning_rate": 3.2037235947010385e-06, + "loss": 0.0, + "step": 11050 + }, + { + "epoch": 0.7121866340143069, + "grad_norm": 0.15443077795960122, + "learning_rate": 3.203007518796993e-06, + "loss": 0.002, + "step": 11051 + }, + { + "epoch": 0.7122510794612361, + "grad_norm": 0.006115973097704426, + "learning_rate": 3.202291442892947e-06, + "loss": 0.0, + "step": 11052 + }, + { + "epoch": 0.7123155249081652, + "grad_norm": 0.00581395781401189, + "learning_rate": 3.201575366988901e-06, + "loss": 0.0, + "step": 11053 + }, + { + "epoch": 0.7123799703550944, + "grad_norm": 0.0022123917383665612, + "learning_rate": 3.2008592910848553e-06, + "loss": 0.0, + "step": 11054 + }, + { + "epoch": 0.7124444158020236, + "grad_norm": 0.00022742322294811237, + "learning_rate": 3.200143215180809e-06, + "loss": 0.0, + "step": 11055 + }, + { + "epoch": 0.7125088612489527, + "grad_norm": 0.030732401054467395, + "learning_rate": 3.1994271392767635e-06, + "loss": 0.0, + "step": 11056 + }, + { + "epoch": 0.7125733066958819, + "grad_norm": 0.0022031101795498163, + "learning_rate": 3.1987110633727173e-06, + "loss": 0.0, + "step": 11057 + }, + { + "epoch": 0.7126377521428111, + "grad_norm": 0.05268874666893051, + "learning_rate": 3.197994987468672e-06, + "loss": 0.0001, + "step": 11058 + }, + { + "epoch": 0.7127021975897403, + "grad_norm": 0.000596779920373873, + "learning_rate": 3.1972789115646264e-06, + "loss": 0.0, + "step": 11059 + }, + { + "epoch": 0.7127666430366695, + "grad_norm": 0.015269021418092311, + "learning_rate": 3.1965628356605807e-06, + "loss": 0.0, + "step": 11060 + }, + { + "epoch": 0.7128310884835987, + "grad_norm": 0.0007433832035599262, + "learning_rate": 3.1958467597565345e-06, + "loss": 0.0, + "step": 11061 + }, + { + "epoch": 0.7128955339305278, + "grad_norm": 0.00024632248124767153, + "learning_rate": 3.195130683852489e-06, + "loss": 0.0, + "step": 11062 + }, + { + "epoch": 0.712959979377457, + "grad_norm": 6.792495538918946e-05, + "learning_rate": 3.1944146079484427e-06, + "loss": 0.0, + "step": 11063 + }, + { + "epoch": 0.7130244248243861, + "grad_norm": 0.0007723524883490751, + "learning_rate": 3.193698532044397e-06, + "loss": 0.0, + "step": 11064 + }, + { + "epoch": 0.7130888702713153, + "grad_norm": 0.00046314913899469146, + "learning_rate": 3.192982456140351e-06, + "loss": 0.0, + "step": 11065 + }, + { + "epoch": 0.7131533157182445, + "grad_norm": 0.0002443464187616548, + "learning_rate": 3.192266380236305e-06, + "loss": 0.0, + "step": 11066 + }, + { + "epoch": 0.7132177611651737, + "grad_norm": 0.03511749555454775, + "learning_rate": 3.1915503043322595e-06, + "loss": 0.0, + "step": 11067 + }, + { + "epoch": 0.7132822066121028, + "grad_norm": 0.08750064745305847, + "learning_rate": 3.1908342284282134e-06, + "loss": 0.0001, + "step": 11068 + }, + { + "epoch": 0.713346652059032, + "grad_norm": 0.0007083306043677525, + "learning_rate": 3.1901181525241677e-06, + "loss": 0.0, + "step": 11069 + }, + { + "epoch": 0.7134110975059612, + "grad_norm": 0.004821037321704811, + "learning_rate": 3.1894020766201224e-06, + "loss": 0.0, + "step": 11070 + }, + { + "epoch": 0.7134755429528904, + "grad_norm": 6.842244899806434e-05, + "learning_rate": 3.1886860007160762e-06, + "loss": 0.0, + "step": 11071 + }, + { + "epoch": 0.7135399883998196, + "grad_norm": 0.19849096388992776, + "learning_rate": 3.1879699248120305e-06, + "loss": 0.0017, + "step": 11072 + }, + { + "epoch": 0.7136044338467488, + "grad_norm": 0.001744117216499498, + "learning_rate": 3.1872538489079844e-06, + "loss": 0.0, + "step": 11073 + }, + { + "epoch": 0.713668879293678, + "grad_norm": 0.0007771459762669267, + "learning_rate": 3.1865377730039387e-06, + "loss": 0.0, + "step": 11074 + }, + { + "epoch": 0.713733324740607, + "grad_norm": 0.0016081975587549844, + "learning_rate": 3.185821697099893e-06, + "loss": 0.0, + "step": 11075 + }, + { + "epoch": 0.7137977701875362, + "grad_norm": 0.09325613134932795, + "learning_rate": 3.185105621195847e-06, + "loss": 0.0002, + "step": 11076 + }, + { + "epoch": 0.7138622156344654, + "grad_norm": 0.332418115417522, + "learning_rate": 3.184389545291801e-06, + "loss": 0.0014, + "step": 11077 + }, + { + "epoch": 0.7139266610813946, + "grad_norm": 0.04041215457695259, + "learning_rate": 3.183673469387755e-06, + "loss": 0.0001, + "step": 11078 + }, + { + "epoch": 0.7139911065283238, + "grad_norm": 0.08855809440335718, + "learning_rate": 3.1829573934837094e-06, + "loss": 0.0016, + "step": 11079 + }, + { + "epoch": 0.714055551975253, + "grad_norm": 0.037115998367209524, + "learning_rate": 3.1822413175796637e-06, + "loss": 0.0001, + "step": 11080 + }, + { + "epoch": 0.7141199974221821, + "grad_norm": 0.0009066699956542551, + "learning_rate": 3.1815252416756175e-06, + "loss": 0.0, + "step": 11081 + }, + { + "epoch": 0.7141844428691113, + "grad_norm": 0.019780705026331263, + "learning_rate": 3.1808091657715723e-06, + "loss": 0.0, + "step": 11082 + }, + { + "epoch": 0.7142488883160405, + "grad_norm": 0.00261057496389828, + "learning_rate": 3.1800930898675266e-06, + "loss": 0.0, + "step": 11083 + }, + { + "epoch": 0.7143133337629697, + "grad_norm": 0.004270109738808761, + "learning_rate": 3.1793770139634804e-06, + "loss": 0.0, + "step": 11084 + }, + { + "epoch": 0.7143777792098989, + "grad_norm": 0.00031775442414158883, + "learning_rate": 3.1786609380594347e-06, + "loss": 0.0, + "step": 11085 + }, + { + "epoch": 0.7144422246568279, + "grad_norm": 0.12453918901981105, + "learning_rate": 3.1779448621553886e-06, + "loss": 0.0002, + "step": 11086 + }, + { + "epoch": 0.7145066701037571, + "grad_norm": 0.02119295088790315, + "learning_rate": 3.177228786251343e-06, + "loss": 0.0, + "step": 11087 + }, + { + "epoch": 0.7145711155506863, + "grad_norm": 0.0006572972137359756, + "learning_rate": 3.176512710347297e-06, + "loss": 0.0, + "step": 11088 + }, + { + "epoch": 0.7146355609976155, + "grad_norm": 0.0015856123993797646, + "learning_rate": 3.175796634443251e-06, + "loss": 0.0, + "step": 11089 + }, + { + "epoch": 0.7147000064445447, + "grad_norm": 0.005097678348488337, + "learning_rate": 3.1750805585392054e-06, + "loss": 0.0, + "step": 11090 + }, + { + "epoch": 0.7147644518914739, + "grad_norm": 0.03935784061800326, + "learning_rate": 3.1743644826351593e-06, + "loss": 0.0001, + "step": 11091 + }, + { + "epoch": 0.714828897338403, + "grad_norm": 0.00048666767231215894, + "learning_rate": 3.1736484067311136e-06, + "loss": 0.0, + "step": 11092 + }, + { + "epoch": 0.7148933427853322, + "grad_norm": 0.004039960942877218, + "learning_rate": 3.1729323308270683e-06, + "loss": 0.0, + "step": 11093 + }, + { + "epoch": 0.7149577882322614, + "grad_norm": 0.05651111224218124, + "learning_rate": 3.172216254923022e-06, + "loss": 0.0001, + "step": 11094 + }, + { + "epoch": 0.7150222336791906, + "grad_norm": 0.0012988435437589815, + "learning_rate": 3.1715001790189765e-06, + "loss": 0.0, + "step": 11095 + }, + { + "epoch": 0.7150866791261198, + "grad_norm": 0.0006677970549519835, + "learning_rate": 3.1707841031149308e-06, + "loss": 0.0, + "step": 11096 + }, + { + "epoch": 0.715151124573049, + "grad_norm": 0.0030971611143586165, + "learning_rate": 3.1700680272108846e-06, + "loss": 0.0, + "step": 11097 + }, + { + "epoch": 0.715215570019978, + "grad_norm": 0.05772528771724839, + "learning_rate": 3.169351951306839e-06, + "loss": 0.0001, + "step": 11098 + }, + { + "epoch": 0.7152800154669072, + "grad_norm": 0.0017996675808251306, + "learning_rate": 3.168635875402793e-06, + "loss": 0.0, + "step": 11099 + }, + { + "epoch": 0.7153444609138364, + "grad_norm": 0.0022183191323614845, + "learning_rate": 3.167919799498747e-06, + "loss": 0.0, + "step": 11100 + }, + { + "epoch": 0.7154089063607656, + "grad_norm": 0.23467454047809808, + "learning_rate": 3.167203723594701e-06, + "loss": 0.0058, + "step": 11101 + }, + { + "epoch": 0.7154733518076948, + "grad_norm": 0.002101152624454598, + "learning_rate": 3.1664876476906553e-06, + "loss": 0.0, + "step": 11102 + }, + { + "epoch": 0.715537797254624, + "grad_norm": 0.10132137891567196, + "learning_rate": 3.1657715717866096e-06, + "loss": 0.0002, + "step": 11103 + }, + { + "epoch": 0.7156022427015531, + "grad_norm": 0.0013626302825582128, + "learning_rate": 3.1650554958825634e-06, + "loss": 0.0, + "step": 11104 + }, + { + "epoch": 0.7156666881484823, + "grad_norm": 0.013922212605411412, + "learning_rate": 3.164339419978518e-06, + "loss": 0.0, + "step": 11105 + }, + { + "epoch": 0.7157311335954115, + "grad_norm": 0.5795495550330169, + "learning_rate": 3.1636233440744725e-06, + "loss": 0.0019, + "step": 11106 + }, + { + "epoch": 0.7157955790423407, + "grad_norm": 0.019280375892706227, + "learning_rate": 3.1629072681704263e-06, + "loss": 0.0, + "step": 11107 + }, + { + "epoch": 0.7158600244892699, + "grad_norm": 3.448348121566918e-05, + "learning_rate": 3.1621911922663806e-06, + "loss": 0.0, + "step": 11108 + }, + { + "epoch": 0.715924469936199, + "grad_norm": 0.37455556282125935, + "learning_rate": 3.1614751163623345e-06, + "loss": 0.0016, + "step": 11109 + }, + { + "epoch": 0.7159889153831281, + "grad_norm": 0.002586349361368294, + "learning_rate": 3.160759040458289e-06, + "loss": 0.0, + "step": 11110 + }, + { + "epoch": 0.7160533608300573, + "grad_norm": 0.008448127471088534, + "learning_rate": 3.160042964554243e-06, + "loss": 0.0, + "step": 11111 + }, + { + "epoch": 0.7161178062769865, + "grad_norm": 0.0010937285941726932, + "learning_rate": 3.159326888650197e-06, + "loss": 0.0, + "step": 11112 + }, + { + "epoch": 0.7161822517239157, + "grad_norm": 0.0012277663763215767, + "learning_rate": 3.1586108127461513e-06, + "loss": 0.0, + "step": 11113 + }, + { + "epoch": 0.7162466971708449, + "grad_norm": 0.0018488032473446409, + "learning_rate": 3.157894736842105e-06, + "loss": 0.0, + "step": 11114 + }, + { + "epoch": 0.7163111426177741, + "grad_norm": 0.08111634839048355, + "learning_rate": 3.1571786609380595e-06, + "loss": 0.0001, + "step": 11115 + }, + { + "epoch": 0.7163755880647033, + "grad_norm": 0.0031572357244101274, + "learning_rate": 3.1564625850340138e-06, + "loss": 0.0, + "step": 11116 + }, + { + "epoch": 0.7164400335116324, + "grad_norm": 0.0014554546438801737, + "learning_rate": 3.155746509129968e-06, + "loss": 0.0, + "step": 11117 + }, + { + "epoch": 0.7165044789585616, + "grad_norm": 0.028527429420330017, + "learning_rate": 3.1550304332259224e-06, + "loss": 0.0, + "step": 11118 + }, + { + "epoch": 0.7165689244054908, + "grad_norm": 0.0013177336847458132, + "learning_rate": 3.1543143573218767e-06, + "loss": 0.0, + "step": 11119 + }, + { + "epoch": 0.7166333698524199, + "grad_norm": 0.02391522038213578, + "learning_rate": 3.1535982814178305e-06, + "loss": 0.0001, + "step": 11120 + }, + { + "epoch": 0.7166978152993491, + "grad_norm": 0.001059440528141261, + "learning_rate": 3.152882205513785e-06, + "loss": 0.0, + "step": 11121 + }, + { + "epoch": 0.7167622607462782, + "grad_norm": 0.001678356185974784, + "learning_rate": 3.1521661296097387e-06, + "loss": 0.0, + "step": 11122 + }, + { + "epoch": 0.7168267061932074, + "grad_norm": 0.00032899135591734686, + "learning_rate": 3.151450053705693e-06, + "loss": 0.0, + "step": 11123 + }, + { + "epoch": 0.7168911516401366, + "grad_norm": 0.0042420537766143995, + "learning_rate": 3.1507339778016473e-06, + "loss": 0.0, + "step": 11124 + }, + { + "epoch": 0.7169555970870658, + "grad_norm": 0.06737302971743062, + "learning_rate": 3.150017901897601e-06, + "loss": 0.0001, + "step": 11125 + }, + { + "epoch": 0.717020042533995, + "grad_norm": 0.10045379921951969, + "learning_rate": 3.1493018259935555e-06, + "loss": 0.0002, + "step": 11126 + }, + { + "epoch": 0.7170844879809242, + "grad_norm": 0.03187420971150182, + "learning_rate": 3.1485857500895094e-06, + "loss": 0.0002, + "step": 11127 + }, + { + "epoch": 0.7171489334278534, + "grad_norm": 0.001211750648026592, + "learning_rate": 3.147869674185464e-06, + "loss": 0.0, + "step": 11128 + }, + { + "epoch": 0.7172133788747825, + "grad_norm": 0.008495406179175139, + "learning_rate": 3.1471535982814184e-06, + "loss": 0.0, + "step": 11129 + }, + { + "epoch": 0.7172778243217117, + "grad_norm": 0.0019832293144138556, + "learning_rate": 3.1464375223773722e-06, + "loss": 0.0, + "step": 11130 + }, + { + "epoch": 0.7173422697686408, + "grad_norm": 0.001240237132151751, + "learning_rate": 3.1457214464733265e-06, + "loss": 0.0, + "step": 11131 + }, + { + "epoch": 0.71740671521557, + "grad_norm": 0.0025074924361898395, + "learning_rate": 3.145005370569281e-06, + "loss": 0.0, + "step": 11132 + }, + { + "epoch": 0.7174711606624992, + "grad_norm": 0.0027266612771010145, + "learning_rate": 3.1442892946652347e-06, + "loss": 0.0, + "step": 11133 + }, + { + "epoch": 0.7175356061094283, + "grad_norm": 0.05183947841060524, + "learning_rate": 3.143573218761189e-06, + "loss": 0.0001, + "step": 11134 + }, + { + "epoch": 0.7176000515563575, + "grad_norm": 0.00013604534610558102, + "learning_rate": 3.142857142857143e-06, + "loss": 0.0, + "step": 11135 + }, + { + "epoch": 0.7176644970032867, + "grad_norm": 0.004464684547835725, + "learning_rate": 3.142141066953097e-06, + "loss": 0.0, + "step": 11136 + }, + { + "epoch": 0.7177289424502159, + "grad_norm": 0.043388709463875526, + "learning_rate": 3.1414249910490515e-06, + "loss": 0.0002, + "step": 11137 + }, + { + "epoch": 0.7177933878971451, + "grad_norm": 0.563788289759631, + "learning_rate": 3.1407089151450054e-06, + "loss": 0.0048, + "step": 11138 + }, + { + "epoch": 0.7178578333440743, + "grad_norm": 0.00031376440967617104, + "learning_rate": 3.1399928392409597e-06, + "loss": 0.0, + "step": 11139 + }, + { + "epoch": 0.7179222787910035, + "grad_norm": 0.0009322943557485454, + "learning_rate": 3.1392767633369144e-06, + "loss": 0.0, + "step": 11140 + }, + { + "epoch": 0.7179867242379326, + "grad_norm": 0.2445635070487584, + "learning_rate": 3.1385606874328683e-06, + "loss": 0.0011, + "step": 11141 + }, + { + "epoch": 0.7180511696848617, + "grad_norm": 0.0009303590616047754, + "learning_rate": 3.1378446115288226e-06, + "loss": 0.0, + "step": 11142 + }, + { + "epoch": 0.7181156151317909, + "grad_norm": 0.002144644973596857, + "learning_rate": 3.1371285356247764e-06, + "loss": 0.0, + "step": 11143 + }, + { + "epoch": 0.7181800605787201, + "grad_norm": 0.24049082303382727, + "learning_rate": 3.1364124597207307e-06, + "loss": 0.0005, + "step": 11144 + }, + { + "epoch": 0.7182445060256493, + "grad_norm": 0.010361287644884275, + "learning_rate": 3.135696383816685e-06, + "loss": 0.0001, + "step": 11145 + }, + { + "epoch": 0.7183089514725784, + "grad_norm": 0.00045569104002404434, + "learning_rate": 3.134980307912639e-06, + "loss": 0.0, + "step": 11146 + }, + { + "epoch": 0.7183733969195076, + "grad_norm": 0.1184363656128077, + "learning_rate": 3.134264232008593e-06, + "loss": 0.0013, + "step": 11147 + }, + { + "epoch": 0.7184378423664368, + "grad_norm": 0.0018075368988239252, + "learning_rate": 3.133548156104547e-06, + "loss": 0.0, + "step": 11148 + }, + { + "epoch": 0.718502287813366, + "grad_norm": 0.00025330471947959053, + "learning_rate": 3.1328320802005014e-06, + "loss": 0.0, + "step": 11149 + }, + { + "epoch": 0.7185667332602952, + "grad_norm": 0.0018817507730820291, + "learning_rate": 3.1321160042964553e-06, + "loss": 0.0, + "step": 11150 + }, + { + "epoch": 0.7186311787072244, + "grad_norm": 0.14699625858626997, + "learning_rate": 3.1313999283924096e-06, + "loss": 0.001, + "step": 11151 + }, + { + "epoch": 0.7186956241541536, + "grad_norm": 0.010185056320389333, + "learning_rate": 3.1306838524883643e-06, + "loss": 0.0015, + "step": 11152 + }, + { + "epoch": 0.7187600696010826, + "grad_norm": 0.00017482119536334955, + "learning_rate": 3.1299677765843186e-06, + "loss": 0.0, + "step": 11153 + }, + { + "epoch": 0.7188245150480118, + "grad_norm": 0.008587165702478385, + "learning_rate": 3.1292517006802725e-06, + "loss": 0.0, + "step": 11154 + }, + { + "epoch": 0.718888960494941, + "grad_norm": 0.0009752747086168306, + "learning_rate": 3.1285356247762268e-06, + "loss": 0.0, + "step": 11155 + }, + { + "epoch": 0.7189534059418702, + "grad_norm": 0.00273619332827515, + "learning_rate": 3.1278195488721806e-06, + "loss": 0.0, + "step": 11156 + }, + { + "epoch": 0.7190178513887994, + "grad_norm": 0.0003457374189358038, + "learning_rate": 3.127103472968135e-06, + "loss": 0.0, + "step": 11157 + }, + { + "epoch": 0.7190822968357286, + "grad_norm": 0.001673470044109342, + "learning_rate": 3.126387397064089e-06, + "loss": 0.0, + "step": 11158 + }, + { + "epoch": 0.7191467422826577, + "grad_norm": 0.00021614544359388704, + "learning_rate": 3.125671321160043e-06, + "loss": 0.0, + "step": 11159 + }, + { + "epoch": 0.7192111877295869, + "grad_norm": 0.0036123325054397515, + "learning_rate": 3.1249552452559974e-06, + "loss": 0.0, + "step": 11160 + }, + { + "epoch": 0.7192756331765161, + "grad_norm": 0.01439849637720543, + "learning_rate": 3.1242391693519513e-06, + "loss": 0.0, + "step": 11161 + }, + { + "epoch": 0.7193400786234453, + "grad_norm": 0.0011721288297011038, + "learning_rate": 3.1235230934479056e-06, + "loss": 0.0, + "step": 11162 + }, + { + "epoch": 0.7194045240703745, + "grad_norm": 0.004723643909055465, + "learning_rate": 3.1228070175438603e-06, + "loss": 0.0, + "step": 11163 + }, + { + "epoch": 0.7194689695173035, + "grad_norm": 0.0035602629306015162, + "learning_rate": 3.122090941639814e-06, + "loss": 0.0, + "step": 11164 + }, + { + "epoch": 0.7195334149642327, + "grad_norm": 0.0003956756912840429, + "learning_rate": 3.1213748657357685e-06, + "loss": 0.0, + "step": 11165 + }, + { + "epoch": 0.7195978604111619, + "grad_norm": 0.001660037121350968, + "learning_rate": 3.1206587898317223e-06, + "loss": 0.0, + "step": 11166 + }, + { + "epoch": 0.7196623058580911, + "grad_norm": 0.00039859561483115464, + "learning_rate": 3.1199427139276766e-06, + "loss": 0.0, + "step": 11167 + }, + { + "epoch": 0.7197267513050203, + "grad_norm": 0.0060584983061866965, + "learning_rate": 3.119226638023631e-06, + "loss": 0.0001, + "step": 11168 + }, + { + "epoch": 0.7197911967519495, + "grad_norm": 0.037109889478630684, + "learning_rate": 3.118510562119585e-06, + "loss": 0.0, + "step": 11169 + }, + { + "epoch": 0.7198556421988787, + "grad_norm": 0.00727883548794619, + "learning_rate": 3.117794486215539e-06, + "loss": 0.0001, + "step": 11170 + }, + { + "epoch": 0.7199200876458078, + "grad_norm": 0.03501727862860073, + "learning_rate": 3.117078410311493e-06, + "loss": 0.0, + "step": 11171 + }, + { + "epoch": 0.719984533092737, + "grad_norm": 8.61872471854203e-05, + "learning_rate": 3.1163623344074473e-06, + "loss": 0.0, + "step": 11172 + }, + { + "epoch": 0.7200489785396662, + "grad_norm": 0.045333885416277295, + "learning_rate": 3.1156462585034016e-06, + "loss": 0.0001, + "step": 11173 + }, + { + "epoch": 0.7201134239865954, + "grad_norm": 0.0004629402319689133, + "learning_rate": 3.1149301825993555e-06, + "loss": 0.0, + "step": 11174 + }, + { + "epoch": 0.7201778694335246, + "grad_norm": 0.001063566655592568, + "learning_rate": 3.11421410669531e-06, + "loss": 0.0, + "step": 11175 + }, + { + "epoch": 0.7202423148804536, + "grad_norm": 0.0014662245610195432, + "learning_rate": 3.1134980307912645e-06, + "loss": 0.0, + "step": 11176 + }, + { + "epoch": 0.7203067603273828, + "grad_norm": 0.01931647362060871, + "learning_rate": 3.1127819548872184e-06, + "loss": 0.0, + "step": 11177 + }, + { + "epoch": 0.720371205774312, + "grad_norm": 0.0004281946405760085, + "learning_rate": 3.1120658789831727e-06, + "loss": 0.0, + "step": 11178 + }, + { + "epoch": 0.7204356512212412, + "grad_norm": 7.321520534062253e-05, + "learning_rate": 3.1113498030791265e-06, + "loss": 0.0, + "step": 11179 + }, + { + "epoch": 0.7205000966681704, + "grad_norm": 8.563287008322675e-05, + "learning_rate": 3.110633727175081e-06, + "loss": 0.0, + "step": 11180 + }, + { + "epoch": 0.7205645421150996, + "grad_norm": 0.00019715790289080482, + "learning_rate": 3.109917651271035e-06, + "loss": 0.0, + "step": 11181 + }, + { + "epoch": 0.7206289875620288, + "grad_norm": 0.002634901520461059, + "learning_rate": 3.109201575366989e-06, + "loss": 0.0, + "step": 11182 + }, + { + "epoch": 0.7206934330089579, + "grad_norm": 0.0008865863108786099, + "learning_rate": 3.1084854994629433e-06, + "loss": 0.0, + "step": 11183 + }, + { + "epoch": 0.7207578784558871, + "grad_norm": 0.025815671687311366, + "learning_rate": 3.107769423558897e-06, + "loss": 0.0, + "step": 11184 + }, + { + "epoch": 0.7208223239028163, + "grad_norm": 0.0032002885997212812, + "learning_rate": 3.1070533476548515e-06, + "loss": 0.0, + "step": 11185 + }, + { + "epoch": 0.7208867693497455, + "grad_norm": 0.0004901728854833662, + "learning_rate": 3.1063372717508058e-06, + "loss": 0.0, + "step": 11186 + }, + { + "epoch": 0.7209512147966746, + "grad_norm": 0.00753785072550071, + "learning_rate": 3.10562119584676e-06, + "loss": 0.0, + "step": 11187 + }, + { + "epoch": 0.7210156602436038, + "grad_norm": 0.001151215509133265, + "learning_rate": 3.1049051199427144e-06, + "loss": 0.0, + "step": 11188 + }, + { + "epoch": 0.7210801056905329, + "grad_norm": 0.00012703756509971546, + "learning_rate": 3.1041890440386687e-06, + "loss": 0.0, + "step": 11189 + }, + { + "epoch": 0.7211445511374621, + "grad_norm": 0.00027725036286674646, + "learning_rate": 3.1034729681346225e-06, + "loss": 0.0, + "step": 11190 + }, + { + "epoch": 0.7212089965843913, + "grad_norm": 0.00032789403026681533, + "learning_rate": 3.102756892230577e-06, + "loss": 0.0, + "step": 11191 + }, + { + "epoch": 0.7212734420313205, + "grad_norm": 0.5477644907254653, + "learning_rate": 3.1020408163265307e-06, + "loss": 0.0013, + "step": 11192 + }, + { + "epoch": 0.7213378874782497, + "grad_norm": 0.022567781263925505, + "learning_rate": 3.101324740422485e-06, + "loss": 0.0003, + "step": 11193 + }, + { + "epoch": 0.7214023329251789, + "grad_norm": 0.00021412049244247774, + "learning_rate": 3.1006086645184393e-06, + "loss": 0.0, + "step": 11194 + }, + { + "epoch": 0.721466778372108, + "grad_norm": 0.04202279788293058, + "learning_rate": 3.099892588614393e-06, + "loss": 0.0001, + "step": 11195 + }, + { + "epoch": 0.7215312238190372, + "grad_norm": 0.004843800760419462, + "learning_rate": 3.0991765127103475e-06, + "loss": 0.0, + "step": 11196 + }, + { + "epoch": 0.7215956692659664, + "grad_norm": 0.002510341090575078, + "learning_rate": 3.0984604368063014e-06, + "loss": 0.0, + "step": 11197 + }, + { + "epoch": 0.7216601147128955, + "grad_norm": 0.01219112790806607, + "learning_rate": 3.097744360902256e-06, + "loss": 0.0001, + "step": 11198 + }, + { + "epoch": 0.7217245601598247, + "grad_norm": 6.304089065724722e-05, + "learning_rate": 3.0970282849982104e-06, + "loss": 0.0, + "step": 11199 + }, + { + "epoch": 0.7217890056067539, + "grad_norm": 0.5504341113723744, + "learning_rate": 3.0963122090941643e-06, + "loss": 0.002, + "step": 11200 + }, + { + "epoch": 0.721853451053683, + "grad_norm": 0.01023833832935001, + "learning_rate": 3.0955961331901186e-06, + "loss": 0.0001, + "step": 11201 + }, + { + "epoch": 0.7219178965006122, + "grad_norm": 0.1783995912772252, + "learning_rate": 3.094880057286073e-06, + "loss": 0.0037, + "step": 11202 + }, + { + "epoch": 0.7219823419475414, + "grad_norm": 0.0008514347736365616, + "learning_rate": 3.0941639813820267e-06, + "loss": 0.0, + "step": 11203 + }, + { + "epoch": 0.7220467873944706, + "grad_norm": 0.0005467954296606304, + "learning_rate": 3.093447905477981e-06, + "loss": 0.0, + "step": 11204 + }, + { + "epoch": 0.7221112328413998, + "grad_norm": 0.12932414173611667, + "learning_rate": 3.092731829573935e-06, + "loss": 0.0003, + "step": 11205 + }, + { + "epoch": 0.722175678288329, + "grad_norm": 0.00013929640553021798, + "learning_rate": 3.092015753669889e-06, + "loss": 0.0, + "step": 11206 + }, + { + "epoch": 0.7222401237352581, + "grad_norm": 0.14328180054046125, + "learning_rate": 3.091299677765843e-06, + "loss": 0.0006, + "step": 11207 + }, + { + "epoch": 0.7223045691821873, + "grad_norm": 0.0005452546115663261, + "learning_rate": 3.0905836018617974e-06, + "loss": 0.0, + "step": 11208 + }, + { + "epoch": 0.7223690146291164, + "grad_norm": 0.00024169194708091167, + "learning_rate": 3.0898675259577517e-06, + "loss": 0.0, + "step": 11209 + }, + { + "epoch": 0.7224334600760456, + "grad_norm": 0.004371841536006174, + "learning_rate": 3.0891514500537064e-06, + "loss": 0.0, + "step": 11210 + }, + { + "epoch": 0.7224979055229748, + "grad_norm": 0.0039695361977091495, + "learning_rate": 3.0884353741496603e-06, + "loss": 0.0, + "step": 11211 + }, + { + "epoch": 0.722562350969904, + "grad_norm": 0.001597297508301907, + "learning_rate": 3.0877192982456146e-06, + "loss": 0.0, + "step": 11212 + }, + { + "epoch": 0.7226267964168331, + "grad_norm": 0.0149499392375087, + "learning_rate": 3.0870032223415685e-06, + "loss": 0.0001, + "step": 11213 + }, + { + "epoch": 0.7226912418637623, + "grad_norm": 0.0009184210155997939, + "learning_rate": 3.0862871464375228e-06, + "loss": 0.0, + "step": 11214 + }, + { + "epoch": 0.7227556873106915, + "grad_norm": 0.011828507087317438, + "learning_rate": 3.0855710705334766e-06, + "loss": 0.0, + "step": 11215 + }, + { + "epoch": 0.7228201327576207, + "grad_norm": 0.004237269538513051, + "learning_rate": 3.084854994629431e-06, + "loss": 0.0, + "step": 11216 + }, + { + "epoch": 0.7228845782045499, + "grad_norm": 0.000772409782086021, + "learning_rate": 3.0841389187253852e-06, + "loss": 0.0, + "step": 11217 + }, + { + "epoch": 0.7229490236514791, + "grad_norm": 0.00760526489143851, + "learning_rate": 3.083422842821339e-06, + "loss": 0.0001, + "step": 11218 + }, + { + "epoch": 0.7230134690984082, + "grad_norm": 0.03619062946544116, + "learning_rate": 3.0827067669172934e-06, + "loss": 0.0003, + "step": 11219 + }, + { + "epoch": 0.7230779145453373, + "grad_norm": 0.0007644458517121425, + "learning_rate": 3.0819906910132473e-06, + "loss": 0.0, + "step": 11220 + }, + { + "epoch": 0.7231423599922665, + "grad_norm": 0.0006187374178444931, + "learning_rate": 3.0812746151092016e-06, + "loss": 0.0, + "step": 11221 + }, + { + "epoch": 0.7232068054391957, + "grad_norm": 0.008779720301754806, + "learning_rate": 3.0805585392051563e-06, + "loss": 0.0, + "step": 11222 + }, + { + "epoch": 0.7232712508861249, + "grad_norm": 0.33317532619060475, + "learning_rate": 3.07984246330111e-06, + "loss": 0.0028, + "step": 11223 + }, + { + "epoch": 0.7233356963330541, + "grad_norm": 0.008626626856931026, + "learning_rate": 3.0791263873970645e-06, + "loss": 0.0, + "step": 11224 + }, + { + "epoch": 0.7234001417799832, + "grad_norm": 0.0015988935817651294, + "learning_rate": 3.0784103114930188e-06, + "loss": 0.0, + "step": 11225 + }, + { + "epoch": 0.7234645872269124, + "grad_norm": 0.019773563400669943, + "learning_rate": 3.0776942355889726e-06, + "loss": 0.0, + "step": 11226 + }, + { + "epoch": 0.7235290326738416, + "grad_norm": 0.0044985562525182805, + "learning_rate": 3.076978159684927e-06, + "loss": 0.0, + "step": 11227 + }, + { + "epoch": 0.7235934781207708, + "grad_norm": 0.001497283255147728, + "learning_rate": 3.076262083780881e-06, + "loss": 0.0, + "step": 11228 + }, + { + "epoch": 0.7236579235677, + "grad_norm": 0.00011553391408470788, + "learning_rate": 3.075546007876835e-06, + "loss": 0.0, + "step": 11229 + }, + { + "epoch": 0.7237223690146292, + "grad_norm": 0.0025702919683202085, + "learning_rate": 3.0748299319727894e-06, + "loss": 0.0, + "step": 11230 + }, + { + "epoch": 0.7237868144615582, + "grad_norm": 3.5742902256819365e-05, + "learning_rate": 3.0741138560687433e-06, + "loss": 0.0, + "step": 11231 + }, + { + "epoch": 0.7238512599084874, + "grad_norm": 0.0008341897101581062, + "learning_rate": 3.0733977801646976e-06, + "loss": 0.0, + "step": 11232 + }, + { + "epoch": 0.7239157053554166, + "grad_norm": 0.0004687527850827818, + "learning_rate": 3.0726817042606523e-06, + "loss": 0.0, + "step": 11233 + }, + { + "epoch": 0.7239801508023458, + "grad_norm": 0.06597886275171361, + "learning_rate": 3.071965628356606e-06, + "loss": 0.0002, + "step": 11234 + }, + { + "epoch": 0.724044596249275, + "grad_norm": 0.006935657033462403, + "learning_rate": 3.0712495524525605e-06, + "loss": 0.0, + "step": 11235 + }, + { + "epoch": 0.7241090416962042, + "grad_norm": 0.08593750812790572, + "learning_rate": 3.0705334765485144e-06, + "loss": 0.0005, + "step": 11236 + }, + { + "epoch": 0.7241734871431333, + "grad_norm": 0.002378809302678618, + "learning_rate": 3.0698174006444687e-06, + "loss": 0.0, + "step": 11237 + }, + { + "epoch": 0.7242379325900625, + "grad_norm": 0.45399070124829183, + "learning_rate": 3.069101324740423e-06, + "loss": 0.0022, + "step": 11238 + }, + { + "epoch": 0.7243023780369917, + "grad_norm": 0.26184424907155546, + "learning_rate": 3.068385248836377e-06, + "loss": 0.0009, + "step": 11239 + }, + { + "epoch": 0.7243668234839209, + "grad_norm": 0.0033589809308402887, + "learning_rate": 3.067669172932331e-06, + "loss": 0.0, + "step": 11240 + }, + { + "epoch": 0.7244312689308501, + "grad_norm": 0.005790182700251807, + "learning_rate": 3.066953097028285e-06, + "loss": 0.0, + "step": 11241 + }, + { + "epoch": 0.7244957143777793, + "grad_norm": 0.0002524290695434493, + "learning_rate": 3.0662370211242393e-06, + "loss": 0.0, + "step": 11242 + }, + { + "epoch": 0.7245601598247083, + "grad_norm": 0.0005162926392841873, + "learning_rate": 3.065520945220193e-06, + "loss": 0.0, + "step": 11243 + }, + { + "epoch": 0.7246246052716375, + "grad_norm": 0.0017082764232077385, + "learning_rate": 3.0648048693161475e-06, + "loss": 0.0, + "step": 11244 + }, + { + "epoch": 0.7246890507185667, + "grad_norm": 0.004266903234189341, + "learning_rate": 3.064088793412102e-06, + "loss": 0.0, + "step": 11245 + }, + { + "epoch": 0.7247534961654959, + "grad_norm": 0.0001998843517710784, + "learning_rate": 3.0633727175080565e-06, + "loss": 0.0, + "step": 11246 + }, + { + "epoch": 0.7248179416124251, + "grad_norm": 0.16482393362610753, + "learning_rate": 3.0626566416040104e-06, + "loss": 0.0002, + "step": 11247 + }, + { + "epoch": 0.7248823870593543, + "grad_norm": 0.00958134520982764, + "learning_rate": 3.0619405656999647e-06, + "loss": 0.0, + "step": 11248 + }, + { + "epoch": 0.7249468325062834, + "grad_norm": 0.005892490520614762, + "learning_rate": 3.0612244897959185e-06, + "loss": 0.0, + "step": 11249 + }, + { + "epoch": 0.7250112779532126, + "grad_norm": 0.01922783843529163, + "learning_rate": 3.060508413891873e-06, + "loss": 0.0, + "step": 11250 + }, + { + "epoch": 0.7250757234001418, + "grad_norm": 9.092396979500205e-05, + "learning_rate": 3.0597923379878267e-06, + "loss": 0.0, + "step": 11251 + }, + { + "epoch": 0.725140168847071, + "grad_norm": 0.0004333292273602369, + "learning_rate": 3.059076262083781e-06, + "loss": 0.0, + "step": 11252 + }, + { + "epoch": 0.7252046142940002, + "grad_norm": 0.017930935702477148, + "learning_rate": 3.0583601861797353e-06, + "loss": 0.0, + "step": 11253 + }, + { + "epoch": 0.7252690597409293, + "grad_norm": 0.31085538834256204, + "learning_rate": 3.057644110275689e-06, + "loss": 0.0065, + "step": 11254 + }, + { + "epoch": 0.7253335051878584, + "grad_norm": 0.014556196484398517, + "learning_rate": 3.0569280343716435e-06, + "loss": 0.0001, + "step": 11255 + }, + { + "epoch": 0.7253979506347876, + "grad_norm": 0.00015255389938576366, + "learning_rate": 3.0562119584675974e-06, + "loss": 0.0, + "step": 11256 + }, + { + "epoch": 0.7254623960817168, + "grad_norm": 0.00020395906453181, + "learning_rate": 3.055495882563552e-06, + "loss": 0.0, + "step": 11257 + }, + { + "epoch": 0.725526841528646, + "grad_norm": 0.0003205706918907401, + "learning_rate": 3.0547798066595064e-06, + "loss": 0.0, + "step": 11258 + }, + { + "epoch": 0.7255912869755752, + "grad_norm": 4.8430433611672415e-05, + "learning_rate": 3.0540637307554603e-06, + "loss": 0.0, + "step": 11259 + }, + { + "epoch": 0.7256557324225044, + "grad_norm": 0.0034224555781167698, + "learning_rate": 3.0533476548514146e-06, + "loss": 0.0, + "step": 11260 + }, + { + "epoch": 0.7257201778694335, + "grad_norm": 0.0007747417139003617, + "learning_rate": 3.052631578947369e-06, + "loss": 0.0, + "step": 11261 + }, + { + "epoch": 0.7257846233163627, + "grad_norm": 0.005629201743521242, + "learning_rate": 3.0519155030433227e-06, + "loss": 0.0016, + "step": 11262 + }, + { + "epoch": 0.7258490687632919, + "grad_norm": 0.05939066877792539, + "learning_rate": 3.051199427139277e-06, + "loss": 0.0002, + "step": 11263 + }, + { + "epoch": 0.7259135142102211, + "grad_norm": 0.024681152626680736, + "learning_rate": 3.050483351235231e-06, + "loss": 0.0002, + "step": 11264 + }, + { + "epoch": 0.7259779596571502, + "grad_norm": 0.16541939988288423, + "learning_rate": 3.049767275331185e-06, + "loss": 0.0003, + "step": 11265 + }, + { + "epoch": 0.7260424051040794, + "grad_norm": 0.006080011470615735, + "learning_rate": 3.0490511994271395e-06, + "loss": 0.0001, + "step": 11266 + }, + { + "epoch": 0.7261068505510085, + "grad_norm": 0.0047498769675451856, + "learning_rate": 3.0483351235230934e-06, + "loss": 0.0, + "step": 11267 + }, + { + "epoch": 0.7261712959979377, + "grad_norm": 0.04948049102232195, + "learning_rate": 3.047619047619048e-06, + "loss": 0.0001, + "step": 11268 + }, + { + "epoch": 0.7262357414448669, + "grad_norm": 0.0006189984344180252, + "learning_rate": 3.0469029717150024e-06, + "loss": 0.0, + "step": 11269 + }, + { + "epoch": 0.7263001868917961, + "grad_norm": 0.005652951470014697, + "learning_rate": 3.0461868958109563e-06, + "loss": 0.0, + "step": 11270 + }, + { + "epoch": 0.7263646323387253, + "grad_norm": 0.0005117226456541811, + "learning_rate": 3.0454708199069106e-06, + "loss": 0.0, + "step": 11271 + }, + { + "epoch": 0.7264290777856545, + "grad_norm": 0.0007810068636911916, + "learning_rate": 3.0447547440028645e-06, + "loss": 0.0, + "step": 11272 + }, + { + "epoch": 0.7264935232325836, + "grad_norm": 0.06590527306262674, + "learning_rate": 3.0440386680988188e-06, + "loss": 0.0004, + "step": 11273 + }, + { + "epoch": 0.7265579686795128, + "grad_norm": 0.0005164033597064126, + "learning_rate": 3.043322592194773e-06, + "loss": 0.0, + "step": 11274 + }, + { + "epoch": 0.726622414126442, + "grad_norm": 0.0003381609834740342, + "learning_rate": 3.042606516290727e-06, + "loss": 0.0, + "step": 11275 + }, + { + "epoch": 0.7266868595733711, + "grad_norm": 0.021830852884517263, + "learning_rate": 3.0418904403866812e-06, + "loss": 0.0, + "step": 11276 + }, + { + "epoch": 0.7267513050203003, + "grad_norm": 6.346864446019381e-05, + "learning_rate": 3.041174364482635e-06, + "loss": 0.0, + "step": 11277 + }, + { + "epoch": 0.7268157504672295, + "grad_norm": 0.03467683947264384, + "learning_rate": 3.0404582885785894e-06, + "loss": 0.0, + "step": 11278 + }, + { + "epoch": 0.7268801959141586, + "grad_norm": 0.004254908353937039, + "learning_rate": 3.0397422126745437e-06, + "loss": 0.0, + "step": 11279 + }, + { + "epoch": 0.7269446413610878, + "grad_norm": 0.003063179042156029, + "learning_rate": 3.039026136770498e-06, + "loss": 0.0, + "step": 11280 + }, + { + "epoch": 0.727009086808017, + "grad_norm": 0.0001314564867009563, + "learning_rate": 3.0383100608664523e-06, + "loss": 0.0, + "step": 11281 + }, + { + "epoch": 0.7270735322549462, + "grad_norm": 0.004146407912709877, + "learning_rate": 3.0375939849624066e-06, + "loss": 0.0, + "step": 11282 + }, + { + "epoch": 0.7271379777018754, + "grad_norm": 0.00010302604658438643, + "learning_rate": 3.0368779090583605e-06, + "loss": 0.0, + "step": 11283 + }, + { + "epoch": 0.7272024231488046, + "grad_norm": 0.00013965483383936777, + "learning_rate": 3.0361618331543148e-06, + "loss": 0.0, + "step": 11284 + }, + { + "epoch": 0.7272668685957338, + "grad_norm": 0.00018378616446001092, + "learning_rate": 3.0354457572502686e-06, + "loss": 0.0, + "step": 11285 + }, + { + "epoch": 0.7273313140426629, + "grad_norm": 0.001957099914498733, + "learning_rate": 3.034729681346223e-06, + "loss": 0.0, + "step": 11286 + }, + { + "epoch": 0.727395759489592, + "grad_norm": 0.0008828377932276057, + "learning_rate": 3.0340136054421772e-06, + "loss": 0.0, + "step": 11287 + }, + { + "epoch": 0.7274602049365212, + "grad_norm": 0.0029698710575783546, + "learning_rate": 3.033297529538131e-06, + "loss": 0.0, + "step": 11288 + }, + { + "epoch": 0.7275246503834504, + "grad_norm": 0.00019433618893119242, + "learning_rate": 3.0325814536340854e-06, + "loss": 0.0, + "step": 11289 + }, + { + "epoch": 0.7275890958303796, + "grad_norm": 0.0006416192589271509, + "learning_rate": 3.0318653777300393e-06, + "loss": 0.0, + "step": 11290 + }, + { + "epoch": 0.7276535412773087, + "grad_norm": 0.00838866934093486, + "learning_rate": 3.0311493018259936e-06, + "loss": 0.0, + "step": 11291 + }, + { + "epoch": 0.7277179867242379, + "grad_norm": 0.000245455502279775, + "learning_rate": 3.0304332259219483e-06, + "loss": 0.0, + "step": 11292 + }, + { + "epoch": 0.7277824321711671, + "grad_norm": 0.004930906444727167, + "learning_rate": 3.029717150017902e-06, + "loss": 0.0, + "step": 11293 + }, + { + "epoch": 0.7278468776180963, + "grad_norm": 4.776988774088927e-05, + "learning_rate": 3.0290010741138565e-06, + "loss": 0.0, + "step": 11294 + }, + { + "epoch": 0.7279113230650255, + "grad_norm": 0.27195599982658003, + "learning_rate": 3.0282849982098108e-06, + "loss": 0.001, + "step": 11295 + }, + { + "epoch": 0.7279757685119547, + "grad_norm": 0.0030353622788795963, + "learning_rate": 3.0275689223057647e-06, + "loss": 0.0, + "step": 11296 + }, + { + "epoch": 0.7280402139588839, + "grad_norm": 0.5768505948030167, + "learning_rate": 3.026852846401719e-06, + "loss": 0.003, + "step": 11297 + }, + { + "epoch": 0.7281046594058129, + "grad_norm": 0.0025597431430050125, + "learning_rate": 3.026136770497673e-06, + "loss": 0.0, + "step": 11298 + }, + { + "epoch": 0.7281691048527421, + "grad_norm": 0.0016598811747845761, + "learning_rate": 3.025420694593627e-06, + "loss": 0.0, + "step": 11299 + }, + { + "epoch": 0.7282335502996713, + "grad_norm": 0.00014893458879150577, + "learning_rate": 3.024704618689581e-06, + "loss": 0.0, + "step": 11300 + }, + { + "epoch": 0.7282979957466005, + "grad_norm": 0.0021818727213651556, + "learning_rate": 3.0239885427855353e-06, + "loss": 0.0, + "step": 11301 + }, + { + "epoch": 0.7283624411935297, + "grad_norm": 0.008553871083575986, + "learning_rate": 3.0232724668814896e-06, + "loss": 0.0, + "step": 11302 + }, + { + "epoch": 0.7284268866404588, + "grad_norm": 0.001350344278458923, + "learning_rate": 3.0225563909774443e-06, + "loss": 0.0, + "step": 11303 + }, + { + "epoch": 0.728491332087388, + "grad_norm": 0.00034752832518681015, + "learning_rate": 3.021840315073398e-06, + "loss": 0.0, + "step": 11304 + }, + { + "epoch": 0.7285557775343172, + "grad_norm": 0.004296723301807933, + "learning_rate": 3.0211242391693525e-06, + "loss": 0.0, + "step": 11305 + }, + { + "epoch": 0.7286202229812464, + "grad_norm": 0.003349462100284539, + "learning_rate": 3.0204081632653064e-06, + "loss": 0.0, + "step": 11306 + }, + { + "epoch": 0.7286846684281756, + "grad_norm": 0.13197393612356048, + "learning_rate": 3.0196920873612607e-06, + "loss": 0.0005, + "step": 11307 + }, + { + "epoch": 0.7287491138751048, + "grad_norm": 0.000510281932853362, + "learning_rate": 3.0189760114572145e-06, + "loss": 0.0, + "step": 11308 + }, + { + "epoch": 0.7288135593220338, + "grad_norm": 0.0011085601485967163, + "learning_rate": 3.018259935553169e-06, + "loss": 0.0, + "step": 11309 + }, + { + "epoch": 0.728878004768963, + "grad_norm": 0.005277812025546389, + "learning_rate": 3.017543859649123e-06, + "loss": 0.0, + "step": 11310 + }, + { + "epoch": 0.7289424502158922, + "grad_norm": 0.0002395444898397595, + "learning_rate": 3.016827783745077e-06, + "loss": 0.0, + "step": 11311 + }, + { + "epoch": 0.7290068956628214, + "grad_norm": 0.0831582389340663, + "learning_rate": 3.0161117078410313e-06, + "loss": 0.0011, + "step": 11312 + }, + { + "epoch": 0.7290713411097506, + "grad_norm": 0.0017825745778012954, + "learning_rate": 3.015395631936985e-06, + "loss": 0.0, + "step": 11313 + }, + { + "epoch": 0.7291357865566798, + "grad_norm": 0.0011956396270402388, + "learning_rate": 3.0146795560329395e-06, + "loss": 0.0, + "step": 11314 + }, + { + "epoch": 0.729200232003609, + "grad_norm": 0.00043885243242865044, + "learning_rate": 3.0139634801288942e-06, + "loss": 0.0, + "step": 11315 + }, + { + "epoch": 0.7292646774505381, + "grad_norm": 0.0009921284919679632, + "learning_rate": 3.013247404224848e-06, + "loss": 0.0, + "step": 11316 + }, + { + "epoch": 0.7293291228974673, + "grad_norm": 0.0023250831159840257, + "learning_rate": 3.0125313283208024e-06, + "loss": 0.0, + "step": 11317 + }, + { + "epoch": 0.7293935683443965, + "grad_norm": 0.15707155844292203, + "learning_rate": 3.0118152524167567e-06, + "loss": 0.0004, + "step": 11318 + }, + { + "epoch": 0.7294580137913257, + "grad_norm": 0.013931934554137165, + "learning_rate": 3.0110991765127106e-06, + "loss": 0.0, + "step": 11319 + }, + { + "epoch": 0.7295224592382549, + "grad_norm": 0.0014724451177281826, + "learning_rate": 3.010383100608665e-06, + "loss": 0.0, + "step": 11320 + }, + { + "epoch": 0.729586904685184, + "grad_norm": 9.713692903118522e-05, + "learning_rate": 3.0096670247046187e-06, + "loss": 0.0, + "step": 11321 + }, + { + "epoch": 0.7296513501321131, + "grad_norm": 0.0010043175423519069, + "learning_rate": 3.008950948800573e-06, + "loss": 0.0, + "step": 11322 + }, + { + "epoch": 0.7297157955790423, + "grad_norm": 0.003977753280873503, + "learning_rate": 3.0082348728965273e-06, + "loss": 0.0, + "step": 11323 + }, + { + "epoch": 0.7297802410259715, + "grad_norm": 7.66866136069101e-05, + "learning_rate": 3.007518796992481e-06, + "loss": 0.0, + "step": 11324 + }, + { + "epoch": 0.7298446864729007, + "grad_norm": 0.03245453503710955, + "learning_rate": 3.0068027210884355e-06, + "loss": 0.0001, + "step": 11325 + }, + { + "epoch": 0.7299091319198299, + "grad_norm": 0.017230089378299205, + "learning_rate": 3.0060866451843894e-06, + "loss": 0.0002, + "step": 11326 + }, + { + "epoch": 0.729973577366759, + "grad_norm": 0.0011237248614723047, + "learning_rate": 3.005370569280344e-06, + "loss": 0.0, + "step": 11327 + }, + { + "epoch": 0.7300380228136882, + "grad_norm": 0.001596873892730706, + "learning_rate": 3.0046544933762984e-06, + "loss": 0.0, + "step": 11328 + }, + { + "epoch": 0.7301024682606174, + "grad_norm": 0.0025830303630132935, + "learning_rate": 3.0039384174722523e-06, + "loss": 0.0, + "step": 11329 + }, + { + "epoch": 0.7301669137075466, + "grad_norm": 0.0003530421299883205, + "learning_rate": 3.0032223415682066e-06, + "loss": 0.0, + "step": 11330 + }, + { + "epoch": 0.7302313591544758, + "grad_norm": 0.002322914063727893, + "learning_rate": 3.002506265664161e-06, + "loss": 0.0, + "step": 11331 + }, + { + "epoch": 0.7302958046014049, + "grad_norm": 0.015656113380858726, + "learning_rate": 3.0017901897601148e-06, + "loss": 0.0, + "step": 11332 + }, + { + "epoch": 0.730360250048334, + "grad_norm": 0.002232419987401452, + "learning_rate": 3.001074113856069e-06, + "loss": 0.0, + "step": 11333 + }, + { + "epoch": 0.7304246954952632, + "grad_norm": 0.0006180026645144517, + "learning_rate": 3.000358037952023e-06, + "loss": 0.0, + "step": 11334 + }, + { + "epoch": 0.7304891409421924, + "grad_norm": 0.0053043607240062145, + "learning_rate": 2.9996419620479772e-06, + "loss": 0.0, + "step": 11335 + }, + { + "epoch": 0.7305535863891216, + "grad_norm": 0.0007339025781432594, + "learning_rate": 2.9989258861439315e-06, + "loss": 0.0, + "step": 11336 + }, + { + "epoch": 0.7306180318360508, + "grad_norm": 0.014264670100293255, + "learning_rate": 2.9982098102398854e-06, + "loss": 0.0, + "step": 11337 + }, + { + "epoch": 0.73068247728298, + "grad_norm": 0.0012291208711675198, + "learning_rate": 2.9974937343358397e-06, + "loss": 0.0, + "step": 11338 + }, + { + "epoch": 0.7307469227299092, + "grad_norm": 0.06005159391534337, + "learning_rate": 2.9967776584317944e-06, + "loss": 0.0001, + "step": 11339 + }, + { + "epoch": 0.7308113681768383, + "grad_norm": 0.0003933094764526526, + "learning_rate": 2.9960615825277483e-06, + "loss": 0.0, + "step": 11340 + }, + { + "epoch": 0.7308758136237675, + "grad_norm": 0.021090790890940164, + "learning_rate": 2.9953455066237026e-06, + "loss": 0.0002, + "step": 11341 + }, + { + "epoch": 0.7309402590706967, + "grad_norm": 0.025931634133735976, + "learning_rate": 2.9946294307196565e-06, + "loss": 0.0, + "step": 11342 + }, + { + "epoch": 0.7310047045176258, + "grad_norm": 0.02852376562835928, + "learning_rate": 2.9939133548156108e-06, + "loss": 0.0, + "step": 11343 + }, + { + "epoch": 0.731069149964555, + "grad_norm": 0.015667323444314415, + "learning_rate": 2.993197278911565e-06, + "loss": 0.0, + "step": 11344 + }, + { + "epoch": 0.7311335954114841, + "grad_norm": 0.00218994004770224, + "learning_rate": 2.992481203007519e-06, + "loss": 0.0, + "step": 11345 + }, + { + "epoch": 0.7311980408584133, + "grad_norm": 0.16275101952445878, + "learning_rate": 2.9917651271034732e-06, + "loss": 0.0028, + "step": 11346 + }, + { + "epoch": 0.7312624863053425, + "grad_norm": 0.0003862983051810963, + "learning_rate": 2.991049051199427e-06, + "loss": 0.0, + "step": 11347 + }, + { + "epoch": 0.7313269317522717, + "grad_norm": 0.003829513862433199, + "learning_rate": 2.9903329752953814e-06, + "loss": 0.0, + "step": 11348 + }, + { + "epoch": 0.7313913771992009, + "grad_norm": 0.0034865630005024286, + "learning_rate": 2.9896168993913353e-06, + "loss": 0.0, + "step": 11349 + }, + { + "epoch": 0.7314558226461301, + "grad_norm": 0.0009932220979224295, + "learning_rate": 2.98890082348729e-06, + "loss": 0.0, + "step": 11350 + }, + { + "epoch": 0.7315202680930593, + "grad_norm": 0.002630709002841604, + "learning_rate": 2.9881847475832443e-06, + "loss": 0.0, + "step": 11351 + }, + { + "epoch": 0.7315847135399884, + "grad_norm": 0.008694154945653666, + "learning_rate": 2.9874686716791986e-06, + "loss": 0.0, + "step": 11352 + }, + { + "epoch": 0.7316491589869176, + "grad_norm": 0.001717471994335099, + "learning_rate": 2.9867525957751525e-06, + "loss": 0.0, + "step": 11353 + }, + { + "epoch": 0.7317136044338467, + "grad_norm": 0.08697505214756895, + "learning_rate": 2.9860365198711068e-06, + "loss": 0.0017, + "step": 11354 + }, + { + "epoch": 0.7317780498807759, + "grad_norm": 0.0014773768805635785, + "learning_rate": 2.9853204439670607e-06, + "loss": 0.0, + "step": 11355 + }, + { + "epoch": 0.7318424953277051, + "grad_norm": 0.01304011785695086, + "learning_rate": 2.984604368063015e-06, + "loss": 0.0, + "step": 11356 + }, + { + "epoch": 0.7319069407746343, + "grad_norm": 0.006473541821742216, + "learning_rate": 2.983888292158969e-06, + "loss": 0.0, + "step": 11357 + }, + { + "epoch": 0.7319713862215634, + "grad_norm": 0.0013719339669280896, + "learning_rate": 2.983172216254923e-06, + "loss": 0.0, + "step": 11358 + }, + { + "epoch": 0.7320358316684926, + "grad_norm": 0.0002129689555400633, + "learning_rate": 2.9824561403508774e-06, + "loss": 0.0, + "step": 11359 + }, + { + "epoch": 0.7321002771154218, + "grad_norm": 0.0001586731060494783, + "learning_rate": 2.9817400644468313e-06, + "loss": 0.0, + "step": 11360 + }, + { + "epoch": 0.732164722562351, + "grad_norm": 0.013050724962506173, + "learning_rate": 2.9810239885427856e-06, + "loss": 0.0, + "step": 11361 + }, + { + "epoch": 0.7322291680092802, + "grad_norm": 0.019111154363726266, + "learning_rate": 2.9803079126387403e-06, + "loss": 0.0001, + "step": 11362 + }, + { + "epoch": 0.7322936134562094, + "grad_norm": 0.023404573866207774, + "learning_rate": 2.979591836734694e-06, + "loss": 0.0001, + "step": 11363 + }, + { + "epoch": 0.7323580589031385, + "grad_norm": 0.0005499391081068312, + "learning_rate": 2.9788757608306485e-06, + "loss": 0.0, + "step": 11364 + }, + { + "epoch": 0.7324225043500676, + "grad_norm": 0.0021230133353467673, + "learning_rate": 2.9781596849266024e-06, + "loss": 0.0, + "step": 11365 + }, + { + "epoch": 0.7324869497969968, + "grad_norm": 0.014250397877147453, + "learning_rate": 2.9774436090225567e-06, + "loss": 0.0001, + "step": 11366 + }, + { + "epoch": 0.732551395243926, + "grad_norm": 0.00023056297783971086, + "learning_rate": 2.976727533118511e-06, + "loss": 0.0, + "step": 11367 + }, + { + "epoch": 0.7326158406908552, + "grad_norm": 0.073680816757015, + "learning_rate": 2.976011457214465e-06, + "loss": 0.0002, + "step": 11368 + }, + { + "epoch": 0.7326802861377844, + "grad_norm": 0.011644553199364758, + "learning_rate": 2.975295381310419e-06, + "loss": 0.0, + "step": 11369 + }, + { + "epoch": 0.7327447315847135, + "grad_norm": 1.2168431158448905, + "learning_rate": 2.974579305406373e-06, + "loss": 0.0042, + "step": 11370 + }, + { + "epoch": 0.7328091770316427, + "grad_norm": 0.0011119148824099833, + "learning_rate": 2.9738632295023273e-06, + "loss": 0.0, + "step": 11371 + }, + { + "epoch": 0.7328736224785719, + "grad_norm": 0.14873476128698843, + "learning_rate": 2.9731471535982816e-06, + "loss": 0.0002, + "step": 11372 + }, + { + "epoch": 0.7329380679255011, + "grad_norm": 0.0005180624511212282, + "learning_rate": 2.9724310776942355e-06, + "loss": 0.0, + "step": 11373 + }, + { + "epoch": 0.7330025133724303, + "grad_norm": 0.08457680714713561, + "learning_rate": 2.9717150017901902e-06, + "loss": 0.0002, + "step": 11374 + }, + { + "epoch": 0.7330669588193595, + "grad_norm": 3.189354502948727e-05, + "learning_rate": 2.9709989258861445e-06, + "loss": 0.0, + "step": 11375 + }, + { + "epoch": 0.7331314042662885, + "grad_norm": 0.00015374242569765145, + "learning_rate": 2.9702828499820984e-06, + "loss": 0.0, + "step": 11376 + }, + { + "epoch": 0.7331958497132177, + "grad_norm": 0.004303427852003826, + "learning_rate": 2.9695667740780527e-06, + "loss": 0.0, + "step": 11377 + }, + { + "epoch": 0.7332602951601469, + "grad_norm": 0.01627340553711377, + "learning_rate": 2.9688506981740066e-06, + "loss": 0.0, + "step": 11378 + }, + { + "epoch": 0.7333247406070761, + "grad_norm": 0.1106417562165362, + "learning_rate": 2.968134622269961e-06, + "loss": 0.0001, + "step": 11379 + }, + { + "epoch": 0.7333891860540053, + "grad_norm": 0.11460116547485791, + "learning_rate": 2.967418546365915e-06, + "loss": 0.0018, + "step": 11380 + }, + { + "epoch": 0.7334536315009345, + "grad_norm": 0.0073095201484695465, + "learning_rate": 2.966702470461869e-06, + "loss": 0.0, + "step": 11381 + }, + { + "epoch": 0.7335180769478636, + "grad_norm": 0.014872503234698634, + "learning_rate": 2.9659863945578233e-06, + "loss": 0.0001, + "step": 11382 + }, + { + "epoch": 0.7335825223947928, + "grad_norm": 0.042749717691514845, + "learning_rate": 2.965270318653777e-06, + "loss": 0.0001, + "step": 11383 + }, + { + "epoch": 0.733646967841722, + "grad_norm": 0.0008445524787041037, + "learning_rate": 2.9645542427497315e-06, + "loss": 0.0, + "step": 11384 + }, + { + "epoch": 0.7337114132886512, + "grad_norm": 0.005167055323404655, + "learning_rate": 2.9638381668456862e-06, + "loss": 0.0, + "step": 11385 + }, + { + "epoch": 0.7337758587355804, + "grad_norm": 0.026027118945750397, + "learning_rate": 2.96312209094164e-06, + "loss": 0.0, + "step": 11386 + }, + { + "epoch": 0.7338403041825095, + "grad_norm": 0.0004145667714222251, + "learning_rate": 2.9624060150375944e-06, + "loss": 0.0, + "step": 11387 + }, + { + "epoch": 0.7339047496294386, + "grad_norm": 5.982031954885564e-05, + "learning_rate": 2.9616899391335487e-06, + "loss": 0.0, + "step": 11388 + }, + { + "epoch": 0.7339691950763678, + "grad_norm": 0.04745125465754833, + "learning_rate": 2.9609738632295026e-06, + "loss": 0.0003, + "step": 11389 + }, + { + "epoch": 0.734033640523297, + "grad_norm": 0.0004990294223088284, + "learning_rate": 2.960257787325457e-06, + "loss": 0.0, + "step": 11390 + }, + { + "epoch": 0.7340980859702262, + "grad_norm": 0.0006998216447846934, + "learning_rate": 2.9595417114214108e-06, + "loss": 0.0, + "step": 11391 + }, + { + "epoch": 0.7341625314171554, + "grad_norm": 0.01677587625693549, + "learning_rate": 2.958825635517365e-06, + "loss": 0.0, + "step": 11392 + }, + { + "epoch": 0.7342269768640846, + "grad_norm": 0.0003467193528171593, + "learning_rate": 2.958109559613319e-06, + "loss": 0.0, + "step": 11393 + }, + { + "epoch": 0.7342914223110137, + "grad_norm": 0.0005840219191719138, + "learning_rate": 2.9573934837092732e-06, + "loss": 0.0, + "step": 11394 + }, + { + "epoch": 0.7343558677579429, + "grad_norm": 0.0032256336054706956, + "learning_rate": 2.9566774078052275e-06, + "loss": 0.0, + "step": 11395 + }, + { + "epoch": 0.7344203132048721, + "grad_norm": 0.00010625610456919803, + "learning_rate": 2.9559613319011814e-06, + "loss": 0.0, + "step": 11396 + }, + { + "epoch": 0.7344847586518013, + "grad_norm": 0.00030531125861322296, + "learning_rate": 2.955245255997136e-06, + "loss": 0.0, + "step": 11397 + }, + { + "epoch": 0.7345492040987305, + "grad_norm": 0.2009604092585717, + "learning_rate": 2.9545291800930904e-06, + "loss": 0.0005, + "step": 11398 + }, + { + "epoch": 0.7346136495456596, + "grad_norm": 0.33045670282215517, + "learning_rate": 2.9538131041890443e-06, + "loss": 0.0024, + "step": 11399 + }, + { + "epoch": 0.7346780949925887, + "grad_norm": 0.0005136405285533002, + "learning_rate": 2.9530970282849986e-06, + "loss": 0.0, + "step": 11400 + }, + { + "epoch": 0.7347425404395179, + "grad_norm": 0.009765878316454831, + "learning_rate": 2.9523809523809525e-06, + "loss": 0.0, + "step": 11401 + }, + { + "epoch": 0.7348069858864471, + "grad_norm": 0.33457723500160075, + "learning_rate": 2.9516648764769068e-06, + "loss": 0.0013, + "step": 11402 + }, + { + "epoch": 0.7348714313333763, + "grad_norm": 0.08505562734413115, + "learning_rate": 2.950948800572861e-06, + "loss": 0.0001, + "step": 11403 + }, + { + "epoch": 0.7349358767803055, + "grad_norm": 0.0005396746602388885, + "learning_rate": 2.950232724668815e-06, + "loss": 0.0, + "step": 11404 + }, + { + "epoch": 0.7350003222272347, + "grad_norm": 0.0007219231506156975, + "learning_rate": 2.9495166487647692e-06, + "loss": 0.0, + "step": 11405 + }, + { + "epoch": 0.7350647676741638, + "grad_norm": 0.00080918341992736, + "learning_rate": 2.948800572860723e-06, + "loss": 0.0, + "step": 11406 + }, + { + "epoch": 0.735129213121093, + "grad_norm": 0.008291451385984977, + "learning_rate": 2.9480844969566774e-06, + "loss": 0.0001, + "step": 11407 + }, + { + "epoch": 0.7351936585680222, + "grad_norm": 3.7420126980391375e-05, + "learning_rate": 2.9473684210526317e-06, + "loss": 0.0, + "step": 11408 + }, + { + "epoch": 0.7352581040149514, + "grad_norm": 0.00547087372155844, + "learning_rate": 2.9466523451485864e-06, + "loss": 0.0, + "step": 11409 + }, + { + "epoch": 0.7353225494618805, + "grad_norm": 0.0017479068818298368, + "learning_rate": 2.9459362692445403e-06, + "loss": 0.0, + "step": 11410 + }, + { + "epoch": 0.7353869949088097, + "grad_norm": 0.00347103415906202, + "learning_rate": 2.9452201933404946e-06, + "loss": 0.0, + "step": 11411 + }, + { + "epoch": 0.7354514403557388, + "grad_norm": 0.004133792163996183, + "learning_rate": 2.9445041174364485e-06, + "loss": 0.0, + "step": 11412 + }, + { + "epoch": 0.735515885802668, + "grad_norm": 0.00042971752764806866, + "learning_rate": 2.9437880415324028e-06, + "loss": 0.0, + "step": 11413 + }, + { + "epoch": 0.7355803312495972, + "grad_norm": 0.00010637823235444148, + "learning_rate": 2.9430719656283567e-06, + "loss": 0.0, + "step": 11414 + }, + { + "epoch": 0.7356447766965264, + "grad_norm": 0.00012087427014443836, + "learning_rate": 2.942355889724311e-06, + "loss": 0.0, + "step": 11415 + }, + { + "epoch": 0.7357092221434556, + "grad_norm": 0.00217104518436385, + "learning_rate": 2.9416398138202653e-06, + "loss": 0.0, + "step": 11416 + }, + { + "epoch": 0.7357736675903848, + "grad_norm": 6.510885686800687e-05, + "learning_rate": 2.940923737916219e-06, + "loss": 0.0, + "step": 11417 + }, + { + "epoch": 0.735838113037314, + "grad_norm": 0.013142905983270175, + "learning_rate": 2.9402076620121734e-06, + "loss": 0.0001, + "step": 11418 + }, + { + "epoch": 0.7359025584842431, + "grad_norm": 0.002358491054728065, + "learning_rate": 2.9394915861081273e-06, + "loss": 0.0, + "step": 11419 + }, + { + "epoch": 0.7359670039311723, + "grad_norm": 0.0026521542996968076, + "learning_rate": 2.938775510204082e-06, + "loss": 0.0, + "step": 11420 + }, + { + "epoch": 0.7360314493781014, + "grad_norm": 0.003946334973341138, + "learning_rate": 2.9380594343000363e-06, + "loss": 0.0, + "step": 11421 + }, + { + "epoch": 0.7360958948250306, + "grad_norm": 0.0088235968052502, + "learning_rate": 2.93734335839599e-06, + "loss": 0.0, + "step": 11422 + }, + { + "epoch": 0.7361603402719598, + "grad_norm": 0.004964299051000416, + "learning_rate": 2.9366272824919445e-06, + "loss": 0.0, + "step": 11423 + }, + { + "epoch": 0.7362247857188889, + "grad_norm": 0.008458645840074462, + "learning_rate": 2.935911206587899e-06, + "loss": 0.0, + "step": 11424 + }, + { + "epoch": 0.7362892311658181, + "grad_norm": 0.003313165458895752, + "learning_rate": 2.9351951306838527e-06, + "loss": 0.0, + "step": 11425 + }, + { + "epoch": 0.7363536766127473, + "grad_norm": 0.0005009463709045276, + "learning_rate": 2.934479054779807e-06, + "loss": 0.0, + "step": 11426 + }, + { + "epoch": 0.7364181220596765, + "grad_norm": 0.0006078475946347758, + "learning_rate": 2.933762978875761e-06, + "loss": 0.0, + "step": 11427 + }, + { + "epoch": 0.7364825675066057, + "grad_norm": 0.007668545880177946, + "learning_rate": 2.933046902971715e-06, + "loss": 0.0, + "step": 11428 + }, + { + "epoch": 0.7365470129535349, + "grad_norm": 0.16581019987401066, + "learning_rate": 2.9323308270676694e-06, + "loss": 0.0004, + "step": 11429 + }, + { + "epoch": 0.736611458400464, + "grad_norm": 9.086367411350327e-05, + "learning_rate": 2.9316147511636233e-06, + "loss": 0.0, + "step": 11430 + }, + { + "epoch": 0.7366759038473932, + "grad_norm": 0.002234896439144746, + "learning_rate": 2.9308986752595776e-06, + "loss": 0.0, + "step": 11431 + }, + { + "epoch": 0.7367403492943223, + "grad_norm": 0.005501577603166098, + "learning_rate": 2.9301825993555323e-06, + "loss": 0.0, + "step": 11432 + }, + { + "epoch": 0.7368047947412515, + "grad_norm": 0.014186799638279296, + "learning_rate": 2.9294665234514862e-06, + "loss": 0.0, + "step": 11433 + }, + { + "epoch": 0.7368692401881807, + "grad_norm": 0.007490603212982399, + "learning_rate": 2.9287504475474405e-06, + "loss": 0.0, + "step": 11434 + }, + { + "epoch": 0.7369336856351099, + "grad_norm": 0.003929949417439596, + "learning_rate": 2.9280343716433944e-06, + "loss": 0.0, + "step": 11435 + }, + { + "epoch": 0.736998131082039, + "grad_norm": 0.5977107253835082, + "learning_rate": 2.9273182957393487e-06, + "loss": 0.0005, + "step": 11436 + }, + { + "epoch": 0.7370625765289682, + "grad_norm": 0.018352840375388126, + "learning_rate": 2.926602219835303e-06, + "loss": 0.0, + "step": 11437 + }, + { + "epoch": 0.7371270219758974, + "grad_norm": 0.00020872345792618345, + "learning_rate": 2.925886143931257e-06, + "loss": 0.0, + "step": 11438 + }, + { + "epoch": 0.7371914674228266, + "grad_norm": 0.00234018454065431, + "learning_rate": 2.925170068027211e-06, + "loss": 0.0, + "step": 11439 + }, + { + "epoch": 0.7372559128697558, + "grad_norm": 0.0016579721245929928, + "learning_rate": 2.924453992123165e-06, + "loss": 0.0, + "step": 11440 + }, + { + "epoch": 0.737320358316685, + "grad_norm": 0.08998472100608822, + "learning_rate": 2.9237379162191193e-06, + "loss": 0.0001, + "step": 11441 + }, + { + "epoch": 0.7373848037636141, + "grad_norm": 0.0016083044416349665, + "learning_rate": 2.923021840315073e-06, + "loss": 0.0, + "step": 11442 + }, + { + "epoch": 0.7374492492105432, + "grad_norm": 0.4860207992251301, + "learning_rate": 2.9223057644110275e-06, + "loss": 0.0011, + "step": 11443 + }, + { + "epoch": 0.7375136946574724, + "grad_norm": 0.0023780665069745095, + "learning_rate": 2.9215896885069822e-06, + "loss": 0.0, + "step": 11444 + }, + { + "epoch": 0.7375781401044016, + "grad_norm": 0.18297514282903246, + "learning_rate": 2.9208736126029365e-06, + "loss": 0.0008, + "step": 11445 + }, + { + "epoch": 0.7376425855513308, + "grad_norm": 7.226204698247018e-05, + "learning_rate": 2.9201575366988904e-06, + "loss": 0.0, + "step": 11446 + }, + { + "epoch": 0.73770703099826, + "grad_norm": 0.005099393862582168, + "learning_rate": 2.9194414607948447e-06, + "loss": 0.0, + "step": 11447 + }, + { + "epoch": 0.7377714764451891, + "grad_norm": 0.04962834057641299, + "learning_rate": 2.9187253848907986e-06, + "loss": 0.0001, + "step": 11448 + }, + { + "epoch": 0.7378359218921183, + "grad_norm": 0.0006057873141577277, + "learning_rate": 2.918009308986753e-06, + "loss": 0.0, + "step": 11449 + }, + { + "epoch": 0.7379003673390475, + "grad_norm": 0.0028990582394420833, + "learning_rate": 2.9172932330827068e-06, + "loss": 0.0, + "step": 11450 + }, + { + "epoch": 0.7379648127859767, + "grad_norm": 0.050842658381563356, + "learning_rate": 2.916577157178661e-06, + "loss": 0.0007, + "step": 11451 + }, + { + "epoch": 0.7380292582329059, + "grad_norm": 0.0024151748871815318, + "learning_rate": 2.9158610812746154e-06, + "loss": 0.0, + "step": 11452 + }, + { + "epoch": 0.7380937036798351, + "grad_norm": 0.0003905186066450872, + "learning_rate": 2.9151450053705692e-06, + "loss": 0.0, + "step": 11453 + }, + { + "epoch": 0.7381581491267641, + "grad_norm": 0.09842042434272515, + "learning_rate": 2.9144289294665235e-06, + "loss": 0.0003, + "step": 11454 + }, + { + "epoch": 0.7382225945736933, + "grad_norm": 0.03346659978636565, + "learning_rate": 2.9137128535624782e-06, + "loss": 0.0001, + "step": 11455 + }, + { + "epoch": 0.7382870400206225, + "grad_norm": 0.006367096281842634, + "learning_rate": 2.912996777658432e-06, + "loss": 0.0, + "step": 11456 + }, + { + "epoch": 0.7383514854675517, + "grad_norm": 0.00023461946393520187, + "learning_rate": 2.9122807017543864e-06, + "loss": 0.0, + "step": 11457 + }, + { + "epoch": 0.7384159309144809, + "grad_norm": 0.00038369322384038427, + "learning_rate": 2.9115646258503403e-06, + "loss": 0.0, + "step": 11458 + }, + { + "epoch": 0.7384803763614101, + "grad_norm": 0.005769438618029025, + "learning_rate": 2.9108485499462946e-06, + "loss": 0.0, + "step": 11459 + }, + { + "epoch": 0.7385448218083392, + "grad_norm": 0.0017036441431630288, + "learning_rate": 2.910132474042249e-06, + "loss": 0.0, + "step": 11460 + }, + { + "epoch": 0.7386092672552684, + "grad_norm": 0.0001072216240296, + "learning_rate": 2.9094163981382028e-06, + "loss": 0.0, + "step": 11461 + }, + { + "epoch": 0.7386737127021976, + "grad_norm": 0.01801058241218933, + "learning_rate": 2.908700322234157e-06, + "loss": 0.0001, + "step": 11462 + }, + { + "epoch": 0.7387381581491268, + "grad_norm": 0.22848583907627634, + "learning_rate": 2.907984246330111e-06, + "loss": 0.0015, + "step": 11463 + }, + { + "epoch": 0.738802603596056, + "grad_norm": 0.01842134254047434, + "learning_rate": 2.9072681704260652e-06, + "loss": 0.0002, + "step": 11464 + }, + { + "epoch": 0.7388670490429851, + "grad_norm": 0.1558477289825116, + "learning_rate": 2.9065520945220195e-06, + "loss": 0.0003, + "step": 11465 + }, + { + "epoch": 0.7389314944899142, + "grad_norm": 0.0013242711822388608, + "learning_rate": 2.9058360186179734e-06, + "loss": 0.0, + "step": 11466 + }, + { + "epoch": 0.7389959399368434, + "grad_norm": 0.00503076842425939, + "learning_rate": 2.905119942713928e-06, + "loss": 0.0, + "step": 11467 + }, + { + "epoch": 0.7390603853837726, + "grad_norm": 0.05842548289512627, + "learning_rate": 2.9044038668098824e-06, + "loss": 0.0006, + "step": 11468 + }, + { + "epoch": 0.7391248308307018, + "grad_norm": 0.003572431656853577, + "learning_rate": 2.9036877909058363e-06, + "loss": 0.0, + "step": 11469 + }, + { + "epoch": 0.739189276277631, + "grad_norm": 0.0209967272130641, + "learning_rate": 2.9029717150017906e-06, + "loss": 0.0001, + "step": 11470 + }, + { + "epoch": 0.7392537217245602, + "grad_norm": 0.0006626698904435314, + "learning_rate": 2.9022556390977445e-06, + "loss": 0.0, + "step": 11471 + }, + { + "epoch": 0.7393181671714893, + "grad_norm": 0.0793006035205354, + "learning_rate": 2.9015395631936988e-06, + "loss": 0.0007, + "step": 11472 + }, + { + "epoch": 0.7393826126184185, + "grad_norm": 0.0037272037398564804, + "learning_rate": 2.900823487289653e-06, + "loss": 0.0, + "step": 11473 + }, + { + "epoch": 0.7394470580653477, + "grad_norm": 0.2464952448338863, + "learning_rate": 2.900107411385607e-06, + "loss": 0.0058, + "step": 11474 + }, + { + "epoch": 0.7395115035122769, + "grad_norm": 4.4250068883029826e-05, + "learning_rate": 2.8993913354815613e-06, + "loss": 0.0, + "step": 11475 + }, + { + "epoch": 0.7395759489592061, + "grad_norm": 0.00012204855908644014, + "learning_rate": 2.898675259577515e-06, + "loss": 0.0, + "step": 11476 + }, + { + "epoch": 0.7396403944061352, + "grad_norm": 0.002904846725812882, + "learning_rate": 2.8979591836734694e-06, + "loss": 0.0, + "step": 11477 + }, + { + "epoch": 0.7397048398530643, + "grad_norm": 0.002904846725812882, + "learning_rate": 2.8979591836734694e-06, + "loss": 0.0034, + "step": 11478 + }, + { + "epoch": 0.7397692852999935, + "grad_norm": 0.004992793364236146, + "learning_rate": 2.8972431077694237e-06, + "loss": 0.0, + "step": 11479 + }, + { + "epoch": 0.7398337307469227, + "grad_norm": 0.04042075975410935, + "learning_rate": 2.896527031865378e-06, + "loss": 0.0001, + "step": 11480 + }, + { + "epoch": 0.7398981761938519, + "grad_norm": 0.006872133714897192, + "learning_rate": 2.8958109559613323e-06, + "loss": 0.0, + "step": 11481 + }, + { + "epoch": 0.7399626216407811, + "grad_norm": 0.10795343834250004, + "learning_rate": 2.8950948800572866e-06, + "loss": 0.0002, + "step": 11482 + }, + { + "epoch": 0.7400270670877103, + "grad_norm": 0.0005270637838438341, + "learning_rate": 2.8943788041532405e-06, + "loss": 0.0, + "step": 11483 + }, + { + "epoch": 0.7400915125346395, + "grad_norm": 0.015242437661451919, + "learning_rate": 2.893662728249195e-06, + "loss": 0.0, + "step": 11484 + }, + { + "epoch": 0.7401559579815686, + "grad_norm": 0.11215733937589789, + "learning_rate": 2.8929466523451487e-06, + "loss": 0.0001, + "step": 11485 + }, + { + "epoch": 0.7402204034284978, + "grad_norm": 5.972577802370192e-05, + "learning_rate": 2.892230576441103e-06, + "loss": 0.0, + "step": 11486 + }, + { + "epoch": 0.740284848875427, + "grad_norm": 0.0011349598866917305, + "learning_rate": 2.8915145005370573e-06, + "loss": 0.0, + "step": 11487 + }, + { + "epoch": 0.7403492943223561, + "grad_norm": 0.0022451144249306092, + "learning_rate": 2.890798424633011e-06, + "loss": 0.0, + "step": 11488 + }, + { + "epoch": 0.7404137397692853, + "grad_norm": 0.0007307866650342127, + "learning_rate": 2.8900823487289654e-06, + "loss": 0.0, + "step": 11489 + }, + { + "epoch": 0.7404781852162144, + "grad_norm": 0.0025448175047036894, + "learning_rate": 2.8893662728249193e-06, + "loss": 0.0, + "step": 11490 + }, + { + "epoch": 0.7405426306631436, + "grad_norm": 7.213562714107801e-05, + "learning_rate": 2.888650196920874e-06, + "loss": 0.0, + "step": 11491 + }, + { + "epoch": 0.7406070761100728, + "grad_norm": 0.00037888931948071514, + "learning_rate": 2.8879341210168283e-06, + "loss": 0.0, + "step": 11492 + }, + { + "epoch": 0.740671521557002, + "grad_norm": 0.0003706806676715015, + "learning_rate": 2.8872180451127822e-06, + "loss": 0.0, + "step": 11493 + }, + { + "epoch": 0.7407359670039312, + "grad_norm": 0.011404830342325069, + "learning_rate": 2.8865019692087365e-06, + "loss": 0.0, + "step": 11494 + }, + { + "epoch": 0.7408004124508604, + "grad_norm": 0.0014902201044040247, + "learning_rate": 2.885785893304691e-06, + "loss": 0.0, + "step": 11495 + }, + { + "epoch": 0.7408648578977896, + "grad_norm": 0.00031089468463307054, + "learning_rate": 2.8850698174006447e-06, + "loss": 0.0, + "step": 11496 + }, + { + "epoch": 0.7409293033447187, + "grad_norm": 0.0004114111374888369, + "learning_rate": 2.884353741496599e-06, + "loss": 0.0, + "step": 11497 + }, + { + "epoch": 0.7409937487916479, + "grad_norm": 0.002545742978935913, + "learning_rate": 2.883637665592553e-06, + "loss": 0.0, + "step": 11498 + }, + { + "epoch": 0.741058194238577, + "grad_norm": 8.01331414854357e-05, + "learning_rate": 2.882921589688507e-06, + "loss": 0.0, + "step": 11499 + }, + { + "epoch": 0.7411226396855062, + "grad_norm": 0.826731859534718, + "learning_rate": 2.882205513784461e-06, + "loss": 0.0006, + "step": 11500 + }, + { + "epoch": 0.7411870851324354, + "grad_norm": 0.1948683552474743, + "learning_rate": 2.8814894378804153e-06, + "loss": 0.0004, + "step": 11501 + }, + { + "epoch": 0.7412515305793645, + "grad_norm": 0.008470631267963194, + "learning_rate": 2.8807733619763696e-06, + "loss": 0.0, + "step": 11502 + }, + { + "epoch": 0.7413159760262937, + "grad_norm": 0.008737941093447015, + "learning_rate": 2.8800572860723244e-06, + "loss": 0.0001, + "step": 11503 + }, + { + "epoch": 0.7413804214732229, + "grad_norm": 0.01340398639142613, + "learning_rate": 2.8793412101682782e-06, + "loss": 0.0002, + "step": 11504 + }, + { + "epoch": 0.7414448669201521, + "grad_norm": 0.000581258970536343, + "learning_rate": 2.8786251342642325e-06, + "loss": 0.0, + "step": 11505 + }, + { + "epoch": 0.7415093123670813, + "grad_norm": 0.004788253913425937, + "learning_rate": 2.8779090583601864e-06, + "loss": 0.0, + "step": 11506 + }, + { + "epoch": 0.7415737578140105, + "grad_norm": 0.00012619639194410797, + "learning_rate": 2.8771929824561407e-06, + "loss": 0.0, + "step": 11507 + }, + { + "epoch": 0.7416382032609397, + "grad_norm": 0.011955899024996343, + "learning_rate": 2.8764769065520946e-06, + "loss": 0.0, + "step": 11508 + }, + { + "epoch": 0.7417026487078688, + "grad_norm": 0.0011804670870373755, + "learning_rate": 2.875760830648049e-06, + "loss": 0.0, + "step": 11509 + }, + { + "epoch": 0.7417670941547979, + "grad_norm": 0.002210108798975343, + "learning_rate": 2.875044754744003e-06, + "loss": 0.0, + "step": 11510 + }, + { + "epoch": 0.7418315396017271, + "grad_norm": 0.0004053548305945269, + "learning_rate": 2.874328678839957e-06, + "loss": 0.0, + "step": 11511 + }, + { + "epoch": 0.7418959850486563, + "grad_norm": 0.00013120753274026113, + "learning_rate": 2.8736126029359114e-06, + "loss": 0.0, + "step": 11512 + }, + { + "epoch": 0.7419604304955855, + "grad_norm": 0.12246954292956626, + "learning_rate": 2.8728965270318652e-06, + "loss": 0.0002, + "step": 11513 + }, + { + "epoch": 0.7420248759425147, + "grad_norm": 0.0031773706184475043, + "learning_rate": 2.8721804511278195e-06, + "loss": 0.0, + "step": 11514 + }, + { + "epoch": 0.7420893213894438, + "grad_norm": 0.0003136670518566507, + "learning_rate": 2.8714643752237742e-06, + "loss": 0.0, + "step": 11515 + }, + { + "epoch": 0.742153766836373, + "grad_norm": 0.05410939153081128, + "learning_rate": 2.870748299319728e-06, + "loss": 0.0001, + "step": 11516 + }, + { + "epoch": 0.7422182122833022, + "grad_norm": 7.286480718874506e-05, + "learning_rate": 2.8700322234156824e-06, + "loss": 0.0, + "step": 11517 + }, + { + "epoch": 0.7422826577302314, + "grad_norm": 0.005174802579942772, + "learning_rate": 2.8693161475116367e-06, + "loss": 0.0, + "step": 11518 + }, + { + "epoch": 0.7423471031771606, + "grad_norm": 0.001427121198095007, + "learning_rate": 2.8686000716075906e-06, + "loss": 0.0, + "step": 11519 + }, + { + "epoch": 0.7424115486240898, + "grad_norm": 0.021820159725184658, + "learning_rate": 2.867883995703545e-06, + "loss": 0.0001, + "step": 11520 + }, + { + "epoch": 0.7424759940710188, + "grad_norm": 0.0030847242572352293, + "learning_rate": 2.8671679197994988e-06, + "loss": 0.0, + "step": 11521 + }, + { + "epoch": 0.742540439517948, + "grad_norm": 0.10641283012131907, + "learning_rate": 2.866451843895453e-06, + "loss": 0.0017, + "step": 11522 + }, + { + "epoch": 0.7426048849648772, + "grad_norm": 0.009833028385451683, + "learning_rate": 2.8657357679914074e-06, + "loss": 0.0001, + "step": 11523 + }, + { + "epoch": 0.7426693304118064, + "grad_norm": 0.003582891327112572, + "learning_rate": 2.8650196920873612e-06, + "loss": 0.0, + "step": 11524 + }, + { + "epoch": 0.7427337758587356, + "grad_norm": 0.00897878326744134, + "learning_rate": 2.8643036161833155e-06, + "loss": 0.0001, + "step": 11525 + }, + { + "epoch": 0.7427982213056648, + "grad_norm": 0.00011438453578087207, + "learning_rate": 2.8635875402792703e-06, + "loss": 0.0, + "step": 11526 + }, + { + "epoch": 0.7428626667525939, + "grad_norm": 1.8637766949118943, + "learning_rate": 2.862871464375224e-06, + "loss": 0.0185, + "step": 11527 + }, + { + "epoch": 0.7429271121995231, + "grad_norm": 8.769021280880539e-05, + "learning_rate": 2.8621553884711784e-06, + "loss": 0.0, + "step": 11528 + }, + { + "epoch": 0.7429915576464523, + "grad_norm": 0.009708635368675824, + "learning_rate": 2.8614393125671323e-06, + "loss": 0.0001, + "step": 11529 + }, + { + "epoch": 0.7430560030933815, + "grad_norm": 0.25208524448416436, + "learning_rate": 2.8607232366630866e-06, + "loss": 0.001, + "step": 11530 + }, + { + "epoch": 0.7431204485403107, + "grad_norm": 0.23889954660465637, + "learning_rate": 2.860007160759041e-06, + "loss": 0.0013, + "step": 11531 + }, + { + "epoch": 0.7431848939872397, + "grad_norm": 0.003267420010600027, + "learning_rate": 2.8592910848549948e-06, + "loss": 0.0, + "step": 11532 + }, + { + "epoch": 0.7432493394341689, + "grad_norm": 8.740468378403458e-05, + "learning_rate": 2.858575008950949e-06, + "loss": 0.0, + "step": 11533 + }, + { + "epoch": 0.7433137848810981, + "grad_norm": 6.484494334233853e-05, + "learning_rate": 2.857858933046903e-06, + "loss": 0.0, + "step": 11534 + }, + { + "epoch": 0.7433782303280273, + "grad_norm": 0.33328327409046465, + "learning_rate": 2.8571428571428573e-06, + "loss": 0.0026, + "step": 11535 + }, + { + "epoch": 0.7434426757749565, + "grad_norm": 0.03705969421250206, + "learning_rate": 2.856426781238811e-06, + "loss": 0.0001, + "step": 11536 + }, + { + "epoch": 0.7435071212218857, + "grad_norm": 0.08214135387497098, + "learning_rate": 2.8557107053347654e-06, + "loss": 0.0001, + "step": 11537 + }, + { + "epoch": 0.7435715666688149, + "grad_norm": 0.0009779501742388384, + "learning_rate": 2.85499462943072e-06, + "loss": 0.0, + "step": 11538 + }, + { + "epoch": 0.743636012115744, + "grad_norm": 0.0019099608653063945, + "learning_rate": 2.8542785535266745e-06, + "loss": 0.0, + "step": 11539 + }, + { + "epoch": 0.7437004575626732, + "grad_norm": 6.36266112464164e-05, + "learning_rate": 2.8535624776226283e-06, + "loss": 0.0, + "step": 11540 + }, + { + "epoch": 0.7437649030096024, + "grad_norm": 0.01838611981256415, + "learning_rate": 2.8528464017185826e-06, + "loss": 0.0, + "step": 11541 + }, + { + "epoch": 0.7438293484565316, + "grad_norm": 0.00011629883493401809, + "learning_rate": 2.8521303258145365e-06, + "loss": 0.0, + "step": 11542 + }, + { + "epoch": 0.7438937939034607, + "grad_norm": 0.002767961297251372, + "learning_rate": 2.851414249910491e-06, + "loss": 0.0, + "step": 11543 + }, + { + "epoch": 0.7439582393503898, + "grad_norm": 0.054474319327986465, + "learning_rate": 2.8506981740064447e-06, + "loss": 0.0004, + "step": 11544 + }, + { + "epoch": 0.744022684797319, + "grad_norm": 0.0006248430618284908, + "learning_rate": 2.849982098102399e-06, + "loss": 0.0, + "step": 11545 + }, + { + "epoch": 0.7440871302442482, + "grad_norm": 0.00481655779190564, + "learning_rate": 2.8492660221983533e-06, + "loss": 0.0, + "step": 11546 + }, + { + "epoch": 0.7441515756911774, + "grad_norm": 0.015192968024787968, + "learning_rate": 2.848549946294307e-06, + "loss": 0.0001, + "step": 11547 + }, + { + "epoch": 0.7442160211381066, + "grad_norm": 0.02535298427612439, + "learning_rate": 2.8478338703902614e-06, + "loss": 0.0001, + "step": 11548 + }, + { + "epoch": 0.7442804665850358, + "grad_norm": 0.008441149317855135, + "learning_rate": 2.8471177944862153e-06, + "loss": 0.0001, + "step": 11549 + }, + { + "epoch": 0.744344912031965, + "grad_norm": 0.00043440331282244664, + "learning_rate": 2.84640171858217e-06, + "loss": 0.0, + "step": 11550 + }, + { + "epoch": 0.7444093574788941, + "grad_norm": 0.0028974444813782536, + "learning_rate": 2.8456856426781243e-06, + "loss": 0.0, + "step": 11551 + }, + { + "epoch": 0.7444738029258233, + "grad_norm": 0.00011725375211290053, + "learning_rate": 2.8449695667740786e-06, + "loss": 0.0, + "step": 11552 + }, + { + "epoch": 0.7445382483727525, + "grad_norm": 0.0003650860736984975, + "learning_rate": 2.8442534908700325e-06, + "loss": 0.0, + "step": 11553 + }, + { + "epoch": 0.7446026938196817, + "grad_norm": 4.315019431511173e-05, + "learning_rate": 2.843537414965987e-06, + "loss": 0.0, + "step": 11554 + }, + { + "epoch": 0.7446671392666108, + "grad_norm": 0.002278690880074027, + "learning_rate": 2.8428213390619407e-06, + "loss": 0.0, + "step": 11555 + }, + { + "epoch": 0.74473158471354, + "grad_norm": 0.0001178197491432631, + "learning_rate": 2.842105263157895e-06, + "loss": 0.0, + "step": 11556 + }, + { + "epoch": 0.7447960301604691, + "grad_norm": 0.003881942521512389, + "learning_rate": 2.841389187253849e-06, + "loss": 0.0, + "step": 11557 + }, + { + "epoch": 0.7448604756073983, + "grad_norm": 0.0021296588472513025, + "learning_rate": 2.840673111349803e-06, + "loss": 0.0, + "step": 11558 + }, + { + "epoch": 0.7449249210543275, + "grad_norm": 0.00046791558014023247, + "learning_rate": 2.8399570354457575e-06, + "loss": 0.0, + "step": 11559 + }, + { + "epoch": 0.7449893665012567, + "grad_norm": 0.00020211463966043466, + "learning_rate": 2.8392409595417113e-06, + "loss": 0.0, + "step": 11560 + }, + { + "epoch": 0.7450538119481859, + "grad_norm": 0.0003985674663379224, + "learning_rate": 2.838524883637666e-06, + "loss": 0.0, + "step": 11561 + }, + { + "epoch": 0.7451182573951151, + "grad_norm": 0.33669691966511234, + "learning_rate": 2.8378088077336204e-06, + "loss": 0.0031, + "step": 11562 + }, + { + "epoch": 0.7451827028420442, + "grad_norm": 0.012881644125098787, + "learning_rate": 2.8370927318295742e-06, + "loss": 0.0001, + "step": 11563 + }, + { + "epoch": 0.7452471482889734, + "grad_norm": 0.0004095707305752869, + "learning_rate": 2.8363766559255285e-06, + "loss": 0.0, + "step": 11564 + }, + { + "epoch": 0.7453115937359026, + "grad_norm": 0.0553894190092041, + "learning_rate": 2.8356605800214824e-06, + "loss": 0.0002, + "step": 11565 + }, + { + "epoch": 0.7453760391828317, + "grad_norm": 0.0015646026292246813, + "learning_rate": 2.8349445041174367e-06, + "loss": 0.0, + "step": 11566 + }, + { + "epoch": 0.7454404846297609, + "grad_norm": 0.0007490978582765871, + "learning_rate": 2.834228428213391e-06, + "loss": 0.0, + "step": 11567 + }, + { + "epoch": 0.74550493007669, + "grad_norm": 0.0008107603181492814, + "learning_rate": 2.833512352309345e-06, + "loss": 0.0, + "step": 11568 + }, + { + "epoch": 0.7455693755236192, + "grad_norm": 0.0005066625926412042, + "learning_rate": 2.832796276405299e-06, + "loss": 0.0, + "step": 11569 + }, + { + "epoch": 0.7456338209705484, + "grad_norm": 0.0031491577925305417, + "learning_rate": 2.832080200501253e-06, + "loss": 0.0, + "step": 11570 + }, + { + "epoch": 0.7456982664174776, + "grad_norm": 0.0008086278143267719, + "learning_rate": 2.8313641245972074e-06, + "loss": 0.0, + "step": 11571 + }, + { + "epoch": 0.7457627118644068, + "grad_norm": 0.0003411885658767725, + "learning_rate": 2.8306480486931616e-06, + "loss": 0.0, + "step": 11572 + }, + { + "epoch": 0.745827157311336, + "grad_norm": 0.0005964478571595986, + "learning_rate": 2.829931972789116e-06, + "loss": 0.0, + "step": 11573 + }, + { + "epoch": 0.7458916027582652, + "grad_norm": 0.12147032343687166, + "learning_rate": 2.8292158968850702e-06, + "loss": 0.0002, + "step": 11574 + }, + { + "epoch": 0.7459560482051943, + "grad_norm": 0.03803393591887575, + "learning_rate": 2.8284998209810245e-06, + "loss": 0.0016, + "step": 11575 + }, + { + "epoch": 0.7460204936521235, + "grad_norm": 0.00016790024335527423, + "learning_rate": 2.8277837450769784e-06, + "loss": 0.0, + "step": 11576 + }, + { + "epoch": 0.7460849390990526, + "grad_norm": 0.0024457187488064213, + "learning_rate": 2.8270676691729327e-06, + "loss": 0.0, + "step": 11577 + }, + { + "epoch": 0.7461493845459818, + "grad_norm": 6.03264194184613e-05, + "learning_rate": 2.8263515932688866e-06, + "loss": 0.0, + "step": 11578 + }, + { + "epoch": 0.746213829992911, + "grad_norm": 0.0009593868231441341, + "learning_rate": 2.825635517364841e-06, + "loss": 0.0, + "step": 11579 + }, + { + "epoch": 0.7462782754398402, + "grad_norm": 0.00015227808015561272, + "learning_rate": 2.824919441460795e-06, + "loss": 0.0, + "step": 11580 + }, + { + "epoch": 0.7463427208867693, + "grad_norm": 0.0005544944467892892, + "learning_rate": 2.824203365556749e-06, + "loss": 0.0, + "step": 11581 + }, + { + "epoch": 0.7464071663336985, + "grad_norm": 0.14816729522203623, + "learning_rate": 2.8234872896527034e-06, + "loss": 0.0003, + "step": 11582 + }, + { + "epoch": 0.7464716117806277, + "grad_norm": 0.12938729032013577, + "learning_rate": 2.8227712137486572e-06, + "loss": 0.0004, + "step": 11583 + }, + { + "epoch": 0.7465360572275569, + "grad_norm": 0.0844716364187078, + "learning_rate": 2.8220551378446115e-06, + "loss": 0.0006, + "step": 11584 + }, + { + "epoch": 0.7466005026744861, + "grad_norm": 0.002823985210489743, + "learning_rate": 2.8213390619405663e-06, + "loss": 0.0, + "step": 11585 + }, + { + "epoch": 0.7466649481214153, + "grad_norm": 0.05066442807138474, + "learning_rate": 2.82062298603652e-06, + "loss": 0.0002, + "step": 11586 + }, + { + "epoch": 0.7467293935683444, + "grad_norm": 0.004276016365310969, + "learning_rate": 2.8199069101324744e-06, + "loss": 0.0, + "step": 11587 + }, + { + "epoch": 0.7467938390152735, + "grad_norm": 0.0028644723760939557, + "learning_rate": 2.8191908342284287e-06, + "loss": 0.0, + "step": 11588 + }, + { + "epoch": 0.7468582844622027, + "grad_norm": 0.0002588828785945799, + "learning_rate": 2.8184747583243826e-06, + "loss": 0.0, + "step": 11589 + }, + { + "epoch": 0.7469227299091319, + "grad_norm": 0.00039665747800869124, + "learning_rate": 2.817758682420337e-06, + "loss": 0.0, + "step": 11590 + }, + { + "epoch": 0.7469871753560611, + "grad_norm": 0.0003482603860227377, + "learning_rate": 2.8170426065162908e-06, + "loss": 0.0, + "step": 11591 + }, + { + "epoch": 0.7470516208029903, + "grad_norm": 0.0008336352241551569, + "learning_rate": 2.816326530612245e-06, + "loss": 0.0, + "step": 11592 + }, + { + "epoch": 0.7471160662499194, + "grad_norm": 5.235047905893616e-05, + "learning_rate": 2.815610454708199e-06, + "loss": 0.0, + "step": 11593 + }, + { + "epoch": 0.7471805116968486, + "grad_norm": 0.0014324225534880688, + "learning_rate": 2.8148943788041533e-06, + "loss": 0.0, + "step": 11594 + }, + { + "epoch": 0.7472449571437778, + "grad_norm": 0.0017366095073440056, + "learning_rate": 2.8141783029001076e-06, + "loss": 0.0, + "step": 11595 + }, + { + "epoch": 0.747309402590707, + "grad_norm": 0.0004592679869086307, + "learning_rate": 2.8134622269960623e-06, + "loss": 0.0, + "step": 11596 + }, + { + "epoch": 0.7473738480376362, + "grad_norm": 0.0057053165657561505, + "learning_rate": 2.812746151092016e-06, + "loss": 0.0, + "step": 11597 + }, + { + "epoch": 0.7474382934845654, + "grad_norm": 0.0006819042703925117, + "learning_rate": 2.8120300751879705e-06, + "loss": 0.0015, + "step": 11598 + }, + { + "epoch": 0.7475027389314944, + "grad_norm": 0.6504672810054322, + "learning_rate": 2.8113139992839243e-06, + "loss": 0.002, + "step": 11599 + }, + { + "epoch": 0.7475671843784236, + "grad_norm": 0.00035804196067088125, + "learning_rate": 2.8105979233798786e-06, + "loss": 0.0, + "step": 11600 + }, + { + "epoch": 0.7476316298253528, + "grad_norm": 0.0016188882058790126, + "learning_rate": 2.8098818474758325e-06, + "loss": 0.0, + "step": 11601 + }, + { + "epoch": 0.747696075272282, + "grad_norm": 0.00036099919084685566, + "learning_rate": 2.809165771571787e-06, + "loss": 0.0, + "step": 11602 + }, + { + "epoch": 0.7477605207192112, + "grad_norm": 0.0029460432117195917, + "learning_rate": 2.808449695667741e-06, + "loss": 0.0, + "step": 11603 + }, + { + "epoch": 0.7478249661661404, + "grad_norm": 0.00011651976692317094, + "learning_rate": 2.807733619763695e-06, + "loss": 0.0, + "step": 11604 + }, + { + "epoch": 0.7478894116130695, + "grad_norm": 0.0025859682305249014, + "learning_rate": 2.8070175438596493e-06, + "loss": 0.0, + "step": 11605 + }, + { + "epoch": 0.7479538570599987, + "grad_norm": 4.958741962410607e-05, + "learning_rate": 2.806301467955603e-06, + "loss": 0.0, + "step": 11606 + }, + { + "epoch": 0.7480183025069279, + "grad_norm": 0.0029761885102953744, + "learning_rate": 2.8055853920515574e-06, + "loss": 0.0, + "step": 11607 + }, + { + "epoch": 0.7480827479538571, + "grad_norm": 0.0008891637672764099, + "learning_rate": 2.804869316147512e-06, + "loss": 0.0, + "step": 11608 + }, + { + "epoch": 0.7481471934007863, + "grad_norm": 2.754389762637177e-05, + "learning_rate": 2.804153240243466e-06, + "loss": 0.0, + "step": 11609 + }, + { + "epoch": 0.7482116388477154, + "grad_norm": 0.006316527622726746, + "learning_rate": 2.8034371643394203e-06, + "loss": 0.0, + "step": 11610 + }, + { + "epoch": 0.7482760842946445, + "grad_norm": 0.02277983004237064, + "learning_rate": 2.8027210884353746e-06, + "loss": 0.0001, + "step": 11611 + }, + { + "epoch": 0.7483405297415737, + "grad_norm": 5.643851932003774e-05, + "learning_rate": 2.8020050125313285e-06, + "loss": 0.0, + "step": 11612 + }, + { + "epoch": 0.7484049751885029, + "grad_norm": 0.00037393311065744746, + "learning_rate": 2.801288936627283e-06, + "loss": 0.0, + "step": 11613 + }, + { + "epoch": 0.7484694206354321, + "grad_norm": 0.002534550843795288, + "learning_rate": 2.8005728607232367e-06, + "loss": 0.0, + "step": 11614 + }, + { + "epoch": 0.7485338660823613, + "grad_norm": 0.024649051818809553, + "learning_rate": 2.799856784819191e-06, + "loss": 0.0002, + "step": 11615 + }, + { + "epoch": 0.7485983115292905, + "grad_norm": 0.041082903201940656, + "learning_rate": 2.7991407089151453e-06, + "loss": 0.0002, + "step": 11616 + }, + { + "epoch": 0.7486627569762196, + "grad_norm": 0.1127003747936899, + "learning_rate": 2.798424633011099e-06, + "loss": 0.0002, + "step": 11617 + }, + { + "epoch": 0.7487272024231488, + "grad_norm": 0.0001859838946770348, + "learning_rate": 2.7977085571070535e-06, + "loss": 0.0, + "step": 11618 + }, + { + "epoch": 0.748791647870078, + "grad_norm": 5.065890990128626e-05, + "learning_rate": 2.7969924812030073e-06, + "loss": 0.0, + "step": 11619 + }, + { + "epoch": 0.7488560933170072, + "grad_norm": 9.265103468572512e-05, + "learning_rate": 2.796276405298962e-06, + "loss": 0.0, + "step": 11620 + }, + { + "epoch": 0.7489205387639364, + "grad_norm": 0.0008966920858239456, + "learning_rate": 2.7955603293949164e-06, + "loss": 0.0, + "step": 11621 + }, + { + "epoch": 0.7489849842108655, + "grad_norm": 0.00012183848043519522, + "learning_rate": 2.7948442534908702e-06, + "loss": 0.0, + "step": 11622 + }, + { + "epoch": 0.7490494296577946, + "grad_norm": 0.0016598313811174246, + "learning_rate": 2.7941281775868245e-06, + "loss": 0.0, + "step": 11623 + }, + { + "epoch": 0.7491138751047238, + "grad_norm": 1.1238977335514882e-05, + "learning_rate": 2.793412101682779e-06, + "loss": 0.0, + "step": 11624 + }, + { + "epoch": 0.749178320551653, + "grad_norm": 0.00034594887045504333, + "learning_rate": 2.7926960257787327e-06, + "loss": 0.0, + "step": 11625 + }, + { + "epoch": 0.7492427659985822, + "grad_norm": 0.04615189947607823, + "learning_rate": 2.791979949874687e-06, + "loss": 0.0001, + "step": 11626 + }, + { + "epoch": 0.7493072114455114, + "grad_norm": 0.0002986722078818211, + "learning_rate": 2.791263873970641e-06, + "loss": 0.0, + "step": 11627 + }, + { + "epoch": 0.7493716568924406, + "grad_norm": 0.005092092729537614, + "learning_rate": 2.790547798066595e-06, + "loss": 0.0, + "step": 11628 + }, + { + "epoch": 0.7494361023393697, + "grad_norm": 0.1200249868569926, + "learning_rate": 2.7898317221625495e-06, + "loss": 0.0002, + "step": 11629 + }, + { + "epoch": 0.7495005477862989, + "grad_norm": 0.0049599157186567015, + "learning_rate": 2.7891156462585034e-06, + "loss": 0.0, + "step": 11630 + }, + { + "epoch": 0.7495649932332281, + "grad_norm": 8.649864165149563e-05, + "learning_rate": 2.788399570354458e-06, + "loss": 0.0, + "step": 11631 + }, + { + "epoch": 0.7496294386801573, + "grad_norm": 0.0016241807887423438, + "learning_rate": 2.7876834944504124e-06, + "loss": 0.0, + "step": 11632 + }, + { + "epoch": 0.7496938841270864, + "grad_norm": 0.000766953844427467, + "learning_rate": 2.7869674185463662e-06, + "loss": 0.0, + "step": 11633 + }, + { + "epoch": 0.7497583295740156, + "grad_norm": 0.00042285343101394534, + "learning_rate": 2.7862513426423205e-06, + "loss": 0.0, + "step": 11634 + }, + { + "epoch": 0.7498227750209447, + "grad_norm": 0.000205183922965492, + "learning_rate": 2.7855352667382744e-06, + "loss": 0.0, + "step": 11635 + }, + { + "epoch": 0.7498872204678739, + "grad_norm": 5.591482784916908e-05, + "learning_rate": 2.7848191908342287e-06, + "loss": 0.0, + "step": 11636 + }, + { + "epoch": 0.7499516659148031, + "grad_norm": 0.0005972326634359615, + "learning_rate": 2.784103114930183e-06, + "loss": 0.0, + "step": 11637 + }, + { + "epoch": 0.7500161113617323, + "grad_norm": 0.08351007143777367, + "learning_rate": 2.783387039026137e-06, + "loss": 0.0016, + "step": 11638 + }, + { + "epoch": 0.7500805568086615, + "grad_norm": 1.1101820253498027, + "learning_rate": 2.782670963122091e-06, + "loss": 0.0085, + "step": 11639 + }, + { + "epoch": 0.7501450022555907, + "grad_norm": 4.642928998663188e-05, + "learning_rate": 2.781954887218045e-06, + "loss": 0.0, + "step": 11640 + }, + { + "epoch": 0.7502094477025198, + "grad_norm": 0.003023120807805911, + "learning_rate": 2.7812388113139994e-06, + "loss": 0.0, + "step": 11641 + }, + { + "epoch": 0.750273893149449, + "grad_norm": 0.00029685534288482725, + "learning_rate": 2.7805227354099532e-06, + "loss": 0.0, + "step": 11642 + }, + { + "epoch": 0.7503383385963782, + "grad_norm": 0.00021844772164633, + "learning_rate": 2.779806659505908e-06, + "loss": 0.0, + "step": 11643 + }, + { + "epoch": 0.7504027840433073, + "grad_norm": 0.00017051293991199247, + "learning_rate": 2.7790905836018623e-06, + "loss": 0.0, + "step": 11644 + }, + { + "epoch": 0.7504672294902365, + "grad_norm": 0.0012412810367730285, + "learning_rate": 2.7783745076978166e-06, + "loss": 0.0, + "step": 11645 + }, + { + "epoch": 0.7505316749371657, + "grad_norm": 9.528078393517384e-05, + "learning_rate": 2.7776584317937704e-06, + "loss": 0.0, + "step": 11646 + }, + { + "epoch": 0.7505961203840948, + "grad_norm": 0.0011691385906994708, + "learning_rate": 2.7769423558897247e-06, + "loss": 0.0, + "step": 11647 + }, + { + "epoch": 0.750660565831024, + "grad_norm": 0.00023046299115114463, + "learning_rate": 2.7762262799856786e-06, + "loss": 0.0, + "step": 11648 + }, + { + "epoch": 0.7507250112779532, + "grad_norm": 0.0002655424185747253, + "learning_rate": 2.775510204081633e-06, + "loss": 0.0, + "step": 11649 + }, + { + "epoch": 0.7507894567248824, + "grad_norm": 0.19039229979656985, + "learning_rate": 2.7747941281775868e-06, + "loss": 0.0004, + "step": 11650 + }, + { + "epoch": 0.7508539021718116, + "grad_norm": 0.0019108833699937504, + "learning_rate": 2.774078052273541e-06, + "loss": 0.0, + "step": 11651 + }, + { + "epoch": 0.7509183476187408, + "grad_norm": 0.0007730187024007991, + "learning_rate": 2.7733619763694954e-06, + "loss": 0.0, + "step": 11652 + }, + { + "epoch": 0.75098279306567, + "grad_norm": 0.003987731423439629, + "learning_rate": 2.7726459004654493e-06, + "loss": 0.0, + "step": 11653 + }, + { + "epoch": 0.7510472385125991, + "grad_norm": 0.000375934201446267, + "learning_rate": 2.7719298245614036e-06, + "loss": 0.0, + "step": 11654 + }, + { + "epoch": 0.7511116839595282, + "grad_norm": 0.31494069653808116, + "learning_rate": 2.7712137486573583e-06, + "loss": 0.0014, + "step": 11655 + }, + { + "epoch": 0.7511761294064574, + "grad_norm": 0.005691559960223658, + "learning_rate": 2.770497672753312e-06, + "loss": 0.0001, + "step": 11656 + }, + { + "epoch": 0.7512405748533866, + "grad_norm": 0.0004438557435561563, + "learning_rate": 2.7697815968492665e-06, + "loss": 0.0, + "step": 11657 + }, + { + "epoch": 0.7513050203003158, + "grad_norm": 0.05629256404691263, + "learning_rate": 2.7690655209452203e-06, + "loss": 0.0001, + "step": 11658 + }, + { + "epoch": 0.751369465747245, + "grad_norm": 0.00039722302627628154, + "learning_rate": 2.7683494450411746e-06, + "loss": 0.0, + "step": 11659 + }, + { + "epoch": 0.7514339111941741, + "grad_norm": 0.025686982877658825, + "learning_rate": 2.767633369137129e-06, + "loss": 0.0002, + "step": 11660 + }, + { + "epoch": 0.7514983566411033, + "grad_norm": 0.0007727245090655069, + "learning_rate": 2.766917293233083e-06, + "loss": 0.0, + "step": 11661 + }, + { + "epoch": 0.7515628020880325, + "grad_norm": 0.000900820580803089, + "learning_rate": 2.766201217329037e-06, + "loss": 0.0, + "step": 11662 + }, + { + "epoch": 0.7516272475349617, + "grad_norm": 0.00027621965420460257, + "learning_rate": 2.765485141424991e-06, + "loss": 0.0, + "step": 11663 + }, + { + "epoch": 0.7516916929818909, + "grad_norm": 0.0007073996963652853, + "learning_rate": 2.7647690655209453e-06, + "loss": 0.0, + "step": 11664 + }, + { + "epoch": 0.75175613842882, + "grad_norm": 0.0006071636553376335, + "learning_rate": 2.7640529896168996e-06, + "loss": 0.0, + "step": 11665 + }, + { + "epoch": 0.7518205838757491, + "grad_norm": 0.012462280848083922, + "learning_rate": 2.763336913712854e-06, + "loss": 0.0001, + "step": 11666 + }, + { + "epoch": 0.7518850293226783, + "grad_norm": 0.00018560450923334242, + "learning_rate": 2.762620837808808e-06, + "loss": 0.0, + "step": 11667 + }, + { + "epoch": 0.7519494747696075, + "grad_norm": 0.0006644943512278006, + "learning_rate": 2.7619047619047625e-06, + "loss": 0.0, + "step": 11668 + }, + { + "epoch": 0.7520139202165367, + "grad_norm": 0.0020807138950169435, + "learning_rate": 2.7611886860007163e-06, + "loss": 0.0, + "step": 11669 + }, + { + "epoch": 0.7520783656634659, + "grad_norm": 5.37856004501817e-05, + "learning_rate": 2.7604726100966706e-06, + "loss": 0.0, + "step": 11670 + }, + { + "epoch": 0.752142811110395, + "grad_norm": 0.0005517811580377726, + "learning_rate": 2.7597565341926245e-06, + "loss": 0.0, + "step": 11671 + }, + { + "epoch": 0.7522072565573242, + "grad_norm": 0.018654850971019456, + "learning_rate": 2.759040458288579e-06, + "loss": 0.0002, + "step": 11672 + }, + { + "epoch": 0.7522717020042534, + "grad_norm": 0.0015801575484079477, + "learning_rate": 2.758324382384533e-06, + "loss": 0.0, + "step": 11673 + }, + { + "epoch": 0.7523361474511826, + "grad_norm": 0.004039334597610442, + "learning_rate": 2.757608306480487e-06, + "loss": 0.0, + "step": 11674 + }, + { + "epoch": 0.7524005928981118, + "grad_norm": 0.06624770873534068, + "learning_rate": 2.7568922305764413e-06, + "loss": 0.0005, + "step": 11675 + }, + { + "epoch": 0.752465038345041, + "grad_norm": 0.0010852293084164862, + "learning_rate": 2.756176154672395e-06, + "loss": 0.0, + "step": 11676 + }, + { + "epoch": 0.75252948379197, + "grad_norm": 0.0002506029780493605, + "learning_rate": 2.7554600787683495e-06, + "loss": 0.0, + "step": 11677 + }, + { + "epoch": 0.7525939292388992, + "grad_norm": 0.0009374336268016534, + "learning_rate": 2.754744002864304e-06, + "loss": 0.0, + "step": 11678 + }, + { + "epoch": 0.7526583746858284, + "grad_norm": 8.921424638537497e-05, + "learning_rate": 2.754027926960258e-06, + "loss": 0.0, + "step": 11679 + }, + { + "epoch": 0.7527228201327576, + "grad_norm": 0.0015448375941823727, + "learning_rate": 2.7533118510562124e-06, + "loss": 0.0, + "step": 11680 + }, + { + "epoch": 0.7527872655796868, + "grad_norm": 0.0010091009396157359, + "learning_rate": 2.7525957751521667e-06, + "loss": 0.0, + "step": 11681 + }, + { + "epoch": 0.752851711026616, + "grad_norm": 0.0004459042079651145, + "learning_rate": 2.7518796992481205e-06, + "loss": 0.0, + "step": 11682 + }, + { + "epoch": 0.7529161564735452, + "grad_norm": 0.01627399213125485, + "learning_rate": 2.751163623344075e-06, + "loss": 0.0, + "step": 11683 + }, + { + "epoch": 0.7529806019204743, + "grad_norm": 0.013965691610753837, + "learning_rate": 2.7504475474400287e-06, + "loss": 0.0001, + "step": 11684 + }, + { + "epoch": 0.7530450473674035, + "grad_norm": 0.0001156550801431203, + "learning_rate": 2.749731471535983e-06, + "loss": 0.0, + "step": 11685 + }, + { + "epoch": 0.7531094928143327, + "grad_norm": 0.3935840113011604, + "learning_rate": 2.7490153956319373e-06, + "loss": 0.0004, + "step": 11686 + }, + { + "epoch": 0.7531739382612619, + "grad_norm": 0.0007292888015694302, + "learning_rate": 2.748299319727891e-06, + "loss": 0.0, + "step": 11687 + }, + { + "epoch": 0.753238383708191, + "grad_norm": 0.00018506515606916054, + "learning_rate": 2.7475832438238455e-06, + "loss": 0.0, + "step": 11688 + }, + { + "epoch": 0.7533028291551201, + "grad_norm": 0.0002825209862627467, + "learning_rate": 2.7468671679197994e-06, + "loss": 0.0, + "step": 11689 + }, + { + "epoch": 0.7533672746020493, + "grad_norm": 0.0049305875242178976, + "learning_rate": 2.746151092015754e-06, + "loss": 0.0, + "step": 11690 + }, + { + "epoch": 0.7534317200489785, + "grad_norm": 0.0003643963192400323, + "learning_rate": 2.7454350161117084e-06, + "loss": 0.0, + "step": 11691 + }, + { + "epoch": 0.7534961654959077, + "grad_norm": 0.0035886233576998884, + "learning_rate": 2.7447189402076622e-06, + "loss": 0.0, + "step": 11692 + }, + { + "epoch": 0.7535606109428369, + "grad_norm": 0.03080716919407748, + "learning_rate": 2.7440028643036165e-06, + "loss": 0.0001, + "step": 11693 + }, + { + "epoch": 0.7536250563897661, + "grad_norm": 0.00470493868439932, + "learning_rate": 2.743286788399571e-06, + "loss": 0.0, + "step": 11694 + }, + { + "epoch": 0.7536895018366953, + "grad_norm": 0.00019118115247683613, + "learning_rate": 2.7425707124955247e-06, + "loss": 0.0, + "step": 11695 + }, + { + "epoch": 0.7537539472836244, + "grad_norm": 5.5976844224649946e-05, + "learning_rate": 2.741854636591479e-06, + "loss": 0.0, + "step": 11696 + }, + { + "epoch": 0.7538183927305536, + "grad_norm": 0.0237676543025234, + "learning_rate": 2.741138560687433e-06, + "loss": 0.0, + "step": 11697 + }, + { + "epoch": 0.7538828381774828, + "grad_norm": 0.38799647013666066, + "learning_rate": 2.740422484783387e-06, + "loss": 0.0029, + "step": 11698 + }, + { + "epoch": 0.753947283624412, + "grad_norm": 0.003417863242828486, + "learning_rate": 2.739706408879341e-06, + "loss": 0.0, + "step": 11699 + }, + { + "epoch": 0.7540117290713411, + "grad_norm": 3.090706308481155e-05, + "learning_rate": 2.7389903329752954e-06, + "loss": 0.0, + "step": 11700 + }, + { + "epoch": 0.7540761745182702, + "grad_norm": 6.625108551354473e-05, + "learning_rate": 2.73827425707125e-06, + "loss": 0.0, + "step": 11701 + }, + { + "epoch": 0.7541406199651994, + "grad_norm": 0.00021590261960305907, + "learning_rate": 2.7375581811672044e-06, + "loss": 0.0, + "step": 11702 + }, + { + "epoch": 0.7542050654121286, + "grad_norm": 0.0006234648806642372, + "learning_rate": 2.7368421052631583e-06, + "loss": 0.0, + "step": 11703 + }, + { + "epoch": 0.7542695108590578, + "grad_norm": 0.0071332380691034, + "learning_rate": 2.7361260293591126e-06, + "loss": 0.0, + "step": 11704 + }, + { + "epoch": 0.754333956305987, + "grad_norm": 1.2562028050453755, + "learning_rate": 2.7354099534550664e-06, + "loss": 0.0093, + "step": 11705 + }, + { + "epoch": 0.7543984017529162, + "grad_norm": 0.0029726964662291043, + "learning_rate": 2.7346938775510207e-06, + "loss": 0.0, + "step": 11706 + }, + { + "epoch": 0.7544628471998454, + "grad_norm": 0.014132551918677235, + "learning_rate": 2.7339778016469746e-06, + "loss": 0.0002, + "step": 11707 + }, + { + "epoch": 0.7545272926467745, + "grad_norm": 0.001203151290063077, + "learning_rate": 2.733261725742929e-06, + "loss": 0.0, + "step": 11708 + }, + { + "epoch": 0.7545917380937037, + "grad_norm": 0.009634512422446848, + "learning_rate": 2.732545649838883e-06, + "loss": 0.0001, + "step": 11709 + }, + { + "epoch": 0.7546561835406329, + "grad_norm": 0.16926328691142287, + "learning_rate": 2.731829573934837e-06, + "loss": 0.0003, + "step": 11710 + }, + { + "epoch": 0.754720628987562, + "grad_norm": 0.021506676559323645, + "learning_rate": 2.7311134980307914e-06, + "loss": 0.0001, + "step": 11711 + }, + { + "epoch": 0.7547850744344912, + "grad_norm": 0.12201709350159476, + "learning_rate": 2.7303974221267453e-06, + "loss": 0.0001, + "step": 11712 + }, + { + "epoch": 0.7548495198814203, + "grad_norm": 0.010025205131539256, + "learning_rate": 2.7296813462227e-06, + "loss": 0.0001, + "step": 11713 + }, + { + "epoch": 0.7549139653283495, + "grad_norm": 0.00034131962690424583, + "learning_rate": 2.7289652703186543e-06, + "loss": 0.0, + "step": 11714 + }, + { + "epoch": 0.7549784107752787, + "grad_norm": 0.0005576713005746142, + "learning_rate": 2.728249194414608e-06, + "loss": 0.0, + "step": 11715 + }, + { + "epoch": 0.7550428562222079, + "grad_norm": 0.00021151105237174136, + "learning_rate": 2.7275331185105625e-06, + "loss": 0.0, + "step": 11716 + }, + { + "epoch": 0.7551073016691371, + "grad_norm": 0.0005511650236481126, + "learning_rate": 2.7268170426065167e-06, + "loss": 0.0, + "step": 11717 + }, + { + "epoch": 0.7551717471160663, + "grad_norm": 0.0005032934039541653, + "learning_rate": 2.7261009667024706e-06, + "loss": 0.0, + "step": 11718 + }, + { + "epoch": 0.7552361925629955, + "grad_norm": 0.004268815835783067, + "learning_rate": 2.725384890798425e-06, + "loss": 0.0, + "step": 11719 + }, + { + "epoch": 0.7553006380099246, + "grad_norm": 0.029657345675551724, + "learning_rate": 2.724668814894379e-06, + "loss": 0.0001, + "step": 11720 + }, + { + "epoch": 0.7553650834568538, + "grad_norm": 0.00019803287793178183, + "learning_rate": 2.723952738990333e-06, + "loss": 0.0, + "step": 11721 + }, + { + "epoch": 0.7554295289037829, + "grad_norm": 0.017483873308816737, + "learning_rate": 2.7232366630862874e-06, + "loss": 0.0001, + "step": 11722 + }, + { + "epoch": 0.7554939743507121, + "grad_norm": 0.0002098614959974441, + "learning_rate": 2.7225205871822413e-06, + "loss": 0.0, + "step": 11723 + }, + { + "epoch": 0.7555584197976413, + "grad_norm": 0.0066547328197811, + "learning_rate": 2.7218045112781956e-06, + "loss": 0.0, + "step": 11724 + }, + { + "epoch": 0.7556228652445705, + "grad_norm": 0.00320167998285512, + "learning_rate": 2.7210884353741503e-06, + "loss": 0.0, + "step": 11725 + }, + { + "epoch": 0.7556873106914996, + "grad_norm": 0.00010177039287106444, + "learning_rate": 2.720372359470104e-06, + "loss": 0.0, + "step": 11726 + }, + { + "epoch": 0.7557517561384288, + "grad_norm": 0.00154407389412439, + "learning_rate": 2.7196562835660585e-06, + "loss": 0.0, + "step": 11727 + }, + { + "epoch": 0.755816201585358, + "grad_norm": 0.0033565564377196987, + "learning_rate": 2.7189402076620123e-06, + "loss": 0.0, + "step": 11728 + }, + { + "epoch": 0.7558806470322872, + "grad_norm": 0.1146090764871986, + "learning_rate": 2.7182241317579666e-06, + "loss": 0.0001, + "step": 11729 + }, + { + "epoch": 0.7559450924792164, + "grad_norm": 0.0142495845011723, + "learning_rate": 2.717508055853921e-06, + "loss": 0.0, + "step": 11730 + }, + { + "epoch": 0.7560095379261456, + "grad_norm": 0.028604510943794178, + "learning_rate": 2.716791979949875e-06, + "loss": 0.0, + "step": 11731 + }, + { + "epoch": 0.7560739833730747, + "grad_norm": 0.0015702490887233036, + "learning_rate": 2.716075904045829e-06, + "loss": 0.0, + "step": 11732 + }, + { + "epoch": 0.7561384288200038, + "grad_norm": 0.0037016426026286998, + "learning_rate": 2.715359828141783e-06, + "loss": 0.0, + "step": 11733 + }, + { + "epoch": 0.756202874266933, + "grad_norm": 0.05622580946870829, + "learning_rate": 2.7146437522377373e-06, + "loss": 0.0016, + "step": 11734 + }, + { + "epoch": 0.7562673197138622, + "grad_norm": 0.016572480640073047, + "learning_rate": 2.713927676333691e-06, + "loss": 0.0001, + "step": 11735 + }, + { + "epoch": 0.7563317651607914, + "grad_norm": 0.011137781346677419, + "learning_rate": 2.7132116004296455e-06, + "loss": 0.0, + "step": 11736 + }, + { + "epoch": 0.7563962106077206, + "grad_norm": 0.05387597953175312, + "learning_rate": 2.7124955245256e-06, + "loss": 0.0017, + "step": 11737 + }, + { + "epoch": 0.7564606560546497, + "grad_norm": 0.007463667476841625, + "learning_rate": 2.7117794486215545e-06, + "loss": 0.0, + "step": 11738 + }, + { + "epoch": 0.7565251015015789, + "grad_norm": 0.006396773303127624, + "learning_rate": 2.7110633727175084e-06, + "loss": 0.0, + "step": 11739 + }, + { + "epoch": 0.7565895469485081, + "grad_norm": 0.21658050634270629, + "learning_rate": 2.7103472968134627e-06, + "loss": 0.0003, + "step": 11740 + }, + { + "epoch": 0.7566539923954373, + "grad_norm": 0.009636629385455547, + "learning_rate": 2.7096312209094165e-06, + "loss": 0.0, + "step": 11741 + }, + { + "epoch": 0.7567184378423665, + "grad_norm": 0.003825152018230289, + "learning_rate": 2.708915145005371e-06, + "loss": 0.0, + "step": 11742 + }, + { + "epoch": 0.7567828832892957, + "grad_norm": 0.003434779293330581, + "learning_rate": 2.7081990691013247e-06, + "loss": 0.0, + "step": 11743 + }, + { + "epoch": 0.7568473287362247, + "grad_norm": 0.005043666369474453, + "learning_rate": 2.707482993197279e-06, + "loss": 0.0, + "step": 11744 + }, + { + "epoch": 0.7569117741831539, + "grad_norm": 0.20329336378052815, + "learning_rate": 2.7067669172932333e-06, + "loss": 0.0007, + "step": 11745 + }, + { + "epoch": 0.7569762196300831, + "grad_norm": 0.08628995642796942, + "learning_rate": 2.706050841389187e-06, + "loss": 0.0001, + "step": 11746 + }, + { + "epoch": 0.7570406650770123, + "grad_norm": 0.0038641870067376086, + "learning_rate": 2.7053347654851415e-06, + "loss": 0.0, + "step": 11747 + }, + { + "epoch": 0.7571051105239415, + "grad_norm": 0.00018371592263419093, + "learning_rate": 2.704618689581096e-06, + "loss": 0.0, + "step": 11748 + }, + { + "epoch": 0.7571695559708707, + "grad_norm": 0.15424518081402985, + "learning_rate": 2.70390261367705e-06, + "loss": 0.002, + "step": 11749 + }, + { + "epoch": 0.7572340014177998, + "grad_norm": 0.013526881003292895, + "learning_rate": 2.7031865377730044e-06, + "loss": 0.0, + "step": 11750 + }, + { + "epoch": 0.757298446864729, + "grad_norm": 0.12452717105498468, + "learning_rate": 2.7024704618689582e-06, + "loss": 0.0019, + "step": 11751 + }, + { + "epoch": 0.7573628923116582, + "grad_norm": 0.003727796039911149, + "learning_rate": 2.7017543859649125e-06, + "loss": 0.0, + "step": 11752 + }, + { + "epoch": 0.7574273377585874, + "grad_norm": 0.0013507419381989824, + "learning_rate": 2.701038310060867e-06, + "loss": 0.0, + "step": 11753 + }, + { + "epoch": 0.7574917832055166, + "grad_norm": 0.003996855090206607, + "learning_rate": 2.7003222341568207e-06, + "loss": 0.0, + "step": 11754 + }, + { + "epoch": 0.7575562286524457, + "grad_norm": 0.1571776864987052, + "learning_rate": 2.699606158252775e-06, + "loss": 0.0003, + "step": 11755 + }, + { + "epoch": 0.7576206740993748, + "grad_norm": 0.012616492852407405, + "learning_rate": 2.698890082348729e-06, + "loss": 0.0, + "step": 11756 + }, + { + "epoch": 0.757685119546304, + "grad_norm": 0.0265738536170251, + "learning_rate": 2.698174006444683e-06, + "loss": 0.0, + "step": 11757 + }, + { + "epoch": 0.7577495649932332, + "grad_norm": 0.001051202791457292, + "learning_rate": 2.6974579305406375e-06, + "loss": 0.0, + "step": 11758 + }, + { + "epoch": 0.7578140104401624, + "grad_norm": 0.009863241518408, + "learning_rate": 2.6967418546365914e-06, + "loss": 0.0001, + "step": 11759 + }, + { + "epoch": 0.7578784558870916, + "grad_norm": 0.00017777405807486947, + "learning_rate": 2.696025778732546e-06, + "loss": 0.0, + "step": 11760 + }, + { + "epoch": 0.7579429013340208, + "grad_norm": 0.0215673841155166, + "learning_rate": 2.6953097028285004e-06, + "loss": 0.0, + "step": 11761 + }, + { + "epoch": 0.7580073467809499, + "grad_norm": 0.00022594751348228815, + "learning_rate": 2.6945936269244543e-06, + "loss": 0.0, + "step": 11762 + }, + { + "epoch": 0.7580717922278791, + "grad_norm": 0.03331891898553955, + "learning_rate": 2.6938775510204086e-06, + "loss": 0.0001, + "step": 11763 + }, + { + "epoch": 0.7581362376748083, + "grad_norm": 0.001087133402002619, + "learning_rate": 2.6931614751163624e-06, + "loss": 0.0, + "step": 11764 + }, + { + "epoch": 0.7582006831217375, + "grad_norm": 1.1937595826408476, + "learning_rate": 2.6924453992123167e-06, + "loss": 0.0228, + "step": 11765 + }, + { + "epoch": 0.7582651285686666, + "grad_norm": 2.6517257757050547, + "learning_rate": 2.691729323308271e-06, + "loss": 0.0089, + "step": 11766 + }, + { + "epoch": 0.7583295740155958, + "grad_norm": 0.0009098672745745888, + "learning_rate": 2.691013247404225e-06, + "loss": 0.0, + "step": 11767 + }, + { + "epoch": 0.7583940194625249, + "grad_norm": 0.0005929477012532038, + "learning_rate": 2.690297171500179e-06, + "loss": 0.0, + "step": 11768 + }, + { + "epoch": 0.7584584649094541, + "grad_norm": 0.04149749930801474, + "learning_rate": 2.689581095596133e-06, + "loss": 0.0, + "step": 11769 + }, + { + "epoch": 0.7585229103563833, + "grad_norm": 0.0042939977993353496, + "learning_rate": 2.6888650196920874e-06, + "loss": 0.0, + "step": 11770 + }, + { + "epoch": 0.7585873558033125, + "grad_norm": 0.000777963915366081, + "learning_rate": 2.6881489437880417e-06, + "loss": 0.0, + "step": 11771 + }, + { + "epoch": 0.7586518012502417, + "grad_norm": 0.0033128294424841676, + "learning_rate": 2.687432867883996e-06, + "loss": 0.0, + "step": 11772 + }, + { + "epoch": 0.7587162466971709, + "grad_norm": 0.0009968778887012326, + "learning_rate": 2.6867167919799503e-06, + "loss": 0.0, + "step": 11773 + }, + { + "epoch": 0.7587806921441, + "grad_norm": 0.008245827496948636, + "learning_rate": 2.6860007160759046e-06, + "loss": 0.0001, + "step": 11774 + }, + { + "epoch": 0.7588451375910292, + "grad_norm": 0.006805220256750272, + "learning_rate": 2.6852846401718585e-06, + "loss": 0.0001, + "step": 11775 + }, + { + "epoch": 0.7589095830379584, + "grad_norm": 0.006761178031938303, + "learning_rate": 2.6845685642678127e-06, + "loss": 0.0, + "step": 11776 + }, + { + "epoch": 0.7589740284848876, + "grad_norm": 0.24914090247152243, + "learning_rate": 2.6838524883637666e-06, + "loss": 0.0019, + "step": 11777 + }, + { + "epoch": 0.7590384739318167, + "grad_norm": 0.0001379510648379672, + "learning_rate": 2.683136412459721e-06, + "loss": 0.0, + "step": 11778 + }, + { + "epoch": 0.7591029193787459, + "grad_norm": 0.49096497583264964, + "learning_rate": 2.6824203365556752e-06, + "loss": 0.0012, + "step": 11779 + }, + { + "epoch": 0.759167364825675, + "grad_norm": 0.010198572628584911, + "learning_rate": 2.681704260651629e-06, + "loss": 0.0, + "step": 11780 + }, + { + "epoch": 0.7592318102726042, + "grad_norm": 0.02135744720818544, + "learning_rate": 2.6809881847475834e-06, + "loss": 0.0, + "step": 11781 + }, + { + "epoch": 0.7592962557195334, + "grad_norm": 0.0610014097381115, + "learning_rate": 2.6802721088435373e-06, + "loss": 0.0001, + "step": 11782 + }, + { + "epoch": 0.7593607011664626, + "grad_norm": 0.003880145932573114, + "learning_rate": 2.679556032939492e-06, + "loss": 0.0, + "step": 11783 + }, + { + "epoch": 0.7594251466133918, + "grad_norm": 0.00020370923103518696, + "learning_rate": 2.6788399570354463e-06, + "loss": 0.0, + "step": 11784 + }, + { + "epoch": 0.759489592060321, + "grad_norm": 0.0014196499306884838, + "learning_rate": 2.6781238811314e-06, + "loss": 0.0, + "step": 11785 + }, + { + "epoch": 0.7595540375072501, + "grad_norm": 0.0004107186318848994, + "learning_rate": 2.6774078052273545e-06, + "loss": 0.0, + "step": 11786 + }, + { + "epoch": 0.7596184829541793, + "grad_norm": 0.004399499841082977, + "learning_rate": 2.6766917293233088e-06, + "loss": 0.0, + "step": 11787 + }, + { + "epoch": 0.7596829284011085, + "grad_norm": 0.043867588185750386, + "learning_rate": 2.6759756534192626e-06, + "loss": 0.0004, + "step": 11788 + }, + { + "epoch": 0.7597473738480376, + "grad_norm": 0.0009829067833408381, + "learning_rate": 2.675259577515217e-06, + "loss": 0.0, + "step": 11789 + }, + { + "epoch": 0.7598118192949668, + "grad_norm": 0.23802523069820278, + "learning_rate": 2.674543501611171e-06, + "loss": 0.001, + "step": 11790 + }, + { + "epoch": 0.759876264741896, + "grad_norm": 0.0033577163553118712, + "learning_rate": 2.673827425707125e-06, + "loss": 0.0, + "step": 11791 + }, + { + "epoch": 0.7599407101888251, + "grad_norm": 0.02319084706000951, + "learning_rate": 2.673111349803079e-06, + "loss": 0.0001, + "step": 11792 + }, + { + "epoch": 0.7600051556357543, + "grad_norm": 0.056277099181222555, + "learning_rate": 2.6723952738990333e-06, + "loss": 0.0001, + "step": 11793 + }, + { + "epoch": 0.7600696010826835, + "grad_norm": 0.00015177896495238968, + "learning_rate": 2.6716791979949876e-06, + "loss": 0.0, + "step": 11794 + }, + { + "epoch": 0.7601340465296127, + "grad_norm": 0.0015311593093557537, + "learning_rate": 2.6709631220909423e-06, + "loss": 0.0, + "step": 11795 + }, + { + "epoch": 0.7601984919765419, + "grad_norm": 0.021320862762892245, + "learning_rate": 2.670247046186896e-06, + "loss": 0.0, + "step": 11796 + }, + { + "epoch": 0.7602629374234711, + "grad_norm": 0.031283424056328626, + "learning_rate": 2.6695309702828505e-06, + "loss": 0.0001, + "step": 11797 + }, + { + "epoch": 0.7603273828704002, + "grad_norm": 0.23894887948035884, + "learning_rate": 2.6688148943788044e-06, + "loss": 0.0002, + "step": 11798 + }, + { + "epoch": 0.7603918283173294, + "grad_norm": 0.04260379057087094, + "learning_rate": 2.6680988184747587e-06, + "loss": 0.0003, + "step": 11799 + }, + { + "epoch": 0.7604562737642585, + "grad_norm": 9.578836850924028e-05, + "learning_rate": 2.6673827425707125e-06, + "loss": 0.0, + "step": 11800 + }, + { + "epoch": 0.7605207192111877, + "grad_norm": 0.006114623869051579, + "learning_rate": 2.666666666666667e-06, + "loss": 0.0, + "step": 11801 + }, + { + "epoch": 0.7605851646581169, + "grad_norm": 0.0006305918796649802, + "learning_rate": 2.665950590762621e-06, + "loss": 0.0, + "step": 11802 + }, + { + "epoch": 0.7606496101050461, + "grad_norm": 0.0018584589838389813, + "learning_rate": 2.665234514858575e-06, + "loss": 0.0, + "step": 11803 + }, + { + "epoch": 0.7607140555519752, + "grad_norm": 0.0934540428233906, + "learning_rate": 2.6645184389545293e-06, + "loss": 0.0003, + "step": 11804 + }, + { + "epoch": 0.7607785009989044, + "grad_norm": 0.017028167671529763, + "learning_rate": 2.663802363050483e-06, + "loss": 0.0, + "step": 11805 + }, + { + "epoch": 0.7608429464458336, + "grad_norm": 0.3192495760082157, + "learning_rate": 2.6630862871464375e-06, + "loss": 0.004, + "step": 11806 + }, + { + "epoch": 0.7609073918927628, + "grad_norm": 0.000859402811223285, + "learning_rate": 2.662370211242392e-06, + "loss": 0.0, + "step": 11807 + }, + { + "epoch": 0.760971837339692, + "grad_norm": 0.001708086676881105, + "learning_rate": 2.661654135338346e-06, + "loss": 0.0, + "step": 11808 + }, + { + "epoch": 0.7610362827866212, + "grad_norm": 0.0002548697259070124, + "learning_rate": 2.6609380594343004e-06, + "loss": 0.0, + "step": 11809 + }, + { + "epoch": 0.7611007282335503, + "grad_norm": 0.032245765729999896, + "learning_rate": 2.6602219835302547e-06, + "loss": 0.0001, + "step": 11810 + }, + { + "epoch": 0.7611651736804794, + "grad_norm": 0.07445310934893193, + "learning_rate": 2.6595059076262085e-06, + "loss": 0.0001, + "step": 11811 + }, + { + "epoch": 0.7612296191274086, + "grad_norm": 0.17762862192819528, + "learning_rate": 2.658789831722163e-06, + "loss": 0.0005, + "step": 11812 + }, + { + "epoch": 0.7612940645743378, + "grad_norm": 0.011117645753348048, + "learning_rate": 2.6580737558181167e-06, + "loss": 0.0001, + "step": 11813 + }, + { + "epoch": 0.761358510021267, + "grad_norm": 0.34508473103897397, + "learning_rate": 2.657357679914071e-06, + "loss": 0.0005, + "step": 11814 + }, + { + "epoch": 0.7614229554681962, + "grad_norm": 0.1738423116659773, + "learning_rate": 2.6566416040100253e-06, + "loss": 0.0006, + "step": 11815 + }, + { + "epoch": 0.7614874009151253, + "grad_norm": 0.005119695379919854, + "learning_rate": 2.655925528105979e-06, + "loss": 0.0, + "step": 11816 + }, + { + "epoch": 0.7615518463620545, + "grad_norm": 1.4379237835705299, + "learning_rate": 2.6552094522019335e-06, + "loss": 0.0012, + "step": 11817 + }, + { + "epoch": 0.7616162918089837, + "grad_norm": 0.0022575272986593542, + "learning_rate": 2.6544933762978882e-06, + "loss": 0.0, + "step": 11818 + }, + { + "epoch": 0.7616807372559129, + "grad_norm": 0.5371539565571194, + "learning_rate": 2.653777300393842e-06, + "loss": 0.0042, + "step": 11819 + }, + { + "epoch": 0.7617451827028421, + "grad_norm": 0.00024939560620323515, + "learning_rate": 2.6530612244897964e-06, + "loss": 0.0, + "step": 11820 + }, + { + "epoch": 0.7618096281497713, + "grad_norm": 0.0006033900395677962, + "learning_rate": 2.6523451485857503e-06, + "loss": 0.0, + "step": 11821 + }, + { + "epoch": 0.7618740735967003, + "grad_norm": 8.636202424418066e-05, + "learning_rate": 2.6516290726817046e-06, + "loss": 0.0, + "step": 11822 + }, + { + "epoch": 0.7619385190436295, + "grad_norm": 0.00012699654655637103, + "learning_rate": 2.650912996777659e-06, + "loss": 0.0, + "step": 11823 + }, + { + "epoch": 0.7620029644905587, + "grad_norm": 0.001349256260770123, + "learning_rate": 2.6501969208736127e-06, + "loss": 0.0, + "step": 11824 + }, + { + "epoch": 0.7620674099374879, + "grad_norm": 0.00020679210237969375, + "learning_rate": 2.649480844969567e-06, + "loss": 0.0, + "step": 11825 + }, + { + "epoch": 0.7621318553844171, + "grad_norm": 0.002953117774268297, + "learning_rate": 2.648764769065521e-06, + "loss": 0.0, + "step": 11826 + }, + { + "epoch": 0.7621963008313463, + "grad_norm": 0.003053642823270433, + "learning_rate": 2.648048693161475e-06, + "loss": 0.0, + "step": 11827 + }, + { + "epoch": 0.7622607462782754, + "grad_norm": 0.01158112876131613, + "learning_rate": 2.6473326172574295e-06, + "loss": 0.0, + "step": 11828 + }, + { + "epoch": 0.7623251917252046, + "grad_norm": 0.00046034852405999084, + "learning_rate": 2.6466165413533834e-06, + "loss": 0.0, + "step": 11829 + }, + { + "epoch": 0.7623896371721338, + "grad_norm": 0.0006405258341291647, + "learning_rate": 2.645900465449338e-06, + "loss": 0.0, + "step": 11830 + }, + { + "epoch": 0.762454082619063, + "grad_norm": 0.0016408110038443101, + "learning_rate": 2.6451843895452924e-06, + "loss": 0.0, + "step": 11831 + }, + { + "epoch": 0.7625185280659922, + "grad_norm": 0.001069109636870012, + "learning_rate": 2.6444683136412463e-06, + "loss": 0.0, + "step": 11832 + }, + { + "epoch": 0.7625829735129213, + "grad_norm": 0.015143400205832954, + "learning_rate": 2.6437522377372006e-06, + "loss": 0.0, + "step": 11833 + }, + { + "epoch": 0.7626474189598504, + "grad_norm": 0.0010125760276858443, + "learning_rate": 2.6430361618331545e-06, + "loss": 0.0, + "step": 11834 + }, + { + "epoch": 0.7627118644067796, + "grad_norm": 0.022163290000592962, + "learning_rate": 2.6423200859291087e-06, + "loss": 0.0016, + "step": 11835 + }, + { + "epoch": 0.7627763098537088, + "grad_norm": 0.7956127941337039, + "learning_rate": 2.641604010025063e-06, + "loss": 0.0027, + "step": 11836 + }, + { + "epoch": 0.762840755300638, + "grad_norm": 0.16291320241804333, + "learning_rate": 2.640887934121017e-06, + "loss": 0.0019, + "step": 11837 + }, + { + "epoch": 0.7629052007475672, + "grad_norm": 0.00014900202124249368, + "learning_rate": 2.6401718582169712e-06, + "loss": 0.0, + "step": 11838 + }, + { + "epoch": 0.7629696461944964, + "grad_norm": 0.0017136274959623572, + "learning_rate": 2.639455782312925e-06, + "loss": 0.0, + "step": 11839 + }, + { + "epoch": 0.7630340916414255, + "grad_norm": 0.05000114029835755, + "learning_rate": 2.6387397064088794e-06, + "loss": 0.0, + "step": 11840 + }, + { + "epoch": 0.7630985370883547, + "grad_norm": 7.336247695839558e-05, + "learning_rate": 2.6380236305048333e-06, + "loss": 0.0, + "step": 11841 + }, + { + "epoch": 0.7631629825352839, + "grad_norm": 0.6437919769674801, + "learning_rate": 2.637307554600788e-06, + "loss": 0.0017, + "step": 11842 + }, + { + "epoch": 0.7632274279822131, + "grad_norm": 0.009178616250083763, + "learning_rate": 2.6365914786967423e-06, + "loss": 0.0, + "step": 11843 + }, + { + "epoch": 0.7632918734291422, + "grad_norm": 0.0001536315240165565, + "learning_rate": 2.6358754027926966e-06, + "loss": 0.0, + "step": 11844 + }, + { + "epoch": 0.7633563188760714, + "grad_norm": 0.04400854197836655, + "learning_rate": 2.6351593268886505e-06, + "loss": 0.0, + "step": 11845 + }, + { + "epoch": 0.7634207643230005, + "grad_norm": 0.05531086042366422, + "learning_rate": 2.6344432509846048e-06, + "loss": 0.0001, + "step": 11846 + }, + { + "epoch": 0.7634852097699297, + "grad_norm": 0.00016720311908218334, + "learning_rate": 2.6337271750805586e-06, + "loss": 0.0, + "step": 11847 + }, + { + "epoch": 0.7635496552168589, + "grad_norm": 0.0006551267521307725, + "learning_rate": 2.633011099176513e-06, + "loss": 0.0, + "step": 11848 + }, + { + "epoch": 0.7636141006637881, + "grad_norm": 6.0717022634338646e-05, + "learning_rate": 2.632295023272467e-06, + "loss": 0.0, + "step": 11849 + }, + { + "epoch": 0.7636785461107173, + "grad_norm": 0.010353891275486336, + "learning_rate": 2.631578947368421e-06, + "loss": 0.0, + "step": 11850 + }, + { + "epoch": 0.7637429915576465, + "grad_norm": 0.0015369640711080093, + "learning_rate": 2.6308628714643754e-06, + "loss": 0.0, + "step": 11851 + }, + { + "epoch": 0.7638074370045757, + "grad_norm": 0.0003941849795398379, + "learning_rate": 2.6301467955603293e-06, + "loss": 0.0, + "step": 11852 + }, + { + "epoch": 0.7638718824515048, + "grad_norm": 0.0003463927405450879, + "learning_rate": 2.629430719656284e-06, + "loss": 0.0, + "step": 11853 + }, + { + "epoch": 0.763936327898434, + "grad_norm": 0.0008114464169782369, + "learning_rate": 2.6287146437522383e-06, + "loss": 0.0, + "step": 11854 + }, + { + "epoch": 0.7640007733453632, + "grad_norm": 6.755891019105164e-05, + "learning_rate": 2.627998567848192e-06, + "loss": 0.0, + "step": 11855 + }, + { + "epoch": 0.7640652187922923, + "grad_norm": 0.0011958914319152606, + "learning_rate": 2.6272824919441465e-06, + "loss": 0.0, + "step": 11856 + }, + { + "epoch": 0.7641296642392215, + "grad_norm": 0.0038164852243763306, + "learning_rate": 2.6265664160401004e-06, + "loss": 0.0, + "step": 11857 + }, + { + "epoch": 0.7641941096861506, + "grad_norm": 0.00030386703955184003, + "learning_rate": 2.6258503401360547e-06, + "loss": 0.0, + "step": 11858 + }, + { + "epoch": 0.7642585551330798, + "grad_norm": 0.00043240885358736416, + "learning_rate": 2.625134264232009e-06, + "loss": 0.0, + "step": 11859 + }, + { + "epoch": 0.764323000580009, + "grad_norm": 0.002957734448481022, + "learning_rate": 2.624418188327963e-06, + "loss": 0.0, + "step": 11860 + }, + { + "epoch": 0.7643874460269382, + "grad_norm": 0.0002885876430917981, + "learning_rate": 2.623702112423917e-06, + "loss": 0.0, + "step": 11861 + }, + { + "epoch": 0.7644518914738674, + "grad_norm": 0.005890731307086754, + "learning_rate": 2.622986036519871e-06, + "loss": 0.0, + "step": 11862 + }, + { + "epoch": 0.7645163369207966, + "grad_norm": 6.651252772496995e-05, + "learning_rate": 2.6222699606158253e-06, + "loss": 0.0, + "step": 11863 + }, + { + "epoch": 0.7645807823677258, + "grad_norm": 0.09255987482417974, + "learning_rate": 2.6215538847117796e-06, + "loss": 0.0017, + "step": 11864 + }, + { + "epoch": 0.7646452278146549, + "grad_norm": 0.0030729239305532265, + "learning_rate": 2.620837808807734e-06, + "loss": 0.0, + "step": 11865 + }, + { + "epoch": 0.7647096732615841, + "grad_norm": 0.0004729470478401344, + "learning_rate": 2.620121732903688e-06, + "loss": 0.0, + "step": 11866 + }, + { + "epoch": 0.7647741187085132, + "grad_norm": 0.00031197574419735606, + "learning_rate": 2.6194056569996425e-06, + "loss": 0.0, + "step": 11867 + }, + { + "epoch": 0.7648385641554424, + "grad_norm": 0.554805689295591, + "learning_rate": 2.6186895810955964e-06, + "loss": 0.0024, + "step": 11868 + }, + { + "epoch": 0.7649030096023716, + "grad_norm": 0.0005885395825914211, + "learning_rate": 2.6179735051915507e-06, + "loss": 0.0, + "step": 11869 + }, + { + "epoch": 0.7649674550493007, + "grad_norm": 0.0012661613214064538, + "learning_rate": 2.6172574292875045e-06, + "loss": 0.0, + "step": 11870 + }, + { + "epoch": 0.7650319004962299, + "grad_norm": 8.82429252709678e-05, + "learning_rate": 2.616541353383459e-06, + "loss": 0.0, + "step": 11871 + }, + { + "epoch": 0.7650963459431591, + "grad_norm": 0.1790998115578762, + "learning_rate": 2.615825277479413e-06, + "loss": 0.0002, + "step": 11872 + }, + { + "epoch": 0.7651607913900883, + "grad_norm": 0.08301635025098202, + "learning_rate": 2.615109201575367e-06, + "loss": 0.0006, + "step": 11873 + }, + { + "epoch": 0.7652252368370175, + "grad_norm": 0.0008072109844447568, + "learning_rate": 2.6143931256713213e-06, + "loss": 0.0, + "step": 11874 + }, + { + "epoch": 0.7652896822839467, + "grad_norm": 5.068903277494801e-05, + "learning_rate": 2.613677049767275e-06, + "loss": 0.0, + "step": 11875 + }, + { + "epoch": 0.7653541277308759, + "grad_norm": 0.003124247831575227, + "learning_rate": 2.6129609738632295e-06, + "loss": 0.0, + "step": 11876 + }, + { + "epoch": 0.765418573177805, + "grad_norm": 0.00019207895262184278, + "learning_rate": 2.6122448979591842e-06, + "loss": 0.0, + "step": 11877 + }, + { + "epoch": 0.7654830186247341, + "grad_norm": 0.2378545285856134, + "learning_rate": 2.611528822055138e-06, + "loss": 0.0004, + "step": 11878 + }, + { + "epoch": 0.7655474640716633, + "grad_norm": 0.008514530768873717, + "learning_rate": 2.6108127461510924e-06, + "loss": 0.0, + "step": 11879 + }, + { + "epoch": 0.7656119095185925, + "grad_norm": 0.00046250907666416433, + "learning_rate": 2.6100966702470467e-06, + "loss": 0.0, + "step": 11880 + }, + { + "epoch": 0.7656763549655217, + "grad_norm": 0.13777567500132695, + "learning_rate": 2.6093805943430006e-06, + "loss": 0.0023, + "step": 11881 + }, + { + "epoch": 0.7657408004124509, + "grad_norm": 0.0003629261576732833, + "learning_rate": 2.608664518438955e-06, + "loss": 0.0, + "step": 11882 + }, + { + "epoch": 0.76580524585938, + "grad_norm": 0.0009412332498742197, + "learning_rate": 2.6079484425349087e-06, + "loss": 0.0, + "step": 11883 + }, + { + "epoch": 0.7658696913063092, + "grad_norm": 0.00012120714432166618, + "learning_rate": 2.607232366630863e-06, + "loss": 0.0, + "step": 11884 + }, + { + "epoch": 0.7659341367532384, + "grad_norm": 0.0012524471309115777, + "learning_rate": 2.606516290726817e-06, + "loss": 0.0, + "step": 11885 + }, + { + "epoch": 0.7659985822001676, + "grad_norm": 0.0004439158991068107, + "learning_rate": 2.605800214822771e-06, + "loss": 0.0, + "step": 11886 + }, + { + "epoch": 0.7660630276470968, + "grad_norm": 0.014125048239822585, + "learning_rate": 2.6050841389187255e-06, + "loss": 0.0, + "step": 11887 + }, + { + "epoch": 0.766127473094026, + "grad_norm": 0.01923080340759084, + "learning_rate": 2.6043680630146802e-06, + "loss": 0.0, + "step": 11888 + }, + { + "epoch": 0.766191918540955, + "grad_norm": 0.00011607705395213511, + "learning_rate": 2.603651987110634e-06, + "loss": 0.0, + "step": 11889 + }, + { + "epoch": 0.7662563639878842, + "grad_norm": 0.0006579140278466091, + "learning_rate": 2.6029359112065884e-06, + "loss": 0.0, + "step": 11890 + }, + { + "epoch": 0.7663208094348134, + "grad_norm": 0.0002124806354313648, + "learning_rate": 2.6022198353025423e-06, + "loss": 0.0, + "step": 11891 + }, + { + "epoch": 0.7663852548817426, + "grad_norm": 0.0013674333354162045, + "learning_rate": 2.6015037593984966e-06, + "loss": 0.0, + "step": 11892 + }, + { + "epoch": 0.7664497003286718, + "grad_norm": 0.014675526109410085, + "learning_rate": 2.6007876834944505e-06, + "loss": 0.0001, + "step": 11893 + }, + { + "epoch": 0.766514145775601, + "grad_norm": 0.03573676645358599, + "learning_rate": 2.6000716075904047e-06, + "loss": 0.0001, + "step": 11894 + }, + { + "epoch": 0.7665785912225301, + "grad_norm": 0.0008220368009597546, + "learning_rate": 2.599355531686359e-06, + "loss": 0.0, + "step": 11895 + }, + { + "epoch": 0.7666430366694593, + "grad_norm": 0.021377558030884054, + "learning_rate": 2.598639455782313e-06, + "loss": 0.0, + "step": 11896 + }, + { + "epoch": 0.7667074821163885, + "grad_norm": 0.01966843926829134, + "learning_rate": 2.5979233798782672e-06, + "loss": 0.0001, + "step": 11897 + }, + { + "epoch": 0.7667719275633177, + "grad_norm": 0.00019038251652734773, + "learning_rate": 2.597207303974221e-06, + "loss": 0.0, + "step": 11898 + }, + { + "epoch": 0.7668363730102469, + "grad_norm": 0.002648309532818556, + "learning_rate": 2.5964912280701754e-06, + "loss": 0.0, + "step": 11899 + }, + { + "epoch": 0.766900818457176, + "grad_norm": 0.0001730522603491945, + "learning_rate": 2.59577515216613e-06, + "loss": 0.0, + "step": 11900 + }, + { + "epoch": 0.7669652639041051, + "grad_norm": 1.6432124179162014e-05, + "learning_rate": 2.595059076262084e-06, + "loss": 0.0, + "step": 11901 + }, + { + "epoch": 0.7670297093510343, + "grad_norm": 1.7618660333607035e-05, + "learning_rate": 2.5943430003580383e-06, + "loss": 0.0, + "step": 11902 + }, + { + "epoch": 0.7670941547979635, + "grad_norm": 0.03666116270808565, + "learning_rate": 2.5936269244539926e-06, + "loss": 0.0, + "step": 11903 + }, + { + "epoch": 0.7671586002448927, + "grad_norm": 0.11028554089182246, + "learning_rate": 2.5929108485499465e-06, + "loss": 0.0002, + "step": 11904 + }, + { + "epoch": 0.7672230456918219, + "grad_norm": 0.22051480791536138, + "learning_rate": 2.5921947726459008e-06, + "loss": 0.0013, + "step": 11905 + }, + { + "epoch": 0.767287491138751, + "grad_norm": 0.006515029704666616, + "learning_rate": 2.5914786967418546e-06, + "loss": 0.0, + "step": 11906 + }, + { + "epoch": 0.7673519365856802, + "grad_norm": 0.006650815831030154, + "learning_rate": 2.590762620837809e-06, + "loss": 0.0, + "step": 11907 + }, + { + "epoch": 0.7674163820326094, + "grad_norm": 0.0051438193964023025, + "learning_rate": 2.5900465449337632e-06, + "loss": 0.0, + "step": 11908 + }, + { + "epoch": 0.7674808274795386, + "grad_norm": 0.4898605593241484, + "learning_rate": 2.589330469029717e-06, + "loss": 0.0023, + "step": 11909 + }, + { + "epoch": 0.7675452729264678, + "grad_norm": 0.0006288678826158701, + "learning_rate": 2.5886143931256714e-06, + "loss": 0.0, + "step": 11910 + }, + { + "epoch": 0.7676097183733969, + "grad_norm": 0.003386373418255314, + "learning_rate": 2.5878983172216253e-06, + "loss": 0.0, + "step": 11911 + }, + { + "epoch": 0.767674163820326, + "grad_norm": 0.002668168384645223, + "learning_rate": 2.58718224131758e-06, + "loss": 0.0, + "step": 11912 + }, + { + "epoch": 0.7677386092672552, + "grad_norm": 0.001082914875932513, + "learning_rate": 2.5864661654135343e-06, + "loss": 0.0, + "step": 11913 + }, + { + "epoch": 0.7678030547141844, + "grad_norm": 0.06223376012771815, + "learning_rate": 2.585750089509488e-06, + "loss": 0.0003, + "step": 11914 + }, + { + "epoch": 0.7678675001611136, + "grad_norm": 0.31937400054402, + "learning_rate": 2.5850340136054425e-06, + "loss": 0.0019, + "step": 11915 + }, + { + "epoch": 0.7679319456080428, + "grad_norm": 0.018148016411417802, + "learning_rate": 2.5843179377013968e-06, + "loss": 0.0001, + "step": 11916 + }, + { + "epoch": 0.767996391054972, + "grad_norm": 0.0038383521751228934, + "learning_rate": 2.5836018617973507e-06, + "loss": 0.0, + "step": 11917 + }, + { + "epoch": 0.7680608365019012, + "grad_norm": 0.1600973730490817, + "learning_rate": 2.582885785893305e-06, + "loss": 0.0012, + "step": 11918 + }, + { + "epoch": 0.7681252819488303, + "grad_norm": 0.0460683108904585, + "learning_rate": 2.582169709989259e-06, + "loss": 0.0001, + "step": 11919 + }, + { + "epoch": 0.7681897273957595, + "grad_norm": 0.0016058862460527552, + "learning_rate": 2.581453634085213e-06, + "loss": 0.0, + "step": 11920 + }, + { + "epoch": 0.7682541728426887, + "grad_norm": 0.002115022812249171, + "learning_rate": 2.5807375581811674e-06, + "loss": 0.0, + "step": 11921 + }, + { + "epoch": 0.7683186182896179, + "grad_norm": 0.004045692377456239, + "learning_rate": 2.5800214822771213e-06, + "loss": 0.0, + "step": 11922 + }, + { + "epoch": 0.768383063736547, + "grad_norm": 0.0005268503339750655, + "learning_rate": 2.579305406373076e-06, + "loss": 0.0, + "step": 11923 + }, + { + "epoch": 0.7684475091834762, + "grad_norm": 0.0009562755514328496, + "learning_rate": 2.5785893304690303e-06, + "loss": 0.0, + "step": 11924 + }, + { + "epoch": 0.7685119546304053, + "grad_norm": 0.21633146350250407, + "learning_rate": 2.577873254564984e-06, + "loss": 0.0003, + "step": 11925 + }, + { + "epoch": 0.7685764000773345, + "grad_norm": 0.0018381836193263551, + "learning_rate": 2.5771571786609385e-06, + "loss": 0.0, + "step": 11926 + }, + { + "epoch": 0.7686408455242637, + "grad_norm": 0.007529660661730795, + "learning_rate": 2.5764411027568924e-06, + "loss": 0.0001, + "step": 11927 + }, + { + "epoch": 0.7687052909711929, + "grad_norm": 0.0025654502952239984, + "learning_rate": 2.5757250268528467e-06, + "loss": 0.0, + "step": 11928 + }, + { + "epoch": 0.7687697364181221, + "grad_norm": 0.0316317751952979, + "learning_rate": 2.575008950948801e-06, + "loss": 0.0002, + "step": 11929 + }, + { + "epoch": 0.7688341818650513, + "grad_norm": 0.1274536097738183, + "learning_rate": 2.574292875044755e-06, + "loss": 0.0004, + "step": 11930 + }, + { + "epoch": 0.7688986273119804, + "grad_norm": 0.0013855358441066388, + "learning_rate": 2.573576799140709e-06, + "loss": 0.0, + "step": 11931 + }, + { + "epoch": 0.7689630727589096, + "grad_norm": 0.03422381413667952, + "learning_rate": 2.572860723236663e-06, + "loss": 0.0001, + "step": 11932 + }, + { + "epoch": 0.7690275182058388, + "grad_norm": 0.00036604118401955133, + "learning_rate": 2.5721446473326173e-06, + "loss": 0.0, + "step": 11933 + }, + { + "epoch": 0.7690919636527679, + "grad_norm": 0.007119691489192297, + "learning_rate": 2.571428571428571e-06, + "loss": 0.0, + "step": 11934 + }, + { + "epoch": 0.7691564090996971, + "grad_norm": 0.0007796408203194449, + "learning_rate": 2.570712495524526e-06, + "loss": 0.0, + "step": 11935 + }, + { + "epoch": 0.7692208545466263, + "grad_norm": 0.0046414742114352535, + "learning_rate": 2.5699964196204802e-06, + "loss": 0.0, + "step": 11936 + }, + { + "epoch": 0.7692852999935554, + "grad_norm": 0.0026985613912551645, + "learning_rate": 2.5692803437164345e-06, + "loss": 0.0, + "step": 11937 + }, + { + "epoch": 0.7693497454404846, + "grad_norm": 0.015896688905246856, + "learning_rate": 2.5685642678123884e-06, + "loss": 0.0001, + "step": 11938 + }, + { + "epoch": 0.7694141908874138, + "grad_norm": 0.002225209735481927, + "learning_rate": 2.5678481919083427e-06, + "loss": 0.0, + "step": 11939 + }, + { + "epoch": 0.769478636334343, + "grad_norm": 0.000544120900602758, + "learning_rate": 2.5671321160042966e-06, + "loss": 0.0, + "step": 11940 + }, + { + "epoch": 0.7695430817812722, + "grad_norm": 0.000987951875979495, + "learning_rate": 2.566416040100251e-06, + "loss": 0.0, + "step": 11941 + }, + { + "epoch": 0.7696075272282014, + "grad_norm": 0.2139565355227848, + "learning_rate": 2.5656999641962047e-06, + "loss": 0.0011, + "step": 11942 + }, + { + "epoch": 0.7696719726751305, + "grad_norm": 0.0014181229277454124, + "learning_rate": 2.564983888292159e-06, + "loss": 0.0, + "step": 11943 + }, + { + "epoch": 0.7697364181220597, + "grad_norm": 0.0011511183313510068, + "learning_rate": 2.5642678123881133e-06, + "loss": 0.0, + "step": 11944 + }, + { + "epoch": 0.7698008635689888, + "grad_norm": 0.021332500647752366, + "learning_rate": 2.563551736484067e-06, + "loss": 0.0, + "step": 11945 + }, + { + "epoch": 0.769865309015918, + "grad_norm": 0.0006443705067154068, + "learning_rate": 2.5628356605800215e-06, + "loss": 0.0, + "step": 11946 + }, + { + "epoch": 0.7699297544628472, + "grad_norm": 0.018157607163298645, + "learning_rate": 2.5621195846759762e-06, + "loss": 0.0, + "step": 11947 + }, + { + "epoch": 0.7699941999097764, + "grad_norm": 0.0006373038105464806, + "learning_rate": 2.56140350877193e-06, + "loss": 0.0, + "step": 11948 + }, + { + "epoch": 0.7700586453567055, + "grad_norm": 0.1702311649693865, + "learning_rate": 2.5606874328678844e-06, + "loss": 0.0007, + "step": 11949 + }, + { + "epoch": 0.7701230908036347, + "grad_norm": 0.006839913357284363, + "learning_rate": 2.5599713569638383e-06, + "loss": 0.0, + "step": 11950 + }, + { + "epoch": 0.7701875362505639, + "grad_norm": 0.020449848798393098, + "learning_rate": 2.5592552810597926e-06, + "loss": 0.0, + "step": 11951 + }, + { + "epoch": 0.7702519816974931, + "grad_norm": 0.0186318491828164, + "learning_rate": 2.558539205155747e-06, + "loss": 0.0, + "step": 11952 + }, + { + "epoch": 0.7703164271444223, + "grad_norm": 0.012205018831311388, + "learning_rate": 2.5578231292517007e-06, + "loss": 0.0, + "step": 11953 + }, + { + "epoch": 0.7703808725913515, + "grad_norm": 0.014573322368295935, + "learning_rate": 2.557107053347655e-06, + "loss": 0.0001, + "step": 11954 + }, + { + "epoch": 0.7704453180382806, + "grad_norm": 0.3025764545657545, + "learning_rate": 2.556390977443609e-06, + "loss": 0.0009, + "step": 11955 + }, + { + "epoch": 0.7705097634852097, + "grad_norm": 0.003638116650934389, + "learning_rate": 2.5556749015395632e-06, + "loss": 0.0, + "step": 11956 + }, + { + "epoch": 0.7705742089321389, + "grad_norm": 0.005688319311506136, + "learning_rate": 2.5549588256355175e-06, + "loss": 0.0, + "step": 11957 + }, + { + "epoch": 0.7706386543790681, + "grad_norm": 0.00278424510278735, + "learning_rate": 2.554242749731472e-06, + "loss": 0.0, + "step": 11958 + }, + { + "epoch": 0.7707030998259973, + "grad_norm": 0.01585624886430006, + "learning_rate": 2.553526673827426e-06, + "loss": 0.0, + "step": 11959 + }, + { + "epoch": 0.7707675452729265, + "grad_norm": 0.0003486352458013042, + "learning_rate": 2.5528105979233804e-06, + "loss": 0.0, + "step": 11960 + }, + { + "epoch": 0.7708319907198556, + "grad_norm": 0.013720875341040538, + "learning_rate": 2.5520945220193343e-06, + "loss": 0.0, + "step": 11961 + }, + { + "epoch": 0.7708964361667848, + "grad_norm": 0.0011977463656281952, + "learning_rate": 2.5513784461152886e-06, + "loss": 0.0, + "step": 11962 + }, + { + "epoch": 0.770960881613714, + "grad_norm": 0.0006557547530678418, + "learning_rate": 2.5506623702112425e-06, + "loss": 0.0, + "step": 11963 + }, + { + "epoch": 0.7710253270606432, + "grad_norm": 7.640697653079052e-05, + "learning_rate": 2.5499462943071968e-06, + "loss": 0.0, + "step": 11964 + }, + { + "epoch": 0.7710897725075724, + "grad_norm": 0.0005535518755850447, + "learning_rate": 2.549230218403151e-06, + "loss": 0.0, + "step": 11965 + }, + { + "epoch": 0.7711542179545016, + "grad_norm": 0.014170017898537866, + "learning_rate": 2.548514142499105e-06, + "loss": 0.0, + "step": 11966 + }, + { + "epoch": 0.7712186634014306, + "grad_norm": 0.025082221368806706, + "learning_rate": 2.5477980665950592e-06, + "loss": 0.0, + "step": 11967 + }, + { + "epoch": 0.7712831088483598, + "grad_norm": 0.06604501701956178, + "learning_rate": 2.547081990691013e-06, + "loss": 0.0001, + "step": 11968 + }, + { + "epoch": 0.771347554295289, + "grad_norm": 0.5134007423695695, + "learning_rate": 2.5463659147869674e-06, + "loss": 0.0026, + "step": 11969 + }, + { + "epoch": 0.7714119997422182, + "grad_norm": 9.906826757261537e-05, + "learning_rate": 2.545649838882922e-06, + "loss": 0.0, + "step": 11970 + }, + { + "epoch": 0.7714764451891474, + "grad_norm": 0.0019939027732718065, + "learning_rate": 2.544933762978876e-06, + "loss": 0.0, + "step": 11971 + }, + { + "epoch": 0.7715408906360766, + "grad_norm": 0.0009953462564640401, + "learning_rate": 2.5442176870748303e-06, + "loss": 0.0, + "step": 11972 + }, + { + "epoch": 0.7716053360830057, + "grad_norm": 0.00022765940038362478, + "learning_rate": 2.5435016111707846e-06, + "loss": 0.0, + "step": 11973 + }, + { + "epoch": 0.7716697815299349, + "grad_norm": 0.0034598503765954076, + "learning_rate": 2.5427855352667385e-06, + "loss": 0.0, + "step": 11974 + }, + { + "epoch": 0.7717342269768641, + "grad_norm": 0.0015874933110641958, + "learning_rate": 2.5420694593626928e-06, + "loss": 0.0, + "step": 11975 + }, + { + "epoch": 0.7717986724237933, + "grad_norm": 0.19490331702864275, + "learning_rate": 2.5413533834586467e-06, + "loss": 0.0002, + "step": 11976 + }, + { + "epoch": 0.7718631178707225, + "grad_norm": 0.046350962259317584, + "learning_rate": 2.540637307554601e-06, + "loss": 0.0002, + "step": 11977 + }, + { + "epoch": 0.7719275633176516, + "grad_norm": 0.0029893966150884525, + "learning_rate": 2.5399212316505553e-06, + "loss": 0.0, + "step": 11978 + }, + { + "epoch": 0.7719920087645807, + "grad_norm": 0.006386238741863867, + "learning_rate": 2.539205155746509e-06, + "loss": 0.0, + "step": 11979 + }, + { + "epoch": 0.7720564542115099, + "grad_norm": 0.08005569888313953, + "learning_rate": 2.5384890798424634e-06, + "loss": 0.0001, + "step": 11980 + }, + { + "epoch": 0.7721208996584391, + "grad_norm": 0.00017401220544152146, + "learning_rate": 2.5377730039384173e-06, + "loss": 0.0, + "step": 11981 + }, + { + "epoch": 0.7721853451053683, + "grad_norm": 0.00920950918241963, + "learning_rate": 2.537056928034372e-06, + "loss": 0.0001, + "step": 11982 + }, + { + "epoch": 0.7722497905522975, + "grad_norm": 4.005209131921557e-05, + "learning_rate": 2.5363408521303263e-06, + "loss": 0.0, + "step": 11983 + }, + { + "epoch": 0.7723142359992267, + "grad_norm": 0.0041205538081291486, + "learning_rate": 2.53562477622628e-06, + "loss": 0.0, + "step": 11984 + }, + { + "epoch": 0.7723786814461558, + "grad_norm": 0.21781045231342352, + "learning_rate": 2.5349087003222345e-06, + "loss": 0.0006, + "step": 11985 + }, + { + "epoch": 0.772443126893085, + "grad_norm": 0.4514950507189504, + "learning_rate": 2.534192624418189e-06, + "loss": 0.0019, + "step": 11986 + }, + { + "epoch": 0.7725075723400142, + "grad_norm": 0.0022551838848633467, + "learning_rate": 2.5334765485141427e-06, + "loss": 0.0, + "step": 11987 + }, + { + "epoch": 0.7725720177869434, + "grad_norm": 0.03850665996937269, + "learning_rate": 2.532760472610097e-06, + "loss": 0.0, + "step": 11988 + }, + { + "epoch": 0.7726364632338725, + "grad_norm": 0.003876283490998469, + "learning_rate": 2.532044396706051e-06, + "loss": 0.0, + "step": 11989 + }, + { + "epoch": 0.7727009086808017, + "grad_norm": 0.001072320260391634, + "learning_rate": 2.531328320802005e-06, + "loss": 0.0, + "step": 11990 + }, + { + "epoch": 0.7727653541277308, + "grad_norm": 0.0027809967217183487, + "learning_rate": 2.530612244897959e-06, + "loss": 0.0, + "step": 11991 + }, + { + "epoch": 0.77282979957466, + "grad_norm": 8.871064129087865e-05, + "learning_rate": 2.5298961689939133e-06, + "loss": 0.0, + "step": 11992 + }, + { + "epoch": 0.7728942450215892, + "grad_norm": 0.004071476358003262, + "learning_rate": 2.529180093089868e-06, + "loss": 0.0, + "step": 11993 + }, + { + "epoch": 0.7729586904685184, + "grad_norm": 0.0002506879102149424, + "learning_rate": 2.5284640171858223e-06, + "loss": 0.0, + "step": 11994 + }, + { + "epoch": 0.7730231359154476, + "grad_norm": 0.0063177136721583385, + "learning_rate": 2.5277479412817762e-06, + "loss": 0.0, + "step": 11995 + }, + { + "epoch": 0.7730875813623768, + "grad_norm": 0.00446458534063843, + "learning_rate": 2.5270318653777305e-06, + "loss": 0.0, + "step": 11996 + }, + { + "epoch": 0.773152026809306, + "grad_norm": 0.0008947799086842924, + "learning_rate": 2.5263157894736844e-06, + "loss": 0.0, + "step": 11997 + }, + { + "epoch": 0.7732164722562351, + "grad_norm": 0.0004906066965026694, + "learning_rate": 2.5255997135696387e-06, + "loss": 0.0, + "step": 11998 + }, + { + "epoch": 0.7732809177031643, + "grad_norm": 0.0006771847353210726, + "learning_rate": 2.5248836376655926e-06, + "loss": 0.0, + "step": 11999 + }, + { + "epoch": 0.7733453631500935, + "grad_norm": 0.03204525715449161, + "learning_rate": 2.524167561761547e-06, + "loss": 0.0, + "step": 12000 + }, + { + "epoch": 0.7734098085970226, + "grad_norm": 0.17269605440999874, + "learning_rate": 2.523451485857501e-06, + "loss": 0.0004, + "step": 12001 + }, + { + "epoch": 0.7734742540439518, + "grad_norm": 0.0002331402822554568, + "learning_rate": 2.522735409953455e-06, + "loss": 0.0, + "step": 12002 + }, + { + "epoch": 0.7735386994908809, + "grad_norm": 0.0018706894203856107, + "learning_rate": 2.5220193340494093e-06, + "loss": 0.0, + "step": 12003 + }, + { + "epoch": 0.7736031449378101, + "grad_norm": 0.23577475587510444, + "learning_rate": 2.521303258145363e-06, + "loss": 0.003, + "step": 12004 + }, + { + "epoch": 0.7736675903847393, + "grad_norm": 0.09998982366549632, + "learning_rate": 2.520587182241318e-06, + "loss": 0.0002, + "step": 12005 + }, + { + "epoch": 0.7737320358316685, + "grad_norm": 0.00016423824356981455, + "learning_rate": 2.5198711063372722e-06, + "loss": 0.0, + "step": 12006 + }, + { + "epoch": 0.7737964812785977, + "grad_norm": 0.03779216557272751, + "learning_rate": 2.519155030433226e-06, + "loss": 0.0, + "step": 12007 + }, + { + "epoch": 0.7738609267255269, + "grad_norm": 0.0012791941326287728, + "learning_rate": 2.5184389545291804e-06, + "loss": 0.0, + "step": 12008 + }, + { + "epoch": 0.773925372172456, + "grad_norm": 0.012356869073728675, + "learning_rate": 2.5177228786251347e-06, + "loss": 0.0, + "step": 12009 + }, + { + "epoch": 0.7739898176193852, + "grad_norm": 0.4994760987231003, + "learning_rate": 2.5170068027210886e-06, + "loss": 0.0029, + "step": 12010 + }, + { + "epoch": 0.7740542630663144, + "grad_norm": 0.06908226621424454, + "learning_rate": 2.516290726817043e-06, + "loss": 0.0016, + "step": 12011 + }, + { + "epoch": 0.7741187085132435, + "grad_norm": 0.28788372637119103, + "learning_rate": 2.5155746509129967e-06, + "loss": 0.0004, + "step": 12012 + }, + { + "epoch": 0.7741831539601727, + "grad_norm": 0.0007493520973933247, + "learning_rate": 2.514858575008951e-06, + "loss": 0.0, + "step": 12013 + }, + { + "epoch": 0.7742475994071019, + "grad_norm": 0.2166681766839472, + "learning_rate": 2.5141424991049053e-06, + "loss": 0.0003, + "step": 12014 + }, + { + "epoch": 0.774312044854031, + "grad_norm": 0.01656720079967917, + "learning_rate": 2.5134264232008592e-06, + "loss": 0.0, + "step": 12015 + }, + { + "epoch": 0.7743764903009602, + "grad_norm": 0.028964880535246422, + "learning_rate": 2.5127103472968135e-06, + "loss": 0.0, + "step": 12016 + }, + { + "epoch": 0.7744409357478894, + "grad_norm": 0.01668855371166636, + "learning_rate": 2.5119942713927682e-06, + "loss": 0.0, + "step": 12017 + }, + { + "epoch": 0.7745053811948186, + "grad_norm": 0.00826921975490535, + "learning_rate": 2.511278195488722e-06, + "loss": 0.0, + "step": 12018 + }, + { + "epoch": 0.7745698266417478, + "grad_norm": 0.0019967600267275804, + "learning_rate": 2.5105621195846764e-06, + "loss": 0.0, + "step": 12019 + }, + { + "epoch": 0.774634272088677, + "grad_norm": 0.1981025072169954, + "learning_rate": 2.5098460436806303e-06, + "loss": 0.001, + "step": 12020 + }, + { + "epoch": 0.7746987175356062, + "grad_norm": 0.44295811245102745, + "learning_rate": 2.5091299677765846e-06, + "loss": 0.0034, + "step": 12021 + }, + { + "epoch": 0.7747631629825353, + "grad_norm": 0.0007132173739187416, + "learning_rate": 2.508413891872539e-06, + "loss": 0.0, + "step": 12022 + }, + { + "epoch": 0.7748276084294644, + "grad_norm": 0.0025241999101835977, + "learning_rate": 2.5076978159684928e-06, + "loss": 0.0, + "step": 12023 + }, + { + "epoch": 0.7748920538763936, + "grad_norm": 0.00020836271478278545, + "learning_rate": 2.506981740064447e-06, + "loss": 0.0, + "step": 12024 + }, + { + "epoch": 0.7749564993233228, + "grad_norm": 0.004587700629129224, + "learning_rate": 2.506265664160401e-06, + "loss": 0.0, + "step": 12025 + }, + { + "epoch": 0.775020944770252, + "grad_norm": 0.017800457394732166, + "learning_rate": 2.5055495882563552e-06, + "loss": 0.0, + "step": 12026 + }, + { + "epoch": 0.7750853902171811, + "grad_norm": 0.005903831600595229, + "learning_rate": 2.504833512352309e-06, + "loss": 0.0, + "step": 12027 + }, + { + "epoch": 0.7751498356641103, + "grad_norm": 0.0019456858051113458, + "learning_rate": 2.504117436448264e-06, + "loss": 0.0, + "step": 12028 + }, + { + "epoch": 0.7752142811110395, + "grad_norm": 0.008377258458590922, + "learning_rate": 2.503401360544218e-06, + "loss": 0.0, + "step": 12029 + }, + { + "epoch": 0.7752787265579687, + "grad_norm": 0.01561660979057207, + "learning_rate": 2.5026852846401724e-06, + "loss": 0.0, + "step": 12030 + }, + { + "epoch": 0.7753431720048979, + "grad_norm": 0.02577598930821863, + "learning_rate": 2.5019692087361263e-06, + "loss": 0.0, + "step": 12031 + }, + { + "epoch": 0.7754076174518271, + "grad_norm": 0.0010712492468354091, + "learning_rate": 2.5012531328320806e-06, + "loss": 0.0, + "step": 12032 + }, + { + "epoch": 0.7754720628987563, + "grad_norm": 0.0944316984142256, + "learning_rate": 2.5005370569280345e-06, + "loss": 0.0001, + "step": 12033 + }, + { + "epoch": 0.7755365083456853, + "grad_norm": 0.0232625944343488, + "learning_rate": 2.4998209810239888e-06, + "loss": 0.0003, + "step": 12034 + }, + { + "epoch": 0.7756009537926145, + "grad_norm": 1.4485049070539173, + "learning_rate": 2.4991049051199427e-06, + "loss": 0.0025, + "step": 12035 + }, + { + "epoch": 0.7756653992395437, + "grad_norm": 0.0016307701840922474, + "learning_rate": 2.498388829215897e-06, + "loss": 0.0, + "step": 12036 + }, + { + "epoch": 0.7757298446864729, + "grad_norm": 0.02336175794591136, + "learning_rate": 2.4976727533118513e-06, + "loss": 0.0001, + "step": 12037 + }, + { + "epoch": 0.7757942901334021, + "grad_norm": 0.16227938458248187, + "learning_rate": 2.4969566774078056e-06, + "loss": 0.0022, + "step": 12038 + }, + { + "epoch": 0.7758587355803312, + "grad_norm": 0.07302846880666206, + "learning_rate": 2.4962406015037594e-06, + "loss": 0.0004, + "step": 12039 + }, + { + "epoch": 0.7759231810272604, + "grad_norm": 0.00041019317192437306, + "learning_rate": 2.4955245255997137e-06, + "loss": 0.0, + "step": 12040 + }, + { + "epoch": 0.7759876264741896, + "grad_norm": 0.002584801277987377, + "learning_rate": 2.494808449695668e-06, + "loss": 0.0, + "step": 12041 + }, + { + "epoch": 0.7760520719211188, + "grad_norm": 0.002223323087132319, + "learning_rate": 2.494092373791622e-06, + "loss": 0.0, + "step": 12042 + }, + { + "epoch": 0.776116517368048, + "grad_norm": 0.01440653692624194, + "learning_rate": 2.493376297887576e-06, + "loss": 0.0, + "step": 12043 + }, + { + "epoch": 0.7761809628149772, + "grad_norm": 0.053300064123326024, + "learning_rate": 2.4926602219835305e-06, + "loss": 0.0001, + "step": 12044 + }, + { + "epoch": 0.7762454082619062, + "grad_norm": 0.20566420595865056, + "learning_rate": 2.491944146079485e-06, + "loss": 0.0023, + "step": 12045 + }, + { + "epoch": 0.7763098537088354, + "grad_norm": 0.0007623805293543374, + "learning_rate": 2.4912280701754387e-06, + "loss": 0.0, + "step": 12046 + }, + { + "epoch": 0.7763742991557646, + "grad_norm": 0.0014681434347622946, + "learning_rate": 2.490511994271393e-06, + "loss": 0.0, + "step": 12047 + }, + { + "epoch": 0.7764387446026938, + "grad_norm": 0.00043488636991413735, + "learning_rate": 2.489795918367347e-06, + "loss": 0.0, + "step": 12048 + }, + { + "epoch": 0.776503190049623, + "grad_norm": 0.0017831550431723615, + "learning_rate": 2.4890798424633016e-06, + "loss": 0.0, + "step": 12049 + }, + { + "epoch": 0.7765676354965522, + "grad_norm": 0.0018114956405963806, + "learning_rate": 2.4883637665592554e-06, + "loss": 0.0, + "step": 12050 + }, + { + "epoch": 0.7766320809434814, + "grad_norm": 0.18950622836851014, + "learning_rate": 2.4876476906552097e-06, + "loss": 0.0002, + "step": 12051 + }, + { + "epoch": 0.7766965263904105, + "grad_norm": 0.00418238718089347, + "learning_rate": 2.4869316147511636e-06, + "loss": 0.0, + "step": 12052 + }, + { + "epoch": 0.7767609718373397, + "grad_norm": 0.0004478533245452791, + "learning_rate": 2.486215538847118e-06, + "loss": 0.0, + "step": 12053 + }, + { + "epoch": 0.7768254172842689, + "grad_norm": 0.00024396889565855113, + "learning_rate": 2.4854994629430722e-06, + "loss": 0.0, + "step": 12054 + }, + { + "epoch": 0.7768898627311981, + "grad_norm": 0.2903369953181515, + "learning_rate": 2.4847833870390265e-06, + "loss": 0.0008, + "step": 12055 + }, + { + "epoch": 0.7769543081781272, + "grad_norm": 0.00020092059146728687, + "learning_rate": 2.4840673111349804e-06, + "loss": 0.0, + "step": 12056 + }, + { + "epoch": 0.7770187536250563, + "grad_norm": 0.0011666064461578786, + "learning_rate": 2.4833512352309347e-06, + "loss": 0.0, + "step": 12057 + }, + { + "epoch": 0.7770831990719855, + "grad_norm": 0.001190503348124305, + "learning_rate": 2.482635159326889e-06, + "loss": 0.0, + "step": 12058 + }, + { + "epoch": 0.7771476445189147, + "grad_norm": 0.3657997162209834, + "learning_rate": 2.481919083422843e-06, + "loss": 0.0032, + "step": 12059 + }, + { + "epoch": 0.7772120899658439, + "grad_norm": 0.001634408520878064, + "learning_rate": 2.481203007518797e-06, + "loss": 0.0, + "step": 12060 + }, + { + "epoch": 0.7772765354127731, + "grad_norm": 0.1469337056910139, + "learning_rate": 2.4804869316147515e-06, + "loss": 0.0017, + "step": 12061 + }, + { + "epoch": 0.7773409808597023, + "grad_norm": 0.06710843879393069, + "learning_rate": 2.4797708557107058e-06, + "loss": 0.0002, + "step": 12062 + }, + { + "epoch": 0.7774054263066315, + "grad_norm": 0.0017892333181064397, + "learning_rate": 2.4790547798066596e-06, + "loss": 0.0, + "step": 12063 + }, + { + "epoch": 0.7774698717535606, + "grad_norm": 0.004690414808615758, + "learning_rate": 2.478338703902614e-06, + "loss": 0.0, + "step": 12064 + }, + { + "epoch": 0.7775343172004898, + "grad_norm": 0.004454153225457359, + "learning_rate": 2.477622627998568e-06, + "loss": 0.0, + "step": 12065 + }, + { + "epoch": 0.777598762647419, + "grad_norm": 0.31551044717576315, + "learning_rate": 2.4769065520945225e-06, + "loss": 0.0011, + "step": 12066 + }, + { + "epoch": 0.7776632080943481, + "grad_norm": 0.0014824010634681267, + "learning_rate": 2.4761904761904764e-06, + "loss": 0.0, + "step": 12067 + }, + { + "epoch": 0.7777276535412773, + "grad_norm": 0.005493271680943317, + "learning_rate": 2.4754744002864307e-06, + "loss": 0.0, + "step": 12068 + }, + { + "epoch": 0.7777920989882064, + "grad_norm": 0.002344001587170874, + "learning_rate": 2.4747583243823846e-06, + "loss": 0.0, + "step": 12069 + }, + { + "epoch": 0.7778565444351356, + "grad_norm": 0.00015707887304980444, + "learning_rate": 2.474042248478339e-06, + "loss": 0.0, + "step": 12070 + }, + { + "epoch": 0.7779209898820648, + "grad_norm": 0.0006823565350090584, + "learning_rate": 2.473326172574293e-06, + "loss": 0.0, + "step": 12071 + }, + { + "epoch": 0.777985435328994, + "grad_norm": 0.0017209300771120863, + "learning_rate": 2.4726100966702475e-06, + "loss": 0.0, + "step": 12072 + }, + { + "epoch": 0.7780498807759232, + "grad_norm": 0.2138218417932704, + "learning_rate": 2.4718940207662013e-06, + "loss": 0.0006, + "step": 12073 + }, + { + "epoch": 0.7781143262228524, + "grad_norm": 0.0065011778796621855, + "learning_rate": 2.4711779448621556e-06, + "loss": 0.0, + "step": 12074 + }, + { + "epoch": 0.7781787716697816, + "grad_norm": 0.0003251508710584543, + "learning_rate": 2.47046186895811e-06, + "loss": 0.0, + "step": 12075 + }, + { + "epoch": 0.7782432171167107, + "grad_norm": 0.0010564877939732219, + "learning_rate": 2.469745793054064e-06, + "loss": 0.0, + "step": 12076 + }, + { + "epoch": 0.7783076625636399, + "grad_norm": 0.0001559384697446135, + "learning_rate": 2.469029717150018e-06, + "loss": 0.0, + "step": 12077 + }, + { + "epoch": 0.7783721080105691, + "grad_norm": 0.0007897786642414037, + "learning_rate": 2.4683136412459724e-06, + "loss": 0.0, + "step": 12078 + }, + { + "epoch": 0.7784365534574982, + "grad_norm": 0.008367158669417396, + "learning_rate": 2.4675975653419267e-06, + "loss": 0.0, + "step": 12079 + }, + { + "epoch": 0.7785009989044274, + "grad_norm": 0.0024458826459670666, + "learning_rate": 2.4668814894378806e-06, + "loss": 0.0, + "step": 12080 + }, + { + "epoch": 0.7785654443513566, + "grad_norm": 0.02535862235484841, + "learning_rate": 2.466165413533835e-06, + "loss": 0.0001, + "step": 12081 + }, + { + "epoch": 0.7786298897982857, + "grad_norm": 0.0008567730125366892, + "learning_rate": 2.4654493376297888e-06, + "loss": 0.0, + "step": 12082 + }, + { + "epoch": 0.7786943352452149, + "grad_norm": 0.0010730528697962104, + "learning_rate": 2.464733261725743e-06, + "loss": 0.0, + "step": 12083 + }, + { + "epoch": 0.7787587806921441, + "grad_norm": 0.05461918233566015, + "learning_rate": 2.4640171858216974e-06, + "loss": 0.0001, + "step": 12084 + }, + { + "epoch": 0.7788232261390733, + "grad_norm": 0.15517757863659667, + "learning_rate": 2.4633011099176517e-06, + "loss": 0.0015, + "step": 12085 + }, + { + "epoch": 0.7788876715860025, + "grad_norm": 0.1818583424467823, + "learning_rate": 2.4625850340136055e-06, + "loss": 0.0013, + "step": 12086 + }, + { + "epoch": 0.7789521170329317, + "grad_norm": 0.001713274648651585, + "learning_rate": 2.46186895810956e-06, + "loss": 0.0, + "step": 12087 + }, + { + "epoch": 0.7790165624798608, + "grad_norm": 0.14944862140584098, + "learning_rate": 2.4611528822055137e-06, + "loss": 0.0011, + "step": 12088 + }, + { + "epoch": 0.77908100792679, + "grad_norm": 0.12680037185162404, + "learning_rate": 2.460436806301468e-06, + "loss": 0.0012, + "step": 12089 + }, + { + "epoch": 0.7791454533737191, + "grad_norm": 2.549845910452418e-05, + "learning_rate": 2.4597207303974223e-06, + "loss": 0.0, + "step": 12090 + }, + { + "epoch": 0.7792098988206483, + "grad_norm": 0.000564051977485508, + "learning_rate": 2.4590046544933766e-06, + "loss": 0.0, + "step": 12091 + }, + { + "epoch": 0.7792743442675775, + "grad_norm": 0.0010766900145023287, + "learning_rate": 2.4582885785893305e-06, + "loss": 0.0, + "step": 12092 + }, + { + "epoch": 0.7793387897145067, + "grad_norm": 0.7329335370994735, + "learning_rate": 2.4575725026852848e-06, + "loss": 0.001, + "step": 12093 + }, + { + "epoch": 0.7794032351614358, + "grad_norm": 0.001353292375859486, + "learning_rate": 2.456856426781239e-06, + "loss": 0.0, + "step": 12094 + }, + { + "epoch": 0.779467680608365, + "grad_norm": 0.053991346554312696, + "learning_rate": 2.456140350877193e-06, + "loss": 0.0, + "step": 12095 + }, + { + "epoch": 0.7795321260552942, + "grad_norm": 0.005993064838952124, + "learning_rate": 2.4554242749731473e-06, + "loss": 0.0, + "step": 12096 + }, + { + "epoch": 0.7795965715022234, + "grad_norm": 0.0021416859490622545, + "learning_rate": 2.4547081990691016e-06, + "loss": 0.0, + "step": 12097 + }, + { + "epoch": 0.7796610169491526, + "grad_norm": 0.014471636175070532, + "learning_rate": 2.453992123165056e-06, + "loss": 0.0, + "step": 12098 + }, + { + "epoch": 0.7797254623960818, + "grad_norm": 0.006637386067261587, + "learning_rate": 2.4532760472610097e-06, + "loss": 0.0, + "step": 12099 + }, + { + "epoch": 0.7797899078430109, + "grad_norm": 0.01238649530879906, + "learning_rate": 2.452559971356964e-06, + "loss": 0.0001, + "step": 12100 + }, + { + "epoch": 0.77985435328994, + "grad_norm": 0.04190091815805419, + "learning_rate": 2.4518438954529183e-06, + "loss": 0.0016, + "step": 12101 + }, + { + "epoch": 0.7799187987368692, + "grad_norm": 0.00034396283472747845, + "learning_rate": 2.4511278195488726e-06, + "loss": 0.0, + "step": 12102 + }, + { + "epoch": 0.7799832441837984, + "grad_norm": 0.00787000151745771, + "learning_rate": 2.4504117436448265e-06, + "loss": 0.0, + "step": 12103 + }, + { + "epoch": 0.7800476896307276, + "grad_norm": 0.0040082247399448896, + "learning_rate": 2.449695667740781e-06, + "loss": 0.0, + "step": 12104 + }, + { + "epoch": 0.7801121350776568, + "grad_norm": 0.4142050407785845, + "learning_rate": 2.4489795918367347e-06, + "loss": 0.0009, + "step": 12105 + }, + { + "epoch": 0.7801765805245859, + "grad_norm": 0.0002606664246261931, + "learning_rate": 2.448263515932689e-06, + "loss": 0.0, + "step": 12106 + }, + { + "epoch": 0.7802410259715151, + "grad_norm": 0.003579972965774035, + "learning_rate": 2.4475474400286433e-06, + "loss": 0.0, + "step": 12107 + }, + { + "epoch": 0.7803054714184443, + "grad_norm": 0.021305318846473693, + "learning_rate": 2.4468313641245976e-06, + "loss": 0.0002, + "step": 12108 + }, + { + "epoch": 0.7803699168653735, + "grad_norm": 0.012691156115001318, + "learning_rate": 2.4461152882205514e-06, + "loss": 0.0, + "step": 12109 + }, + { + "epoch": 0.7804343623123027, + "grad_norm": 0.0017728354092780912, + "learning_rate": 2.4453992123165057e-06, + "loss": 0.0, + "step": 12110 + }, + { + "epoch": 0.7804988077592319, + "grad_norm": 0.0016533355333587835, + "learning_rate": 2.44468313641246e-06, + "loss": 0.0, + "step": 12111 + }, + { + "epoch": 0.7805632532061609, + "grad_norm": 0.0022065467282514017, + "learning_rate": 2.443967060508414e-06, + "loss": 0.0, + "step": 12112 + }, + { + "epoch": 0.7806276986530901, + "grad_norm": 0.004123723032585986, + "learning_rate": 2.4432509846043682e-06, + "loss": 0.0, + "step": 12113 + }, + { + "epoch": 0.7806921441000193, + "grad_norm": 0.0005452104333529033, + "learning_rate": 2.4425349087003225e-06, + "loss": 0.0, + "step": 12114 + }, + { + "epoch": 0.7807565895469485, + "grad_norm": 0.0005542779206685047, + "learning_rate": 2.441818832796277e-06, + "loss": 0.0, + "step": 12115 + }, + { + "epoch": 0.7808210349938777, + "grad_norm": 0.10948264410785608, + "learning_rate": 2.4411027568922307e-06, + "loss": 0.0004, + "step": 12116 + }, + { + "epoch": 0.7808854804408069, + "grad_norm": 0.0001848680504991575, + "learning_rate": 2.440386680988185e-06, + "loss": 0.0, + "step": 12117 + }, + { + "epoch": 0.780949925887736, + "grad_norm": 0.0007432499498959208, + "learning_rate": 2.439670605084139e-06, + "loss": 0.0, + "step": 12118 + }, + { + "epoch": 0.7810143713346652, + "grad_norm": 0.004389440175323089, + "learning_rate": 2.4389545291800936e-06, + "loss": 0.0, + "step": 12119 + }, + { + "epoch": 0.7810788167815944, + "grad_norm": 0.002810186785865301, + "learning_rate": 2.4382384532760475e-06, + "loss": 0.0, + "step": 12120 + }, + { + "epoch": 0.7811432622285236, + "grad_norm": 0.007075445688301599, + "learning_rate": 2.4375223773720018e-06, + "loss": 0.0, + "step": 12121 + }, + { + "epoch": 0.7812077076754528, + "grad_norm": 0.00019089657477344002, + "learning_rate": 2.4368063014679556e-06, + "loss": 0.0, + "step": 12122 + }, + { + "epoch": 0.7812721531223819, + "grad_norm": 0.002233569077508966, + "learning_rate": 2.43609022556391e-06, + "loss": 0.0, + "step": 12123 + }, + { + "epoch": 0.781336598569311, + "grad_norm": 2.4103195654440213, + "learning_rate": 2.4353741496598642e-06, + "loss": 0.0292, + "step": 12124 + }, + { + "epoch": 0.7814010440162402, + "grad_norm": 8.232393805314394e-05, + "learning_rate": 2.4346580737558185e-06, + "loss": 0.0, + "step": 12125 + }, + { + "epoch": 0.7814654894631694, + "grad_norm": 0.16571786769047236, + "learning_rate": 2.4339419978517724e-06, + "loss": 0.0003, + "step": 12126 + }, + { + "epoch": 0.7815299349100986, + "grad_norm": 0.006719157453833545, + "learning_rate": 2.4332259219477267e-06, + "loss": 0.0, + "step": 12127 + }, + { + "epoch": 0.7815943803570278, + "grad_norm": 0.004906313933490102, + "learning_rate": 2.432509846043681e-06, + "loss": 0.0, + "step": 12128 + }, + { + "epoch": 0.781658825803957, + "grad_norm": 0.006681028163229526, + "learning_rate": 2.431793770139635e-06, + "loss": 0.0, + "step": 12129 + }, + { + "epoch": 0.7817232712508861, + "grad_norm": 0.2901713907485199, + "learning_rate": 2.431077694235589e-06, + "loss": 0.0, + "step": 12130 + }, + { + "epoch": 0.7817877166978153, + "grad_norm": 0.0014225891402345502, + "learning_rate": 2.4303616183315435e-06, + "loss": 0.0, + "step": 12131 + }, + { + "epoch": 0.7818521621447445, + "grad_norm": 0.08323650832116088, + "learning_rate": 2.4296455424274978e-06, + "loss": 0.0001, + "step": 12132 + }, + { + "epoch": 0.7819166075916737, + "grad_norm": 0.003190025050149661, + "learning_rate": 2.4289294665234516e-06, + "loss": 0.0, + "step": 12133 + }, + { + "epoch": 0.7819810530386028, + "grad_norm": 0.00029784653909833324, + "learning_rate": 2.428213390619406e-06, + "loss": 0.0, + "step": 12134 + }, + { + "epoch": 0.782045498485532, + "grad_norm": 0.0007026094397731929, + "learning_rate": 2.42749731471536e-06, + "loss": 0.0, + "step": 12135 + }, + { + "epoch": 0.7821099439324611, + "grad_norm": 0.0012636307050004592, + "learning_rate": 2.4267812388113145e-06, + "loss": 0.0, + "step": 12136 + }, + { + "epoch": 0.7821743893793903, + "grad_norm": 0.0029543542002693716, + "learning_rate": 2.4260651629072684e-06, + "loss": 0.0, + "step": 12137 + }, + { + "epoch": 0.7822388348263195, + "grad_norm": 0.00022484532724232343, + "learning_rate": 2.4253490870032227e-06, + "loss": 0.0, + "step": 12138 + }, + { + "epoch": 0.7823032802732487, + "grad_norm": 0.09689424289044542, + "learning_rate": 2.4246330110991766e-06, + "loss": 0.0002, + "step": 12139 + }, + { + "epoch": 0.7823677257201779, + "grad_norm": 0.0004091185705752037, + "learning_rate": 2.423916935195131e-06, + "loss": 0.0, + "step": 12140 + }, + { + "epoch": 0.7824321711671071, + "grad_norm": 0.03076440841027304, + "learning_rate": 2.4232008592910848e-06, + "loss": 0.0, + "step": 12141 + }, + { + "epoch": 0.7824966166140362, + "grad_norm": 0.00017963182636153072, + "learning_rate": 2.4224847833870395e-06, + "loss": 0.0, + "step": 12142 + }, + { + "epoch": 0.7825610620609654, + "grad_norm": 5.2125403499151375e-05, + "learning_rate": 2.4217687074829934e-06, + "loss": 0.0, + "step": 12143 + }, + { + "epoch": 0.7826255075078946, + "grad_norm": 0.000837962959501766, + "learning_rate": 2.4210526315789477e-06, + "loss": 0.0, + "step": 12144 + }, + { + "epoch": 0.7826899529548237, + "grad_norm": 0.0031376415468503094, + "learning_rate": 2.4203365556749015e-06, + "loss": 0.0, + "step": 12145 + }, + { + "epoch": 0.7827543984017529, + "grad_norm": 5.146406773654627e-05, + "learning_rate": 2.419620479770856e-06, + "loss": 0.0, + "step": 12146 + }, + { + "epoch": 0.782818843848682, + "grad_norm": 0.00029061667402159076, + "learning_rate": 2.41890440386681e-06, + "loss": 0.0, + "step": 12147 + }, + { + "epoch": 0.7828832892956112, + "grad_norm": 0.00689310914039741, + "learning_rate": 2.4181883279627644e-06, + "loss": 0.0, + "step": 12148 + }, + { + "epoch": 0.7829477347425404, + "grad_norm": 0.00255918569442203, + "learning_rate": 2.4174722520587183e-06, + "loss": 0.0, + "step": 12149 + }, + { + "epoch": 0.7830121801894696, + "grad_norm": 0.14416244853354604, + "learning_rate": 2.4167561761546726e-06, + "loss": 0.0012, + "step": 12150 + }, + { + "epoch": 0.7830766256363988, + "grad_norm": 0.3272660684480119, + "learning_rate": 2.416040100250627e-06, + "loss": 0.0005, + "step": 12151 + }, + { + "epoch": 0.783141071083328, + "grad_norm": 0.30817576965074384, + "learning_rate": 2.4153240243465808e-06, + "loss": 0.0019, + "step": 12152 + }, + { + "epoch": 0.7832055165302572, + "grad_norm": 0.07889921893529749, + "learning_rate": 2.414607948442535e-06, + "loss": 0.0001, + "step": 12153 + }, + { + "epoch": 0.7832699619771863, + "grad_norm": 0.00043163331430451227, + "learning_rate": 2.4138918725384894e-06, + "loss": 0.0, + "step": 12154 + }, + { + "epoch": 0.7833344074241155, + "grad_norm": 0.0001289400500351095, + "learning_rate": 2.4131757966344437e-06, + "loss": 0.0, + "step": 12155 + }, + { + "epoch": 0.7833988528710447, + "grad_norm": 0.0007038620007471168, + "learning_rate": 2.4124597207303976e-06, + "loss": 0.0, + "step": 12156 + }, + { + "epoch": 0.7834632983179738, + "grad_norm": 0.009012677822004099, + "learning_rate": 2.411743644826352e-06, + "loss": 0.0, + "step": 12157 + }, + { + "epoch": 0.783527743764903, + "grad_norm": 0.015514184778514704, + "learning_rate": 2.4110275689223057e-06, + "loss": 0.0, + "step": 12158 + }, + { + "epoch": 0.7835921892118322, + "grad_norm": 0.0007476084735745873, + "learning_rate": 2.41031149301826e-06, + "loss": 0.0, + "step": 12159 + }, + { + "epoch": 0.7836566346587613, + "grad_norm": 0.004590340510394398, + "learning_rate": 2.4095954171142143e-06, + "loss": 0.0, + "step": 12160 + }, + { + "epoch": 0.7837210801056905, + "grad_norm": 0.010306499838861465, + "learning_rate": 2.4088793412101686e-06, + "loss": 0.0, + "step": 12161 + }, + { + "epoch": 0.7837855255526197, + "grad_norm": 0.0005240210405741492, + "learning_rate": 2.4081632653061225e-06, + "loss": 0.0, + "step": 12162 + }, + { + "epoch": 0.7838499709995489, + "grad_norm": 0.23179992839626576, + "learning_rate": 2.407447189402077e-06, + "loss": 0.0022, + "step": 12163 + }, + { + "epoch": 0.7839144164464781, + "grad_norm": 0.00023400309095611104, + "learning_rate": 2.406731113498031e-06, + "loss": 0.0, + "step": 12164 + }, + { + "epoch": 0.7839788618934073, + "grad_norm": 0.03840447149473234, + "learning_rate": 2.406015037593985e-06, + "loss": 0.0001, + "step": 12165 + }, + { + "epoch": 0.7840433073403364, + "grad_norm": 0.33425590007271, + "learning_rate": 2.4052989616899393e-06, + "loss": 0.0024, + "step": 12166 + }, + { + "epoch": 0.7841077527872656, + "grad_norm": 0.013922142574820724, + "learning_rate": 2.4045828857858936e-06, + "loss": 0.0001, + "step": 12167 + }, + { + "epoch": 0.7841721982341947, + "grad_norm": 0.0012904758371460228, + "learning_rate": 2.403866809881848e-06, + "loss": 0.0, + "step": 12168 + }, + { + "epoch": 0.7842366436811239, + "grad_norm": 0.0009271433348567991, + "learning_rate": 2.4031507339778017e-06, + "loss": 0.0, + "step": 12169 + }, + { + "epoch": 0.7843010891280531, + "grad_norm": 0.029267167884655023, + "learning_rate": 2.402434658073756e-06, + "loss": 0.0001, + "step": 12170 + }, + { + "epoch": 0.7843655345749823, + "grad_norm": 0.006470476356729586, + "learning_rate": 2.4017185821697103e-06, + "loss": 0.0001, + "step": 12171 + }, + { + "epoch": 0.7844299800219114, + "grad_norm": 0.006364758656668254, + "learning_rate": 2.4010025062656646e-06, + "loss": 0.0, + "step": 12172 + }, + { + "epoch": 0.7844944254688406, + "grad_norm": 0.34521296607212343, + "learning_rate": 2.4002864303616185e-06, + "loss": 0.0032, + "step": 12173 + }, + { + "epoch": 0.7845588709157698, + "grad_norm": 0.003681485751642218, + "learning_rate": 2.399570354457573e-06, + "loss": 0.0, + "step": 12174 + }, + { + "epoch": 0.784623316362699, + "grad_norm": 0.00445226304354565, + "learning_rate": 2.3988542785535267e-06, + "loss": 0.0, + "step": 12175 + }, + { + "epoch": 0.7846877618096282, + "grad_norm": 0.0015989638304230221, + "learning_rate": 2.398138202649481e-06, + "loss": 0.0, + "step": 12176 + }, + { + "epoch": 0.7847522072565574, + "grad_norm": 0.0043978620529324984, + "learning_rate": 2.3974221267454353e-06, + "loss": 0.0, + "step": 12177 + }, + { + "epoch": 0.7848166527034866, + "grad_norm": 0.017444955525010995, + "learning_rate": 2.3967060508413896e-06, + "loss": 0.0, + "step": 12178 + }, + { + "epoch": 0.7848810981504156, + "grad_norm": 0.01364086457456686, + "learning_rate": 2.3959899749373435e-06, + "loss": 0.0015, + "step": 12179 + }, + { + "epoch": 0.7849455435973448, + "grad_norm": 0.005634009224861755, + "learning_rate": 2.3952738990332978e-06, + "loss": 0.0, + "step": 12180 + }, + { + "epoch": 0.785009989044274, + "grad_norm": 0.0002739373318359726, + "learning_rate": 2.3945578231292516e-06, + "loss": 0.0, + "step": 12181 + }, + { + "epoch": 0.7850744344912032, + "grad_norm": 0.013928744806136031, + "learning_rate": 2.393841747225206e-06, + "loss": 0.0, + "step": 12182 + }, + { + "epoch": 0.7851388799381324, + "grad_norm": 0.00152916755038011, + "learning_rate": 2.3931256713211602e-06, + "loss": 0.0, + "step": 12183 + }, + { + "epoch": 0.7852033253850615, + "grad_norm": 0.005809892577009027, + "learning_rate": 2.3924095954171145e-06, + "loss": 0.0, + "step": 12184 + }, + { + "epoch": 0.7852677708319907, + "grad_norm": 0.013241687900794809, + "learning_rate": 2.3916935195130684e-06, + "loss": 0.0001, + "step": 12185 + }, + { + "epoch": 0.7853322162789199, + "grad_norm": 0.00023618599780431406, + "learning_rate": 2.3909774436090227e-06, + "loss": 0.0, + "step": 12186 + }, + { + "epoch": 0.7853966617258491, + "grad_norm": 0.030064050017800315, + "learning_rate": 2.390261367704977e-06, + "loss": 0.0003, + "step": 12187 + }, + { + "epoch": 0.7854611071727783, + "grad_norm": 0.01977551719248264, + "learning_rate": 2.389545291800931e-06, + "loss": 0.0015, + "step": 12188 + }, + { + "epoch": 0.7855255526197075, + "grad_norm": 0.0002665702840403745, + "learning_rate": 2.388829215896885e-06, + "loss": 0.0, + "step": 12189 + }, + { + "epoch": 0.7855899980666365, + "grad_norm": 0.016736773736830732, + "learning_rate": 2.3881131399928395e-06, + "loss": 0.0, + "step": 12190 + }, + { + "epoch": 0.7856544435135657, + "grad_norm": 0.011101890454886009, + "learning_rate": 2.3873970640887938e-06, + "loss": 0.0, + "step": 12191 + }, + { + "epoch": 0.7857188889604949, + "grad_norm": 0.23320839139528582, + "learning_rate": 2.3866809881847476e-06, + "loss": 0.0002, + "step": 12192 + }, + { + "epoch": 0.7857833344074241, + "grad_norm": 0.001548839832227801, + "learning_rate": 2.385964912280702e-06, + "loss": 0.0, + "step": 12193 + }, + { + "epoch": 0.7858477798543533, + "grad_norm": 0.008140804390900756, + "learning_rate": 2.385248836376656e-06, + "loss": 0.0, + "step": 12194 + }, + { + "epoch": 0.7859122253012825, + "grad_norm": 0.002026366280862881, + "learning_rate": 2.3845327604726105e-06, + "loss": 0.0, + "step": 12195 + }, + { + "epoch": 0.7859766707482116, + "grad_norm": 0.0012847861580297382, + "learning_rate": 2.3838166845685644e-06, + "loss": 0.0, + "step": 12196 + }, + { + "epoch": 0.7860411161951408, + "grad_norm": 0.0006644772422334051, + "learning_rate": 2.3831006086645187e-06, + "loss": 0.0, + "step": 12197 + }, + { + "epoch": 0.78610556164207, + "grad_norm": 0.03664619062678337, + "learning_rate": 2.3823845327604726e-06, + "loss": 0.0, + "step": 12198 + }, + { + "epoch": 0.7861700070889992, + "grad_norm": 0.004651097115143568, + "learning_rate": 2.381668456856427e-06, + "loss": 0.0, + "step": 12199 + }, + { + "epoch": 0.7862344525359284, + "grad_norm": 0.015155141237009116, + "learning_rate": 2.380952380952381e-06, + "loss": 0.0, + "step": 12200 + }, + { + "epoch": 0.7862988979828575, + "grad_norm": 0.14372060879564724, + "learning_rate": 2.3802363050483355e-06, + "loss": 0.0021, + "step": 12201 + }, + { + "epoch": 0.7863633434297866, + "grad_norm": 0.012198772020524575, + "learning_rate": 2.3795202291442894e-06, + "loss": 0.0001, + "step": 12202 + }, + { + "epoch": 0.7864277888767158, + "grad_norm": 0.003081126606843541, + "learning_rate": 2.3788041532402437e-06, + "loss": 0.0, + "step": 12203 + }, + { + "epoch": 0.786492234323645, + "grad_norm": 0.0045107827367743885, + "learning_rate": 2.378088077336198e-06, + "loss": 0.0, + "step": 12204 + }, + { + "epoch": 0.7865566797705742, + "grad_norm": 0.14999968012139864, + "learning_rate": 2.377372001432152e-06, + "loss": 0.0013, + "step": 12205 + }, + { + "epoch": 0.7866211252175034, + "grad_norm": 0.09314629267320713, + "learning_rate": 2.376655925528106e-06, + "loss": 0.0001, + "step": 12206 + }, + { + "epoch": 0.7866855706644326, + "grad_norm": 0.0022869868699410103, + "learning_rate": 2.3759398496240604e-06, + "loss": 0.0, + "step": 12207 + }, + { + "epoch": 0.7867500161113617, + "grad_norm": 0.00021443939616199012, + "learning_rate": 2.3752237737200147e-06, + "loss": 0.0, + "step": 12208 + }, + { + "epoch": 0.7868144615582909, + "grad_norm": 0.007957057997663745, + "learning_rate": 2.3745076978159686e-06, + "loss": 0.0, + "step": 12209 + }, + { + "epoch": 0.7868789070052201, + "grad_norm": 0.047592136178799455, + "learning_rate": 2.373791621911923e-06, + "loss": 0.0, + "step": 12210 + }, + { + "epoch": 0.7869433524521493, + "grad_norm": 0.004593912436612267, + "learning_rate": 2.3730755460078768e-06, + "loss": 0.0, + "step": 12211 + }, + { + "epoch": 0.7870077978990784, + "grad_norm": 0.012097084100934885, + "learning_rate": 2.3723594701038315e-06, + "loss": 0.0001, + "step": 12212 + }, + { + "epoch": 0.7870722433460076, + "grad_norm": 0.02758894133465858, + "learning_rate": 2.3716433941997854e-06, + "loss": 0.0, + "step": 12213 + }, + { + "epoch": 0.7871366887929367, + "grad_norm": 0.000602953256711079, + "learning_rate": 2.3709273182957397e-06, + "loss": 0.0, + "step": 12214 + }, + { + "epoch": 0.7872011342398659, + "grad_norm": 0.004375077774056914, + "learning_rate": 2.3702112423916936e-06, + "loss": 0.0, + "step": 12215 + }, + { + "epoch": 0.7872655796867951, + "grad_norm": 0.0025716119276872535, + "learning_rate": 2.369495166487648e-06, + "loss": 0.0, + "step": 12216 + }, + { + "epoch": 0.7873300251337243, + "grad_norm": 0.01916180191521439, + "learning_rate": 2.368779090583602e-06, + "loss": 0.0001, + "step": 12217 + }, + { + "epoch": 0.7873944705806535, + "grad_norm": 0.15724028651836391, + "learning_rate": 2.3680630146795564e-06, + "loss": 0.0004, + "step": 12218 + }, + { + "epoch": 0.7874589160275827, + "grad_norm": 0.14482280604470232, + "learning_rate": 2.3673469387755103e-06, + "loss": 0.0017, + "step": 12219 + }, + { + "epoch": 0.7875233614745119, + "grad_norm": 0.17595384919038112, + "learning_rate": 2.3666308628714646e-06, + "loss": 0.0005, + "step": 12220 + }, + { + "epoch": 0.787587806921441, + "grad_norm": 0.11437295345132624, + "learning_rate": 2.365914786967419e-06, + "loss": 0.0003, + "step": 12221 + }, + { + "epoch": 0.7876522523683702, + "grad_norm": 0.027399682842273777, + "learning_rate": 2.365198711063373e-06, + "loss": 0.0, + "step": 12222 + }, + { + "epoch": 0.7877166978152993, + "grad_norm": 0.00710695761001995, + "learning_rate": 2.364482635159327e-06, + "loss": 0.0, + "step": 12223 + }, + { + "epoch": 0.7877811432622285, + "grad_norm": 0.0010690633257261582, + "learning_rate": 2.3637665592552814e-06, + "loss": 0.0, + "step": 12224 + }, + { + "epoch": 0.7878455887091577, + "grad_norm": 0.3357881946120721, + "learning_rate": 2.3630504833512357e-06, + "loss": 0.0006, + "step": 12225 + }, + { + "epoch": 0.7879100341560868, + "grad_norm": 0.011652891799082953, + "learning_rate": 2.3623344074471896e-06, + "loss": 0.0, + "step": 12226 + }, + { + "epoch": 0.787974479603016, + "grad_norm": 0.002222747962640288, + "learning_rate": 2.361618331543144e-06, + "loss": 0.0, + "step": 12227 + }, + { + "epoch": 0.7880389250499452, + "grad_norm": 0.010843309689273661, + "learning_rate": 2.3609022556390977e-06, + "loss": 0.0001, + "step": 12228 + }, + { + "epoch": 0.7881033704968744, + "grad_norm": 0.46336933926030965, + "learning_rate": 2.360186179735052e-06, + "loss": 0.0068, + "step": 12229 + }, + { + "epoch": 0.7881678159438036, + "grad_norm": 0.0005158168464518133, + "learning_rate": 2.3594701038310063e-06, + "loss": 0.0015, + "step": 12230 + }, + { + "epoch": 0.7882322613907328, + "grad_norm": 0.00042800521227334665, + "learning_rate": 2.3587540279269606e-06, + "loss": 0.0, + "step": 12231 + }, + { + "epoch": 0.788296706837662, + "grad_norm": 0.0375830123299696, + "learning_rate": 2.3580379520229145e-06, + "loss": 0.0003, + "step": 12232 + }, + { + "epoch": 0.7883611522845911, + "grad_norm": 0.3342013072829814, + "learning_rate": 2.357321876118869e-06, + "loss": 0.0003, + "step": 12233 + }, + { + "epoch": 0.7884255977315203, + "grad_norm": 0.0008485553032904481, + "learning_rate": 2.3566058002148227e-06, + "loss": 0.0, + "step": 12234 + }, + { + "epoch": 0.7884900431784494, + "grad_norm": 0.4544385418544847, + "learning_rate": 2.355889724310777e-06, + "loss": 0.0008, + "step": 12235 + }, + { + "epoch": 0.7885544886253786, + "grad_norm": 0.0010409078297272856, + "learning_rate": 2.3551736484067313e-06, + "loss": 0.0, + "step": 12236 + }, + { + "epoch": 0.7886189340723078, + "grad_norm": 0.002169620912121323, + "learning_rate": 2.3544575725026856e-06, + "loss": 0.0, + "step": 12237 + }, + { + "epoch": 0.788683379519237, + "grad_norm": 0.0017981680341119573, + "learning_rate": 2.3537414965986395e-06, + "loss": 0.0, + "step": 12238 + }, + { + "epoch": 0.7887478249661661, + "grad_norm": 0.04692421753212694, + "learning_rate": 2.3530254206945938e-06, + "loss": 0.0004, + "step": 12239 + }, + { + "epoch": 0.7888122704130953, + "grad_norm": 0.040648232692013384, + "learning_rate": 2.352309344790548e-06, + "loss": 0.0001, + "step": 12240 + }, + { + "epoch": 0.7888767158600245, + "grad_norm": 0.21630014614031065, + "learning_rate": 2.351593268886502e-06, + "loss": 0.0003, + "step": 12241 + }, + { + "epoch": 0.7889411613069537, + "grad_norm": 0.0016455505937524788, + "learning_rate": 2.3508771929824562e-06, + "loss": 0.0, + "step": 12242 + }, + { + "epoch": 0.7890056067538829, + "grad_norm": 1.7173800802001882e-05, + "learning_rate": 2.3501611170784105e-06, + "loss": 0.0, + "step": 12243 + }, + { + "epoch": 0.789070052200812, + "grad_norm": 0.3804484963166623, + "learning_rate": 2.349445041174365e-06, + "loss": 0.0029, + "step": 12244 + }, + { + "epoch": 0.7891344976477412, + "grad_norm": 0.0006245605744261292, + "learning_rate": 2.3487289652703187e-06, + "loss": 0.0, + "step": 12245 + }, + { + "epoch": 0.7891989430946703, + "grad_norm": 0.0016185882460237843, + "learning_rate": 2.348012889366273e-06, + "loss": 0.0, + "step": 12246 + }, + { + "epoch": 0.7892633885415995, + "grad_norm": 6.474478917892943e-05, + "learning_rate": 2.3472968134622273e-06, + "loss": 0.0, + "step": 12247 + }, + { + "epoch": 0.7893278339885287, + "grad_norm": 0.0010739064109665784, + "learning_rate": 2.3465807375581816e-06, + "loss": 0.0, + "step": 12248 + }, + { + "epoch": 0.7893922794354579, + "grad_norm": 0.005548181129234342, + "learning_rate": 2.3458646616541355e-06, + "loss": 0.0001, + "step": 12249 + }, + { + "epoch": 0.789456724882387, + "grad_norm": 0.008573517040880186, + "learning_rate": 2.3451485857500898e-06, + "loss": 0.0, + "step": 12250 + }, + { + "epoch": 0.7895211703293162, + "grad_norm": 0.0006694098065241521, + "learning_rate": 2.3444325098460436e-06, + "loss": 0.0, + "step": 12251 + }, + { + "epoch": 0.7895856157762454, + "grad_norm": 0.027516989197968755, + "learning_rate": 2.343716433941998e-06, + "loss": 0.0, + "step": 12252 + }, + { + "epoch": 0.7896500612231746, + "grad_norm": 0.0049549690740599, + "learning_rate": 2.3430003580379522e-06, + "loss": 0.0, + "step": 12253 + }, + { + "epoch": 0.7897145066701038, + "grad_norm": 0.0029326547432867644, + "learning_rate": 2.3422842821339065e-06, + "loss": 0.0, + "step": 12254 + }, + { + "epoch": 0.789778952117033, + "grad_norm": 0.025886055570603583, + "learning_rate": 2.3415682062298604e-06, + "loss": 0.0, + "step": 12255 + }, + { + "epoch": 0.7898433975639622, + "grad_norm": 0.012209315086344184, + "learning_rate": 2.3408521303258147e-06, + "loss": 0.0, + "step": 12256 + }, + { + "epoch": 0.7899078430108912, + "grad_norm": 6.541967178543215e-05, + "learning_rate": 2.340136054421769e-06, + "loss": 0.0, + "step": 12257 + }, + { + "epoch": 0.7899722884578204, + "grad_norm": 0.002371812470589662, + "learning_rate": 2.339419978517723e-06, + "loss": 0.0, + "step": 12258 + }, + { + "epoch": 0.7900367339047496, + "grad_norm": 0.11401620408703504, + "learning_rate": 2.338703902613677e-06, + "loss": 0.0019, + "step": 12259 + }, + { + "epoch": 0.7901011793516788, + "grad_norm": 0.012610422179252519, + "learning_rate": 2.3379878267096315e-06, + "loss": 0.0, + "step": 12260 + }, + { + "epoch": 0.790165624798608, + "grad_norm": 0.00046666391484797036, + "learning_rate": 2.3372717508055858e-06, + "loss": 0.0, + "step": 12261 + }, + { + "epoch": 0.7902300702455372, + "grad_norm": 0.00217984363643231, + "learning_rate": 2.3365556749015397e-06, + "loss": 0.0, + "step": 12262 + }, + { + "epoch": 0.7902945156924663, + "grad_norm": 0.008862938690758203, + "learning_rate": 2.335839598997494e-06, + "loss": 0.0, + "step": 12263 + }, + { + "epoch": 0.7903589611393955, + "grad_norm": 0.05935538910762845, + "learning_rate": 2.335123523093448e-06, + "loss": 0.0001, + "step": 12264 + }, + { + "epoch": 0.7904234065863247, + "grad_norm": 0.0005315012449867554, + "learning_rate": 2.3344074471894026e-06, + "loss": 0.0, + "step": 12265 + }, + { + "epoch": 0.7904878520332539, + "grad_norm": 0.0012946143773809717, + "learning_rate": 2.3336913712853564e-06, + "loss": 0.0, + "step": 12266 + }, + { + "epoch": 0.7905522974801831, + "grad_norm": 0.022759214076861673, + "learning_rate": 2.3329752953813107e-06, + "loss": 0.0, + "step": 12267 + }, + { + "epoch": 0.7906167429271121, + "grad_norm": 0.0003268068807733046, + "learning_rate": 2.3322592194772646e-06, + "loss": 0.0, + "step": 12268 + }, + { + "epoch": 0.7906811883740413, + "grad_norm": 0.3537525626450311, + "learning_rate": 2.331543143573219e-06, + "loss": 0.0012, + "step": 12269 + }, + { + "epoch": 0.7907456338209705, + "grad_norm": 0.01784420948702125, + "learning_rate": 2.330827067669173e-06, + "loss": 0.0001, + "step": 12270 + }, + { + "epoch": 0.7908100792678997, + "grad_norm": 0.001048223111041715, + "learning_rate": 2.3301109917651275e-06, + "loss": 0.0, + "step": 12271 + }, + { + "epoch": 0.7908745247148289, + "grad_norm": 0.0011720714576711842, + "learning_rate": 2.3293949158610814e-06, + "loss": 0.0, + "step": 12272 + }, + { + "epoch": 0.7909389701617581, + "grad_norm": 0.09711526275150101, + "learning_rate": 2.3286788399570357e-06, + "loss": 0.0001, + "step": 12273 + }, + { + "epoch": 0.7910034156086873, + "grad_norm": 0.0004784918328230193, + "learning_rate": 2.32796276405299e-06, + "loss": 0.0, + "step": 12274 + }, + { + "epoch": 0.7910678610556164, + "grad_norm": 0.0019445352077134038, + "learning_rate": 2.327246688148944e-06, + "loss": 0.0, + "step": 12275 + }, + { + "epoch": 0.7911323065025456, + "grad_norm": 0.0010747047967742114, + "learning_rate": 2.326530612244898e-06, + "loss": 0.0, + "step": 12276 + }, + { + "epoch": 0.7911967519494748, + "grad_norm": 0.0008913434106858376, + "learning_rate": 2.3258145363408524e-06, + "loss": 0.0, + "step": 12277 + }, + { + "epoch": 0.791261197396404, + "grad_norm": 0.0008809684577119583, + "learning_rate": 2.3250984604368067e-06, + "loss": 0.0, + "step": 12278 + }, + { + "epoch": 0.7913256428433331, + "grad_norm": 0.0033671814651020987, + "learning_rate": 2.3243823845327606e-06, + "loss": 0.0, + "step": 12279 + }, + { + "epoch": 0.7913900882902623, + "grad_norm": 0.0011540542898446035, + "learning_rate": 2.323666308628715e-06, + "loss": 0.0, + "step": 12280 + }, + { + "epoch": 0.7914545337371914, + "grad_norm": 0.003371865755117032, + "learning_rate": 2.322950232724669e-06, + "loss": 0.0, + "step": 12281 + }, + { + "epoch": 0.7915189791841206, + "grad_norm": 0.00016563388657528873, + "learning_rate": 2.3222341568206235e-06, + "loss": 0.0, + "step": 12282 + }, + { + "epoch": 0.7915834246310498, + "grad_norm": 0.09632593516717095, + "learning_rate": 2.3215180809165774e-06, + "loss": 0.0013, + "step": 12283 + }, + { + "epoch": 0.791647870077979, + "grad_norm": 0.0017298544745243945, + "learning_rate": 2.3208020050125317e-06, + "loss": 0.0, + "step": 12284 + }, + { + "epoch": 0.7917123155249082, + "grad_norm": 0.010223582251694568, + "learning_rate": 2.3200859291084856e-06, + "loss": 0.0001, + "step": 12285 + }, + { + "epoch": 0.7917767609718374, + "grad_norm": 0.003706090422825231, + "learning_rate": 2.31936985320444e-06, + "loss": 0.0, + "step": 12286 + }, + { + "epoch": 0.7918412064187665, + "grad_norm": 0.5455833713255104, + "learning_rate": 2.3186537773003937e-06, + "loss": 0.0033, + "step": 12287 + }, + { + "epoch": 0.7919056518656957, + "grad_norm": 0.005634153863743244, + "learning_rate": 2.3179377013963485e-06, + "loss": 0.0, + "step": 12288 + }, + { + "epoch": 0.7919700973126249, + "grad_norm": 0.00811497967985401, + "learning_rate": 2.3172216254923023e-06, + "loss": 0.0, + "step": 12289 + }, + { + "epoch": 0.792034542759554, + "grad_norm": 0.023416422764268287, + "learning_rate": 2.3165055495882566e-06, + "loss": 0.0, + "step": 12290 + }, + { + "epoch": 0.7920989882064832, + "grad_norm": 0.000397575294584166, + "learning_rate": 2.3157894736842105e-06, + "loss": 0.0, + "step": 12291 + }, + { + "epoch": 0.7921634336534124, + "grad_norm": 0.09261146259030603, + "learning_rate": 2.315073397780165e-06, + "loss": 0.0007, + "step": 12292 + }, + { + "epoch": 0.7922278791003415, + "grad_norm": 6.736243921635722e-05, + "learning_rate": 2.314357321876119e-06, + "loss": 0.0, + "step": 12293 + }, + { + "epoch": 0.7922923245472707, + "grad_norm": 8.188848771461251e-05, + "learning_rate": 2.3136412459720734e-06, + "loss": 0.0, + "step": 12294 + }, + { + "epoch": 0.7923567699941999, + "grad_norm": 0.5248949627024653, + "learning_rate": 2.3129251700680273e-06, + "loss": 0.0005, + "step": 12295 + }, + { + "epoch": 0.7924212154411291, + "grad_norm": 0.0022065020062548503, + "learning_rate": 2.3122090941639816e-06, + "loss": 0.0, + "step": 12296 + }, + { + "epoch": 0.7924856608880583, + "grad_norm": 0.0012940578166477242, + "learning_rate": 2.311493018259936e-06, + "loss": 0.0, + "step": 12297 + }, + { + "epoch": 0.7925501063349875, + "grad_norm": 0.0008661635742292246, + "learning_rate": 2.3107769423558898e-06, + "loss": 0.0, + "step": 12298 + }, + { + "epoch": 0.7926145517819166, + "grad_norm": 0.012853991928710864, + "learning_rate": 2.310060866451844e-06, + "loss": 0.0, + "step": 12299 + }, + { + "epoch": 0.7926789972288458, + "grad_norm": 0.0013576044205097536, + "learning_rate": 2.3093447905477984e-06, + "loss": 0.0, + "step": 12300 + }, + { + "epoch": 0.792743442675775, + "grad_norm": 6.910762728236609e-05, + "learning_rate": 2.3086287146437527e-06, + "loss": 0.0, + "step": 12301 + }, + { + "epoch": 0.7928078881227041, + "grad_norm": 0.0067343948025941945, + "learning_rate": 2.3079126387397065e-06, + "loss": 0.0, + "step": 12302 + }, + { + "epoch": 0.7928723335696333, + "grad_norm": 0.13638891214778198, + "learning_rate": 2.307196562835661e-06, + "loss": 0.0004, + "step": 12303 + }, + { + "epoch": 0.7929367790165625, + "grad_norm": 0.32272106701892816, + "learning_rate": 2.3064804869316147e-06, + "loss": 0.0012, + "step": 12304 + }, + { + "epoch": 0.7930012244634916, + "grad_norm": 0.0008684087055918425, + "learning_rate": 2.305764411027569e-06, + "loss": 0.0, + "step": 12305 + }, + { + "epoch": 0.7930656699104208, + "grad_norm": 0.0001796671162931569, + "learning_rate": 2.3050483351235233e-06, + "loss": 0.0, + "step": 12306 + }, + { + "epoch": 0.79313011535735, + "grad_norm": 0.22511224430501187, + "learning_rate": 2.3043322592194776e-06, + "loss": 0.0007, + "step": 12307 + }, + { + "epoch": 0.7931945608042792, + "grad_norm": 0.0004992730688903502, + "learning_rate": 2.3036161833154315e-06, + "loss": 0.0, + "step": 12308 + }, + { + "epoch": 0.7932590062512084, + "grad_norm": 0.006909952409031217, + "learning_rate": 2.3029001074113858e-06, + "loss": 0.0, + "step": 12309 + }, + { + "epoch": 0.7933234516981376, + "grad_norm": 0.0040115938022819, + "learning_rate": 2.30218403150734e-06, + "loss": 0.0, + "step": 12310 + }, + { + "epoch": 0.7933878971450667, + "grad_norm": 0.0014633534190212837, + "learning_rate": 2.301467955603294e-06, + "loss": 0.0, + "step": 12311 + }, + { + "epoch": 0.7934523425919959, + "grad_norm": 0.012486874126142355, + "learning_rate": 2.3007518796992482e-06, + "loss": 0.0, + "step": 12312 + }, + { + "epoch": 0.793516788038925, + "grad_norm": 0.0010191498823039192, + "learning_rate": 2.3000358037952025e-06, + "loss": 0.0, + "step": 12313 + }, + { + "epoch": 0.7935812334858542, + "grad_norm": 0.004574874004510897, + "learning_rate": 2.299319727891157e-06, + "loss": 0.0, + "step": 12314 + }, + { + "epoch": 0.7936456789327834, + "grad_norm": 0.00018292927670207949, + "learning_rate": 2.2986036519871107e-06, + "loss": 0.0, + "step": 12315 + }, + { + "epoch": 0.7937101243797126, + "grad_norm": 0.2433858495795913, + "learning_rate": 2.297887576083065e-06, + "loss": 0.0025, + "step": 12316 + }, + { + "epoch": 0.7937745698266417, + "grad_norm": 0.000507205422647784, + "learning_rate": 2.2971715001790193e-06, + "loss": 0.0, + "step": 12317 + }, + { + "epoch": 0.7938390152735709, + "grad_norm": 0.1609688126555202, + "learning_rate": 2.2964554242749736e-06, + "loss": 0.0019, + "step": 12318 + }, + { + "epoch": 0.7939034607205001, + "grad_norm": 0.0012293050600295469, + "learning_rate": 2.2957393483709275e-06, + "loss": 0.0, + "step": 12319 + }, + { + "epoch": 0.7939679061674293, + "grad_norm": 0.0009011055666482163, + "learning_rate": 2.2950232724668818e-06, + "loss": 0.0, + "step": 12320 + }, + { + "epoch": 0.7940323516143585, + "grad_norm": 0.002458300198121493, + "learning_rate": 2.2943071965628357e-06, + "loss": 0.0, + "step": 12321 + }, + { + "epoch": 0.7940967970612877, + "grad_norm": 0.010545672807899122, + "learning_rate": 2.29359112065879e-06, + "loss": 0.0001, + "step": 12322 + }, + { + "epoch": 0.7941612425082168, + "grad_norm": 0.0006009889248218322, + "learning_rate": 2.2928750447547443e-06, + "loss": 0.0, + "step": 12323 + }, + { + "epoch": 0.7942256879551459, + "grad_norm": 0.0002882234106889472, + "learning_rate": 2.2921589688506986e-06, + "loss": 0.0, + "step": 12324 + }, + { + "epoch": 0.7942901334020751, + "grad_norm": 7.939152417382084e-05, + "learning_rate": 2.2914428929466524e-06, + "loss": 0.0, + "step": 12325 + }, + { + "epoch": 0.7943545788490043, + "grad_norm": 0.0004449329234502113, + "learning_rate": 2.2907268170426067e-06, + "loss": 0.0, + "step": 12326 + }, + { + "epoch": 0.7944190242959335, + "grad_norm": 0.00014424612203982502, + "learning_rate": 2.2900107411385606e-06, + "loss": 0.0, + "step": 12327 + }, + { + "epoch": 0.7944834697428627, + "grad_norm": 0.0031572437902694242, + "learning_rate": 2.289294665234515e-06, + "loss": 0.0, + "step": 12328 + }, + { + "epoch": 0.7945479151897918, + "grad_norm": 0.02013882993644046, + "learning_rate": 2.288578589330469e-06, + "loss": 0.0, + "step": 12329 + }, + { + "epoch": 0.794612360636721, + "grad_norm": 0.0006641821282922399, + "learning_rate": 2.2878625134264235e-06, + "loss": 0.0, + "step": 12330 + }, + { + "epoch": 0.7946768060836502, + "grad_norm": 0.0964850576276222, + "learning_rate": 2.2871464375223774e-06, + "loss": 0.0003, + "step": 12331 + }, + { + "epoch": 0.7947412515305794, + "grad_norm": 0.0007458173316273714, + "learning_rate": 2.2864303616183317e-06, + "loss": 0.0, + "step": 12332 + }, + { + "epoch": 0.7948056969775086, + "grad_norm": 0.044678187758849984, + "learning_rate": 2.285714285714286e-06, + "loss": 0.0001, + "step": 12333 + }, + { + "epoch": 0.7948701424244378, + "grad_norm": 0.012587534549491937, + "learning_rate": 2.28499820981024e-06, + "loss": 0.0001, + "step": 12334 + }, + { + "epoch": 0.7949345878713668, + "grad_norm": 0.11678656098745568, + "learning_rate": 2.284282133906194e-06, + "loss": 0.0002, + "step": 12335 + }, + { + "epoch": 0.794999033318296, + "grad_norm": 0.0010133240072671896, + "learning_rate": 2.2835660580021484e-06, + "loss": 0.0, + "step": 12336 + }, + { + "epoch": 0.7950634787652252, + "grad_norm": 0.01128362238810577, + "learning_rate": 2.2828499820981027e-06, + "loss": 0.0, + "step": 12337 + }, + { + "epoch": 0.7951279242121544, + "grad_norm": 0.325144468806227, + "learning_rate": 2.2821339061940566e-06, + "loss": 0.0024, + "step": 12338 + }, + { + "epoch": 0.7951923696590836, + "grad_norm": 0.0003143201195948108, + "learning_rate": 2.281417830290011e-06, + "loss": 0.0, + "step": 12339 + }, + { + "epoch": 0.7952568151060128, + "grad_norm": 0.00018327716875614307, + "learning_rate": 2.280701754385965e-06, + "loss": 0.0, + "step": 12340 + }, + { + "epoch": 0.7953212605529419, + "grad_norm": 0.00038591333705380033, + "learning_rate": 2.2799856784819195e-06, + "loss": 0.0, + "step": 12341 + }, + { + "epoch": 0.7953857059998711, + "grad_norm": 0.0012024442904717586, + "learning_rate": 2.2792696025778734e-06, + "loss": 0.0, + "step": 12342 + }, + { + "epoch": 0.7954501514468003, + "grad_norm": 0.00036475198609929637, + "learning_rate": 2.2785535266738277e-06, + "loss": 0.0, + "step": 12343 + }, + { + "epoch": 0.7955145968937295, + "grad_norm": 0.009867178122339677, + "learning_rate": 2.2778374507697816e-06, + "loss": 0.0, + "step": 12344 + }, + { + "epoch": 0.7955790423406587, + "grad_norm": 0.00080797752438696, + "learning_rate": 2.277121374865736e-06, + "loss": 0.0, + "step": 12345 + }, + { + "epoch": 0.7956434877875878, + "grad_norm": 0.000350938664247268, + "learning_rate": 2.27640529896169e-06, + "loss": 0.0, + "step": 12346 + }, + { + "epoch": 0.7957079332345169, + "grad_norm": 0.0004998791925092817, + "learning_rate": 2.2756892230576445e-06, + "loss": 0.0, + "step": 12347 + }, + { + "epoch": 0.7957723786814461, + "grad_norm": 0.023357191681495117, + "learning_rate": 2.2749731471535983e-06, + "loss": 0.0001, + "step": 12348 + }, + { + "epoch": 0.7958368241283753, + "grad_norm": 0.3889522787023688, + "learning_rate": 2.2742570712495526e-06, + "loss": 0.0028, + "step": 12349 + }, + { + "epoch": 0.7959012695753045, + "grad_norm": 0.026527525468430544, + "learning_rate": 2.273540995345507e-06, + "loss": 0.0, + "step": 12350 + }, + { + "epoch": 0.7959657150222337, + "grad_norm": 0.0003701205214895632, + "learning_rate": 2.272824919441461e-06, + "loss": 0.0, + "step": 12351 + }, + { + "epoch": 0.7960301604691629, + "grad_norm": 0.048662335749158375, + "learning_rate": 2.272108843537415e-06, + "loss": 0.0005, + "step": 12352 + }, + { + "epoch": 0.796094605916092, + "grad_norm": 0.0028033741150151723, + "learning_rate": 2.2713927676333694e-06, + "loss": 0.0, + "step": 12353 + }, + { + "epoch": 0.7961590513630212, + "grad_norm": 0.005704388075964442, + "learning_rate": 2.2706766917293237e-06, + "loss": 0.0, + "step": 12354 + }, + { + "epoch": 0.7962234968099504, + "grad_norm": 0.002117519304700571, + "learning_rate": 2.2699606158252776e-06, + "loss": 0.0, + "step": 12355 + }, + { + "epoch": 0.7962879422568796, + "grad_norm": 0.02104627910229218, + "learning_rate": 2.269244539921232e-06, + "loss": 0.0001, + "step": 12356 + }, + { + "epoch": 0.7963523877038087, + "grad_norm": 0.0002981008190228777, + "learning_rate": 2.2685284640171858e-06, + "loss": 0.0, + "step": 12357 + }, + { + "epoch": 0.7964168331507379, + "grad_norm": 0.15222814648679983, + "learning_rate": 2.2678123881131405e-06, + "loss": 0.0034, + "step": 12358 + }, + { + "epoch": 0.796481278597667, + "grad_norm": 0.0007378246127458366, + "learning_rate": 2.2670963122090944e-06, + "loss": 0.0, + "step": 12359 + }, + { + "epoch": 0.7965457240445962, + "grad_norm": 0.0003285574545467159, + "learning_rate": 2.2663802363050487e-06, + "loss": 0.0, + "step": 12360 + }, + { + "epoch": 0.7966101694915254, + "grad_norm": 0.011981375678915126, + "learning_rate": 2.2656641604010025e-06, + "loss": 0.0, + "step": 12361 + }, + { + "epoch": 0.7966746149384546, + "grad_norm": 0.0715815572735966, + "learning_rate": 2.264948084496957e-06, + "loss": 0.0, + "step": 12362 + }, + { + "epoch": 0.7967390603853838, + "grad_norm": 0.0077975512422411635, + "learning_rate": 2.264232008592911e-06, + "loss": 0.0, + "step": 12363 + }, + { + "epoch": 0.796803505832313, + "grad_norm": 0.00018230401913338704, + "learning_rate": 2.2635159326888654e-06, + "loss": 0.0, + "step": 12364 + }, + { + "epoch": 0.7968679512792421, + "grad_norm": 0.00044439336476779, + "learning_rate": 2.2627998567848193e-06, + "loss": 0.0, + "step": 12365 + }, + { + "epoch": 0.7969323967261713, + "grad_norm": 0.0558334291996121, + "learning_rate": 2.2620837808807736e-06, + "loss": 0.0, + "step": 12366 + }, + { + "epoch": 0.7969968421731005, + "grad_norm": 0.0017058114736429463, + "learning_rate": 2.261367704976728e-06, + "loss": 0.0, + "step": 12367 + }, + { + "epoch": 0.7970612876200296, + "grad_norm": 0.0031099253657042403, + "learning_rate": 2.2606516290726818e-06, + "loss": 0.0, + "step": 12368 + }, + { + "epoch": 0.7971257330669588, + "grad_norm": 0.07728687070940958, + "learning_rate": 2.259935553168636e-06, + "loss": 0.0008, + "step": 12369 + }, + { + "epoch": 0.797190178513888, + "grad_norm": 0.1450721760764216, + "learning_rate": 2.2592194772645904e-06, + "loss": 0.0005, + "step": 12370 + }, + { + "epoch": 0.7972546239608171, + "grad_norm": 0.2237326802977375, + "learning_rate": 2.2585034013605447e-06, + "loss": 0.0004, + "step": 12371 + }, + { + "epoch": 0.7973190694077463, + "grad_norm": 0.010944975406888244, + "learning_rate": 2.2577873254564985e-06, + "loss": 0.0, + "step": 12372 + }, + { + "epoch": 0.7973835148546755, + "grad_norm": 0.0016884389008774191, + "learning_rate": 2.257071249552453e-06, + "loss": 0.0, + "step": 12373 + }, + { + "epoch": 0.7974479603016047, + "grad_norm": 0.00034746512207015046, + "learning_rate": 2.2563551736484067e-06, + "loss": 0.0, + "step": 12374 + }, + { + "epoch": 0.7975124057485339, + "grad_norm": 0.005475084699684978, + "learning_rate": 2.255639097744361e-06, + "loss": 0.0, + "step": 12375 + }, + { + "epoch": 0.7975768511954631, + "grad_norm": 0.2716014115799345, + "learning_rate": 2.2549230218403153e-06, + "loss": 0.001, + "step": 12376 + }, + { + "epoch": 0.7976412966423923, + "grad_norm": 0.001062484250198185, + "learning_rate": 2.2542069459362696e-06, + "loss": 0.0, + "step": 12377 + }, + { + "epoch": 0.7977057420893214, + "grad_norm": 0.007033136720229323, + "learning_rate": 2.2534908700322235e-06, + "loss": 0.0001, + "step": 12378 + }, + { + "epoch": 0.7977701875362506, + "grad_norm": 0.022493932942061604, + "learning_rate": 2.2527747941281778e-06, + "loss": 0.0, + "step": 12379 + }, + { + "epoch": 0.7978346329831797, + "grad_norm": 0.0005868051093204366, + "learning_rate": 2.2520587182241317e-06, + "loss": 0.0, + "step": 12380 + }, + { + "epoch": 0.7978990784301089, + "grad_norm": 0.01206821548237533, + "learning_rate": 2.251342642320086e-06, + "loss": 0.0, + "step": 12381 + }, + { + "epoch": 0.7979635238770381, + "grad_norm": 0.003972101955416267, + "learning_rate": 2.2506265664160403e-06, + "loss": 0.0, + "step": 12382 + }, + { + "epoch": 0.7980279693239672, + "grad_norm": 0.019091583821505498, + "learning_rate": 2.2499104905119946e-06, + "loss": 0.0002, + "step": 12383 + }, + { + "epoch": 0.7980924147708964, + "grad_norm": 0.0028018271371107813, + "learning_rate": 2.2491944146079484e-06, + "loss": 0.0, + "step": 12384 + }, + { + "epoch": 0.7981568602178256, + "grad_norm": 0.018689290061926607, + "learning_rate": 2.2484783387039027e-06, + "loss": 0.0, + "step": 12385 + }, + { + "epoch": 0.7982213056647548, + "grad_norm": 0.0010994308213438746, + "learning_rate": 2.247762262799857e-06, + "loss": 0.0, + "step": 12386 + }, + { + "epoch": 0.798285751111684, + "grad_norm": 0.0011902206978131014, + "learning_rate": 2.2470461868958113e-06, + "loss": 0.0, + "step": 12387 + }, + { + "epoch": 0.7983501965586132, + "grad_norm": 0.00018473605557963436, + "learning_rate": 2.246330110991765e-06, + "loss": 0.0, + "step": 12388 + }, + { + "epoch": 0.7984146420055424, + "grad_norm": 0.0027828798294523296, + "learning_rate": 2.2456140350877195e-06, + "loss": 0.0, + "step": 12389 + }, + { + "epoch": 0.7984790874524715, + "grad_norm": 0.0026909045095040935, + "learning_rate": 2.244897959183674e-06, + "loss": 0.0, + "step": 12390 + }, + { + "epoch": 0.7985435328994006, + "grad_norm": 0.04215782510421399, + "learning_rate": 2.2441818832796277e-06, + "loss": 0.0, + "step": 12391 + }, + { + "epoch": 0.7986079783463298, + "grad_norm": 0.004400490777957583, + "learning_rate": 2.243465807375582e-06, + "loss": 0.0, + "step": 12392 + }, + { + "epoch": 0.798672423793259, + "grad_norm": 0.0009927995263724551, + "learning_rate": 2.2427497314715363e-06, + "loss": 0.0, + "step": 12393 + }, + { + "epoch": 0.7987368692401882, + "grad_norm": 0.0008656766188128951, + "learning_rate": 2.2420336555674906e-06, + "loss": 0.0, + "step": 12394 + }, + { + "epoch": 0.7988013146871173, + "grad_norm": 0.11693899508875823, + "learning_rate": 2.2413175796634444e-06, + "loss": 0.0012, + "step": 12395 + }, + { + "epoch": 0.7988657601340465, + "grad_norm": 0.015184840303952818, + "learning_rate": 2.2406015037593987e-06, + "loss": 0.0, + "step": 12396 + }, + { + "epoch": 0.7989302055809757, + "grad_norm": 0.0008131608440570534, + "learning_rate": 2.2398854278553526e-06, + "loss": 0.0, + "step": 12397 + }, + { + "epoch": 0.7989946510279049, + "grad_norm": 0.008973112267477039, + "learning_rate": 2.239169351951307e-06, + "loss": 0.0, + "step": 12398 + }, + { + "epoch": 0.7990590964748341, + "grad_norm": 0.4256889173153369, + "learning_rate": 2.2384532760472612e-06, + "loss": 0.0007, + "step": 12399 + }, + { + "epoch": 0.7991235419217633, + "grad_norm": 0.0014311981142707912, + "learning_rate": 2.2377372001432155e-06, + "loss": 0.0, + "step": 12400 + }, + { + "epoch": 0.7991879873686925, + "grad_norm": 0.0038494384225200677, + "learning_rate": 2.2370211242391694e-06, + "loss": 0.0, + "step": 12401 + }, + { + "epoch": 0.7992524328156215, + "grad_norm": 0.00021152556432885007, + "learning_rate": 2.2363050483351237e-06, + "loss": 0.0, + "step": 12402 + }, + { + "epoch": 0.7993168782625507, + "grad_norm": 0.0004490586247319541, + "learning_rate": 2.235588972431078e-06, + "loss": 0.0, + "step": 12403 + }, + { + "epoch": 0.7993813237094799, + "grad_norm": 0.0006710799683837646, + "learning_rate": 2.234872896527032e-06, + "loss": 0.0, + "step": 12404 + }, + { + "epoch": 0.7994457691564091, + "grad_norm": 0.010726106254010393, + "learning_rate": 2.234156820622986e-06, + "loss": 0.0, + "step": 12405 + }, + { + "epoch": 0.7995102146033383, + "grad_norm": 0.0014574245169015954, + "learning_rate": 2.2334407447189405e-06, + "loss": 0.0, + "step": 12406 + }, + { + "epoch": 0.7995746600502674, + "grad_norm": 0.0005909390737096638, + "learning_rate": 2.2327246688148948e-06, + "loss": 0.0, + "step": 12407 + }, + { + "epoch": 0.7996391054971966, + "grad_norm": 0.00010923331761016114, + "learning_rate": 2.2320085929108486e-06, + "loss": 0.0, + "step": 12408 + }, + { + "epoch": 0.7997035509441258, + "grad_norm": 0.03501950265571546, + "learning_rate": 2.231292517006803e-06, + "loss": 0.0, + "step": 12409 + }, + { + "epoch": 0.799767996391055, + "grad_norm": 0.0004077594829991419, + "learning_rate": 2.230576441102757e-06, + "loss": 0.0, + "step": 12410 + }, + { + "epoch": 0.7998324418379842, + "grad_norm": 2.0668252348953236e-05, + "learning_rate": 2.2298603651987115e-06, + "loss": 0.0, + "step": 12411 + }, + { + "epoch": 0.7998968872849134, + "grad_norm": 0.0005704418239510764, + "learning_rate": 2.2291442892946654e-06, + "loss": 0.0, + "step": 12412 + }, + { + "epoch": 0.7999613327318424, + "grad_norm": 0.01770676458062447, + "learning_rate": 2.2284282133906197e-06, + "loss": 0.0, + "step": 12413 + }, + { + "epoch": 0.8000257781787716, + "grad_norm": 0.00979534521749774, + "learning_rate": 2.2277121374865736e-06, + "loss": 0.0001, + "step": 12414 + }, + { + "epoch": 0.8000902236257008, + "grad_norm": 0.006045575204337346, + "learning_rate": 2.226996061582528e-06, + "loss": 0.0, + "step": 12415 + }, + { + "epoch": 0.80015466907263, + "grad_norm": 0.006182677580691062, + "learning_rate": 2.226279985678482e-06, + "loss": 0.0, + "step": 12416 + }, + { + "epoch": 0.8002191145195592, + "grad_norm": 0.01825002921528511, + "learning_rate": 2.2255639097744365e-06, + "loss": 0.0, + "step": 12417 + }, + { + "epoch": 0.8002835599664884, + "grad_norm": 0.11108596716148732, + "learning_rate": 2.2248478338703904e-06, + "loss": 0.0002, + "step": 12418 + }, + { + "epoch": 0.8003480054134176, + "grad_norm": 0.004849966220652724, + "learning_rate": 2.2241317579663447e-06, + "loss": 0.0, + "step": 12419 + }, + { + "epoch": 0.8004124508603467, + "grad_norm": 0.01844083369199926, + "learning_rate": 2.223415682062299e-06, + "loss": 0.0001, + "step": 12420 + }, + { + "epoch": 0.8004768963072759, + "grad_norm": 0.011662986322466401, + "learning_rate": 2.222699606158253e-06, + "loss": 0.0015, + "step": 12421 + }, + { + "epoch": 0.8005413417542051, + "grad_norm": 0.00012785641594670615, + "learning_rate": 2.221983530254207e-06, + "loss": 0.0, + "step": 12422 + }, + { + "epoch": 0.8006057872011343, + "grad_norm": 0.00016684387752677132, + "learning_rate": 2.2212674543501614e-06, + "loss": 0.0, + "step": 12423 + }, + { + "epoch": 0.8006702326480634, + "grad_norm": 0.00021000222066320482, + "learning_rate": 2.2205513784461157e-06, + "loss": 0.0, + "step": 12424 + }, + { + "epoch": 0.8007346780949925, + "grad_norm": 0.00046518345635586757, + "learning_rate": 2.2198353025420696e-06, + "loss": 0.0, + "step": 12425 + }, + { + "epoch": 0.8007991235419217, + "grad_norm": 0.18923775731011344, + "learning_rate": 2.219119226638024e-06, + "loss": 0.0003, + "step": 12426 + }, + { + "epoch": 0.8008635689888509, + "grad_norm": 0.007316504578402545, + "learning_rate": 2.2184031507339778e-06, + "loss": 0.0001, + "step": 12427 + }, + { + "epoch": 0.8009280144357801, + "grad_norm": 0.004194083563563775, + "learning_rate": 2.2176870748299325e-06, + "loss": 0.0, + "step": 12428 + }, + { + "epoch": 0.8009924598827093, + "grad_norm": 0.0009184694452434844, + "learning_rate": 2.2169709989258864e-06, + "loss": 0.0, + "step": 12429 + }, + { + "epoch": 0.8010569053296385, + "grad_norm": 0.02332515788220043, + "learning_rate": 2.2162549230218407e-06, + "loss": 0.0, + "step": 12430 + }, + { + "epoch": 0.8011213507765677, + "grad_norm": 0.031507976952465794, + "learning_rate": 2.2155388471177945e-06, + "loss": 0.0001, + "step": 12431 + }, + { + "epoch": 0.8011857962234968, + "grad_norm": 2.666919954509735, + "learning_rate": 2.214822771213749e-06, + "loss": 0.0356, + "step": 12432 + }, + { + "epoch": 0.801250241670426, + "grad_norm": 0.001449000874599054, + "learning_rate": 2.2141066953097027e-06, + "loss": 0.0, + "step": 12433 + }, + { + "epoch": 0.8013146871173552, + "grad_norm": 0.004870778020393193, + "learning_rate": 2.2133906194056574e-06, + "loss": 0.0, + "step": 12434 + }, + { + "epoch": 0.8013791325642843, + "grad_norm": 7.898662380784237e-05, + "learning_rate": 2.2126745435016113e-06, + "loss": 0.0, + "step": 12435 + }, + { + "epoch": 0.8014435780112135, + "grad_norm": 0.0002028904110712959, + "learning_rate": 2.2119584675975656e-06, + "loss": 0.0, + "step": 12436 + }, + { + "epoch": 0.8015080234581426, + "grad_norm": 0.005889722100197126, + "learning_rate": 2.2112423916935195e-06, + "loss": 0.0, + "step": 12437 + }, + { + "epoch": 0.8015724689050718, + "grad_norm": 0.0014307871393988698, + "learning_rate": 2.2105263157894738e-06, + "loss": 0.0, + "step": 12438 + }, + { + "epoch": 0.801636914352001, + "grad_norm": 0.0011634336621486382, + "learning_rate": 2.209810239885428e-06, + "loss": 0.0, + "step": 12439 + }, + { + "epoch": 0.8017013597989302, + "grad_norm": 0.020376505300973437, + "learning_rate": 2.2090941639813824e-06, + "loss": 0.0, + "step": 12440 + }, + { + "epoch": 0.8017658052458594, + "grad_norm": 0.003821548746330608, + "learning_rate": 2.2083780880773363e-06, + "loss": 0.0, + "step": 12441 + }, + { + "epoch": 0.8018302506927886, + "grad_norm": 0.0014602166162969207, + "learning_rate": 2.2076620121732906e-06, + "loss": 0.0, + "step": 12442 + }, + { + "epoch": 0.8018946961397178, + "grad_norm": 0.14062339066538232, + "learning_rate": 2.206945936269245e-06, + "loss": 0.0018, + "step": 12443 + }, + { + "epoch": 0.8019591415866469, + "grad_norm": 0.020081862810718113, + "learning_rate": 2.2062298603651987e-06, + "loss": 0.0, + "step": 12444 + }, + { + "epoch": 0.8020235870335761, + "grad_norm": 0.13397164659681082, + "learning_rate": 2.205513784461153e-06, + "loss": 0.0003, + "step": 12445 + }, + { + "epoch": 0.8020880324805052, + "grad_norm": 0.0022000253668111184, + "learning_rate": 2.2047977085571073e-06, + "loss": 0.0, + "step": 12446 + }, + { + "epoch": 0.8021524779274344, + "grad_norm": 0.006739471661244481, + "learning_rate": 2.2040816326530616e-06, + "loss": 0.0, + "step": 12447 + }, + { + "epoch": 0.8022169233743636, + "grad_norm": 0.007373562634504472, + "learning_rate": 2.2033655567490155e-06, + "loss": 0.0, + "step": 12448 + }, + { + "epoch": 0.8022813688212928, + "grad_norm": 0.056199122722132976, + "learning_rate": 2.20264948084497e-06, + "loss": 0.0001, + "step": 12449 + }, + { + "epoch": 0.8023458142682219, + "grad_norm": 0.16691897834307712, + "learning_rate": 2.2019334049409237e-06, + "loss": 0.002, + "step": 12450 + }, + { + "epoch": 0.8024102597151511, + "grad_norm": 0.00024640163647747016, + "learning_rate": 2.201217329036878e-06, + "loss": 0.0, + "step": 12451 + }, + { + "epoch": 0.8024747051620803, + "grad_norm": 0.013440612014857863, + "learning_rate": 2.2005012531328323e-06, + "loss": 0.0, + "step": 12452 + }, + { + "epoch": 0.8025391506090095, + "grad_norm": 0.006342392524851095, + "learning_rate": 2.1997851772287866e-06, + "loss": 0.0, + "step": 12453 + }, + { + "epoch": 0.8026035960559387, + "grad_norm": 0.00017604408165197298, + "learning_rate": 2.1990691013247404e-06, + "loss": 0.0, + "step": 12454 + }, + { + "epoch": 0.8026680415028679, + "grad_norm": 0.0022245882873562933, + "learning_rate": 2.1983530254206947e-06, + "loss": 0.0, + "step": 12455 + }, + { + "epoch": 0.802732486949797, + "grad_norm": 5.712391774772362e-05, + "learning_rate": 2.197636949516649e-06, + "loss": 0.0, + "step": 12456 + }, + { + "epoch": 0.8027969323967262, + "grad_norm": 0.0006953470399971264, + "learning_rate": 2.196920873612603e-06, + "loss": 0.0, + "step": 12457 + }, + { + "epoch": 0.8028613778436553, + "grad_norm": 0.0016720927108341782, + "learning_rate": 2.1962047977085572e-06, + "loss": 0.0, + "step": 12458 + }, + { + "epoch": 0.8029258232905845, + "grad_norm": 0.00015475809128655122, + "learning_rate": 2.1954887218045115e-06, + "loss": 0.0, + "step": 12459 + }, + { + "epoch": 0.8029902687375137, + "grad_norm": 0.0013262927475274097, + "learning_rate": 2.194772645900466e-06, + "loss": 0.0, + "step": 12460 + }, + { + "epoch": 0.8030547141844429, + "grad_norm": 0.0011045115282342043, + "learning_rate": 2.1940565699964197e-06, + "loss": 0.0, + "step": 12461 + }, + { + "epoch": 0.803119159631372, + "grad_norm": 0.0021931957013456202, + "learning_rate": 2.193340494092374e-06, + "loss": 0.0, + "step": 12462 + }, + { + "epoch": 0.8031836050783012, + "grad_norm": 5.5322073605114676e-05, + "learning_rate": 2.1926244181883283e-06, + "loss": 0.0, + "step": 12463 + }, + { + "epoch": 0.8032480505252304, + "grad_norm": 0.18646149330729211, + "learning_rate": 2.1919083422842826e-06, + "loss": 0.0006, + "step": 12464 + }, + { + "epoch": 0.8033124959721596, + "grad_norm": 0.00282455793739817, + "learning_rate": 2.1911922663802365e-06, + "loss": 0.0, + "step": 12465 + }, + { + "epoch": 0.8033769414190888, + "grad_norm": 0.004386932538795974, + "learning_rate": 2.1904761904761908e-06, + "loss": 0.0, + "step": 12466 + }, + { + "epoch": 0.803441386866018, + "grad_norm": 1.3727636621027356e-05, + "learning_rate": 2.1897601145721446e-06, + "loss": 0.0, + "step": 12467 + }, + { + "epoch": 0.8035058323129471, + "grad_norm": 0.00011849457349212985, + "learning_rate": 2.189044038668099e-06, + "loss": 0.0, + "step": 12468 + }, + { + "epoch": 0.8035702777598762, + "grad_norm": 0.0011204254283753746, + "learning_rate": 2.1883279627640532e-06, + "loss": 0.0, + "step": 12469 + }, + { + "epoch": 0.8036347232068054, + "grad_norm": 0.005514206749045671, + "learning_rate": 2.1876118868600075e-06, + "loss": 0.0, + "step": 12470 + }, + { + "epoch": 0.8036991686537346, + "grad_norm": 0.002547410752228369, + "learning_rate": 2.1868958109559614e-06, + "loss": 0.0, + "step": 12471 + }, + { + "epoch": 0.8037636141006638, + "grad_norm": 0.004749226856846408, + "learning_rate": 2.1861797350519157e-06, + "loss": 0.0, + "step": 12472 + }, + { + "epoch": 0.803828059547593, + "grad_norm": 0.008761383470593858, + "learning_rate": 2.1854636591478696e-06, + "loss": 0.0, + "step": 12473 + }, + { + "epoch": 0.8038925049945221, + "grad_norm": 0.0002561698488996513, + "learning_rate": 2.184747583243824e-06, + "loss": 0.0, + "step": 12474 + }, + { + "epoch": 0.8039569504414513, + "grad_norm": 0.004750373251371964, + "learning_rate": 2.184031507339778e-06, + "loss": 0.0, + "step": 12475 + }, + { + "epoch": 0.8040213958883805, + "grad_norm": 0.00010574556100120908, + "learning_rate": 2.1833154314357325e-06, + "loss": 0.0, + "step": 12476 + }, + { + "epoch": 0.8040858413353097, + "grad_norm": 0.312174436736941, + "learning_rate": 2.1825993555316864e-06, + "loss": 0.0056, + "step": 12477 + }, + { + "epoch": 0.8041502867822389, + "grad_norm": 0.023472666434982383, + "learning_rate": 2.1818832796276407e-06, + "loss": 0.0002, + "step": 12478 + }, + { + "epoch": 0.8042147322291681, + "grad_norm": 0.0014214595665206025, + "learning_rate": 2.181167203723595e-06, + "loss": 0.0, + "step": 12479 + }, + { + "epoch": 0.8042791776760971, + "grad_norm": 0.004601320748266051, + "learning_rate": 2.180451127819549e-06, + "loss": 0.0, + "step": 12480 + }, + { + "epoch": 0.8043436231230263, + "grad_norm": 0.0004118759793522063, + "learning_rate": 2.179735051915503e-06, + "loss": 0.0, + "step": 12481 + }, + { + "epoch": 0.8044080685699555, + "grad_norm": 0.007549649471261577, + "learning_rate": 2.1790189760114574e-06, + "loss": 0.0001, + "step": 12482 + }, + { + "epoch": 0.8044725140168847, + "grad_norm": 0.0036913314331920907, + "learning_rate": 2.1783029001074117e-06, + "loss": 0.0, + "step": 12483 + }, + { + "epoch": 0.8045369594638139, + "grad_norm": 0.2587022743807844, + "learning_rate": 2.1775868242033656e-06, + "loss": 0.0009, + "step": 12484 + }, + { + "epoch": 0.8046014049107431, + "grad_norm": 0.00025450952995651784, + "learning_rate": 2.17687074829932e-06, + "loss": 0.0, + "step": 12485 + }, + { + "epoch": 0.8046658503576722, + "grad_norm": 0.7104865939347345, + "learning_rate": 2.1761546723952738e-06, + "loss": 0.002, + "step": 12486 + }, + { + "epoch": 0.8047302958046014, + "grad_norm": 0.004899011875170007, + "learning_rate": 2.1754385964912285e-06, + "loss": 0.0, + "step": 12487 + }, + { + "epoch": 0.8047947412515306, + "grad_norm": 0.0022642938689466183, + "learning_rate": 2.1747225205871824e-06, + "loss": 0.0, + "step": 12488 + }, + { + "epoch": 0.8048591866984598, + "grad_norm": 0.0004189518177420199, + "learning_rate": 2.1740064446831367e-06, + "loss": 0.0, + "step": 12489 + }, + { + "epoch": 0.804923632145389, + "grad_norm": 2.2536943495337073e-05, + "learning_rate": 2.1732903687790905e-06, + "loss": 0.0, + "step": 12490 + }, + { + "epoch": 0.804988077592318, + "grad_norm": 0.0035356629953806427, + "learning_rate": 2.172574292875045e-06, + "loss": 0.0, + "step": 12491 + }, + { + "epoch": 0.8050525230392472, + "grad_norm": 0.4910174343224499, + "learning_rate": 2.171858216970999e-06, + "loss": 0.001, + "step": 12492 + }, + { + "epoch": 0.8051169684861764, + "grad_norm": 0.0033785242190478037, + "learning_rate": 2.1711421410669534e-06, + "loss": 0.0, + "step": 12493 + }, + { + "epoch": 0.8051814139331056, + "grad_norm": 0.00046202312693729595, + "learning_rate": 2.1704260651629073e-06, + "loss": 0.0, + "step": 12494 + }, + { + "epoch": 0.8052458593800348, + "grad_norm": 0.0003226347943040989, + "learning_rate": 2.1697099892588616e-06, + "loss": 0.0, + "step": 12495 + }, + { + "epoch": 0.805310304826964, + "grad_norm": 0.00031251158052698306, + "learning_rate": 2.168993913354816e-06, + "loss": 0.0, + "step": 12496 + }, + { + "epoch": 0.8053747502738932, + "grad_norm": 0.023313700776633278, + "learning_rate": 2.1682778374507698e-06, + "loss": 0.0, + "step": 12497 + }, + { + "epoch": 0.8054391957208223, + "grad_norm": 0.01404507452701196, + "learning_rate": 2.167561761546724e-06, + "loss": 0.0, + "step": 12498 + }, + { + "epoch": 0.8055036411677515, + "grad_norm": 0.00015273987081015428, + "learning_rate": 2.1668456856426784e-06, + "loss": 0.0, + "step": 12499 + }, + { + "epoch": 0.8055680866146807, + "grad_norm": 0.002063542098063826, + "learning_rate": 2.1661296097386327e-06, + "loss": 0.0, + "step": 12500 + }, + { + "epoch": 0.8056325320616099, + "grad_norm": 0.0011278421061533598, + "learning_rate": 2.1654135338345866e-06, + "loss": 0.0, + "step": 12501 + }, + { + "epoch": 0.805696977508539, + "grad_norm": 0.006941577005496535, + "learning_rate": 2.164697457930541e-06, + "loss": 0.0, + "step": 12502 + }, + { + "epoch": 0.8057614229554682, + "grad_norm": 0.0012370293237017856, + "learning_rate": 2.1639813820264947e-06, + "loss": 0.0, + "step": 12503 + }, + { + "epoch": 0.8058258684023973, + "grad_norm": 0.017707468873228566, + "learning_rate": 2.1632653061224495e-06, + "loss": 0.0002, + "step": 12504 + }, + { + "epoch": 0.8058903138493265, + "grad_norm": 0.00018853056554883304, + "learning_rate": 2.1625492302184033e-06, + "loss": 0.0, + "step": 12505 + }, + { + "epoch": 0.8059547592962557, + "grad_norm": 0.004347717010727028, + "learning_rate": 2.1618331543143576e-06, + "loss": 0.0, + "step": 12506 + }, + { + "epoch": 0.8060192047431849, + "grad_norm": 0.0013038376412325182, + "learning_rate": 2.1611170784103115e-06, + "loss": 0.0, + "step": 12507 + }, + { + "epoch": 0.8060836501901141, + "grad_norm": 0.09628063265594201, + "learning_rate": 2.160401002506266e-06, + "loss": 0.0002, + "step": 12508 + }, + { + "epoch": 0.8061480956370433, + "grad_norm": 0.0007463655062723487, + "learning_rate": 2.15968492660222e-06, + "loss": 0.0, + "step": 12509 + }, + { + "epoch": 0.8062125410839724, + "grad_norm": 0.00048655696128334684, + "learning_rate": 2.1589688506981744e-06, + "loss": 0.0, + "step": 12510 + }, + { + "epoch": 0.8062769865309016, + "grad_norm": 0.005872971745491133, + "learning_rate": 2.1582527747941283e-06, + "loss": 0.0, + "step": 12511 + }, + { + "epoch": 0.8063414319778308, + "grad_norm": 0.002717922444481002, + "learning_rate": 2.1575366988900826e-06, + "loss": 0.0, + "step": 12512 + }, + { + "epoch": 0.8064058774247599, + "grad_norm": 0.014760318307408935, + "learning_rate": 2.156820622986037e-06, + "loss": 0.0, + "step": 12513 + }, + { + "epoch": 0.8064703228716891, + "grad_norm": 7.437945918253065e-05, + "learning_rate": 2.1561045470819907e-06, + "loss": 0.0, + "step": 12514 + }, + { + "epoch": 0.8065347683186183, + "grad_norm": 0.0002872171139769259, + "learning_rate": 2.155388471177945e-06, + "loss": 0.0, + "step": 12515 + }, + { + "epoch": 0.8065992137655474, + "grad_norm": 0.4398679174420515, + "learning_rate": 2.1546723952738993e-06, + "loss": 0.0032, + "step": 12516 + }, + { + "epoch": 0.8066636592124766, + "grad_norm": 0.0012609922360681577, + "learning_rate": 2.1539563193698536e-06, + "loss": 0.0, + "step": 12517 + }, + { + "epoch": 0.8067281046594058, + "grad_norm": 0.00022379636048257173, + "learning_rate": 2.1532402434658075e-06, + "loss": 0.0, + "step": 12518 + }, + { + "epoch": 0.806792550106335, + "grad_norm": 0.0017465105291618174, + "learning_rate": 2.152524167561762e-06, + "loss": 0.0, + "step": 12519 + }, + { + "epoch": 0.8068569955532642, + "grad_norm": 0.00038133249943826756, + "learning_rate": 2.1518080916577157e-06, + "loss": 0.0, + "step": 12520 + }, + { + "epoch": 0.8069214410001934, + "grad_norm": 0.00132960592924005, + "learning_rate": 2.15109201575367e-06, + "loss": 0.0, + "step": 12521 + }, + { + "epoch": 0.8069858864471225, + "grad_norm": 0.017596323143278837, + "learning_rate": 2.1503759398496243e-06, + "loss": 0.0, + "step": 12522 + }, + { + "epoch": 0.8070503318940517, + "grad_norm": 0.002651803633894323, + "learning_rate": 2.1496598639455786e-06, + "loss": 0.0, + "step": 12523 + }, + { + "epoch": 0.8071147773409808, + "grad_norm": 0.0018630916251092704, + "learning_rate": 2.1489437880415325e-06, + "loss": 0.0, + "step": 12524 + }, + { + "epoch": 0.80717922278791, + "grad_norm": 0.00027101958664413014, + "learning_rate": 2.1482277121374868e-06, + "loss": 0.0, + "step": 12525 + }, + { + "epoch": 0.8072436682348392, + "grad_norm": 0.014118955231454783, + "learning_rate": 2.1475116362334406e-06, + "loss": 0.0, + "step": 12526 + }, + { + "epoch": 0.8073081136817684, + "grad_norm": 0.0004878170706532088, + "learning_rate": 2.146795560329395e-06, + "loss": 0.0, + "step": 12527 + }, + { + "epoch": 0.8073725591286975, + "grad_norm": 0.7035151141142062, + "learning_rate": 2.1460794844253492e-06, + "loss": 0.0207, + "step": 12528 + }, + { + "epoch": 0.8074370045756267, + "grad_norm": 0.00014633861646761186, + "learning_rate": 2.1453634085213035e-06, + "loss": 0.0, + "step": 12529 + }, + { + "epoch": 0.8075014500225559, + "grad_norm": 0.0007063263377159374, + "learning_rate": 2.1446473326172574e-06, + "loss": 0.0, + "step": 12530 + }, + { + "epoch": 0.8075658954694851, + "grad_norm": 0.031044198535053354, + "learning_rate": 2.1439312567132117e-06, + "loss": 0.0002, + "step": 12531 + }, + { + "epoch": 0.8076303409164143, + "grad_norm": 0.1859323512895166, + "learning_rate": 2.143215180809166e-06, + "loss": 0.0007, + "step": 12532 + }, + { + "epoch": 0.8076947863633435, + "grad_norm": 0.003519137570280147, + "learning_rate": 2.1424991049051203e-06, + "loss": 0.0, + "step": 12533 + }, + { + "epoch": 0.8077592318102726, + "grad_norm": 0.007856042322162549, + "learning_rate": 2.141783029001074e-06, + "loss": 0.0, + "step": 12534 + }, + { + "epoch": 0.8078236772572018, + "grad_norm": 0.00011564863298519748, + "learning_rate": 2.1410669530970285e-06, + "loss": 0.0, + "step": 12535 + }, + { + "epoch": 0.8078881227041309, + "grad_norm": 0.006344745271119972, + "learning_rate": 2.1403508771929828e-06, + "loss": 0.0, + "step": 12536 + }, + { + "epoch": 0.8079525681510601, + "grad_norm": 0.033851168725958655, + "learning_rate": 2.1396348012889367e-06, + "loss": 0.0001, + "step": 12537 + }, + { + "epoch": 0.8080170135979893, + "grad_norm": 0.00015541963414884185, + "learning_rate": 2.138918725384891e-06, + "loss": 0.0, + "step": 12538 + }, + { + "epoch": 0.8080814590449185, + "grad_norm": 0.007355932033656503, + "learning_rate": 2.1382026494808452e-06, + "loss": 0.0, + "step": 12539 + }, + { + "epoch": 0.8081459044918476, + "grad_norm": 0.0014587984007397436, + "learning_rate": 2.1374865735767995e-06, + "loss": 0.0, + "step": 12540 + }, + { + "epoch": 0.8082103499387768, + "grad_norm": 0.3055763484401704, + "learning_rate": 2.1367704976727534e-06, + "loss": 0.0009, + "step": 12541 + }, + { + "epoch": 0.808274795385706, + "grad_norm": 0.015262446442021883, + "learning_rate": 2.1360544217687077e-06, + "loss": 0.0, + "step": 12542 + }, + { + "epoch": 0.8083392408326352, + "grad_norm": 0.007324747722120583, + "learning_rate": 2.1353383458646616e-06, + "loss": 0.0001, + "step": 12543 + }, + { + "epoch": 0.8084036862795644, + "grad_norm": 0.03268571168614651, + "learning_rate": 2.134622269960616e-06, + "loss": 0.0003, + "step": 12544 + }, + { + "epoch": 0.8084681317264936, + "grad_norm": 0.002548795640975255, + "learning_rate": 2.13390619405657e-06, + "loss": 0.0, + "step": 12545 + }, + { + "epoch": 0.8085325771734228, + "grad_norm": 0.0005647212933699163, + "learning_rate": 2.1331901181525245e-06, + "loss": 0.0, + "step": 12546 + }, + { + "epoch": 0.8085970226203518, + "grad_norm": 0.011028547755694413, + "learning_rate": 2.1324740422484784e-06, + "loss": 0.0001, + "step": 12547 + }, + { + "epoch": 0.808661468067281, + "grad_norm": 0.0016098602253972305, + "learning_rate": 2.1317579663444327e-06, + "loss": 0.0, + "step": 12548 + }, + { + "epoch": 0.8087259135142102, + "grad_norm": 0.002141363180888285, + "learning_rate": 2.131041890440387e-06, + "loss": 0.0, + "step": 12549 + }, + { + "epoch": 0.8087903589611394, + "grad_norm": 0.009605867876297334, + "learning_rate": 2.130325814536341e-06, + "loss": 0.0, + "step": 12550 + }, + { + "epoch": 0.8088548044080686, + "grad_norm": 0.012447253129473378, + "learning_rate": 2.129609738632295e-06, + "loss": 0.0, + "step": 12551 + }, + { + "epoch": 0.8089192498549977, + "grad_norm": 0.0005868374143533707, + "learning_rate": 2.1288936627282494e-06, + "loss": 0.0, + "step": 12552 + }, + { + "epoch": 0.8089836953019269, + "grad_norm": 0.005273947446889605, + "learning_rate": 2.1281775868242037e-06, + "loss": 0.0, + "step": 12553 + }, + { + "epoch": 0.8090481407488561, + "grad_norm": 0.0002057675618183742, + "learning_rate": 2.1274615109201576e-06, + "loss": 0.0, + "step": 12554 + }, + { + "epoch": 0.8091125861957853, + "grad_norm": 0.00044356842756500183, + "learning_rate": 2.126745435016112e-06, + "loss": 0.0, + "step": 12555 + }, + { + "epoch": 0.8091770316427145, + "grad_norm": 0.0005960369586189239, + "learning_rate": 2.1260293591120658e-06, + "loss": 0.0, + "step": 12556 + }, + { + "epoch": 0.8092414770896437, + "grad_norm": 0.0005960369586189239, + "learning_rate": 2.1260293591120658e-06, + "loss": 0.0065, + "step": 12557 + }, + { + "epoch": 0.8093059225365727, + "grad_norm": 0.0009959009747822732, + "learning_rate": 2.1253132832080205e-06, + "loss": 0.0, + "step": 12558 + }, + { + "epoch": 0.8093703679835019, + "grad_norm": 0.001079138957866863, + "learning_rate": 2.1245972073039744e-06, + "loss": 0.0, + "step": 12559 + }, + { + "epoch": 0.8094348134304311, + "grad_norm": 0.0001700052524290455, + "learning_rate": 2.1238811313999287e-06, + "loss": 0.0, + "step": 12560 + }, + { + "epoch": 0.8094992588773603, + "grad_norm": 0.3267133866120286, + "learning_rate": 2.1231650554958826e-06, + "loss": 0.0021, + "step": 12561 + }, + { + "epoch": 0.8095637043242895, + "grad_norm": 0.026077396622425923, + "learning_rate": 2.122448979591837e-06, + "loss": 0.0015, + "step": 12562 + }, + { + "epoch": 0.8096281497712187, + "grad_norm": 0.004266359877687984, + "learning_rate": 2.121732903687791e-06, + "loss": 0.0, + "step": 12563 + }, + { + "epoch": 0.8096925952181478, + "grad_norm": 0.0020029363056970465, + "learning_rate": 2.1210168277837455e-06, + "loss": 0.0, + "step": 12564 + }, + { + "epoch": 0.809757040665077, + "grad_norm": 0.0022158301051287744, + "learning_rate": 2.1203007518796993e-06, + "loss": 0.0, + "step": 12565 + }, + { + "epoch": 0.8098214861120062, + "grad_norm": 0.02784966132114145, + "learning_rate": 2.1195846759756536e-06, + "loss": 0.0, + "step": 12566 + }, + { + "epoch": 0.8098859315589354, + "grad_norm": 0.003321398332227599, + "learning_rate": 2.118868600071608e-06, + "loss": 0.0, + "step": 12567 + }, + { + "epoch": 0.8099503770058646, + "grad_norm": 0.0006111349772194386, + "learning_rate": 2.118152524167562e-06, + "loss": 0.0, + "step": 12568 + }, + { + "epoch": 0.8100148224527937, + "grad_norm": 8.239420264536209e-05, + "learning_rate": 2.117436448263516e-06, + "loss": 0.0, + "step": 12569 + }, + { + "epoch": 0.8100792678997228, + "grad_norm": 0.011744492043335403, + "learning_rate": 2.1167203723594704e-06, + "loss": 0.0001, + "step": 12570 + }, + { + "epoch": 0.810143713346652, + "grad_norm": 0.00613206029830229, + "learning_rate": 2.1160042964554247e-06, + "loss": 0.0, + "step": 12571 + }, + { + "epoch": 0.8102081587935812, + "grad_norm": 0.0308008172561735, + "learning_rate": 2.1152882205513786e-06, + "loss": 0.0003, + "step": 12572 + }, + { + "epoch": 0.8102726042405104, + "grad_norm": 0.010741848493233576, + "learning_rate": 2.114572144647333e-06, + "loss": 0.0, + "step": 12573 + }, + { + "epoch": 0.8103370496874396, + "grad_norm": 0.0015504692910859963, + "learning_rate": 2.1138560687432867e-06, + "loss": 0.0, + "step": 12574 + }, + { + "epoch": 0.8104014951343688, + "grad_norm": 0.006183430411555323, + "learning_rate": 2.1131399928392415e-06, + "loss": 0.0, + "step": 12575 + }, + { + "epoch": 0.810465940581298, + "grad_norm": 0.00024011503847110936, + "learning_rate": 2.1124239169351953e-06, + "loss": 0.0, + "step": 12576 + }, + { + "epoch": 0.8105303860282271, + "grad_norm": 0.0013300068832935133, + "learning_rate": 2.1117078410311496e-06, + "loss": 0.0, + "step": 12577 + }, + { + "epoch": 0.8105948314751563, + "grad_norm": 0.0001257257657714678, + "learning_rate": 2.1109917651271035e-06, + "loss": 0.0, + "step": 12578 + }, + { + "epoch": 0.8106592769220855, + "grad_norm": 0.0012701698727820548, + "learning_rate": 2.110275689223058e-06, + "loss": 0.0, + "step": 12579 + }, + { + "epoch": 0.8107237223690146, + "grad_norm": 0.002781898222436216, + "learning_rate": 2.1095596133190117e-06, + "loss": 0.0, + "step": 12580 + }, + { + "epoch": 0.8107881678159438, + "grad_norm": 0.008623381992397053, + "learning_rate": 2.1088435374149664e-06, + "loss": 0.0, + "step": 12581 + }, + { + "epoch": 0.810852613262873, + "grad_norm": 0.0035804817932016133, + "learning_rate": 2.1081274615109203e-06, + "loss": 0.0, + "step": 12582 + }, + { + "epoch": 0.8109170587098021, + "grad_norm": 0.0004178044112188488, + "learning_rate": 2.1074113856068746e-06, + "loss": 0.0, + "step": 12583 + }, + { + "epoch": 0.8109815041567313, + "grad_norm": 0.20006863377531295, + "learning_rate": 2.1066953097028285e-06, + "loss": 0.0039, + "step": 12584 + }, + { + "epoch": 0.8110459496036605, + "grad_norm": 0.023746772059720624, + "learning_rate": 2.1059792337987828e-06, + "loss": 0.0, + "step": 12585 + }, + { + "epoch": 0.8111103950505897, + "grad_norm": 0.00017902644761458916, + "learning_rate": 2.105263157894737e-06, + "loss": 0.0, + "step": 12586 + }, + { + "epoch": 0.8111748404975189, + "grad_norm": 0.0015365147211049636, + "learning_rate": 2.1045470819906914e-06, + "loss": 0.0, + "step": 12587 + }, + { + "epoch": 0.811239285944448, + "grad_norm": 0.03381551467867641, + "learning_rate": 2.1038310060866452e-06, + "loss": 0.0002, + "step": 12588 + }, + { + "epoch": 0.8113037313913772, + "grad_norm": 0.01445457605861845, + "learning_rate": 2.1031149301825995e-06, + "loss": 0.0001, + "step": 12589 + }, + { + "epoch": 0.8113681768383064, + "grad_norm": 0.0031470428802809754, + "learning_rate": 2.102398854278554e-06, + "loss": 0.0, + "step": 12590 + }, + { + "epoch": 0.8114326222852355, + "grad_norm": 0.47407611668946104, + "learning_rate": 2.1016827783745077e-06, + "loss": 0.0027, + "step": 12591 + }, + { + "epoch": 0.8114970677321647, + "grad_norm": 0.001997918103711385, + "learning_rate": 2.100966702470462e-06, + "loss": 0.0, + "step": 12592 + }, + { + "epoch": 0.8115615131790939, + "grad_norm": 0.00022226182021700908, + "learning_rate": 2.1002506265664163e-06, + "loss": 0.0, + "step": 12593 + }, + { + "epoch": 0.811625958626023, + "grad_norm": 0.005495734573647927, + "learning_rate": 2.0995345506623706e-06, + "loss": 0.0, + "step": 12594 + }, + { + "epoch": 0.8116904040729522, + "grad_norm": 0.36579584629506817, + "learning_rate": 2.0988184747583245e-06, + "loss": 0.001, + "step": 12595 + }, + { + "epoch": 0.8117548495198814, + "grad_norm": 0.0017201286993632876, + "learning_rate": 2.0981023988542788e-06, + "loss": 0.0, + "step": 12596 + }, + { + "epoch": 0.8118192949668106, + "grad_norm": 0.0012654749260697195, + "learning_rate": 2.0973863229502327e-06, + "loss": 0.0, + "step": 12597 + }, + { + "epoch": 0.8118837404137398, + "grad_norm": 6.77314804816197e-05, + "learning_rate": 2.096670247046187e-06, + "loss": 0.0, + "step": 12598 + }, + { + "epoch": 0.811948185860669, + "grad_norm": 0.12170673011688415, + "learning_rate": 2.0959541711421412e-06, + "loss": 0.0003, + "step": 12599 + }, + { + "epoch": 0.8120126313075982, + "grad_norm": 0.00039734094272957175, + "learning_rate": 2.0952380952380955e-06, + "loss": 0.0, + "step": 12600 + }, + { + "epoch": 0.8120770767545273, + "grad_norm": 0.0040080291412414185, + "learning_rate": 2.0945220193340494e-06, + "loss": 0.0, + "step": 12601 + }, + { + "epoch": 0.8121415222014564, + "grad_norm": 0.011901667177716067, + "learning_rate": 2.0938059434300037e-06, + "loss": 0.0, + "step": 12602 + }, + { + "epoch": 0.8122059676483856, + "grad_norm": 0.27331548420750906, + "learning_rate": 2.093089867525958e-06, + "loss": 0.0013, + "step": 12603 + }, + { + "epoch": 0.8122704130953148, + "grad_norm": 0.004315035369221101, + "learning_rate": 2.0923737916219123e-06, + "loss": 0.0, + "step": 12604 + }, + { + "epoch": 0.812334858542244, + "grad_norm": 1.478788977076481e-05, + "learning_rate": 2.091657715717866e-06, + "loss": 0.0, + "step": 12605 + }, + { + "epoch": 0.8123993039891731, + "grad_norm": 0.018809907453625072, + "learning_rate": 2.0909416398138205e-06, + "loss": 0.0001, + "step": 12606 + }, + { + "epoch": 0.8124637494361023, + "grad_norm": 0.0015582649472403046, + "learning_rate": 2.090225563909775e-06, + "loss": 0.0, + "step": 12607 + }, + { + "epoch": 0.8125281948830315, + "grad_norm": 0.000712515429465975, + "learning_rate": 2.0895094880057287e-06, + "loss": 0.0, + "step": 12608 + }, + { + "epoch": 0.8125926403299607, + "grad_norm": 0.00017508893570977122, + "learning_rate": 2.088793412101683e-06, + "loss": 0.0, + "step": 12609 + }, + { + "epoch": 0.8126570857768899, + "grad_norm": 0.0010479845875821226, + "learning_rate": 2.0880773361976373e-06, + "loss": 0.0, + "step": 12610 + }, + { + "epoch": 0.8127215312238191, + "grad_norm": 0.001556306928741489, + "learning_rate": 2.0873612602935916e-06, + "loss": 0.0, + "step": 12611 + }, + { + "epoch": 0.8127859766707483, + "grad_norm": 0.006236001901142749, + "learning_rate": 2.0866451843895454e-06, + "loss": 0.0001, + "step": 12612 + }, + { + "epoch": 0.8128504221176774, + "grad_norm": 0.0030114578149282394, + "learning_rate": 2.0859291084854997e-06, + "loss": 0.0, + "step": 12613 + }, + { + "epoch": 0.8129148675646065, + "grad_norm": 0.011364972960464367, + "learning_rate": 2.0852130325814536e-06, + "loss": 0.0, + "step": 12614 + }, + { + "epoch": 0.8129793130115357, + "grad_norm": 0.005280251572317013, + "learning_rate": 2.084496956677408e-06, + "loss": 0.0, + "step": 12615 + }, + { + "epoch": 0.8130437584584649, + "grad_norm": 8.507760533251935e-05, + "learning_rate": 2.083780880773362e-06, + "loss": 0.0, + "step": 12616 + }, + { + "epoch": 0.8131082039053941, + "grad_norm": 0.00038017094694071246, + "learning_rate": 2.0830648048693165e-06, + "loss": 0.0, + "step": 12617 + }, + { + "epoch": 0.8131726493523233, + "grad_norm": 0.2922925010749905, + "learning_rate": 2.0823487289652704e-06, + "loss": 0.0007, + "step": 12618 + }, + { + "epoch": 0.8132370947992524, + "grad_norm": 0.23094773223415585, + "learning_rate": 2.0816326530612247e-06, + "loss": 0.0004, + "step": 12619 + }, + { + "epoch": 0.8133015402461816, + "grad_norm": 0.5233092933901883, + "learning_rate": 2.0809165771571786e-06, + "loss": 0.001, + "step": 12620 + }, + { + "epoch": 0.8133659856931108, + "grad_norm": 0.00033577295685188345, + "learning_rate": 2.080200501253133e-06, + "loss": 0.0, + "step": 12621 + }, + { + "epoch": 0.81343043114004, + "grad_norm": 0.03787885973590073, + "learning_rate": 2.079484425349087e-06, + "loss": 0.0001, + "step": 12622 + }, + { + "epoch": 0.8134948765869692, + "grad_norm": 0.00023672040014827814, + "learning_rate": 2.0787683494450415e-06, + "loss": 0.0, + "step": 12623 + }, + { + "epoch": 0.8135593220338984, + "grad_norm": 0.0004785405222872389, + "learning_rate": 2.0780522735409953e-06, + "loss": 0.0, + "step": 12624 + }, + { + "epoch": 0.8136237674808274, + "grad_norm": 0.002556479343831401, + "learning_rate": 2.0773361976369496e-06, + "loss": 0.0, + "step": 12625 + }, + { + "epoch": 0.8136882129277566, + "grad_norm": 0.071685791015625, + "learning_rate": 2.076620121732904e-06, + "loss": 0.0001, + "step": 12626 + }, + { + "epoch": 0.8137526583746858, + "grad_norm": 0.00015047715241365775, + "learning_rate": 2.075904045828858e-06, + "loss": 0.0, + "step": 12627 + }, + { + "epoch": 0.813817103821615, + "grad_norm": 0.039498544859528056, + "learning_rate": 2.075187969924812e-06, + "loss": 0.0001, + "step": 12628 + }, + { + "epoch": 0.8138815492685442, + "grad_norm": 0.007269135108434769, + "learning_rate": 2.0744718940207664e-06, + "loss": 0.0, + "step": 12629 + }, + { + "epoch": 0.8139459947154734, + "grad_norm": 0.00021724833356658388, + "learning_rate": 2.0737558181167207e-06, + "loss": 0.0, + "step": 12630 + }, + { + "epoch": 0.8140104401624025, + "grad_norm": 0.15544795779068946, + "learning_rate": 2.0730397422126746e-06, + "loss": 0.0005, + "step": 12631 + }, + { + "epoch": 0.8140748856093317, + "grad_norm": 0.007632494535068704, + "learning_rate": 2.072323666308629e-06, + "loss": 0.0, + "step": 12632 + }, + { + "epoch": 0.8141393310562609, + "grad_norm": 0.026181218628717726, + "learning_rate": 2.0716075904045827e-06, + "loss": 0.0, + "step": 12633 + }, + { + "epoch": 0.8142037765031901, + "grad_norm": 0.169198243764644, + "learning_rate": 2.0708915145005375e-06, + "loss": 0.0004, + "step": 12634 + }, + { + "epoch": 0.8142682219501193, + "grad_norm": 0.02404185888492924, + "learning_rate": 2.0701754385964913e-06, + "loss": 0.0001, + "step": 12635 + }, + { + "epoch": 0.8143326673970483, + "grad_norm": 0.003228024572993855, + "learning_rate": 2.0694593626924456e-06, + "loss": 0.0, + "step": 12636 + }, + { + "epoch": 0.8143971128439775, + "grad_norm": 0.0023332422248495926, + "learning_rate": 2.0687432867883995e-06, + "loss": 0.0, + "step": 12637 + }, + { + "epoch": 0.8144615582909067, + "grad_norm": 5.473414157304134e-05, + "learning_rate": 2.068027210884354e-06, + "loss": 0.0, + "step": 12638 + }, + { + "epoch": 0.8145260037378359, + "grad_norm": 0.021708693061578042, + "learning_rate": 2.067311134980308e-06, + "loss": 0.0, + "step": 12639 + }, + { + "epoch": 0.8145904491847651, + "grad_norm": 0.45574811805194243, + "learning_rate": 2.0665950590762624e-06, + "loss": 0.0004, + "step": 12640 + }, + { + "epoch": 0.8146548946316943, + "grad_norm": 0.0013192928213251146, + "learning_rate": 2.0658789831722163e-06, + "loss": 0.0, + "step": 12641 + }, + { + "epoch": 0.8147193400786235, + "grad_norm": 0.11894261211468155, + "learning_rate": 2.0651629072681706e-06, + "loss": 0.0002, + "step": 12642 + }, + { + "epoch": 0.8147837855255526, + "grad_norm": 0.14892849528857, + "learning_rate": 2.064446831364125e-06, + "loss": 0.0002, + "step": 12643 + }, + { + "epoch": 0.8148482309724818, + "grad_norm": 0.5862908378806496, + "learning_rate": 2.0637307554600788e-06, + "loss": 0.0043, + "step": 12644 + }, + { + "epoch": 0.814912676419411, + "grad_norm": 0.00046785723467117026, + "learning_rate": 2.063014679556033e-06, + "loss": 0.0, + "step": 12645 + }, + { + "epoch": 0.8149771218663402, + "grad_norm": 0.0002734640965516467, + "learning_rate": 2.0622986036519874e-06, + "loss": 0.0, + "step": 12646 + }, + { + "epoch": 0.8150415673132693, + "grad_norm": 0.009720572095862221, + "learning_rate": 2.0615825277479417e-06, + "loss": 0.0, + "step": 12647 + }, + { + "epoch": 0.8151060127601985, + "grad_norm": 0.0276897489043289, + "learning_rate": 2.0608664518438955e-06, + "loss": 0.0, + "step": 12648 + }, + { + "epoch": 0.8151704582071276, + "grad_norm": 0.12226153845249521, + "learning_rate": 2.06015037593985e-06, + "loss": 0.0002, + "step": 12649 + }, + { + "epoch": 0.8152349036540568, + "grad_norm": 0.045159530174020975, + "learning_rate": 2.0594343000358037e-06, + "loss": 0.0, + "step": 12650 + }, + { + "epoch": 0.815299349100986, + "grad_norm": 0.0007022768080407791, + "learning_rate": 2.0587182241317584e-06, + "loss": 0.0, + "step": 12651 + }, + { + "epoch": 0.8153637945479152, + "grad_norm": 0.023347433204983205, + "learning_rate": 2.0580021482277123e-06, + "loss": 0.0001, + "step": 12652 + }, + { + "epoch": 0.8154282399948444, + "grad_norm": 0.00021818342706713008, + "learning_rate": 2.0572860723236666e-06, + "loss": 0.0, + "step": 12653 + }, + { + "epoch": 0.8154926854417736, + "grad_norm": 0.19678674301530588, + "learning_rate": 2.0565699964196205e-06, + "loss": 0.0008, + "step": 12654 + }, + { + "epoch": 0.8155571308887027, + "grad_norm": 2.9480723597161855, + "learning_rate": 2.0558539205155748e-06, + "loss": 0.0264, + "step": 12655 + }, + { + "epoch": 0.8156215763356319, + "grad_norm": 0.000604122829339633, + "learning_rate": 2.055137844611529e-06, + "loss": 0.0, + "step": 12656 + }, + { + "epoch": 0.8156860217825611, + "grad_norm": 0.004410635766435355, + "learning_rate": 2.0544217687074834e-06, + "loss": 0.0, + "step": 12657 + }, + { + "epoch": 0.8157504672294902, + "grad_norm": 0.007988108148366502, + "learning_rate": 2.0537056928034372e-06, + "loss": 0.0, + "step": 12658 + }, + { + "epoch": 0.8158149126764194, + "grad_norm": 0.00025976478303401106, + "learning_rate": 2.0529896168993915e-06, + "loss": 0.0, + "step": 12659 + }, + { + "epoch": 0.8158793581233486, + "grad_norm": 0.024995079524881312, + "learning_rate": 2.052273540995346e-06, + "loss": 0.0, + "step": 12660 + }, + { + "epoch": 0.8159438035702777, + "grad_norm": 0.041993960354219664, + "learning_rate": 2.0515574650912997e-06, + "loss": 0.0001, + "step": 12661 + }, + { + "epoch": 0.8160082490172069, + "grad_norm": 0.0006448330306277122, + "learning_rate": 2.050841389187254e-06, + "loss": 0.0, + "step": 12662 + }, + { + "epoch": 0.8160726944641361, + "grad_norm": 0.0007699161659463133, + "learning_rate": 2.0501253132832083e-06, + "loss": 0.0, + "step": 12663 + }, + { + "epoch": 0.8161371399110653, + "grad_norm": 0.0001718285197238581, + "learning_rate": 2.0494092373791626e-06, + "loss": 0.0, + "step": 12664 + }, + { + "epoch": 0.8162015853579945, + "grad_norm": 0.00014869667986260737, + "learning_rate": 2.0486931614751165e-06, + "loss": 0.0, + "step": 12665 + }, + { + "epoch": 0.8162660308049237, + "grad_norm": 0.0001513088943245394, + "learning_rate": 2.047977085571071e-06, + "loss": 0.0, + "step": 12666 + }, + { + "epoch": 0.8163304762518528, + "grad_norm": 0.005079574876596566, + "learning_rate": 2.0472610096670247e-06, + "loss": 0.0, + "step": 12667 + }, + { + "epoch": 0.816394921698782, + "grad_norm": 0.0012937261291333171, + "learning_rate": 2.046544933762979e-06, + "loss": 0.0, + "step": 12668 + }, + { + "epoch": 0.8164593671457111, + "grad_norm": 0.013307215880097614, + "learning_rate": 2.0458288578589333e-06, + "loss": 0.0, + "step": 12669 + }, + { + "epoch": 0.8165238125926403, + "grad_norm": 0.0060048266720873255, + "learning_rate": 2.0451127819548876e-06, + "loss": 0.0, + "step": 12670 + }, + { + "epoch": 0.8165882580395695, + "grad_norm": 0.00019912951092864882, + "learning_rate": 2.0443967060508414e-06, + "loss": 0.0, + "step": 12671 + }, + { + "epoch": 0.8166527034864987, + "grad_norm": 0.00549363525012209, + "learning_rate": 2.0436806301467957e-06, + "loss": 0.0, + "step": 12672 + }, + { + "epoch": 0.8167171489334278, + "grad_norm": 0.00042558856689712486, + "learning_rate": 2.0429645542427496e-06, + "loss": 0.0, + "step": 12673 + }, + { + "epoch": 0.816781594380357, + "grad_norm": 0.00016062680426746665, + "learning_rate": 2.042248478338704e-06, + "loss": 0.0, + "step": 12674 + }, + { + "epoch": 0.8168460398272862, + "grad_norm": 0.08333596551979962, + "learning_rate": 2.041532402434658e-06, + "loss": 0.0004, + "step": 12675 + }, + { + "epoch": 0.8169104852742154, + "grad_norm": 0.2981101617579293, + "learning_rate": 2.0408163265306125e-06, + "loss": 0.0017, + "step": 12676 + }, + { + "epoch": 0.8169749307211446, + "grad_norm": 0.0010572690640603976, + "learning_rate": 2.0401002506265664e-06, + "loss": 0.0, + "step": 12677 + }, + { + "epoch": 0.8170393761680738, + "grad_norm": 0.0012552339913406846, + "learning_rate": 2.0393841747225207e-06, + "loss": 0.0, + "step": 12678 + }, + { + "epoch": 0.817103821615003, + "grad_norm": 0.016703592774660905, + "learning_rate": 2.038668098818475e-06, + "loss": 0.0, + "step": 12679 + }, + { + "epoch": 0.8171682670619321, + "grad_norm": 0.004141902966402822, + "learning_rate": 2.0379520229144293e-06, + "loss": 0.0, + "step": 12680 + }, + { + "epoch": 0.8172327125088612, + "grad_norm": 0.0007111984253445308, + "learning_rate": 2.037235947010383e-06, + "loss": 0.0, + "step": 12681 + }, + { + "epoch": 0.8172971579557904, + "grad_norm": 0.0021643333428704877, + "learning_rate": 2.0365198711063375e-06, + "loss": 0.0, + "step": 12682 + }, + { + "epoch": 0.8173616034027196, + "grad_norm": 0.009506596409528539, + "learning_rate": 2.0358037952022918e-06, + "loss": 0.0, + "step": 12683 + }, + { + "epoch": 0.8174260488496488, + "grad_norm": 0.00015312242409299139, + "learning_rate": 2.0350877192982456e-06, + "loss": 0.0, + "step": 12684 + }, + { + "epoch": 0.8174904942965779, + "grad_norm": 0.0015363426845521342, + "learning_rate": 2.0343716433942e-06, + "loss": 0.0, + "step": 12685 + }, + { + "epoch": 0.8175549397435071, + "grad_norm": 0.0038774986870014085, + "learning_rate": 2.0336555674901542e-06, + "loss": 0.0, + "step": 12686 + }, + { + "epoch": 0.8176193851904363, + "grad_norm": 0.0002841460507729212, + "learning_rate": 2.0329394915861085e-06, + "loss": 0.0, + "step": 12687 + }, + { + "epoch": 0.8176838306373655, + "grad_norm": 0.0004165062032192322, + "learning_rate": 2.0322234156820624e-06, + "loss": 0.0, + "step": 12688 + }, + { + "epoch": 0.8177482760842947, + "grad_norm": 0.0006068016544899525, + "learning_rate": 2.0315073397780167e-06, + "loss": 0.0, + "step": 12689 + }, + { + "epoch": 0.8178127215312239, + "grad_norm": 0.056295926580902295, + "learning_rate": 2.0307912638739706e-06, + "loss": 0.0002, + "step": 12690 + }, + { + "epoch": 0.817877166978153, + "grad_norm": 0.0007701938668082081, + "learning_rate": 2.030075187969925e-06, + "loss": 0.0, + "step": 12691 + }, + { + "epoch": 0.8179416124250821, + "grad_norm": 0.0004344841076649519, + "learning_rate": 2.029359112065879e-06, + "loss": 0.0, + "step": 12692 + }, + { + "epoch": 0.8180060578720113, + "grad_norm": 0.000434818477799362, + "learning_rate": 2.0286430361618335e-06, + "loss": 0.0, + "step": 12693 + }, + { + "epoch": 0.8180705033189405, + "grad_norm": 0.0002604699380230901, + "learning_rate": 2.0279269602577873e-06, + "loss": 0.0, + "step": 12694 + }, + { + "epoch": 0.8181349487658697, + "grad_norm": 0.003211286809710176, + "learning_rate": 2.0272108843537416e-06, + "loss": 0.0, + "step": 12695 + }, + { + "epoch": 0.8181993942127989, + "grad_norm": 0.018087344344049478, + "learning_rate": 2.026494808449696e-06, + "loss": 0.0001, + "step": 12696 + }, + { + "epoch": 0.818263839659728, + "grad_norm": 0.02370398735358649, + "learning_rate": 2.02577873254565e-06, + "loss": 0.0002, + "step": 12697 + }, + { + "epoch": 0.8183282851066572, + "grad_norm": 0.0011283720401023612, + "learning_rate": 2.025062656641604e-06, + "loss": 0.0, + "step": 12698 + }, + { + "epoch": 0.8183927305535864, + "grad_norm": 0.0006159527725816931, + "learning_rate": 2.0243465807375584e-06, + "loss": 0.0, + "step": 12699 + }, + { + "epoch": 0.8184571760005156, + "grad_norm": 0.00618308122004743, + "learning_rate": 2.0236305048335127e-06, + "loss": 0.0, + "step": 12700 + }, + { + "epoch": 0.8185216214474448, + "grad_norm": 0.0012521473666674897, + "learning_rate": 2.0229144289294666e-06, + "loss": 0.0, + "step": 12701 + }, + { + "epoch": 0.818586066894374, + "grad_norm": 0.6123207248370139, + "learning_rate": 2.022198353025421e-06, + "loss": 0.0015, + "step": 12702 + }, + { + "epoch": 0.818650512341303, + "grad_norm": 0.002254610635089249, + "learning_rate": 2.0214822771213748e-06, + "loss": 0.0, + "step": 12703 + }, + { + "epoch": 0.8187149577882322, + "grad_norm": 0.1917155355543102, + "learning_rate": 2.0207662012173295e-06, + "loss": 0.0011, + "step": 12704 + }, + { + "epoch": 0.8187794032351614, + "grad_norm": 0.0010368363220421362, + "learning_rate": 2.0200501253132834e-06, + "loss": 0.0, + "step": 12705 + }, + { + "epoch": 0.8188438486820906, + "grad_norm": 0.0002894131131910139, + "learning_rate": 2.0193340494092377e-06, + "loss": 0.0, + "step": 12706 + }, + { + "epoch": 0.8189082941290198, + "grad_norm": 0.00039159992150386276, + "learning_rate": 2.0186179735051915e-06, + "loss": 0.0, + "step": 12707 + }, + { + "epoch": 0.818972739575949, + "grad_norm": 0.08468321098748963, + "learning_rate": 2.017901897601146e-06, + "loss": 0.0002, + "step": 12708 + }, + { + "epoch": 0.8190371850228781, + "grad_norm": 3.6733720040983004, + "learning_rate": 2.0171858216971e-06, + "loss": 0.0662, + "step": 12709 + }, + { + "epoch": 0.8191016304698073, + "grad_norm": 0.002083130458600624, + "learning_rate": 2.0164697457930544e-06, + "loss": 0.0, + "step": 12710 + }, + { + "epoch": 0.8191660759167365, + "grad_norm": 6.762979958088849e-05, + "learning_rate": 2.0157536698890083e-06, + "loss": 0.0, + "step": 12711 + }, + { + "epoch": 0.8192305213636657, + "grad_norm": 0.07021264761194777, + "learning_rate": 2.0150375939849626e-06, + "loss": 0.0001, + "step": 12712 + }, + { + "epoch": 0.8192949668105949, + "grad_norm": 8.095978465353855e-05, + "learning_rate": 2.014321518080917e-06, + "loss": 0.0, + "step": 12713 + }, + { + "epoch": 0.819359412257524, + "grad_norm": 0.00044268987224894545, + "learning_rate": 2.0136054421768708e-06, + "loss": 0.0, + "step": 12714 + }, + { + "epoch": 0.8194238577044531, + "grad_norm": 0.7423377035637856, + "learning_rate": 2.012889366272825e-06, + "loss": 0.0018, + "step": 12715 + }, + { + "epoch": 0.8194883031513823, + "grad_norm": 0.012233627670962024, + "learning_rate": 2.0121732903687794e-06, + "loss": 0.0, + "step": 12716 + }, + { + "epoch": 0.8195527485983115, + "grad_norm": 0.0039064674577683235, + "learning_rate": 2.0114572144647337e-06, + "loss": 0.0, + "step": 12717 + }, + { + "epoch": 0.8196171940452407, + "grad_norm": 6.302641262364943e-05, + "learning_rate": 2.0107411385606875e-06, + "loss": 0.0, + "step": 12718 + }, + { + "epoch": 0.8196816394921699, + "grad_norm": 0.0005544111992086107, + "learning_rate": 2.010025062656642e-06, + "loss": 0.0, + "step": 12719 + }, + { + "epoch": 0.8197460849390991, + "grad_norm": 0.005699905157546856, + "learning_rate": 2.0093089867525957e-06, + "loss": 0.0, + "step": 12720 + }, + { + "epoch": 0.8198105303860282, + "grad_norm": 0.24517601948537127, + "learning_rate": 2.0085929108485504e-06, + "loss": 0.0037, + "step": 12721 + }, + { + "epoch": 0.8198749758329574, + "grad_norm": 1.4732163879247422, + "learning_rate": 2.0078768349445043e-06, + "loss": 0.0111, + "step": 12722 + }, + { + "epoch": 0.8199394212798866, + "grad_norm": 0.0004949487012933176, + "learning_rate": 2.0071607590404586e-06, + "loss": 0.0, + "step": 12723 + }, + { + "epoch": 0.8200038667268158, + "grad_norm": 0.00566935245972329, + "learning_rate": 2.0064446831364125e-06, + "loss": 0.0, + "step": 12724 + }, + { + "epoch": 0.8200683121737449, + "grad_norm": 0.000825286660599744, + "learning_rate": 2.005728607232367e-06, + "loss": 0.0, + "step": 12725 + }, + { + "epoch": 0.8201327576206741, + "grad_norm": 0.0006255925426041789, + "learning_rate": 2.0050125313283207e-06, + "loss": 0.0, + "step": 12726 + }, + { + "epoch": 0.8201972030676032, + "grad_norm": 0.3893231441875631, + "learning_rate": 2.0042964554242754e-06, + "loss": 0.0011, + "step": 12727 + }, + { + "epoch": 0.8202616485145324, + "grad_norm": 0.3826723426061182, + "learning_rate": 2.0035803795202293e-06, + "loss": 0.0002, + "step": 12728 + }, + { + "epoch": 0.8203260939614616, + "grad_norm": 0.002656368265336166, + "learning_rate": 2.0028643036161836e-06, + "loss": 0.0, + "step": 12729 + }, + { + "epoch": 0.8203905394083908, + "grad_norm": 0.0021246091607805234, + "learning_rate": 2.0021482277121374e-06, + "loss": 0.0, + "step": 12730 + }, + { + "epoch": 0.82045498485532, + "grad_norm": 0.09306680573160549, + "learning_rate": 2.0014321518080917e-06, + "loss": 0.0002, + "step": 12731 + }, + { + "epoch": 0.8205194303022492, + "grad_norm": 0.0013687062223432345, + "learning_rate": 2.000716075904046e-06, + "loss": 0.0, + "step": 12732 + }, + { + "epoch": 0.8205838757491783, + "grad_norm": 0.002352388538741838, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.0, + "step": 12733 + }, + { + "epoch": 0.8206483211961075, + "grad_norm": 0.000925862567441048, + "learning_rate": 1.999283924095954e-06, + "loss": 0.0, + "step": 12734 + }, + { + "epoch": 0.8207127666430367, + "grad_norm": 0.0026647197191653005, + "learning_rate": 1.9985678481919085e-06, + "loss": 0.0, + "step": 12735 + }, + { + "epoch": 0.8207772120899658, + "grad_norm": 0.0005046259473754929, + "learning_rate": 1.997851772287863e-06, + "loss": 0.0, + "step": 12736 + }, + { + "epoch": 0.820841657536895, + "grad_norm": 0.0010993903374720605, + "learning_rate": 1.9971356963838167e-06, + "loss": 0.0, + "step": 12737 + }, + { + "epoch": 0.8209061029838242, + "grad_norm": 0.005264727428518476, + "learning_rate": 1.996419620479771e-06, + "loss": 0.0, + "step": 12738 + }, + { + "epoch": 0.8209705484307533, + "grad_norm": 0.002761264554517652, + "learning_rate": 1.9957035445757253e-06, + "loss": 0.0, + "step": 12739 + }, + { + "epoch": 0.8210349938776825, + "grad_norm": 0.027237537922393484, + "learning_rate": 1.9949874686716796e-06, + "loss": 0.0, + "step": 12740 + }, + { + "epoch": 0.8210994393246117, + "grad_norm": 0.00029789924426951506, + "learning_rate": 1.9942713927676335e-06, + "loss": 0.0, + "step": 12741 + }, + { + "epoch": 0.8211638847715409, + "grad_norm": 0.0032934977863916964, + "learning_rate": 1.9935553168635878e-06, + "loss": 0.0, + "step": 12742 + }, + { + "epoch": 0.8212283302184701, + "grad_norm": 0.05155409225999489, + "learning_rate": 1.9928392409595416e-06, + "loss": 0.0006, + "step": 12743 + }, + { + "epoch": 0.8212927756653993, + "grad_norm": 0.03518922239741282, + "learning_rate": 1.992123165055496e-06, + "loss": 0.0001, + "step": 12744 + }, + { + "epoch": 0.8213572211123285, + "grad_norm": 0.0006332805434407577, + "learning_rate": 1.9914070891514502e-06, + "loss": 0.0, + "step": 12745 + }, + { + "epoch": 0.8214216665592576, + "grad_norm": 0.001911123414774432, + "learning_rate": 1.9906910132474045e-06, + "loss": 0.0, + "step": 12746 + }, + { + "epoch": 0.8214861120061867, + "grad_norm": 0.002936392954632133, + "learning_rate": 1.9899749373433584e-06, + "loss": 0.0, + "step": 12747 + }, + { + "epoch": 0.8215505574531159, + "grad_norm": 0.004308939923328707, + "learning_rate": 1.9892588614393127e-06, + "loss": 0.0, + "step": 12748 + }, + { + "epoch": 0.8216150029000451, + "grad_norm": 0.07829764482853954, + "learning_rate": 1.988542785535267e-06, + "loss": 0.0001, + "step": 12749 + }, + { + "epoch": 0.8216794483469743, + "grad_norm": 0.002653395061435763, + "learning_rate": 1.9878267096312213e-06, + "loss": 0.0, + "step": 12750 + }, + { + "epoch": 0.8217438937939034, + "grad_norm": 0.0009104643964468059, + "learning_rate": 1.987110633727175e-06, + "loss": 0.0, + "step": 12751 + }, + { + "epoch": 0.8218083392408326, + "grad_norm": 0.008782726808995691, + "learning_rate": 1.9863945578231295e-06, + "loss": 0.0015, + "step": 12752 + }, + { + "epoch": 0.8218727846877618, + "grad_norm": 0.01460205373826194, + "learning_rate": 1.9856784819190838e-06, + "loss": 0.0, + "step": 12753 + }, + { + "epoch": 0.821937230134691, + "grad_norm": 0.4726310597162175, + "learning_rate": 1.9849624060150376e-06, + "loss": 0.0026, + "step": 12754 + }, + { + "epoch": 0.8220016755816202, + "grad_norm": 0.04270153103544648, + "learning_rate": 1.984246330110992e-06, + "loss": 0.0001, + "step": 12755 + }, + { + "epoch": 0.8220661210285494, + "grad_norm": 0.5393062261715389, + "learning_rate": 1.9835302542069462e-06, + "loss": 0.0009, + "step": 12756 + }, + { + "epoch": 0.8221305664754786, + "grad_norm": 2.490178844356341, + "learning_rate": 1.9828141783029005e-06, + "loss": 0.0009, + "step": 12757 + }, + { + "epoch": 0.8221950119224077, + "grad_norm": 0.2574040327126001, + "learning_rate": 1.9820981023988544e-06, + "loss": 0.0014, + "step": 12758 + }, + { + "epoch": 0.8222594573693368, + "grad_norm": 0.1584653388532432, + "learning_rate": 1.9813820264948087e-06, + "loss": 0.0004, + "step": 12759 + }, + { + "epoch": 0.822323902816266, + "grad_norm": 0.04805075777073572, + "learning_rate": 1.9806659505907626e-06, + "loss": 0.0003, + "step": 12760 + }, + { + "epoch": 0.8223883482631952, + "grad_norm": 0.0788094696601303, + "learning_rate": 1.979949874686717e-06, + "loss": 0.0001, + "step": 12761 + }, + { + "epoch": 0.8224527937101244, + "grad_norm": 0.004995515025960436, + "learning_rate": 1.979233798782671e-06, + "loss": 0.0001, + "step": 12762 + }, + { + "epoch": 0.8225172391570535, + "grad_norm": 0.0001973465953139601, + "learning_rate": 1.9785177228786255e-06, + "loss": 0.0, + "step": 12763 + }, + { + "epoch": 0.8225816846039827, + "grad_norm": 0.0012884267076477087, + "learning_rate": 1.9778016469745794e-06, + "loss": 0.0, + "step": 12764 + }, + { + "epoch": 0.8226461300509119, + "grad_norm": 0.005440834923307024, + "learning_rate": 1.9770855710705337e-06, + "loss": 0.0, + "step": 12765 + }, + { + "epoch": 0.8227105754978411, + "grad_norm": 0.293044830616161, + "learning_rate": 1.9763694951664875e-06, + "loss": 0.0007, + "step": 12766 + }, + { + "epoch": 0.8227750209447703, + "grad_norm": 0.00020292315307484786, + "learning_rate": 1.975653419262442e-06, + "loss": 0.0, + "step": 12767 + }, + { + "epoch": 0.8228394663916995, + "grad_norm": 0.014746585006457157, + "learning_rate": 1.974937343358396e-06, + "loss": 0.0001, + "step": 12768 + }, + { + "epoch": 0.8229039118386287, + "grad_norm": 0.0029612802244079247, + "learning_rate": 1.9742212674543504e-06, + "loss": 0.0, + "step": 12769 + }, + { + "epoch": 0.8229683572855577, + "grad_norm": 0.0009473144841386316, + "learning_rate": 1.9735051915503043e-06, + "loss": 0.0, + "step": 12770 + }, + { + "epoch": 0.8230328027324869, + "grad_norm": 0.0028644580881699412, + "learning_rate": 1.9727891156462586e-06, + "loss": 0.0, + "step": 12771 + }, + { + "epoch": 0.8230972481794161, + "grad_norm": 0.016254687805129026, + "learning_rate": 1.972073039742213e-06, + "loss": 0.0, + "step": 12772 + }, + { + "epoch": 0.8231616936263453, + "grad_norm": 0.0022775558697995233, + "learning_rate": 1.9713569638381668e-06, + "loss": 0.0, + "step": 12773 + }, + { + "epoch": 0.8232261390732745, + "grad_norm": 0.00014820028206541386, + "learning_rate": 1.970640887934121e-06, + "loss": 0.0, + "step": 12774 + }, + { + "epoch": 0.8232905845202036, + "grad_norm": 0.11497893146273225, + "learning_rate": 1.9699248120300754e-06, + "loss": 0.0016, + "step": 12775 + }, + { + "epoch": 0.8233550299671328, + "grad_norm": 0.02543805923047829, + "learning_rate": 1.9692087361260297e-06, + "loss": 0.0, + "step": 12776 + }, + { + "epoch": 0.823419475414062, + "grad_norm": 0.0007624847949835808, + "learning_rate": 1.9684926602219835e-06, + "loss": 0.0, + "step": 12777 + }, + { + "epoch": 0.8234839208609912, + "grad_norm": 0.0002072227547720038, + "learning_rate": 1.967776584317938e-06, + "loss": 0.0, + "step": 12778 + }, + { + "epoch": 0.8235483663079204, + "grad_norm": 0.0020613742597048756, + "learning_rate": 1.9670605084138917e-06, + "loss": 0.0, + "step": 12779 + }, + { + "epoch": 0.8236128117548496, + "grad_norm": 0.07955951346787625, + "learning_rate": 1.9663444325098464e-06, + "loss": 0.0002, + "step": 12780 + }, + { + "epoch": 0.8236772572017786, + "grad_norm": 0.0011062340486165578, + "learning_rate": 1.9656283566058003e-06, + "loss": 0.0, + "step": 12781 + }, + { + "epoch": 0.8237417026487078, + "grad_norm": 0.0018729547487564492, + "learning_rate": 1.9649122807017546e-06, + "loss": 0.0, + "step": 12782 + }, + { + "epoch": 0.823806148095637, + "grad_norm": 1.9180195290861062, + "learning_rate": 1.9641962047977085e-06, + "loss": 0.0044, + "step": 12783 + }, + { + "epoch": 0.8238705935425662, + "grad_norm": 0.0004110962381387348, + "learning_rate": 1.963480128893663e-06, + "loss": 0.0, + "step": 12784 + }, + { + "epoch": 0.8239350389894954, + "grad_norm": 0.002992485947360091, + "learning_rate": 1.962764052989617e-06, + "loss": 0.0, + "step": 12785 + }, + { + "epoch": 0.8239994844364246, + "grad_norm": 0.23977967782600768, + "learning_rate": 1.9620479770855714e-06, + "loss": 0.0028, + "step": 12786 + }, + { + "epoch": 0.8240639298833538, + "grad_norm": 0.0001052902595383848, + "learning_rate": 1.9613319011815253e-06, + "loss": 0.0, + "step": 12787 + }, + { + "epoch": 0.8241283753302829, + "grad_norm": 0.0009807068807572042, + "learning_rate": 1.9606158252774796e-06, + "loss": 0.0, + "step": 12788 + }, + { + "epoch": 0.8241928207772121, + "grad_norm": 0.00017703910016623285, + "learning_rate": 1.959899749373434e-06, + "loss": 0.0, + "step": 12789 + }, + { + "epoch": 0.8242572662241413, + "grad_norm": 0.0008779980011984871, + "learning_rate": 1.9591836734693877e-06, + "loss": 0.0, + "step": 12790 + }, + { + "epoch": 0.8243217116710705, + "grad_norm": 0.0007441098091688068, + "learning_rate": 1.958467597565342e-06, + "loss": 0.0, + "step": 12791 + }, + { + "epoch": 0.8243861571179996, + "grad_norm": 0.0012419554472922133, + "learning_rate": 1.9577515216612963e-06, + "loss": 0.0, + "step": 12792 + }, + { + "epoch": 0.8244506025649287, + "grad_norm": 0.0016284702447365287, + "learning_rate": 1.9570354457572506e-06, + "loss": 0.0, + "step": 12793 + }, + { + "epoch": 0.8245150480118579, + "grad_norm": 0.0033685588571939636, + "learning_rate": 1.9563193698532045e-06, + "loss": 0.0, + "step": 12794 + }, + { + "epoch": 0.8245794934587871, + "grad_norm": 0.0286288462786047, + "learning_rate": 1.955603293949159e-06, + "loss": 0.0, + "step": 12795 + }, + { + "epoch": 0.8246439389057163, + "grad_norm": 0.0021863046623147764, + "learning_rate": 1.9548872180451127e-06, + "loss": 0.0, + "step": 12796 + }, + { + "epoch": 0.8247083843526455, + "grad_norm": 0.002743261369717264, + "learning_rate": 1.9541711421410674e-06, + "loss": 0.0, + "step": 12797 + }, + { + "epoch": 0.8247728297995747, + "grad_norm": 0.030985487605830843, + "learning_rate": 1.9534550662370213e-06, + "loss": 0.0, + "step": 12798 + }, + { + "epoch": 0.8248372752465039, + "grad_norm": 0.011039880199293242, + "learning_rate": 1.9527389903329756e-06, + "loss": 0.0, + "step": 12799 + }, + { + "epoch": 0.824901720693433, + "grad_norm": 0.0006824560142423226, + "learning_rate": 1.9520229144289295e-06, + "loss": 0.0, + "step": 12800 + }, + { + "epoch": 0.8249661661403622, + "grad_norm": 0.010945881791514213, + "learning_rate": 1.9513068385248838e-06, + "loss": 0.0001, + "step": 12801 + }, + { + "epoch": 0.8250306115872914, + "grad_norm": 0.00721237801787959, + "learning_rate": 1.950590762620838e-06, + "loss": 0.0, + "step": 12802 + }, + { + "epoch": 0.8250950570342205, + "grad_norm": 0.0004965975994171999, + "learning_rate": 1.9498746867167923e-06, + "loss": 0.0, + "step": 12803 + }, + { + "epoch": 0.8251595024811497, + "grad_norm": 0.009016010154456617, + "learning_rate": 1.9491586108127462e-06, + "loss": 0.0, + "step": 12804 + }, + { + "epoch": 0.8252239479280788, + "grad_norm": 0.0012284352036761618, + "learning_rate": 1.9484425349087005e-06, + "loss": 0.0, + "step": 12805 + }, + { + "epoch": 0.825288393375008, + "grad_norm": 0.0028087657996823242, + "learning_rate": 1.947726459004655e-06, + "loss": 0.0, + "step": 12806 + }, + { + "epoch": 0.8253528388219372, + "grad_norm": 0.0011335282820437035, + "learning_rate": 1.9470103831006087e-06, + "loss": 0.0, + "step": 12807 + }, + { + "epoch": 0.8254172842688664, + "grad_norm": 0.024974543026714955, + "learning_rate": 1.946294307196563e-06, + "loss": 0.0, + "step": 12808 + }, + { + "epoch": 0.8254817297157956, + "grad_norm": 0.01026302773500995, + "learning_rate": 1.9455782312925173e-06, + "loss": 0.0001, + "step": 12809 + }, + { + "epoch": 0.8255461751627248, + "grad_norm": 0.00013102626201935377, + "learning_rate": 1.9448621553884716e-06, + "loss": 0.0, + "step": 12810 + }, + { + "epoch": 0.825610620609654, + "grad_norm": 0.05074874360207065, + "learning_rate": 1.9441460794844255e-06, + "loss": 0.0002, + "step": 12811 + }, + { + "epoch": 0.8256750660565831, + "grad_norm": 0.16791835295001642, + "learning_rate": 1.9434300035803798e-06, + "loss": 0.0002, + "step": 12812 + }, + { + "epoch": 0.8257395115035123, + "grad_norm": 0.22231346647938868, + "learning_rate": 1.9427139276763336e-06, + "loss": 0.0009, + "step": 12813 + }, + { + "epoch": 0.8258039569504414, + "grad_norm": 0.005087052383364399, + "learning_rate": 1.941997851772288e-06, + "loss": 0.0001, + "step": 12814 + }, + { + "epoch": 0.8258684023973706, + "grad_norm": 0.29658397413916804, + "learning_rate": 1.9412817758682422e-06, + "loss": 0.0004, + "step": 12815 + }, + { + "epoch": 0.8259328478442998, + "grad_norm": 0.00015184382402304126, + "learning_rate": 1.9405656999641965e-06, + "loss": 0.0, + "step": 12816 + }, + { + "epoch": 0.825997293291229, + "grad_norm": 0.00011728335842496472, + "learning_rate": 1.9398496240601504e-06, + "loss": 0.0, + "step": 12817 + }, + { + "epoch": 0.8260617387381581, + "grad_norm": 0.12153047977766473, + "learning_rate": 1.9391335481561047e-06, + "loss": 0.0014, + "step": 12818 + }, + { + "epoch": 0.8261261841850873, + "grad_norm": 0.0024044143837508186, + "learning_rate": 1.9384174722520586e-06, + "loss": 0.0, + "step": 12819 + }, + { + "epoch": 0.8261906296320165, + "grad_norm": 0.26979896131740444, + "learning_rate": 1.9377013963480133e-06, + "loss": 0.0059, + "step": 12820 + }, + { + "epoch": 0.8262550750789457, + "grad_norm": 0.007687804269723589, + "learning_rate": 1.936985320443967e-06, + "loss": 0.0, + "step": 12821 + }, + { + "epoch": 0.8263195205258749, + "grad_norm": 0.001681070516331721, + "learning_rate": 1.9362692445399215e-06, + "loss": 0.0, + "step": 12822 + }, + { + "epoch": 0.8263839659728041, + "grad_norm": 0.010557766168112366, + "learning_rate": 1.9355531686358754e-06, + "loss": 0.0, + "step": 12823 + }, + { + "epoch": 0.8264484114197332, + "grad_norm": 0.0008512911900243197, + "learning_rate": 1.9348370927318297e-06, + "loss": 0.0, + "step": 12824 + }, + { + "epoch": 0.8265128568666623, + "grad_norm": 0.0013926741667203172, + "learning_rate": 1.934121016827784e-06, + "loss": 0.0, + "step": 12825 + }, + { + "epoch": 0.8265773023135915, + "grad_norm": 0.0006454232476995218, + "learning_rate": 1.9334049409237383e-06, + "loss": 0.0, + "step": 12826 + }, + { + "epoch": 0.8266417477605207, + "grad_norm": 0.007012337352928124, + "learning_rate": 1.932688865019692e-06, + "loss": 0.0, + "step": 12827 + }, + { + "epoch": 0.8267061932074499, + "grad_norm": 0.0016290758214399952, + "learning_rate": 1.9319727891156464e-06, + "loss": 0.0, + "step": 12828 + }, + { + "epoch": 0.826770638654379, + "grad_norm": 0.0004002776358242824, + "learning_rate": 1.9312567132116007e-06, + "loss": 0.0, + "step": 12829 + }, + { + "epoch": 0.8268350841013082, + "grad_norm": 0.0006474920133281832, + "learning_rate": 1.9305406373075546e-06, + "loss": 0.0, + "step": 12830 + }, + { + "epoch": 0.8268995295482374, + "grad_norm": 0.0004922729677889804, + "learning_rate": 1.929824561403509e-06, + "loss": 0.0, + "step": 12831 + }, + { + "epoch": 0.8269639749951666, + "grad_norm": 0.015237039801407486, + "learning_rate": 1.929108485499463e-06, + "loss": 0.0, + "step": 12832 + }, + { + "epoch": 0.8270284204420958, + "grad_norm": 0.001385017502838934, + "learning_rate": 1.9283924095954175e-06, + "loss": 0.0, + "step": 12833 + }, + { + "epoch": 0.827092865889025, + "grad_norm": 0.00036743085902484786, + "learning_rate": 1.9276763336913714e-06, + "loss": 0.0, + "step": 12834 + }, + { + "epoch": 0.8271573113359542, + "grad_norm": 0.21470143199397795, + "learning_rate": 1.9269602577873257e-06, + "loss": 0.0052, + "step": 12835 + }, + { + "epoch": 0.8272217567828833, + "grad_norm": 0.20051220260145272, + "learning_rate": 1.9262441818832795e-06, + "loss": 0.0009, + "step": 12836 + }, + { + "epoch": 0.8272862022298124, + "grad_norm": 0.0013799034224737958, + "learning_rate": 1.925528105979234e-06, + "loss": 0.0, + "step": 12837 + }, + { + "epoch": 0.8273506476767416, + "grad_norm": 0.0011499711319856818, + "learning_rate": 1.924812030075188e-06, + "loss": 0.0, + "step": 12838 + }, + { + "epoch": 0.8274150931236708, + "grad_norm": 0.004180721553757159, + "learning_rate": 1.9240959541711424e-06, + "loss": 0.0, + "step": 12839 + }, + { + "epoch": 0.8274795385706, + "grad_norm": 0.00888103511971256, + "learning_rate": 1.9233798782670963e-06, + "loss": 0.0, + "step": 12840 + }, + { + "epoch": 0.8275439840175292, + "grad_norm": 0.0004716433398487072, + "learning_rate": 1.9226638023630506e-06, + "loss": 0.0, + "step": 12841 + }, + { + "epoch": 0.8276084294644583, + "grad_norm": 3.567135691782576e-05, + "learning_rate": 1.921947726459005e-06, + "loss": 0.0, + "step": 12842 + }, + { + "epoch": 0.8276728749113875, + "grad_norm": 0.006151835977891471, + "learning_rate": 1.921231650554959e-06, + "loss": 0.0, + "step": 12843 + }, + { + "epoch": 0.8277373203583167, + "grad_norm": 0.0017068130805905073, + "learning_rate": 1.920515574650913e-06, + "loss": 0.0, + "step": 12844 + }, + { + "epoch": 0.8278017658052459, + "grad_norm": 0.029947653519642226, + "learning_rate": 1.9197994987468674e-06, + "loss": 0.0015, + "step": 12845 + }, + { + "epoch": 0.8278662112521751, + "grad_norm": 0.03704959829540365, + "learning_rate": 1.9190834228428217e-06, + "loss": 0.0001, + "step": 12846 + }, + { + "epoch": 0.8279306566991043, + "grad_norm": 0.0018888023632662365, + "learning_rate": 1.9183673469387756e-06, + "loss": 0.0, + "step": 12847 + }, + { + "epoch": 0.8279951021460333, + "grad_norm": 0.0032775118244479816, + "learning_rate": 1.91765127103473e-06, + "loss": 0.0, + "step": 12848 + }, + { + "epoch": 0.8280595475929625, + "grad_norm": 0.002627906814267534, + "learning_rate": 1.9169351951306837e-06, + "loss": 0.0, + "step": 12849 + }, + { + "epoch": 0.8281239930398917, + "grad_norm": 0.0008013443872099772, + "learning_rate": 1.9162191192266385e-06, + "loss": 0.0, + "step": 12850 + }, + { + "epoch": 0.8281884384868209, + "grad_norm": 0.0009410073850608853, + "learning_rate": 1.9155030433225923e-06, + "loss": 0.0, + "step": 12851 + }, + { + "epoch": 0.8282528839337501, + "grad_norm": 0.0010344266427138322, + "learning_rate": 1.9147869674185466e-06, + "loss": 0.0, + "step": 12852 + }, + { + "epoch": 0.8283173293806793, + "grad_norm": 0.00014077844258373367, + "learning_rate": 1.9140708915145005e-06, + "loss": 0.0, + "step": 12853 + }, + { + "epoch": 0.8283817748276084, + "grad_norm": 0.00022600811992336105, + "learning_rate": 1.913354815610455e-06, + "loss": 0.0, + "step": 12854 + }, + { + "epoch": 0.8284462202745376, + "grad_norm": 0.0003438217147535301, + "learning_rate": 1.912638739706409e-06, + "loss": 0.0, + "step": 12855 + }, + { + "epoch": 0.8285106657214668, + "grad_norm": 1.3295451481395686, + "learning_rate": 1.9119226638023634e-06, + "loss": 0.0073, + "step": 12856 + }, + { + "epoch": 0.828575111168396, + "grad_norm": 0.09529210403966394, + "learning_rate": 1.9112065878983173e-06, + "loss": 0.0002, + "step": 12857 + }, + { + "epoch": 0.8286395566153252, + "grad_norm": 0.00013075154801560885, + "learning_rate": 1.9104905119942716e-06, + "loss": 0.0, + "step": 12858 + }, + { + "epoch": 0.8287040020622543, + "grad_norm": 0.005347229015423188, + "learning_rate": 1.909774436090226e-06, + "loss": 0.0, + "step": 12859 + }, + { + "epoch": 0.8287684475091834, + "grad_norm": 0.011590017739172188, + "learning_rate": 1.9090583601861798e-06, + "loss": 0.0, + "step": 12860 + }, + { + "epoch": 0.8288328929561126, + "grad_norm": 0.002305119796443571, + "learning_rate": 1.908342284282134e-06, + "loss": 0.0, + "step": 12861 + }, + { + "epoch": 0.8288973384030418, + "grad_norm": 0.0032701660571884997, + "learning_rate": 1.9076262083780883e-06, + "loss": 0.0, + "step": 12862 + }, + { + "epoch": 0.828961783849971, + "grad_norm": 0.00032209619860416687, + "learning_rate": 1.9069101324740424e-06, + "loss": 0.0, + "step": 12863 + }, + { + "epoch": 0.8290262292969002, + "grad_norm": 0.0001742985353381191, + "learning_rate": 1.9061940565699965e-06, + "loss": 0.0, + "step": 12864 + }, + { + "epoch": 0.8290906747438294, + "grad_norm": 0.006867729174768813, + "learning_rate": 1.9054779806659506e-06, + "loss": 0.0, + "step": 12865 + }, + { + "epoch": 0.8291551201907585, + "grad_norm": 6.386471846661756e-05, + "learning_rate": 1.904761904761905e-06, + "loss": 0.0, + "step": 12866 + }, + { + "epoch": 0.8292195656376877, + "grad_norm": 8.795452018101676e-05, + "learning_rate": 1.9040458288578592e-06, + "loss": 0.0, + "step": 12867 + }, + { + "epoch": 0.8292840110846169, + "grad_norm": 0.014635279825817802, + "learning_rate": 1.9033297529538133e-06, + "loss": 0.0, + "step": 12868 + }, + { + "epoch": 0.8293484565315461, + "grad_norm": 0.20185402148521048, + "learning_rate": 1.9026136770497674e-06, + "loss": 0.0011, + "step": 12869 + }, + { + "epoch": 0.8294129019784752, + "grad_norm": 3.5018456418513304e-05, + "learning_rate": 1.9018976011457217e-06, + "loss": 0.0, + "step": 12870 + }, + { + "epoch": 0.8294773474254044, + "grad_norm": 0.0011095530381541555, + "learning_rate": 1.9011815252416758e-06, + "loss": 0.0, + "step": 12871 + }, + { + "epoch": 0.8295417928723335, + "grad_norm": 0.06928320343427943, + "learning_rate": 1.9004654493376299e-06, + "loss": 0.0001, + "step": 12872 + }, + { + "epoch": 0.8296062383192627, + "grad_norm": 0.005472200014042761, + "learning_rate": 1.8997493734335842e-06, + "loss": 0.0001, + "step": 12873 + }, + { + "epoch": 0.8296706837661919, + "grad_norm": 0.01115602003143175, + "learning_rate": 1.8990332975295385e-06, + "loss": 0.0001, + "step": 12874 + }, + { + "epoch": 0.8297351292131211, + "grad_norm": 0.0010032057744439192, + "learning_rate": 1.8983172216254925e-06, + "loss": 0.0, + "step": 12875 + }, + { + "epoch": 0.8297995746600503, + "grad_norm": 0.002481002567431664, + "learning_rate": 1.8976011457214466e-06, + "loss": 0.0, + "step": 12876 + }, + { + "epoch": 0.8298640201069795, + "grad_norm": 0.005168957551730069, + "learning_rate": 1.8968850698174007e-06, + "loss": 0.0, + "step": 12877 + }, + { + "epoch": 0.8299284655539086, + "grad_norm": 0.0038538895973451717, + "learning_rate": 1.8961689939133548e-06, + "loss": 0.0, + "step": 12878 + }, + { + "epoch": 0.8299929110008378, + "grad_norm": 0.12830823844414282, + "learning_rate": 1.8954529180093093e-06, + "loss": 0.0001, + "step": 12879 + }, + { + "epoch": 0.830057356447767, + "grad_norm": 0.0022606158190638017, + "learning_rate": 1.8947368421052634e-06, + "loss": 0.0, + "step": 12880 + }, + { + "epoch": 0.8301218018946961, + "grad_norm": 0.1553001860605195, + "learning_rate": 1.8940207662012175e-06, + "loss": 0.0007, + "step": 12881 + }, + { + "epoch": 0.8301862473416253, + "grad_norm": 9.681461373014582e-05, + "learning_rate": 1.8933046902971716e-06, + "loss": 0.0, + "step": 12882 + }, + { + "epoch": 0.8302506927885545, + "grad_norm": 0.006053399325819987, + "learning_rate": 1.8925886143931257e-06, + "loss": 0.0, + "step": 12883 + }, + { + "epoch": 0.8303151382354836, + "grad_norm": 0.0003135291907860067, + "learning_rate": 1.89187253848908e-06, + "loss": 0.0, + "step": 12884 + }, + { + "epoch": 0.8303795836824128, + "grad_norm": 0.0016962123214610799, + "learning_rate": 1.8911564625850343e-06, + "loss": 0.0, + "step": 12885 + }, + { + "epoch": 0.830444029129342, + "grad_norm": 0.0022821039817459542, + "learning_rate": 1.8904403866809883e-06, + "loss": 0.0, + "step": 12886 + }, + { + "epoch": 0.8305084745762712, + "grad_norm": 0.44118902887440714, + "learning_rate": 1.8897243107769424e-06, + "loss": 0.0008, + "step": 12887 + }, + { + "epoch": 0.8305729200232004, + "grad_norm": 0.0026145982784008394, + "learning_rate": 1.8890082348728967e-06, + "loss": 0.0, + "step": 12888 + }, + { + "epoch": 0.8306373654701296, + "grad_norm": 2.009516340008284e-05, + "learning_rate": 1.8882921589688508e-06, + "loss": 0.0, + "step": 12889 + }, + { + "epoch": 0.8307018109170587, + "grad_norm": 0.00039474701415418735, + "learning_rate": 1.887576083064805e-06, + "loss": 0.0, + "step": 12890 + }, + { + "epoch": 0.8307662563639879, + "grad_norm": 0.02382485355526577, + "learning_rate": 1.8868600071607592e-06, + "loss": 0.0001, + "step": 12891 + }, + { + "epoch": 0.830830701810917, + "grad_norm": 0.005398793179076116, + "learning_rate": 1.8861439312567135e-06, + "loss": 0.0, + "step": 12892 + }, + { + "epoch": 0.8308951472578462, + "grad_norm": 0.0768639292053062, + "learning_rate": 1.8854278553526676e-06, + "loss": 0.0002, + "step": 12893 + }, + { + "epoch": 0.8309595927047754, + "grad_norm": 0.008180770152188349, + "learning_rate": 1.8847117794486217e-06, + "loss": 0.0, + "step": 12894 + }, + { + "epoch": 0.8310240381517046, + "grad_norm": 0.003654351436249659, + "learning_rate": 1.8839957035445758e-06, + "loss": 0.0, + "step": 12895 + }, + { + "epoch": 0.8310884835986337, + "grad_norm": 0.0013665234520897544, + "learning_rate": 1.8832796276405303e-06, + "loss": 0.0, + "step": 12896 + }, + { + "epoch": 0.8311529290455629, + "grad_norm": 0.00821381169757801, + "learning_rate": 1.8825635517364844e-06, + "loss": 0.0, + "step": 12897 + }, + { + "epoch": 0.8312173744924921, + "grad_norm": 0.02980132621626116, + "learning_rate": 1.8818474758324384e-06, + "loss": 0.0001, + "step": 12898 + }, + { + "epoch": 0.8312818199394213, + "grad_norm": 0.15304842610763447, + "learning_rate": 1.8811313999283925e-06, + "loss": 0.0002, + "step": 12899 + }, + { + "epoch": 0.8313462653863505, + "grad_norm": 3.2859700974284204e-05, + "learning_rate": 1.8804153240243466e-06, + "loss": 0.0, + "step": 12900 + }, + { + "epoch": 0.8314107108332797, + "grad_norm": 7.330661953553586e-05, + "learning_rate": 1.8796992481203007e-06, + "loss": 0.0, + "step": 12901 + }, + { + "epoch": 0.8314751562802088, + "grad_norm": 0.0016363356131233538, + "learning_rate": 1.8789831722162552e-06, + "loss": 0.0, + "step": 12902 + }, + { + "epoch": 0.8315396017271379, + "grad_norm": 0.0023635623644680886, + "learning_rate": 1.8782670963122093e-06, + "loss": 0.0, + "step": 12903 + }, + { + "epoch": 0.8316040471740671, + "grad_norm": 0.0011234160999025852, + "learning_rate": 1.8775510204081634e-06, + "loss": 0.0, + "step": 12904 + }, + { + "epoch": 0.8316684926209963, + "grad_norm": 0.00569605487531826, + "learning_rate": 1.8768349445041175e-06, + "loss": 0.0, + "step": 12905 + }, + { + "epoch": 0.8317329380679255, + "grad_norm": 0.11860830172496771, + "learning_rate": 1.8761188686000718e-06, + "loss": 0.0001, + "step": 12906 + }, + { + "epoch": 0.8317973835148547, + "grad_norm": 0.0001888324722297905, + "learning_rate": 1.8754027926960259e-06, + "loss": 0.0, + "step": 12907 + }, + { + "epoch": 0.8318618289617838, + "grad_norm": 0.043075241266047205, + "learning_rate": 1.8746867167919802e-06, + "loss": 0.0001, + "step": 12908 + }, + { + "epoch": 0.831926274408713, + "grad_norm": 0.0002445940055020511, + "learning_rate": 1.8739706408879342e-06, + "loss": 0.0, + "step": 12909 + }, + { + "epoch": 0.8319907198556422, + "grad_norm": 0.002135059555199994, + "learning_rate": 1.8732545649838885e-06, + "loss": 0.0, + "step": 12910 + }, + { + "epoch": 0.8320551653025714, + "grad_norm": 3.074306074790163, + "learning_rate": 1.8725384890798426e-06, + "loss": 0.0207, + "step": 12911 + }, + { + "epoch": 0.8321196107495006, + "grad_norm": 0.0009971946495302751, + "learning_rate": 1.8718224131757967e-06, + "loss": 0.0, + "step": 12912 + }, + { + "epoch": 0.8321840561964298, + "grad_norm": 0.0005966592023226857, + "learning_rate": 1.8711063372717508e-06, + "loss": 0.0, + "step": 12913 + }, + { + "epoch": 0.832248501643359, + "grad_norm": 0.0005944522041070304, + "learning_rate": 1.8703902613677053e-06, + "loss": 0.0, + "step": 12914 + }, + { + "epoch": 0.832312947090288, + "grad_norm": 0.00020677188897750951, + "learning_rate": 1.8696741854636594e-06, + "loss": 0.0, + "step": 12915 + }, + { + "epoch": 0.8323773925372172, + "grad_norm": 0.00010503859283321327, + "learning_rate": 1.8689581095596135e-06, + "loss": 0.0, + "step": 12916 + }, + { + "epoch": 0.8324418379841464, + "grad_norm": 0.06789593276378175, + "learning_rate": 1.8682420336555676e-06, + "loss": 0.0002, + "step": 12917 + }, + { + "epoch": 0.8325062834310756, + "grad_norm": 0.00022078347307954933, + "learning_rate": 1.8675259577515217e-06, + "loss": 0.0, + "step": 12918 + }, + { + "epoch": 0.8325707288780048, + "grad_norm": 0.14633415785093218, + "learning_rate": 1.866809881847476e-06, + "loss": 0.0004, + "step": 12919 + }, + { + "epoch": 0.832635174324934, + "grad_norm": 0.26466572272238886, + "learning_rate": 1.8660938059434303e-06, + "loss": 0.0044, + "step": 12920 + }, + { + "epoch": 0.8326996197718631, + "grad_norm": 0.00018119899261728837, + "learning_rate": 1.8653777300393843e-06, + "loss": 0.0, + "step": 12921 + }, + { + "epoch": 0.8327640652187923, + "grad_norm": 0.49176382570365107, + "learning_rate": 1.8646616541353384e-06, + "loss": 0.0004, + "step": 12922 + }, + { + "epoch": 0.8328285106657215, + "grad_norm": 0.17224296823060695, + "learning_rate": 1.8639455782312927e-06, + "loss": 0.0004, + "step": 12923 + }, + { + "epoch": 0.8328929561126507, + "grad_norm": 0.8200194062285915, + "learning_rate": 1.8632295023272468e-06, + "loss": 0.0046, + "step": 12924 + }, + { + "epoch": 0.8329574015595799, + "grad_norm": 0.022811282350755628, + "learning_rate": 1.862513426423201e-06, + "loss": 0.0, + "step": 12925 + }, + { + "epoch": 0.8330218470065089, + "grad_norm": 0.0002876985247923036, + "learning_rate": 1.8617973505191552e-06, + "loss": 0.0, + "step": 12926 + }, + { + "epoch": 0.8330862924534381, + "grad_norm": 0.00026064448045058826, + "learning_rate": 1.8610812746151095e-06, + "loss": 0.0, + "step": 12927 + }, + { + "epoch": 0.8331507379003673, + "grad_norm": 0.00276155562535339, + "learning_rate": 1.8603651987110636e-06, + "loss": 0.0, + "step": 12928 + }, + { + "epoch": 0.8332151833472965, + "grad_norm": 0.0018431929870960982, + "learning_rate": 1.8596491228070177e-06, + "loss": 0.0, + "step": 12929 + }, + { + "epoch": 0.8332796287942257, + "grad_norm": 0.0007927471892093145, + "learning_rate": 1.8589330469029718e-06, + "loss": 0.0, + "step": 12930 + }, + { + "epoch": 0.8333440742411549, + "grad_norm": 0.602887935199894, + "learning_rate": 1.8582169709989263e-06, + "loss": 0.0018, + "step": 12931 + }, + { + "epoch": 0.833408519688084, + "grad_norm": 0.047878668853832275, + "learning_rate": 1.8575008950948804e-06, + "loss": 0.0001, + "step": 12932 + }, + { + "epoch": 0.8334729651350132, + "grad_norm": 0.03237215842096609, + "learning_rate": 1.8567848191908345e-06, + "loss": 0.0, + "step": 12933 + }, + { + "epoch": 0.8335374105819424, + "grad_norm": 0.014911026549980291, + "learning_rate": 1.8560687432867885e-06, + "loss": 0.0, + "step": 12934 + }, + { + "epoch": 0.8336018560288716, + "grad_norm": 0.01923477641968151, + "learning_rate": 1.8553526673827426e-06, + "loss": 0.0001, + "step": 12935 + }, + { + "epoch": 0.8336663014758008, + "grad_norm": 0.2234138849757082, + "learning_rate": 1.8546365914786967e-06, + "loss": 0.0002, + "step": 12936 + }, + { + "epoch": 0.8337307469227299, + "grad_norm": 0.023034146600674356, + "learning_rate": 1.8539205155746512e-06, + "loss": 0.0, + "step": 12937 + }, + { + "epoch": 0.833795192369659, + "grad_norm": 0.004499721841399995, + "learning_rate": 1.8532044396706053e-06, + "loss": 0.0, + "step": 12938 + }, + { + "epoch": 0.8338596378165882, + "grad_norm": 0.027223810403340872, + "learning_rate": 1.8524883637665594e-06, + "loss": 0.0, + "step": 12939 + }, + { + "epoch": 0.8339240832635174, + "grad_norm": 0.0010825319904780766, + "learning_rate": 1.8517722878625135e-06, + "loss": 0.0, + "step": 12940 + }, + { + "epoch": 0.8339885287104466, + "grad_norm": 0.00017965621067447417, + "learning_rate": 1.8510562119584678e-06, + "loss": 0.0, + "step": 12941 + }, + { + "epoch": 0.8340529741573758, + "grad_norm": 0.3216080980251922, + "learning_rate": 1.8503401360544219e-06, + "loss": 0.0006, + "step": 12942 + }, + { + "epoch": 0.834117419604305, + "grad_norm": 0.0010344771420191343, + "learning_rate": 1.8496240601503762e-06, + "loss": 0.0, + "step": 12943 + }, + { + "epoch": 0.8341818650512342, + "grad_norm": 0.0012871061826992497, + "learning_rate": 1.8489079842463303e-06, + "loss": 0.0, + "step": 12944 + }, + { + "epoch": 0.8342463104981633, + "grad_norm": 0.001207658190563529, + "learning_rate": 1.8481919083422846e-06, + "loss": 0.0, + "step": 12945 + }, + { + "epoch": 0.8343107559450925, + "grad_norm": 0.0007330767288757314, + "learning_rate": 1.8474758324382386e-06, + "loss": 0.0, + "step": 12946 + }, + { + "epoch": 0.8343752013920217, + "grad_norm": 0.004926647550943549, + "learning_rate": 1.8467597565341927e-06, + "loss": 0.0, + "step": 12947 + }, + { + "epoch": 0.8344396468389508, + "grad_norm": 0.01605856973953187, + "learning_rate": 1.8460436806301468e-06, + "loss": 0.0, + "step": 12948 + }, + { + "epoch": 0.83450409228588, + "grad_norm": 0.0005230443666051655, + "learning_rate": 1.8453276047261013e-06, + "loss": 0.0, + "step": 12949 + }, + { + "epoch": 0.8345685377328091, + "grad_norm": 0.0008104200669707647, + "learning_rate": 1.8446115288220554e-06, + "loss": 0.0, + "step": 12950 + }, + { + "epoch": 0.8346329831797383, + "grad_norm": 0.011457724398698297, + "learning_rate": 1.8438954529180095e-06, + "loss": 0.0, + "step": 12951 + }, + { + "epoch": 0.8346974286266675, + "grad_norm": 0.024955868465614733, + "learning_rate": 1.8431793770139636e-06, + "loss": 0.0003, + "step": 12952 + }, + { + "epoch": 0.8347618740735967, + "grad_norm": 0.0011789983636083115, + "learning_rate": 1.8424633011099177e-06, + "loss": 0.0, + "step": 12953 + }, + { + "epoch": 0.8348263195205259, + "grad_norm": 0.00127945397321626, + "learning_rate": 1.8417472252058718e-06, + "loss": 0.0, + "step": 12954 + }, + { + "epoch": 0.8348907649674551, + "grad_norm": 0.0005486498215946787, + "learning_rate": 1.8410311493018263e-06, + "loss": 0.0, + "step": 12955 + }, + { + "epoch": 0.8349552104143843, + "grad_norm": 0.0013076929794172173, + "learning_rate": 1.8403150733977804e-06, + "loss": 0.0, + "step": 12956 + }, + { + "epoch": 0.8350196558613134, + "grad_norm": 0.002699150201750239, + "learning_rate": 1.8395989974937344e-06, + "loss": 0.0, + "step": 12957 + }, + { + "epoch": 0.8350841013082426, + "grad_norm": 0.0014782031120580252, + "learning_rate": 1.8388829215896885e-06, + "loss": 0.0, + "step": 12958 + }, + { + "epoch": 0.8351485467551717, + "grad_norm": 0.00013325395203645215, + "learning_rate": 1.8381668456856428e-06, + "loss": 0.0, + "step": 12959 + }, + { + "epoch": 0.8352129922021009, + "grad_norm": 0.00028691027465177585, + "learning_rate": 1.837450769781597e-06, + "loss": 0.0, + "step": 12960 + }, + { + "epoch": 0.8352774376490301, + "grad_norm": 0.015239722260245718, + "learning_rate": 1.8367346938775512e-06, + "loss": 0.0001, + "step": 12961 + }, + { + "epoch": 0.8353418830959592, + "grad_norm": 0.01791622106760161, + "learning_rate": 1.8360186179735053e-06, + "loss": 0.0001, + "step": 12962 + }, + { + "epoch": 0.8354063285428884, + "grad_norm": 0.0010604469781897421, + "learning_rate": 1.8353025420694596e-06, + "loss": 0.0, + "step": 12963 + }, + { + "epoch": 0.8354707739898176, + "grad_norm": 0.0024505637205691246, + "learning_rate": 1.8345864661654137e-06, + "loss": 0.0, + "step": 12964 + }, + { + "epoch": 0.8355352194367468, + "grad_norm": 0.0013904709710636813, + "learning_rate": 1.8338703902613678e-06, + "loss": 0.0, + "step": 12965 + }, + { + "epoch": 0.835599664883676, + "grad_norm": 0.03134726683056867, + "learning_rate": 1.833154314357322e-06, + "loss": 0.0001, + "step": 12966 + }, + { + "epoch": 0.8356641103306052, + "grad_norm": 0.32471975430680294, + "learning_rate": 1.8324382384532764e-06, + "loss": 0.0007, + "step": 12967 + }, + { + "epoch": 0.8357285557775344, + "grad_norm": 0.46926647978812036, + "learning_rate": 1.8317221625492305e-06, + "loss": 0.0024, + "step": 12968 + }, + { + "epoch": 0.8357930012244635, + "grad_norm": 0.0007497435298024752, + "learning_rate": 1.8310060866451845e-06, + "loss": 0.0, + "step": 12969 + }, + { + "epoch": 0.8358574466713926, + "grad_norm": 8.529497298495229e-05, + "learning_rate": 1.8302900107411386e-06, + "loss": 0.0, + "step": 12970 + }, + { + "epoch": 0.8359218921183218, + "grad_norm": 0.010444437001298015, + "learning_rate": 1.8295739348370927e-06, + "loss": 0.0001, + "step": 12971 + }, + { + "epoch": 0.835986337565251, + "grad_norm": 0.0005033715826079014, + "learning_rate": 1.8288578589330472e-06, + "loss": 0.0, + "step": 12972 + }, + { + "epoch": 0.8360507830121802, + "grad_norm": 0.010451150054464193, + "learning_rate": 1.8281417830290013e-06, + "loss": 0.0, + "step": 12973 + }, + { + "epoch": 0.8361152284591093, + "grad_norm": 0.1669388847722097, + "learning_rate": 1.8274257071249554e-06, + "loss": 0.0002, + "step": 12974 + }, + { + "epoch": 0.8361796739060385, + "grad_norm": 3.708374839204555e-05, + "learning_rate": 1.8267096312209095e-06, + "loss": 0.0, + "step": 12975 + }, + { + "epoch": 0.8362441193529677, + "grad_norm": 0.024097830011570242, + "learning_rate": 1.8259935553168636e-06, + "loss": 0.0, + "step": 12976 + }, + { + "epoch": 0.8363085647998969, + "grad_norm": 0.00035244492069838926, + "learning_rate": 1.8252774794128179e-06, + "loss": 0.0, + "step": 12977 + }, + { + "epoch": 0.8363730102468261, + "grad_norm": 0.0006009073649762303, + "learning_rate": 1.8245614035087722e-06, + "loss": 0.0, + "step": 12978 + }, + { + "epoch": 0.8364374556937553, + "grad_norm": 0.002436471679603375, + "learning_rate": 1.8238453276047263e-06, + "loss": 0.0, + "step": 12979 + }, + { + "epoch": 0.8365019011406845, + "grad_norm": 0.0019328984019827203, + "learning_rate": 1.8231292517006803e-06, + "loss": 0.0, + "step": 12980 + }, + { + "epoch": 0.8365663465876135, + "grad_norm": 0.00047925150121626243, + "learning_rate": 1.8224131757966346e-06, + "loss": 0.0, + "step": 12981 + }, + { + "epoch": 0.8366307920345427, + "grad_norm": 0.0023810597907460897, + "learning_rate": 1.8216970998925887e-06, + "loss": 0.0, + "step": 12982 + }, + { + "epoch": 0.8366952374814719, + "grad_norm": 0.005150080251448669, + "learning_rate": 1.8209810239885428e-06, + "loss": 0.0, + "step": 12983 + }, + { + "epoch": 0.8367596829284011, + "grad_norm": 0.0005138275505945283, + "learning_rate": 1.8202649480844971e-06, + "loss": 0.0, + "step": 12984 + }, + { + "epoch": 0.8368241283753303, + "grad_norm": 0.0001395393045962735, + "learning_rate": 1.8195488721804514e-06, + "loss": 0.0, + "step": 12985 + }, + { + "epoch": 0.8368885738222595, + "grad_norm": 0.015921765880894, + "learning_rate": 1.8188327962764055e-06, + "loss": 0.0001, + "step": 12986 + }, + { + "epoch": 0.8369530192691886, + "grad_norm": 0.10171368698405213, + "learning_rate": 1.8181167203723596e-06, + "loss": 0.0009, + "step": 12987 + }, + { + "epoch": 0.8370174647161178, + "grad_norm": 0.003001050790157522, + "learning_rate": 1.8174006444683137e-06, + "loss": 0.0, + "step": 12988 + }, + { + "epoch": 0.837081910163047, + "grad_norm": 0.0007591269273822723, + "learning_rate": 1.8166845685642678e-06, + "loss": 0.0, + "step": 12989 + }, + { + "epoch": 0.8371463556099762, + "grad_norm": 0.0001454134195656195, + "learning_rate": 1.8159684926602223e-06, + "loss": 0.0, + "step": 12990 + }, + { + "epoch": 0.8372108010569054, + "grad_norm": 0.10966075031034136, + "learning_rate": 1.8152524167561764e-06, + "loss": 0.0001, + "step": 12991 + }, + { + "epoch": 0.8372752465038346, + "grad_norm": 0.004862647588603843, + "learning_rate": 1.8145363408521305e-06, + "loss": 0.0, + "step": 12992 + }, + { + "epoch": 0.8373396919507636, + "grad_norm": 0.19124268335483594, + "learning_rate": 1.8138202649480845e-06, + "loss": 0.0006, + "step": 12993 + }, + { + "epoch": 0.8374041373976928, + "grad_norm": 0.00014524860075802834, + "learning_rate": 1.8131041890440388e-06, + "loss": 0.0, + "step": 12994 + }, + { + "epoch": 0.837468582844622, + "grad_norm": 0.04439166994184201, + "learning_rate": 1.812388113139993e-06, + "loss": 0.0001, + "step": 12995 + }, + { + "epoch": 0.8375330282915512, + "grad_norm": 6.69389849704551e-05, + "learning_rate": 1.8116720372359472e-06, + "loss": 0.0, + "step": 12996 + }, + { + "epoch": 0.8375974737384804, + "grad_norm": 0.001974699603729132, + "learning_rate": 1.8109559613319013e-06, + "loss": 0.0, + "step": 12997 + }, + { + "epoch": 0.8376619191854096, + "grad_norm": 0.00018389771702319728, + "learning_rate": 1.8102398854278556e-06, + "loss": 0.0, + "step": 12998 + }, + { + "epoch": 0.8377263646323387, + "grad_norm": 0.12907701798589907, + "learning_rate": 1.8095238095238097e-06, + "loss": 0.0002, + "step": 12999 + }, + { + "epoch": 0.8377908100792679, + "grad_norm": 0.23746540765057983, + "learning_rate": 1.8088077336197638e-06, + "loss": 0.0008, + "step": 13000 + }, + { + "epoch": 0.8378552555261971, + "grad_norm": 0.05998519156857347, + "learning_rate": 1.808091657715718e-06, + "loss": 0.0001, + "step": 13001 + }, + { + "epoch": 0.8379197009731263, + "grad_norm": 0.001119847624552485, + "learning_rate": 1.8073755818116724e-06, + "loss": 0.0, + "step": 13002 + }, + { + "epoch": 0.8379841464200555, + "grad_norm": 0.46324393715573475, + "learning_rate": 1.8066595059076265e-06, + "loss": 0.0016, + "step": 13003 + }, + { + "epoch": 0.8380485918669845, + "grad_norm": 0.002904501204279146, + "learning_rate": 1.8059434300035806e-06, + "loss": 0.0, + "step": 13004 + }, + { + "epoch": 0.8381130373139137, + "grad_norm": 0.0032653766814766654, + "learning_rate": 1.8052273540995346e-06, + "loss": 0.0, + "step": 13005 + }, + { + "epoch": 0.8381774827608429, + "grad_norm": 0.000118351637666897, + "learning_rate": 1.8045112781954887e-06, + "loss": 0.0, + "step": 13006 + }, + { + "epoch": 0.8382419282077721, + "grad_norm": 0.09376592798824523, + "learning_rate": 1.8037952022914432e-06, + "loss": 0.0002, + "step": 13007 + }, + { + "epoch": 0.8383063736547013, + "grad_norm": 0.0040541966063241675, + "learning_rate": 1.8030791263873973e-06, + "loss": 0.0, + "step": 13008 + }, + { + "epoch": 0.8383708191016305, + "grad_norm": 0.013738120997783352, + "learning_rate": 1.8023630504833514e-06, + "loss": 0.0001, + "step": 13009 + }, + { + "epoch": 0.8384352645485597, + "grad_norm": 0.0031340080052982603, + "learning_rate": 1.8016469745793055e-06, + "loss": 0.0, + "step": 13010 + }, + { + "epoch": 0.8384997099954888, + "grad_norm": 0.0009455405513584888, + "learning_rate": 1.8009308986752596e-06, + "loss": 0.0, + "step": 13011 + }, + { + "epoch": 0.838564155442418, + "grad_norm": 0.0036418512175713817, + "learning_rate": 1.8002148227712139e-06, + "loss": 0.0, + "step": 13012 + }, + { + "epoch": 0.8386286008893472, + "grad_norm": 0.0006205554728098501, + "learning_rate": 1.7994987468671682e-06, + "loss": 0.0, + "step": 13013 + }, + { + "epoch": 0.8386930463362764, + "grad_norm": 0.0025184925414780623, + "learning_rate": 1.7987826709631223e-06, + "loss": 0.0, + "step": 13014 + }, + { + "epoch": 0.8387574917832055, + "grad_norm": 9.977119185934306e-05, + "learning_rate": 1.7980665950590764e-06, + "loss": 0.0, + "step": 13015 + }, + { + "epoch": 0.8388219372301347, + "grad_norm": 0.001322791226674588, + "learning_rate": 1.7973505191550307e-06, + "loss": 0.0, + "step": 13016 + }, + { + "epoch": 0.8388863826770638, + "grad_norm": 9.753431868816942e-05, + "learning_rate": 1.7966344432509847e-06, + "loss": 0.0, + "step": 13017 + }, + { + "epoch": 0.838950828123993, + "grad_norm": 0.024187093953887785, + "learning_rate": 1.7959183673469388e-06, + "loss": 0.0, + "step": 13018 + }, + { + "epoch": 0.8390152735709222, + "grad_norm": 0.0008915200755770839, + "learning_rate": 1.7952022914428931e-06, + "loss": 0.0, + "step": 13019 + }, + { + "epoch": 0.8390797190178514, + "grad_norm": 0.1700030878168758, + "learning_rate": 1.7944862155388474e-06, + "loss": 0.0021, + "step": 13020 + }, + { + "epoch": 0.8391441644647806, + "grad_norm": 0.009521015167901947, + "learning_rate": 1.7937701396348015e-06, + "loss": 0.0001, + "step": 13021 + }, + { + "epoch": 0.8392086099117098, + "grad_norm": 0.0009619655795215219, + "learning_rate": 1.7930540637307556e-06, + "loss": 0.0, + "step": 13022 + }, + { + "epoch": 0.8392730553586389, + "grad_norm": 0.04029302332851765, + "learning_rate": 1.7923379878267097e-06, + "loss": 0.0, + "step": 13023 + }, + { + "epoch": 0.8393375008055681, + "grad_norm": 0.0386175096813077, + "learning_rate": 1.7916219119226638e-06, + "loss": 0.0, + "step": 13024 + }, + { + "epoch": 0.8394019462524973, + "grad_norm": 4.674073385948886e-05, + "learning_rate": 1.7909058360186183e-06, + "loss": 0.0, + "step": 13025 + }, + { + "epoch": 0.8394663916994264, + "grad_norm": 0.13500024579167066, + "learning_rate": 1.7901897601145724e-06, + "loss": 0.0003, + "step": 13026 + }, + { + "epoch": 0.8395308371463556, + "grad_norm": 0.0024757972109323823, + "learning_rate": 1.7894736842105265e-06, + "loss": 0.0, + "step": 13027 + }, + { + "epoch": 0.8395952825932848, + "grad_norm": 0.0021501952153607775, + "learning_rate": 1.7887576083064805e-06, + "loss": 0.0, + "step": 13028 + }, + { + "epoch": 0.8396597280402139, + "grad_norm": 0.0008697595381524505, + "learning_rate": 1.7880415324024346e-06, + "loss": 0.0, + "step": 13029 + }, + { + "epoch": 0.8397241734871431, + "grad_norm": 0.4445862117639593, + "learning_rate": 1.787325456498389e-06, + "loss": 0.0035, + "step": 13030 + }, + { + "epoch": 0.8397886189340723, + "grad_norm": 0.36612350437383684, + "learning_rate": 1.7866093805943432e-06, + "loss": 0.0016, + "step": 13031 + }, + { + "epoch": 0.8398530643810015, + "grad_norm": 0.012165564798806148, + "learning_rate": 1.7858933046902973e-06, + "loss": 0.0, + "step": 13032 + }, + { + "epoch": 0.8399175098279307, + "grad_norm": 0.0779756907391507, + "learning_rate": 1.7851772287862514e-06, + "loss": 0.0001, + "step": 13033 + }, + { + "epoch": 0.8399819552748599, + "grad_norm": 0.026000581433871277, + "learning_rate": 1.7844611528822057e-06, + "loss": 0.0015, + "step": 13034 + }, + { + "epoch": 0.840046400721789, + "grad_norm": 0.2525092647996164, + "learning_rate": 1.7837450769781598e-06, + "loss": 0.0005, + "step": 13035 + }, + { + "epoch": 0.8401108461687182, + "grad_norm": 0.0005119127002491398, + "learning_rate": 1.7830290010741139e-06, + "loss": 0.0, + "step": 13036 + }, + { + "epoch": 0.8401752916156473, + "grad_norm": 0.0020882671639366607, + "learning_rate": 1.7823129251700682e-06, + "loss": 0.0, + "step": 13037 + }, + { + "epoch": 0.8402397370625765, + "grad_norm": 0.01590513585309205, + "learning_rate": 1.7815968492660225e-06, + "loss": 0.0, + "step": 13038 + }, + { + "epoch": 0.8403041825095057, + "grad_norm": 0.0003583728461652098, + "learning_rate": 1.7808807733619766e-06, + "loss": 0.0, + "step": 13039 + }, + { + "epoch": 0.8403686279564349, + "grad_norm": 0.0017618107669094471, + "learning_rate": 1.7801646974579306e-06, + "loss": 0.0, + "step": 13040 + }, + { + "epoch": 0.840433073403364, + "grad_norm": 0.0051314915355801785, + "learning_rate": 1.7794486215538847e-06, + "loss": 0.0, + "step": 13041 + }, + { + "epoch": 0.8404975188502932, + "grad_norm": 0.0015903867467092674, + "learning_rate": 1.7787325456498392e-06, + "loss": 0.0, + "step": 13042 + }, + { + "epoch": 0.8405619642972224, + "grad_norm": 0.0004816465431161624, + "learning_rate": 1.7780164697457933e-06, + "loss": 0.0, + "step": 13043 + }, + { + "epoch": 0.8406264097441516, + "grad_norm": 0.006361106917538149, + "learning_rate": 1.7773003938417474e-06, + "loss": 0.0, + "step": 13044 + }, + { + "epoch": 0.8406908551910808, + "grad_norm": 0.035033549990959596, + "learning_rate": 1.7765843179377015e-06, + "loss": 0.0001, + "step": 13045 + }, + { + "epoch": 0.84075530063801, + "grad_norm": 0.005053168074157976, + "learning_rate": 1.7758682420336556e-06, + "loss": 0.0, + "step": 13046 + }, + { + "epoch": 0.8408197460849391, + "grad_norm": 0.1503405115928112, + "learning_rate": 1.7751521661296097e-06, + "loss": 0.0003, + "step": 13047 + }, + { + "epoch": 0.8408841915318682, + "grad_norm": 0.8358501852193341, + "learning_rate": 1.7744360902255642e-06, + "loss": 0.0055, + "step": 13048 + }, + { + "epoch": 0.8409486369787974, + "grad_norm": 0.0012061131465209045, + "learning_rate": 1.7737200143215183e-06, + "loss": 0.0, + "step": 13049 + }, + { + "epoch": 0.8410130824257266, + "grad_norm": 0.030221909655107952, + "learning_rate": 1.7730039384174724e-06, + "loss": 0.0, + "step": 13050 + }, + { + "epoch": 0.8410775278726558, + "grad_norm": 0.0003382566794423338, + "learning_rate": 1.7722878625134265e-06, + "loss": 0.0, + "step": 13051 + }, + { + "epoch": 0.841141973319585, + "grad_norm": 0.001474604475494994, + "learning_rate": 1.7715717866093807e-06, + "loss": 0.0, + "step": 13052 + }, + { + "epoch": 0.8412064187665141, + "grad_norm": 0.0005242323894890739, + "learning_rate": 1.7708557107053348e-06, + "loss": 0.0, + "step": 13053 + }, + { + "epoch": 0.8412708642134433, + "grad_norm": 0.01598698800507151, + "learning_rate": 1.7701396348012891e-06, + "loss": 0.0, + "step": 13054 + }, + { + "epoch": 0.8413353096603725, + "grad_norm": 0.02646729332155176, + "learning_rate": 1.7694235588972432e-06, + "loss": 0.0, + "step": 13055 + }, + { + "epoch": 0.8413997551073017, + "grad_norm": 0.0004705005211530495, + "learning_rate": 1.7687074829931975e-06, + "loss": 0.0, + "step": 13056 + }, + { + "epoch": 0.8414642005542309, + "grad_norm": 0.2572792781261736, + "learning_rate": 1.7679914070891516e-06, + "loss": 0.001, + "step": 13057 + }, + { + "epoch": 0.8415286460011601, + "grad_norm": 0.003987081818030144, + "learning_rate": 1.7672753311851057e-06, + "loss": 0.0, + "step": 13058 + }, + { + "epoch": 0.8415930914480892, + "grad_norm": 0.017189671808628206, + "learning_rate": 1.7665592552810598e-06, + "loss": 0.0, + "step": 13059 + }, + { + "epoch": 0.8416575368950183, + "grad_norm": 0.038183230144691487, + "learning_rate": 1.7658431793770143e-06, + "loss": 0.0, + "step": 13060 + }, + { + "epoch": 0.8417219823419475, + "grad_norm": 0.041134170528374744, + "learning_rate": 1.7651271034729684e-06, + "loss": 0.0001, + "step": 13061 + }, + { + "epoch": 0.8417864277888767, + "grad_norm": 0.08703766028306478, + "learning_rate": 1.7644110275689225e-06, + "loss": 0.0002, + "step": 13062 + }, + { + "epoch": 0.8418508732358059, + "grad_norm": 0.00010780192104593178, + "learning_rate": 1.7636949516648766e-06, + "loss": 0.0, + "step": 13063 + }, + { + "epoch": 0.8419153186827351, + "grad_norm": 0.11928142763613796, + "learning_rate": 1.7629788757608306e-06, + "loss": 0.0004, + "step": 13064 + }, + { + "epoch": 0.8419797641296642, + "grad_norm": 0.5486655215015411, + "learning_rate": 1.762262799856785e-06, + "loss": 0.0035, + "step": 13065 + }, + { + "epoch": 0.8420442095765934, + "grad_norm": 0.005871428197205636, + "learning_rate": 1.7615467239527392e-06, + "loss": 0.0, + "step": 13066 + }, + { + "epoch": 0.8421086550235226, + "grad_norm": 0.0003421205363785894, + "learning_rate": 1.7608306480486933e-06, + "loss": 0.0, + "step": 13067 + }, + { + "epoch": 0.8421731004704518, + "grad_norm": 0.00034174588045342405, + "learning_rate": 1.7601145721446474e-06, + "loss": 0.0, + "step": 13068 + }, + { + "epoch": 0.842237545917381, + "grad_norm": 0.004250460203295504, + "learning_rate": 1.7593984962406017e-06, + "loss": 0.0, + "step": 13069 + }, + { + "epoch": 0.8423019913643102, + "grad_norm": 0.0015205250632764336, + "learning_rate": 1.7586824203365558e-06, + "loss": 0.0, + "step": 13070 + }, + { + "epoch": 0.8423664368112392, + "grad_norm": 0.004258007696294317, + "learning_rate": 1.7579663444325099e-06, + "loss": 0.0, + "step": 13071 + }, + { + "epoch": 0.8424308822581684, + "grad_norm": 0.0034072375699601702, + "learning_rate": 1.7572502685284642e-06, + "loss": 0.0, + "step": 13072 + }, + { + "epoch": 0.8424953277050976, + "grad_norm": 0.0014717670616816948, + "learning_rate": 1.7565341926244185e-06, + "loss": 0.0, + "step": 13073 + }, + { + "epoch": 0.8425597731520268, + "grad_norm": 0.13174503866161089, + "learning_rate": 1.7558181167203726e-06, + "loss": 0.0001, + "step": 13074 + }, + { + "epoch": 0.842624218598956, + "grad_norm": 0.13783244480645623, + "learning_rate": 1.7551020408163267e-06, + "loss": 0.0013, + "step": 13075 + }, + { + "epoch": 0.8426886640458852, + "grad_norm": 0.005166068120200246, + "learning_rate": 1.7543859649122807e-06, + "loss": 0.0, + "step": 13076 + }, + { + "epoch": 0.8427531094928143, + "grad_norm": 0.0006774612637503077, + "learning_rate": 1.7536698890082353e-06, + "loss": 0.0, + "step": 13077 + }, + { + "epoch": 0.8428175549397435, + "grad_norm": 0.17182032842691972, + "learning_rate": 1.7529538131041893e-06, + "loss": 0.0017, + "step": 13078 + }, + { + "epoch": 0.8428820003866727, + "grad_norm": 0.04182167002943656, + "learning_rate": 1.7522377372001434e-06, + "loss": 0.0002, + "step": 13079 + }, + { + "epoch": 0.8429464458336019, + "grad_norm": 0.0244410842129277, + "learning_rate": 1.7515216612960975e-06, + "loss": 0.0001, + "step": 13080 + }, + { + "epoch": 0.8430108912805311, + "grad_norm": 0.010742733410488706, + "learning_rate": 1.7508055853920516e-06, + "loss": 0.0, + "step": 13081 + }, + { + "epoch": 0.8430753367274602, + "grad_norm": 0.010742733410488706, + "learning_rate": 1.7508055853920516e-06, + "loss": 0.0257, + "step": 13082 + }, + { + "epoch": 0.8431397821743893, + "grad_norm": 7.704857180219094e-05, + "learning_rate": 1.7500895094880057e-06, + "loss": 0.0, + "step": 13083 + }, + { + "epoch": 0.8432042276213185, + "grad_norm": 0.06449464703172729, + "learning_rate": 1.7493734335839602e-06, + "loss": 0.0001, + "step": 13084 + }, + { + "epoch": 0.8432686730682477, + "grad_norm": 0.000153695612872603, + "learning_rate": 1.7486573576799143e-06, + "loss": 0.0, + "step": 13085 + }, + { + "epoch": 0.8433331185151769, + "grad_norm": 0.004558501781863454, + "learning_rate": 1.7479412817758684e-06, + "loss": 0.0, + "step": 13086 + }, + { + "epoch": 0.8433975639621061, + "grad_norm": 0.0014270143678160212, + "learning_rate": 1.7472252058718225e-06, + "loss": 0.0, + "step": 13087 + }, + { + "epoch": 0.8434620094090353, + "grad_norm": 0.0021367750367809737, + "learning_rate": 1.7465091299677768e-06, + "loss": 0.0, + "step": 13088 + }, + { + "epoch": 0.8435264548559644, + "grad_norm": 0.06312020112435841, + "learning_rate": 1.7457930540637308e-06, + "loss": 0.0001, + "step": 13089 + }, + { + "epoch": 0.8435909003028936, + "grad_norm": 0.0005994224308608808, + "learning_rate": 1.7450769781596851e-06, + "loss": 0.0, + "step": 13090 + }, + { + "epoch": 0.8436553457498228, + "grad_norm": 0.008210769671738014, + "learning_rate": 1.7443609022556392e-06, + "loss": 0.0, + "step": 13091 + }, + { + "epoch": 0.843719791196752, + "grad_norm": 0.009704245330285514, + "learning_rate": 1.7436448263515935e-06, + "loss": 0.0, + "step": 13092 + }, + { + "epoch": 0.8437842366436811, + "grad_norm": 0.45894062564205756, + "learning_rate": 1.7429287504475476e-06, + "loss": 0.0032, + "step": 13093 + }, + { + "epoch": 0.8438486820906103, + "grad_norm": 0.0023116636587011403, + "learning_rate": 1.7422126745435017e-06, + "loss": 0.0, + "step": 13094 + }, + { + "epoch": 0.8439131275375394, + "grad_norm": 0.002211421761670453, + "learning_rate": 1.7414965986394558e-06, + "loss": 0.0, + "step": 13095 + }, + { + "epoch": 0.8439775729844686, + "grad_norm": 0.01702725153815147, + "learning_rate": 1.7407805227354103e-06, + "loss": 0.0, + "step": 13096 + }, + { + "epoch": 0.8440420184313978, + "grad_norm": 0.0001140036103517741, + "learning_rate": 1.7400644468313644e-06, + "loss": 0.0, + "step": 13097 + }, + { + "epoch": 0.844106463878327, + "grad_norm": 0.10455236751921528, + "learning_rate": 1.7393483709273185e-06, + "loss": 0.0018, + "step": 13098 + }, + { + "epoch": 0.8441709093252562, + "grad_norm": 0.002027961693557616, + "learning_rate": 1.7386322950232726e-06, + "loss": 0.0, + "step": 13099 + }, + { + "epoch": 0.8442353547721854, + "grad_norm": 0.00024180815422684246, + "learning_rate": 1.7379162191192266e-06, + "loss": 0.0, + "step": 13100 + }, + { + "epoch": 0.8442998002191145, + "grad_norm": 0.079310239055661, + "learning_rate": 1.7372001432151807e-06, + "loss": 0.0001, + "step": 13101 + }, + { + "epoch": 0.8443642456660437, + "grad_norm": 0.00905125777422377, + "learning_rate": 1.7364840673111352e-06, + "loss": 0.0, + "step": 13102 + }, + { + "epoch": 0.8444286911129729, + "grad_norm": 0.2213289704661812, + "learning_rate": 1.7357679914070893e-06, + "loss": 0.0009, + "step": 13103 + }, + { + "epoch": 0.844493136559902, + "grad_norm": 0.013132135644655554, + "learning_rate": 1.7350519155030434e-06, + "loss": 0.0, + "step": 13104 + }, + { + "epoch": 0.8445575820068312, + "grad_norm": 0.0019095839276892347, + "learning_rate": 1.7343358395989975e-06, + "loss": 0.0, + "step": 13105 + }, + { + "epoch": 0.8446220274537604, + "grad_norm": 0.003959361517452852, + "learning_rate": 1.7336197636949518e-06, + "loss": 0.0, + "step": 13106 + }, + { + "epoch": 0.8446864729006895, + "grad_norm": 0.0032159310611906385, + "learning_rate": 1.7329036877909059e-06, + "loss": 0.0, + "step": 13107 + }, + { + "epoch": 0.8447509183476187, + "grad_norm": 0.0019923615717410644, + "learning_rate": 1.7321876118868602e-06, + "loss": 0.0, + "step": 13108 + }, + { + "epoch": 0.8448153637945479, + "grad_norm": 0.004360544594844355, + "learning_rate": 1.7314715359828143e-06, + "loss": 0.0, + "step": 13109 + }, + { + "epoch": 0.8448798092414771, + "grad_norm": 0.0004440588758890354, + "learning_rate": 1.7307554600787686e-06, + "loss": 0.0, + "step": 13110 + }, + { + "epoch": 0.8449442546884063, + "grad_norm": 0.05991632583505031, + "learning_rate": 1.7300393841747227e-06, + "loss": 0.0001, + "step": 13111 + }, + { + "epoch": 0.8450087001353355, + "grad_norm": 0.0005124873215020824, + "learning_rate": 1.7293233082706767e-06, + "loss": 0.0, + "step": 13112 + }, + { + "epoch": 0.8450731455822647, + "grad_norm": 0.004131471238295923, + "learning_rate": 1.728607232366631e-06, + "loss": 0.0, + "step": 13113 + }, + { + "epoch": 0.8451375910291938, + "grad_norm": 0.002933325315730856, + "learning_rate": 1.7278911564625853e-06, + "loss": 0.0, + "step": 13114 + }, + { + "epoch": 0.8452020364761229, + "grad_norm": 0.0019792287577856935, + "learning_rate": 1.7271750805585394e-06, + "loss": 0.0, + "step": 13115 + }, + { + "epoch": 0.8452664819230521, + "grad_norm": 0.022908054626963152, + "learning_rate": 1.7264590046544935e-06, + "loss": 0.0, + "step": 13116 + }, + { + "epoch": 0.8453309273699813, + "grad_norm": 0.001399493420350886, + "learning_rate": 1.7257429287504476e-06, + "loss": 0.0, + "step": 13117 + }, + { + "epoch": 0.8453953728169105, + "grad_norm": 0.001018219180645176, + "learning_rate": 1.7250268528464017e-06, + "loss": 0.0, + "step": 13118 + }, + { + "epoch": 0.8454598182638396, + "grad_norm": 0.04616747542702802, + "learning_rate": 1.7243107769423562e-06, + "loss": 0.0, + "step": 13119 + }, + { + "epoch": 0.8455242637107688, + "grad_norm": 0.009493654566610898, + "learning_rate": 1.7235947010383103e-06, + "loss": 0.0, + "step": 13120 + }, + { + "epoch": 0.845588709157698, + "grad_norm": 0.00967814859014821, + "learning_rate": 1.7228786251342644e-06, + "loss": 0.0, + "step": 13121 + }, + { + "epoch": 0.8456531546046272, + "grad_norm": 0.10150213006238015, + "learning_rate": 1.7221625492302185e-06, + "loss": 0.0003, + "step": 13122 + }, + { + "epoch": 0.8457176000515564, + "grad_norm": 0.6385996833154148, + "learning_rate": 1.7214464733261726e-06, + "loss": 0.0013, + "step": 13123 + }, + { + "epoch": 0.8457820454984856, + "grad_norm": 0.027547564191873486, + "learning_rate": 1.7207303974221269e-06, + "loss": 0.0001, + "step": 13124 + }, + { + "epoch": 0.8458464909454148, + "grad_norm": 0.04341508013828472, + "learning_rate": 1.7200143215180811e-06, + "loss": 0.0003, + "step": 13125 + }, + { + "epoch": 0.8459109363923438, + "grad_norm": 0.00016965293402197205, + "learning_rate": 1.7192982456140352e-06, + "loss": 0.0, + "step": 13126 + }, + { + "epoch": 0.845975381839273, + "grad_norm": 0.002586838111347734, + "learning_rate": 1.7185821697099893e-06, + "loss": 0.0, + "step": 13127 + }, + { + "epoch": 0.8460398272862022, + "grad_norm": 0.02806070046606198, + "learning_rate": 1.7178660938059436e-06, + "loss": 0.0003, + "step": 13128 + }, + { + "epoch": 0.8461042727331314, + "grad_norm": 0.00040487585066894285, + "learning_rate": 1.7171500179018977e-06, + "loss": 0.0, + "step": 13129 + }, + { + "epoch": 0.8461687181800606, + "grad_norm": 0.0041082910374663, + "learning_rate": 1.7164339419978518e-06, + "loss": 0.0, + "step": 13130 + }, + { + "epoch": 0.8462331636269897, + "grad_norm": 0.0015562831145490758, + "learning_rate": 1.715717866093806e-06, + "loss": 0.0, + "step": 13131 + }, + { + "epoch": 0.8462976090739189, + "grad_norm": 0.0009302798442001268, + "learning_rate": 1.7150017901897604e-06, + "loss": 0.0, + "step": 13132 + }, + { + "epoch": 0.8463620545208481, + "grad_norm": 0.01062960002645244, + "learning_rate": 1.7142857142857145e-06, + "loss": 0.0, + "step": 13133 + }, + { + "epoch": 0.8464264999677773, + "grad_norm": 0.00834073110916737, + "learning_rate": 1.7135696383816686e-06, + "loss": 0.0, + "step": 13134 + }, + { + "epoch": 0.8464909454147065, + "grad_norm": 0.0009627436988840297, + "learning_rate": 1.7128535624776227e-06, + "loss": 0.0, + "step": 13135 + }, + { + "epoch": 0.8465553908616357, + "grad_norm": 0.0009720207341823486, + "learning_rate": 1.7121374865735767e-06, + "loss": 0.0, + "step": 13136 + }, + { + "epoch": 0.8466198363085649, + "grad_norm": 0.007233971164138382, + "learning_rate": 1.7114214106695313e-06, + "loss": 0.0, + "step": 13137 + }, + { + "epoch": 0.8466842817554939, + "grad_norm": 0.006121266302367881, + "learning_rate": 1.7107053347654853e-06, + "loss": 0.0, + "step": 13138 + }, + { + "epoch": 0.8467487272024231, + "grad_norm": 0.00019090575891950602, + "learning_rate": 1.7099892588614394e-06, + "loss": 0.0, + "step": 13139 + }, + { + "epoch": 0.8468131726493523, + "grad_norm": 0.3189073772494604, + "learning_rate": 1.7092731829573935e-06, + "loss": 0.0069, + "step": 13140 + }, + { + "epoch": 0.8468776180962815, + "grad_norm": 0.00029818719882822894, + "learning_rate": 1.7085571070533478e-06, + "loss": 0.0, + "step": 13141 + }, + { + "epoch": 0.8469420635432107, + "grad_norm": 0.02607671581948451, + "learning_rate": 1.707841031149302e-06, + "loss": 0.0001, + "step": 13142 + }, + { + "epoch": 0.8470065089901399, + "grad_norm": 0.10383467270417568, + "learning_rate": 1.7071249552452562e-06, + "loss": 0.0007, + "step": 13143 + }, + { + "epoch": 0.847070954437069, + "grad_norm": 0.00030382140353013776, + "learning_rate": 1.7064088793412103e-06, + "loss": 0.0, + "step": 13144 + }, + { + "epoch": 0.8471353998839982, + "grad_norm": 0.006279257922826684, + "learning_rate": 1.7056928034371646e-06, + "loss": 0.0, + "step": 13145 + }, + { + "epoch": 0.8471998453309274, + "grad_norm": 0.45831563402449355, + "learning_rate": 1.7049767275331187e-06, + "loss": 0.0036, + "step": 13146 + }, + { + "epoch": 0.8472642907778566, + "grad_norm": 0.004021349795027649, + "learning_rate": 1.7042606516290728e-06, + "loss": 0.0, + "step": 13147 + }, + { + "epoch": 0.8473287362247858, + "grad_norm": 0.03808135836534344, + "learning_rate": 1.703544575725027e-06, + "loss": 0.0, + "step": 13148 + }, + { + "epoch": 0.8473931816717148, + "grad_norm": 0.11625193826279853, + "learning_rate": 1.7028284998209814e-06, + "loss": 0.0017, + "step": 13149 + }, + { + "epoch": 0.847457627118644, + "grad_norm": 0.00826856773484578, + "learning_rate": 1.7021124239169354e-06, + "loss": 0.0001, + "step": 13150 + }, + { + "epoch": 0.8475220725655732, + "grad_norm": 0.06094455670493373, + "learning_rate": 1.7013963480128895e-06, + "loss": 0.0001, + "step": 13151 + }, + { + "epoch": 0.8475865180125024, + "grad_norm": 0.32370913335997803, + "learning_rate": 1.7006802721088436e-06, + "loss": 0.0005, + "step": 13152 + }, + { + "epoch": 0.8476509634594316, + "grad_norm": 0.004277870466133441, + "learning_rate": 1.6999641962047977e-06, + "loss": 0.0, + "step": 13153 + }, + { + "epoch": 0.8477154089063608, + "grad_norm": 0.006180712845803867, + "learning_rate": 1.6992481203007522e-06, + "loss": 0.0001, + "step": 13154 + }, + { + "epoch": 0.84777985435329, + "grad_norm": 0.00029611523936459857, + "learning_rate": 1.6985320443967063e-06, + "loss": 0.0, + "step": 13155 + }, + { + "epoch": 0.8478442998002191, + "grad_norm": 0.0003400751143154014, + "learning_rate": 1.6978159684926604e-06, + "loss": 0.0, + "step": 13156 + }, + { + "epoch": 0.8479087452471483, + "grad_norm": 0.04685887416980266, + "learning_rate": 1.6970998925886145e-06, + "loss": 0.0001, + "step": 13157 + }, + { + "epoch": 0.8479731906940775, + "grad_norm": 0.004478614412734495, + "learning_rate": 1.6963838166845686e-06, + "loss": 0.0, + "step": 13158 + }, + { + "epoch": 0.8480376361410067, + "grad_norm": 0.0872153273947622, + "learning_rate": 1.6956677407805229e-06, + "loss": 0.0016, + "step": 13159 + }, + { + "epoch": 0.8481020815879358, + "grad_norm": 0.0006567281632697598, + "learning_rate": 1.6949516648764772e-06, + "loss": 0.0, + "step": 13160 + }, + { + "epoch": 0.848166527034865, + "grad_norm": 0.000274595862112701, + "learning_rate": 1.6942355889724312e-06, + "loss": 0.0, + "step": 13161 + }, + { + "epoch": 0.8482309724817941, + "grad_norm": 0.002213287427482091, + "learning_rate": 1.6935195130683853e-06, + "loss": 0.0, + "step": 13162 + }, + { + "epoch": 0.8482954179287233, + "grad_norm": 0.0006916792952072503, + "learning_rate": 1.6928034371643396e-06, + "loss": 0.0, + "step": 13163 + }, + { + "epoch": 0.8483598633756525, + "grad_norm": 0.014104434453009731, + "learning_rate": 1.6920873612602937e-06, + "loss": 0.0002, + "step": 13164 + }, + { + "epoch": 0.8484243088225817, + "grad_norm": 0.37413188666834707, + "learning_rate": 1.6913712853562478e-06, + "loss": 0.0051, + "step": 13165 + }, + { + "epoch": 0.8484887542695109, + "grad_norm": 0.22097166148012748, + "learning_rate": 1.690655209452202e-06, + "loss": 0.0032, + "step": 13166 + }, + { + "epoch": 0.84855319971644, + "grad_norm": 0.05122863864429417, + "learning_rate": 1.6899391335481564e-06, + "loss": 0.0001, + "step": 13167 + }, + { + "epoch": 0.8486176451633692, + "grad_norm": 0.0022830902384754453, + "learning_rate": 1.6892230576441105e-06, + "loss": 0.0, + "step": 13168 + }, + { + "epoch": 0.8486820906102984, + "grad_norm": 0.00552262182764459, + "learning_rate": 1.6885069817400646e-06, + "loss": 0.0, + "step": 13169 + }, + { + "epoch": 0.8487465360572276, + "grad_norm": 0.004210165256711199, + "learning_rate": 1.6877909058360187e-06, + "loss": 0.0, + "step": 13170 + }, + { + "epoch": 0.8488109815041567, + "grad_norm": 0.08326636885960183, + "learning_rate": 1.6870748299319727e-06, + "loss": 0.0001, + "step": 13171 + }, + { + "epoch": 0.8488754269510859, + "grad_norm": 0.002165883292149573, + "learning_rate": 1.6863587540279273e-06, + "loss": 0.0, + "step": 13172 + }, + { + "epoch": 0.848939872398015, + "grad_norm": 0.02688358309133841, + "learning_rate": 1.6856426781238813e-06, + "loss": 0.0, + "step": 13173 + }, + { + "epoch": 0.8490043178449442, + "grad_norm": 0.0035068966026358408, + "learning_rate": 1.6849266022198354e-06, + "loss": 0.0015, + "step": 13174 + }, + { + "epoch": 0.8490687632918734, + "grad_norm": 0.08898056050565528, + "learning_rate": 1.6842105263157895e-06, + "loss": 0.0001, + "step": 13175 + }, + { + "epoch": 0.8491332087388026, + "grad_norm": 0.01142288213511176, + "learning_rate": 1.6834944504117436e-06, + "loss": 0.0, + "step": 13176 + }, + { + "epoch": 0.8491976541857318, + "grad_norm": 0.12086776057633856, + "learning_rate": 1.682778374507698e-06, + "loss": 0.0002, + "step": 13177 + }, + { + "epoch": 0.849262099632661, + "grad_norm": 0.016550340064353578, + "learning_rate": 1.6820622986036522e-06, + "loss": 0.0002, + "step": 13178 + }, + { + "epoch": 0.8493265450795902, + "grad_norm": 0.00171017518821912, + "learning_rate": 1.6813462226996063e-06, + "loss": 0.0, + "step": 13179 + }, + { + "epoch": 0.8493909905265193, + "grad_norm": 0.07949205546168953, + "learning_rate": 1.6806301467955604e-06, + "loss": 0.0012, + "step": 13180 + }, + { + "epoch": 0.8494554359734485, + "grad_norm": 0.09155284118646113, + "learning_rate": 1.6799140708915147e-06, + "loss": 0.0008, + "step": 13181 + }, + { + "epoch": 0.8495198814203776, + "grad_norm": 0.0025166251856908886, + "learning_rate": 1.6791979949874688e-06, + "loss": 0.0, + "step": 13182 + }, + { + "epoch": 0.8495843268673068, + "grad_norm": 0.01984616554006101, + "learning_rate": 1.678481919083423e-06, + "loss": 0.0002, + "step": 13183 + }, + { + "epoch": 0.849648772314236, + "grad_norm": 0.0010042060360331753, + "learning_rate": 1.6777658431793771e-06, + "loss": 0.0, + "step": 13184 + }, + { + "epoch": 0.8497132177611652, + "grad_norm": 0.0009949796621702785, + "learning_rate": 1.6770497672753314e-06, + "loss": 0.0, + "step": 13185 + }, + { + "epoch": 0.8497776632080943, + "grad_norm": 0.053973604706768964, + "learning_rate": 1.6763336913712855e-06, + "loss": 0.0001, + "step": 13186 + }, + { + "epoch": 0.8498421086550235, + "grad_norm": 0.4081985619394894, + "learning_rate": 1.6756176154672396e-06, + "loss": 0.0036, + "step": 13187 + }, + { + "epoch": 0.8499065541019527, + "grad_norm": 0.02374758216156314, + "learning_rate": 1.6749015395631937e-06, + "loss": 0.0001, + "step": 13188 + }, + { + "epoch": 0.8499709995488819, + "grad_norm": 0.05115024449824533, + "learning_rate": 1.6741854636591482e-06, + "loss": 0.0002, + "step": 13189 + }, + { + "epoch": 0.8500354449958111, + "grad_norm": 1.4907648784656893, + "learning_rate": 1.6734693877551023e-06, + "loss": 0.006, + "step": 13190 + }, + { + "epoch": 0.8500998904427403, + "grad_norm": 0.000761882675939549, + "learning_rate": 1.6727533118510564e-06, + "loss": 0.0, + "step": 13191 + }, + { + "epoch": 0.8501643358896694, + "grad_norm": 0.0010225074365764744, + "learning_rate": 1.6720372359470105e-06, + "loss": 0.0, + "step": 13192 + }, + { + "epoch": 0.8502287813365985, + "grad_norm": 0.011092580154141158, + "learning_rate": 1.6713211600429646e-06, + "loss": 0.0, + "step": 13193 + }, + { + "epoch": 0.8502932267835277, + "grad_norm": 0.0006417750320247429, + "learning_rate": 1.6706050841389187e-06, + "loss": 0.0, + "step": 13194 + }, + { + "epoch": 0.8503576722304569, + "grad_norm": 0.0035144805856446135, + "learning_rate": 1.6698890082348732e-06, + "loss": 0.0, + "step": 13195 + }, + { + "epoch": 0.8504221176773861, + "grad_norm": 0.0010827521412211539, + "learning_rate": 1.6691729323308273e-06, + "loss": 0.0, + "step": 13196 + }, + { + "epoch": 0.8504865631243153, + "grad_norm": 0.03509750524593469, + "learning_rate": 1.6684568564267813e-06, + "loss": 0.0001, + "step": 13197 + }, + { + "epoch": 0.8505510085712444, + "grad_norm": 0.00036619498825359834, + "learning_rate": 1.6677407805227354e-06, + "loss": 0.0, + "step": 13198 + }, + { + "epoch": 0.8506154540181736, + "grad_norm": 0.009914766960140308, + "learning_rate": 1.6670247046186897e-06, + "loss": 0.0, + "step": 13199 + }, + { + "epoch": 0.8506798994651028, + "grad_norm": 0.0017422356060475125, + "learning_rate": 1.6663086287146438e-06, + "loss": 0.0, + "step": 13200 + }, + { + "epoch": 0.850744344912032, + "grad_norm": 0.5032569191045334, + "learning_rate": 1.6655925528105981e-06, + "loss": 0.0029, + "step": 13201 + }, + { + "epoch": 0.8508087903589612, + "grad_norm": 0.10452537366147462, + "learning_rate": 1.6648764769065522e-06, + "loss": 0.0004, + "step": 13202 + }, + { + "epoch": 0.8508732358058904, + "grad_norm": 0.5053188131076043, + "learning_rate": 1.6641604010025065e-06, + "loss": 0.0021, + "step": 13203 + }, + { + "epoch": 0.8509376812528194, + "grad_norm": 0.0012992472535777047, + "learning_rate": 1.6634443250984606e-06, + "loss": 0.0, + "step": 13204 + }, + { + "epoch": 0.8510021266997486, + "grad_norm": 0.00011059526996946594, + "learning_rate": 1.6627282491944147e-06, + "loss": 0.0, + "step": 13205 + }, + { + "epoch": 0.8510665721466778, + "grad_norm": 0.005765190230397704, + "learning_rate": 1.6620121732903688e-06, + "loss": 0.0, + "step": 13206 + }, + { + "epoch": 0.851131017593607, + "grad_norm": 0.003982361016511815, + "learning_rate": 1.6612960973863233e-06, + "loss": 0.0, + "step": 13207 + }, + { + "epoch": 0.8511954630405362, + "grad_norm": 0.1607033931876445, + "learning_rate": 1.6605800214822774e-06, + "loss": 0.0006, + "step": 13208 + }, + { + "epoch": 0.8512599084874654, + "grad_norm": 0.0023549343502827992, + "learning_rate": 1.6598639455782314e-06, + "loss": 0.0, + "step": 13209 + }, + { + "epoch": 0.8513243539343945, + "grad_norm": 0.004134231289848903, + "learning_rate": 1.6591478696741855e-06, + "loss": 0.0, + "step": 13210 + }, + { + "epoch": 0.8513887993813237, + "grad_norm": 0.0012640425582206376, + "learning_rate": 1.6584317937701396e-06, + "loss": 0.0, + "step": 13211 + }, + { + "epoch": 0.8514532448282529, + "grad_norm": 0.0021418223676945244, + "learning_rate": 1.657715717866094e-06, + "loss": 0.0, + "step": 13212 + }, + { + "epoch": 0.8515176902751821, + "grad_norm": 0.0007885922196547374, + "learning_rate": 1.6569996419620482e-06, + "loss": 0.0, + "step": 13213 + }, + { + "epoch": 0.8515821357221113, + "grad_norm": 0.0014711894637515442, + "learning_rate": 1.6562835660580023e-06, + "loss": 0.0, + "step": 13214 + }, + { + "epoch": 0.8516465811690405, + "grad_norm": 0.008335604533173686, + "learning_rate": 1.6555674901539564e-06, + "loss": 0.0, + "step": 13215 + }, + { + "epoch": 0.8517110266159695, + "grad_norm": 0.011965576789911826, + "learning_rate": 1.6548514142499107e-06, + "loss": 0.0, + "step": 13216 + }, + { + "epoch": 0.8517754720628987, + "grad_norm": 0.0012711725620480775, + "learning_rate": 1.6541353383458648e-06, + "loss": 0.0, + "step": 13217 + }, + { + "epoch": 0.8518399175098279, + "grad_norm": 0.03274330104237303, + "learning_rate": 1.653419262441819e-06, + "loss": 0.0, + "step": 13218 + }, + { + "epoch": 0.8519043629567571, + "grad_norm": 0.013053572989104686, + "learning_rate": 1.6527031865377732e-06, + "loss": 0.0, + "step": 13219 + }, + { + "epoch": 0.8519688084036863, + "grad_norm": 0.0005108394442840666, + "learning_rate": 1.6519871106337275e-06, + "loss": 0.0, + "step": 13220 + }, + { + "epoch": 0.8520332538506155, + "grad_norm": 0.008659800547823577, + "learning_rate": 1.6512710347296815e-06, + "loss": 0.0, + "step": 13221 + }, + { + "epoch": 0.8520976992975446, + "grad_norm": 0.013328921264361386, + "learning_rate": 1.6505549588256356e-06, + "loss": 0.0, + "step": 13222 + }, + { + "epoch": 0.8521621447444738, + "grad_norm": 0.00534133565134591, + "learning_rate": 1.6498388829215897e-06, + "loss": 0.0, + "step": 13223 + }, + { + "epoch": 0.852226590191403, + "grad_norm": 0.004128411945539397, + "learning_rate": 1.6491228070175442e-06, + "loss": 0.0, + "step": 13224 + }, + { + "epoch": 0.8522910356383322, + "grad_norm": 0.0011464066259772201, + "learning_rate": 1.6484067311134983e-06, + "loss": 0.0, + "step": 13225 + }, + { + "epoch": 0.8523554810852614, + "grad_norm": 0.0012449689486442015, + "learning_rate": 1.6476906552094524e-06, + "loss": 0.0, + "step": 13226 + }, + { + "epoch": 0.8524199265321905, + "grad_norm": 0.01839498996198106, + "learning_rate": 1.6469745793054065e-06, + "loss": 0.0, + "step": 13227 + }, + { + "epoch": 0.8524843719791196, + "grad_norm": 0.02975664278763737, + "learning_rate": 1.6462585034013606e-06, + "loss": 0.0, + "step": 13228 + }, + { + "epoch": 0.8525488174260488, + "grad_norm": 0.004641548279767545, + "learning_rate": 1.6455424274973147e-06, + "loss": 0.0, + "step": 13229 + }, + { + "epoch": 0.852613262872978, + "grad_norm": 0.06830025568807072, + "learning_rate": 1.6448263515932692e-06, + "loss": 0.0005, + "step": 13230 + }, + { + "epoch": 0.8526777083199072, + "grad_norm": 1.172192695787183, + "learning_rate": 1.6441102756892233e-06, + "loss": 0.0056, + "step": 13231 + }, + { + "epoch": 0.8527421537668364, + "grad_norm": 0.011497391242717231, + "learning_rate": 1.6433941997851773e-06, + "loss": 0.0, + "step": 13232 + }, + { + "epoch": 0.8528065992137656, + "grad_norm": 0.05892172851319213, + "learning_rate": 1.6426781238811314e-06, + "loss": 0.0, + "step": 13233 + }, + { + "epoch": 0.8528710446606947, + "grad_norm": 0.004463375528091414, + "learning_rate": 1.6419620479770857e-06, + "loss": 0.0, + "step": 13234 + }, + { + "epoch": 0.8529354901076239, + "grad_norm": 0.0018064123459318863, + "learning_rate": 1.6412459720730398e-06, + "loss": 0.0, + "step": 13235 + }, + { + "epoch": 0.8529999355545531, + "grad_norm": 0.06578452059686321, + "learning_rate": 1.6405298961689941e-06, + "loss": 0.0001, + "step": 13236 + }, + { + "epoch": 0.8530643810014823, + "grad_norm": 0.016703103161657937, + "learning_rate": 1.6398138202649482e-06, + "loss": 0.0, + "step": 13237 + }, + { + "epoch": 0.8531288264484114, + "grad_norm": 0.014220012566819788, + "learning_rate": 1.6390977443609025e-06, + "loss": 0.0002, + "step": 13238 + }, + { + "epoch": 0.8531932718953406, + "grad_norm": 0.09662233925181075, + "learning_rate": 1.6383816684568566e-06, + "loss": 0.0001, + "step": 13239 + }, + { + "epoch": 0.8532577173422697, + "grad_norm": 0.19108335351306138, + "learning_rate": 1.6376655925528107e-06, + "loss": 0.0005, + "step": 13240 + }, + { + "epoch": 0.8533221627891989, + "grad_norm": 0.021867105829644136, + "learning_rate": 1.6369495166487648e-06, + "loss": 0.0, + "step": 13241 + }, + { + "epoch": 0.8533866082361281, + "grad_norm": 0.005161472184663322, + "learning_rate": 1.6362334407447193e-06, + "loss": 0.0, + "step": 13242 + }, + { + "epoch": 0.8534510536830573, + "grad_norm": 0.005717583823887771, + "learning_rate": 1.6355173648406734e-06, + "loss": 0.0, + "step": 13243 + }, + { + "epoch": 0.8535154991299865, + "grad_norm": 0.008121971108982103, + "learning_rate": 1.6348012889366274e-06, + "loss": 0.0, + "step": 13244 + }, + { + "epoch": 0.8535799445769157, + "grad_norm": 0.00037337266763354336, + "learning_rate": 1.6340852130325815e-06, + "loss": 0.0, + "step": 13245 + }, + { + "epoch": 0.8536443900238448, + "grad_norm": 0.002726389582061402, + "learning_rate": 1.6333691371285356e-06, + "loss": 0.0, + "step": 13246 + }, + { + "epoch": 0.853708835470774, + "grad_norm": 0.12864904319243925, + "learning_rate": 1.6326530612244897e-06, + "loss": 0.0001, + "step": 13247 + }, + { + "epoch": 0.8537732809177032, + "grad_norm": 0.01956431971786598, + "learning_rate": 1.6319369853204442e-06, + "loss": 0.0, + "step": 13248 + }, + { + "epoch": 0.8538377263646323, + "grad_norm": 0.0007446259199047396, + "learning_rate": 1.6312209094163983e-06, + "loss": 0.0, + "step": 13249 + }, + { + "epoch": 0.8539021718115615, + "grad_norm": 8.923878846050036e-05, + "learning_rate": 1.6305048335123524e-06, + "loss": 0.0, + "step": 13250 + }, + { + "epoch": 0.8539666172584907, + "grad_norm": 0.00614986020674208, + "learning_rate": 1.6297887576083065e-06, + "loss": 0.0, + "step": 13251 + }, + { + "epoch": 0.8540310627054198, + "grad_norm": 0.0002994068452241638, + "learning_rate": 1.6290726817042608e-06, + "loss": 0.0, + "step": 13252 + }, + { + "epoch": 0.854095508152349, + "grad_norm": 0.019766686431275136, + "learning_rate": 1.6283566058002149e-06, + "loss": 0.0001, + "step": 13253 + }, + { + "epoch": 0.8541599535992782, + "grad_norm": 0.08729203685103124, + "learning_rate": 1.6276405298961692e-06, + "loss": 0.0016, + "step": 13254 + }, + { + "epoch": 0.8542243990462074, + "grad_norm": 0.0016916745618347053, + "learning_rate": 1.6269244539921233e-06, + "loss": 0.0, + "step": 13255 + }, + { + "epoch": 0.8542888444931366, + "grad_norm": 0.0019892775296158497, + "learning_rate": 1.6262083780880776e-06, + "loss": 0.0, + "step": 13256 + }, + { + "epoch": 0.8543532899400658, + "grad_norm": 0.0023993870594850473, + "learning_rate": 1.6254923021840316e-06, + "loss": 0.0, + "step": 13257 + }, + { + "epoch": 0.854417735386995, + "grad_norm": 0.00016196712541359675, + "learning_rate": 1.6247762262799857e-06, + "loss": 0.0, + "step": 13258 + }, + { + "epoch": 0.8544821808339241, + "grad_norm": 6.87116925052651e-05, + "learning_rate": 1.62406015037594e-06, + "loss": 0.0, + "step": 13259 + }, + { + "epoch": 0.8545466262808532, + "grad_norm": 0.004721255990723467, + "learning_rate": 1.6233440744718943e-06, + "loss": 0.0, + "step": 13260 + }, + { + "epoch": 0.8546110717277824, + "grad_norm": 0.0018702730203080486, + "learning_rate": 1.6226279985678484e-06, + "loss": 0.0, + "step": 13261 + }, + { + "epoch": 0.8546755171747116, + "grad_norm": 0.04213422495428291, + "learning_rate": 1.6219119226638025e-06, + "loss": 0.0004, + "step": 13262 + }, + { + "epoch": 0.8547399626216408, + "grad_norm": 0.09176947306452253, + "learning_rate": 1.6211958467597566e-06, + "loss": 0.0001, + "step": 13263 + }, + { + "epoch": 0.8548044080685699, + "grad_norm": 0.0002837684054832139, + "learning_rate": 1.6204797708557107e-06, + "loss": 0.0, + "step": 13264 + }, + { + "epoch": 0.8548688535154991, + "grad_norm": 0.00034699342449560114, + "learning_rate": 1.6197636949516652e-06, + "loss": 0.0, + "step": 13265 + }, + { + "epoch": 0.8549332989624283, + "grad_norm": 0.0001257464769171491, + "learning_rate": 1.6190476190476193e-06, + "loss": 0.0, + "step": 13266 + }, + { + "epoch": 0.8549977444093575, + "grad_norm": 0.015559039084421477, + "learning_rate": 1.6183315431435734e-06, + "loss": 0.0, + "step": 13267 + }, + { + "epoch": 0.8550621898562867, + "grad_norm": 0.0008531191131122104, + "learning_rate": 1.6176154672395274e-06, + "loss": 0.0, + "step": 13268 + }, + { + "epoch": 0.8551266353032159, + "grad_norm": 0.003998335045763489, + "learning_rate": 1.6168993913354815e-06, + "loss": 0.0, + "step": 13269 + }, + { + "epoch": 0.855191080750145, + "grad_norm": 0.163619043841412, + "learning_rate": 1.6161833154314358e-06, + "loss": 0.0004, + "step": 13270 + }, + { + "epoch": 0.8552555261970741, + "grad_norm": 0.03945032064933214, + "learning_rate": 1.6154672395273901e-06, + "loss": 0.0002, + "step": 13271 + }, + { + "epoch": 0.8553199716440033, + "grad_norm": 3.905622064627832e-05, + "learning_rate": 1.6147511636233442e-06, + "loss": 0.0, + "step": 13272 + }, + { + "epoch": 0.8553844170909325, + "grad_norm": 0.08700623339778292, + "learning_rate": 1.6140350877192983e-06, + "loss": 0.0001, + "step": 13273 + }, + { + "epoch": 0.8554488625378617, + "grad_norm": 0.0002938337301690031, + "learning_rate": 1.6133190118152526e-06, + "loss": 0.0, + "step": 13274 + }, + { + "epoch": 0.8555133079847909, + "grad_norm": 0.0016490099642151128, + "learning_rate": 1.6126029359112067e-06, + "loss": 0.0, + "step": 13275 + }, + { + "epoch": 0.85557775343172, + "grad_norm": 0.21113241867889743, + "learning_rate": 1.6118868600071608e-06, + "loss": 0.0012, + "step": 13276 + }, + { + "epoch": 0.8556421988786492, + "grad_norm": 0.00034076801657522657, + "learning_rate": 1.611170784103115e-06, + "loss": 0.0, + "step": 13277 + }, + { + "epoch": 0.8557066443255784, + "grad_norm": 0.0007656847467303316, + "learning_rate": 1.6104547081990694e-06, + "loss": 0.0, + "step": 13278 + }, + { + "epoch": 0.8557710897725076, + "grad_norm": 0.004611446967922683, + "learning_rate": 1.6097386322950235e-06, + "loss": 0.0, + "step": 13279 + }, + { + "epoch": 0.8558355352194368, + "grad_norm": 4.522851739194399e-05, + "learning_rate": 1.6090225563909775e-06, + "loss": 0.0, + "step": 13280 + }, + { + "epoch": 0.855899980666366, + "grad_norm": 0.09413581178952365, + "learning_rate": 1.6083064804869316e-06, + "loss": 0.0017, + "step": 13281 + }, + { + "epoch": 0.855964426113295, + "grad_norm": 0.004960847512303381, + "learning_rate": 1.6075904045828857e-06, + "loss": 0.0, + "step": 13282 + }, + { + "epoch": 0.8560288715602242, + "grad_norm": 0.035130821078171386, + "learning_rate": 1.6068743286788402e-06, + "loss": 0.0, + "step": 13283 + }, + { + "epoch": 0.8560933170071534, + "grad_norm": 0.6814632458239104, + "learning_rate": 1.6061582527747943e-06, + "loss": 0.0024, + "step": 13284 + }, + { + "epoch": 0.8561577624540826, + "grad_norm": 0.0006295020560277422, + "learning_rate": 1.6054421768707484e-06, + "loss": 0.0, + "step": 13285 + }, + { + "epoch": 0.8562222079010118, + "grad_norm": 0.000463167640538834, + "learning_rate": 1.6047261009667025e-06, + "loss": 0.0, + "step": 13286 + }, + { + "epoch": 0.856286653347941, + "grad_norm": 0.003007813351736707, + "learning_rate": 1.6040100250626568e-06, + "loss": 0.0, + "step": 13287 + }, + { + "epoch": 0.8563510987948701, + "grad_norm": 0.008826912320212115, + "learning_rate": 1.6032939491586109e-06, + "loss": 0.0, + "step": 13288 + }, + { + "epoch": 0.8564155442417993, + "grad_norm": 0.004008362923994413, + "learning_rate": 1.6025778732545652e-06, + "loss": 0.0, + "step": 13289 + }, + { + "epoch": 0.8564799896887285, + "grad_norm": 0.00025324509025633033, + "learning_rate": 1.6018617973505193e-06, + "loss": 0.0, + "step": 13290 + }, + { + "epoch": 0.8565444351356577, + "grad_norm": 0.009025551561342875, + "learning_rate": 1.6011457214464736e-06, + "loss": 0.0, + "step": 13291 + }, + { + "epoch": 0.8566088805825869, + "grad_norm": 0.15220108443539926, + "learning_rate": 1.6004296455424276e-06, + "loss": 0.0002, + "step": 13292 + }, + { + "epoch": 0.8566733260295161, + "grad_norm": 0.0005597975580254473, + "learning_rate": 1.5997135696383817e-06, + "loss": 0.0, + "step": 13293 + }, + { + "epoch": 0.8567377714764451, + "grad_norm": 0.2044531426959965, + "learning_rate": 1.598997493734336e-06, + "loss": 0.0004, + "step": 13294 + }, + { + "epoch": 0.8568022169233743, + "grad_norm": 0.0065360515775523985, + "learning_rate": 1.5982814178302903e-06, + "loss": 0.0, + "step": 13295 + }, + { + "epoch": 0.8568666623703035, + "grad_norm": 0.00010121851515030464, + "learning_rate": 1.5975653419262444e-06, + "loss": 0.0, + "step": 13296 + }, + { + "epoch": 0.8569311078172327, + "grad_norm": 0.1027933826978185, + "learning_rate": 1.5968492660221985e-06, + "loss": 0.0018, + "step": 13297 + }, + { + "epoch": 0.8569955532641619, + "grad_norm": 0.31212899118972154, + "learning_rate": 1.5961331901181526e-06, + "loss": 0.0011, + "step": 13298 + }, + { + "epoch": 0.8570599987110911, + "grad_norm": 0.011037713981860054, + "learning_rate": 1.5954171142141067e-06, + "loss": 0.0, + "step": 13299 + }, + { + "epoch": 0.8571244441580202, + "grad_norm": 0.17396643847202156, + "learning_rate": 1.5947010383100612e-06, + "loss": 0.0002, + "step": 13300 + }, + { + "epoch": 0.8571888896049494, + "grad_norm": 0.27335187388744736, + "learning_rate": 1.5939849624060153e-06, + "loss": 0.0031, + "step": 13301 + }, + { + "epoch": 0.8572533350518786, + "grad_norm": 0.5506759164068347, + "learning_rate": 1.5932688865019694e-06, + "loss": 0.0027, + "step": 13302 + }, + { + "epoch": 0.8573177804988078, + "grad_norm": 0.038104221039077645, + "learning_rate": 1.5925528105979234e-06, + "loss": 0.0002, + "step": 13303 + }, + { + "epoch": 0.857382225945737, + "grad_norm": 0.03073348441255423, + "learning_rate": 1.5918367346938775e-06, + "loss": 0.0, + "step": 13304 + }, + { + "epoch": 0.8574466713926661, + "grad_norm": 0.003336774922123674, + "learning_rate": 1.5911206587898318e-06, + "loss": 0.0, + "step": 13305 + }, + { + "epoch": 0.8575111168395952, + "grad_norm": 0.019049603577953083, + "learning_rate": 1.5904045828857861e-06, + "loss": 0.0, + "step": 13306 + }, + { + "epoch": 0.8575755622865244, + "grad_norm": 0.006879444816098133, + "learning_rate": 1.5896885069817402e-06, + "loss": 0.0, + "step": 13307 + }, + { + "epoch": 0.8576400077334536, + "grad_norm": 0.0035166293965754702, + "learning_rate": 1.5889724310776943e-06, + "loss": 0.0, + "step": 13308 + }, + { + "epoch": 0.8577044531803828, + "grad_norm": 0.10649776478809318, + "learning_rate": 1.5882563551736486e-06, + "loss": 0.0012, + "step": 13309 + }, + { + "epoch": 0.857768898627312, + "grad_norm": 0.03601981388202897, + "learning_rate": 1.5875402792696027e-06, + "loss": 0.0, + "step": 13310 + }, + { + "epoch": 0.8578333440742412, + "grad_norm": 0.00010030491278225593, + "learning_rate": 1.5868242033655568e-06, + "loss": 0.0, + "step": 13311 + }, + { + "epoch": 0.8578977895211704, + "grad_norm": 0.004006094877726113, + "learning_rate": 1.586108127461511e-06, + "loss": 0.0, + "step": 13312 + }, + { + "epoch": 0.8579622349680995, + "grad_norm": 0.01206481644215532, + "learning_rate": 1.5853920515574654e-06, + "loss": 0.0, + "step": 13313 + }, + { + "epoch": 0.8580266804150287, + "grad_norm": 0.0012709160823947188, + "learning_rate": 1.5846759756534195e-06, + "loss": 0.0, + "step": 13314 + }, + { + "epoch": 0.8580911258619579, + "grad_norm": 0.0005756146179106836, + "learning_rate": 1.5839598997493736e-06, + "loss": 0.0, + "step": 13315 + }, + { + "epoch": 0.858155571308887, + "grad_norm": 0.005792067297186545, + "learning_rate": 1.5832438238453276e-06, + "loss": 0.0, + "step": 13316 + }, + { + "epoch": 0.8582200167558162, + "grad_norm": 0.01710870502387094, + "learning_rate": 1.5825277479412817e-06, + "loss": 0.0001, + "step": 13317 + }, + { + "epoch": 0.8582844622027453, + "grad_norm": 0.002173941342640871, + "learning_rate": 1.5818116720372362e-06, + "loss": 0.0, + "step": 13318 + }, + { + "epoch": 0.8583489076496745, + "grad_norm": 0.0006425608284127846, + "learning_rate": 1.5810955961331903e-06, + "loss": 0.0, + "step": 13319 + }, + { + "epoch": 0.8584133530966037, + "grad_norm": 0.30172473763244206, + "learning_rate": 1.5803795202291444e-06, + "loss": 0.0016, + "step": 13320 + }, + { + "epoch": 0.8584777985435329, + "grad_norm": 0.01861099902398579, + "learning_rate": 1.5796634443250985e-06, + "loss": 0.0, + "step": 13321 + }, + { + "epoch": 0.8585422439904621, + "grad_norm": 0.0012165400924309016, + "learning_rate": 1.5789473684210526e-06, + "loss": 0.0, + "step": 13322 + }, + { + "epoch": 0.8586066894373913, + "grad_norm": 0.0016069345021319553, + "learning_rate": 1.5782312925170069e-06, + "loss": 0.0, + "step": 13323 + }, + { + "epoch": 0.8586711348843205, + "grad_norm": 0.011594244425208303, + "learning_rate": 1.5775152166129612e-06, + "loss": 0.0001, + "step": 13324 + }, + { + "epoch": 0.8587355803312496, + "grad_norm": 0.0014469452087132976, + "learning_rate": 1.5767991407089153e-06, + "loss": 0.0, + "step": 13325 + }, + { + "epoch": 0.8588000257781788, + "grad_norm": 0.4306278862323567, + "learning_rate": 1.5760830648048694e-06, + "loss": 0.0004, + "step": 13326 + }, + { + "epoch": 0.8588644712251079, + "grad_norm": 0.003471658113443766, + "learning_rate": 1.5753669889008237e-06, + "loss": 0.0, + "step": 13327 + }, + { + "epoch": 0.8589289166720371, + "grad_norm": 0.0004689620724818602, + "learning_rate": 1.5746509129967777e-06, + "loss": 0.0, + "step": 13328 + }, + { + "epoch": 0.8589933621189663, + "grad_norm": 0.02153173085345865, + "learning_rate": 1.573934837092732e-06, + "loss": 0.0001, + "step": 13329 + }, + { + "epoch": 0.8590578075658954, + "grad_norm": 0.2610378517842289, + "learning_rate": 1.5732187611886861e-06, + "loss": 0.0006, + "step": 13330 + }, + { + "epoch": 0.8591222530128246, + "grad_norm": 0.02625268560888587, + "learning_rate": 1.5725026852846404e-06, + "loss": 0.0001, + "step": 13331 + }, + { + "epoch": 0.8591866984597538, + "grad_norm": 0.050066121532948815, + "learning_rate": 1.5717866093805945e-06, + "loss": 0.0022, + "step": 13332 + }, + { + "epoch": 0.859251143906683, + "grad_norm": 0.0002840123484040217, + "learning_rate": 1.5710705334765486e-06, + "loss": 0.0, + "step": 13333 + }, + { + "epoch": 0.8593155893536122, + "grad_norm": 0.016482623120749718, + "learning_rate": 1.5703544575725027e-06, + "loss": 0.0, + "step": 13334 + }, + { + "epoch": 0.8593800348005414, + "grad_norm": 0.0861558848682203, + "learning_rate": 1.5696383816684572e-06, + "loss": 0.0016, + "step": 13335 + }, + { + "epoch": 0.8594444802474706, + "grad_norm": 0.00206119523180686, + "learning_rate": 1.5689223057644113e-06, + "loss": 0.0, + "step": 13336 + }, + { + "epoch": 0.8595089256943997, + "grad_norm": 0.0010139368389927733, + "learning_rate": 1.5682062298603654e-06, + "loss": 0.0, + "step": 13337 + }, + { + "epoch": 0.8595733711413288, + "grad_norm": 0.0006872459690220183, + "learning_rate": 1.5674901539563195e-06, + "loss": 0.0, + "step": 13338 + }, + { + "epoch": 0.859637816588258, + "grad_norm": 0.0007045706704706853, + "learning_rate": 1.5667740780522735e-06, + "loss": 0.0, + "step": 13339 + }, + { + "epoch": 0.8597022620351872, + "grad_norm": 0.05906231015101705, + "learning_rate": 1.5660580021482276e-06, + "loss": 0.0, + "step": 13340 + }, + { + "epoch": 0.8597667074821164, + "grad_norm": 0.0011324638062900777, + "learning_rate": 1.5653419262441821e-06, + "loss": 0.0, + "step": 13341 + }, + { + "epoch": 0.8598311529290456, + "grad_norm": 0.010297346909910793, + "learning_rate": 1.5646258503401362e-06, + "loss": 0.0, + "step": 13342 + }, + { + "epoch": 0.8598955983759747, + "grad_norm": 0.29292520835291497, + "learning_rate": 1.5639097744360903e-06, + "loss": 0.0003, + "step": 13343 + }, + { + "epoch": 0.8599600438229039, + "grad_norm": 0.031157416981430704, + "learning_rate": 1.5631936985320444e-06, + "loss": 0.0016, + "step": 13344 + }, + { + "epoch": 0.8600244892698331, + "grad_norm": 0.005336673025525772, + "learning_rate": 1.5624776226279987e-06, + "loss": 0.0, + "step": 13345 + }, + { + "epoch": 0.8600889347167623, + "grad_norm": 0.0007176590913766336, + "learning_rate": 1.5617615467239528e-06, + "loss": 0.0, + "step": 13346 + }, + { + "epoch": 0.8601533801636915, + "grad_norm": 6.578943500745828e-05, + "learning_rate": 1.561045470819907e-06, + "loss": 0.0, + "step": 13347 + }, + { + "epoch": 0.8602178256106207, + "grad_norm": 0.0002624024066516301, + "learning_rate": 1.5603293949158612e-06, + "loss": 0.0, + "step": 13348 + }, + { + "epoch": 0.8602822710575497, + "grad_norm": 0.09216368719245878, + "learning_rate": 1.5596133190118155e-06, + "loss": 0.0006, + "step": 13349 + }, + { + "epoch": 0.8603467165044789, + "grad_norm": 0.003946083988254043, + "learning_rate": 1.5588972431077696e-06, + "loss": 0.0, + "step": 13350 + }, + { + "epoch": 0.8604111619514081, + "grad_norm": 0.000322491336789557, + "learning_rate": 1.5581811672037236e-06, + "loss": 0.0, + "step": 13351 + }, + { + "epoch": 0.8604756073983373, + "grad_norm": 0.00325187811401634, + "learning_rate": 1.5574650912996777e-06, + "loss": 0.0, + "step": 13352 + }, + { + "epoch": 0.8605400528452665, + "grad_norm": 0.02672152168310301, + "learning_rate": 1.5567490153956322e-06, + "loss": 0.0001, + "step": 13353 + }, + { + "epoch": 0.8606044982921957, + "grad_norm": 0.0010806659731611257, + "learning_rate": 1.5560329394915863e-06, + "loss": 0.0, + "step": 13354 + }, + { + "epoch": 0.8606689437391248, + "grad_norm": 0.0012196018515187096, + "learning_rate": 1.5553168635875404e-06, + "loss": 0.0, + "step": 13355 + }, + { + "epoch": 0.860733389186054, + "grad_norm": 0.03056133185486748, + "learning_rate": 1.5546007876834945e-06, + "loss": 0.0001, + "step": 13356 + }, + { + "epoch": 0.8607978346329832, + "grad_norm": 0.003190366588636452, + "learning_rate": 1.5538847117794486e-06, + "loss": 0.0, + "step": 13357 + }, + { + "epoch": 0.8608622800799124, + "grad_norm": 0.0010321709471120919, + "learning_rate": 1.5531686358754029e-06, + "loss": 0.0, + "step": 13358 + }, + { + "epoch": 0.8609267255268416, + "grad_norm": 0.005754727492837118, + "learning_rate": 1.5524525599713572e-06, + "loss": 0.0, + "step": 13359 + }, + { + "epoch": 0.8609911709737708, + "grad_norm": 0.02314667772246618, + "learning_rate": 1.5517364840673113e-06, + "loss": 0.0003, + "step": 13360 + }, + { + "epoch": 0.8610556164206998, + "grad_norm": 0.006225745757706881, + "learning_rate": 1.5510204081632654e-06, + "loss": 0.0, + "step": 13361 + }, + { + "epoch": 0.861120061867629, + "grad_norm": 0.012019083819958221, + "learning_rate": 1.5503043322592197e-06, + "loss": 0.0, + "step": 13362 + }, + { + "epoch": 0.8611845073145582, + "grad_norm": 0.000692814239205552, + "learning_rate": 1.5495882563551737e-06, + "loss": 0.0, + "step": 13363 + }, + { + "epoch": 0.8612489527614874, + "grad_norm": 0.004901376096575237, + "learning_rate": 1.548872180451128e-06, + "loss": 0.0, + "step": 13364 + }, + { + "epoch": 0.8613133982084166, + "grad_norm": 0.02895489313893144, + "learning_rate": 1.5481561045470821e-06, + "loss": 0.0001, + "step": 13365 + }, + { + "epoch": 0.8613778436553458, + "grad_norm": 0.05815220203678352, + "learning_rate": 1.5474400286430364e-06, + "loss": 0.0002, + "step": 13366 + }, + { + "epoch": 0.8614422891022749, + "grad_norm": 0.0003883691743082568, + "learning_rate": 1.5467239527389905e-06, + "loss": 0.0, + "step": 13367 + }, + { + "epoch": 0.8615067345492041, + "grad_norm": 0.0003275088709743321, + "learning_rate": 1.5460078768349446e-06, + "loss": 0.0, + "step": 13368 + }, + { + "epoch": 0.8615711799961333, + "grad_norm": 0.0009924672732805403, + "learning_rate": 1.5452918009308987e-06, + "loss": 0.0, + "step": 13369 + }, + { + "epoch": 0.8616356254430625, + "grad_norm": 0.0034706501414582956, + "learning_rate": 1.5445757250268532e-06, + "loss": 0.0, + "step": 13370 + }, + { + "epoch": 0.8617000708899917, + "grad_norm": 0.08772039352150697, + "learning_rate": 1.5438596491228073e-06, + "loss": 0.0017, + "step": 13371 + }, + { + "epoch": 0.8617645163369207, + "grad_norm": 0.0026765761249290475, + "learning_rate": 1.5431435732187614e-06, + "loss": 0.0, + "step": 13372 + }, + { + "epoch": 0.8618289617838499, + "grad_norm": 0.0022527401314781322, + "learning_rate": 1.5424274973147155e-06, + "loss": 0.0, + "step": 13373 + }, + { + "epoch": 0.8618934072307791, + "grad_norm": 0.012568387233077847, + "learning_rate": 1.5417114214106696e-06, + "loss": 0.0, + "step": 13374 + }, + { + "epoch": 0.8619578526777083, + "grad_norm": 0.030271575278257603, + "learning_rate": 1.5409953455066236e-06, + "loss": 0.0004, + "step": 13375 + }, + { + "epoch": 0.8620222981246375, + "grad_norm": 4.230431264947589e-05, + "learning_rate": 1.5402792696025781e-06, + "loss": 0.0, + "step": 13376 + }, + { + "epoch": 0.8620867435715667, + "grad_norm": 0.0004940029506976715, + "learning_rate": 1.5395631936985322e-06, + "loss": 0.0, + "step": 13377 + }, + { + "epoch": 0.8621511890184959, + "grad_norm": 0.0011218007381096433, + "learning_rate": 1.5388471177944863e-06, + "loss": 0.0, + "step": 13378 + }, + { + "epoch": 0.862215634465425, + "grad_norm": 0.07873246830828655, + "learning_rate": 1.5381310418904404e-06, + "loss": 0.0001, + "step": 13379 + }, + { + "epoch": 0.8622800799123542, + "grad_norm": 0.025534215775952065, + "learning_rate": 1.5374149659863947e-06, + "loss": 0.0, + "step": 13380 + }, + { + "epoch": 0.8623445253592834, + "grad_norm": 0.01992692039099067, + "learning_rate": 1.5366988900823488e-06, + "loss": 0.0002, + "step": 13381 + }, + { + "epoch": 0.8624089708062126, + "grad_norm": 0.30028876324941983, + "learning_rate": 1.535982814178303e-06, + "loss": 0.0015, + "step": 13382 + }, + { + "epoch": 0.8624734162531417, + "grad_norm": 0.0885966816085772, + "learning_rate": 1.5352667382742572e-06, + "loss": 0.0001, + "step": 13383 + }, + { + "epoch": 0.8625378617000709, + "grad_norm": 7.695888165750718e-05, + "learning_rate": 1.5345506623702115e-06, + "loss": 0.0, + "step": 13384 + }, + { + "epoch": 0.862602307147, + "grad_norm": 0.0010478483803299753, + "learning_rate": 1.5338345864661656e-06, + "loss": 0.0, + "step": 13385 + }, + { + "epoch": 0.8626667525939292, + "grad_norm": 0.0009519866972095988, + "learning_rate": 1.5331185105621197e-06, + "loss": 0.0, + "step": 13386 + }, + { + "epoch": 0.8627311980408584, + "grad_norm": 0.05243510078726089, + "learning_rate": 1.5324024346580737e-06, + "loss": 0.0001, + "step": 13387 + }, + { + "epoch": 0.8627956434877876, + "grad_norm": 0.0004253109465269119, + "learning_rate": 1.5316863587540282e-06, + "loss": 0.0, + "step": 13388 + }, + { + "epoch": 0.8628600889347168, + "grad_norm": 0.003103137673456359, + "learning_rate": 1.5309702828499823e-06, + "loss": 0.0, + "step": 13389 + }, + { + "epoch": 0.862924534381646, + "grad_norm": 0.0005662525075994763, + "learning_rate": 1.5302542069459364e-06, + "loss": 0.0, + "step": 13390 + }, + { + "epoch": 0.8629889798285751, + "grad_norm": 0.0021847116480264747, + "learning_rate": 1.5295381310418905e-06, + "loss": 0.0, + "step": 13391 + }, + { + "epoch": 0.8630534252755043, + "grad_norm": 0.1484782827970648, + "learning_rate": 1.5288220551378446e-06, + "loss": 0.0001, + "step": 13392 + }, + { + "epoch": 0.8631178707224335, + "grad_norm": 0.01737780890753743, + "learning_rate": 1.5281059792337987e-06, + "loss": 0.0, + "step": 13393 + }, + { + "epoch": 0.8631823161693626, + "grad_norm": 0.0061111167426056365, + "learning_rate": 1.5273899033297532e-06, + "loss": 0.0001, + "step": 13394 + }, + { + "epoch": 0.8632467616162918, + "grad_norm": 6.793325152705395e-05, + "learning_rate": 1.5266738274257073e-06, + "loss": 0.0, + "step": 13395 + }, + { + "epoch": 0.863311207063221, + "grad_norm": 0.0013361856110366322, + "learning_rate": 1.5259577515216614e-06, + "loss": 0.0, + "step": 13396 + }, + { + "epoch": 0.8633756525101501, + "grad_norm": 0.04722671877927456, + "learning_rate": 1.5252416756176155e-06, + "loss": 0.0, + "step": 13397 + }, + { + "epoch": 0.8634400979570793, + "grad_norm": 0.03648959870640535, + "learning_rate": 1.5245255997135698e-06, + "loss": 0.0001, + "step": 13398 + }, + { + "epoch": 0.8635045434040085, + "grad_norm": 0.0003418445429188015, + "learning_rate": 1.523809523809524e-06, + "loss": 0.0, + "step": 13399 + }, + { + "epoch": 0.8635689888509377, + "grad_norm": 0.0008608845575165467, + "learning_rate": 1.5230934479054781e-06, + "loss": 0.0, + "step": 13400 + }, + { + "epoch": 0.8636334342978669, + "grad_norm": 0.002113036385751762, + "learning_rate": 1.5223773720014322e-06, + "loss": 0.0, + "step": 13401 + }, + { + "epoch": 0.8636978797447961, + "grad_norm": 0.0015854664134770094, + "learning_rate": 1.5216612960973865e-06, + "loss": 0.0, + "step": 13402 + }, + { + "epoch": 0.8637623251917252, + "grad_norm": 0.0069202574918006445, + "learning_rate": 1.5209452201933406e-06, + "loss": 0.0, + "step": 13403 + }, + { + "epoch": 0.8638267706386544, + "grad_norm": 0.0801661461210914, + "learning_rate": 1.5202291442892947e-06, + "loss": 0.0005, + "step": 13404 + }, + { + "epoch": 0.8638912160855835, + "grad_norm": 0.03243276882707622, + "learning_rate": 1.519513068385249e-06, + "loss": 0.0017, + "step": 13405 + }, + { + "epoch": 0.8639556615325127, + "grad_norm": 0.0045153442530477235, + "learning_rate": 1.5187969924812033e-06, + "loss": 0.0, + "step": 13406 + }, + { + "epoch": 0.8640201069794419, + "grad_norm": 0.0018214858012334234, + "learning_rate": 1.5180809165771574e-06, + "loss": 0.0, + "step": 13407 + }, + { + "epoch": 0.864084552426371, + "grad_norm": 0.006574459754307482, + "learning_rate": 1.5173648406731115e-06, + "loss": 0.0, + "step": 13408 + }, + { + "epoch": 0.8641489978733002, + "grad_norm": 0.000748829527636167, + "learning_rate": 1.5166487647690656e-06, + "loss": 0.0, + "step": 13409 + }, + { + "epoch": 0.8642134433202294, + "grad_norm": 0.003037122112591288, + "learning_rate": 1.5159326888650196e-06, + "loss": 0.0, + "step": 13410 + }, + { + "epoch": 0.8642778887671586, + "grad_norm": 0.0004237258940566272, + "learning_rate": 1.5152166129609742e-06, + "loss": 0.0, + "step": 13411 + }, + { + "epoch": 0.8643423342140878, + "grad_norm": 0.03228855131648255, + "learning_rate": 1.5145005370569282e-06, + "loss": 0.0, + "step": 13412 + }, + { + "epoch": 0.864406779661017, + "grad_norm": 0.04334934425745581, + "learning_rate": 1.5137844611528823e-06, + "loss": 0.0001, + "step": 13413 + }, + { + "epoch": 0.8644712251079462, + "grad_norm": 0.003014186912717113, + "learning_rate": 1.5130683852488364e-06, + "loss": 0.0, + "step": 13414 + }, + { + "epoch": 0.8645356705548753, + "grad_norm": 0.0003104792844514378, + "learning_rate": 1.5123523093447905e-06, + "loss": 0.0, + "step": 13415 + }, + { + "epoch": 0.8646001160018044, + "grad_norm": 3.002535634592271e-05, + "learning_rate": 1.5116362334407448e-06, + "loss": 0.0, + "step": 13416 + }, + { + "epoch": 0.8646645614487336, + "grad_norm": 0.011916289380351773, + "learning_rate": 1.510920157536699e-06, + "loss": 0.0001, + "step": 13417 + }, + { + "epoch": 0.8647290068956628, + "grad_norm": 0.00014985182630863873, + "learning_rate": 1.5102040816326532e-06, + "loss": 0.0, + "step": 13418 + }, + { + "epoch": 0.864793452342592, + "grad_norm": 0.0009954287758235075, + "learning_rate": 1.5094880057286073e-06, + "loss": 0.0, + "step": 13419 + }, + { + "epoch": 0.8648578977895212, + "grad_norm": 0.020676748662305275, + "learning_rate": 1.5087719298245616e-06, + "loss": 0.0, + "step": 13420 + }, + { + "epoch": 0.8649223432364503, + "grad_norm": 0.016235645549665126, + "learning_rate": 1.5080558539205157e-06, + "loss": 0.0001, + "step": 13421 + }, + { + "epoch": 0.8649867886833795, + "grad_norm": 0.004837391867327394, + "learning_rate": 1.5073397780164697e-06, + "loss": 0.0, + "step": 13422 + }, + { + "epoch": 0.8650512341303087, + "grad_norm": 0.024349001979618767, + "learning_rate": 1.506623702112424e-06, + "loss": 0.0001, + "step": 13423 + }, + { + "epoch": 0.8651156795772379, + "grad_norm": 0.00068867812212618, + "learning_rate": 1.5059076262083783e-06, + "loss": 0.0, + "step": 13424 + }, + { + "epoch": 0.8651801250241671, + "grad_norm": 0.0016578578833495088, + "learning_rate": 1.5051915503043324e-06, + "loss": 0.0, + "step": 13425 + }, + { + "epoch": 0.8652445704710963, + "grad_norm": 0.03446109256285739, + "learning_rate": 1.5044754744002865e-06, + "loss": 0.0001, + "step": 13426 + }, + { + "epoch": 0.8653090159180253, + "grad_norm": 0.00011652735111931998, + "learning_rate": 1.5037593984962406e-06, + "loss": 0.0, + "step": 13427 + }, + { + "epoch": 0.8653734613649545, + "grad_norm": 0.115670160063929, + "learning_rate": 1.5030433225921947e-06, + "loss": 0.0006, + "step": 13428 + }, + { + "epoch": 0.8654379068118837, + "grad_norm": 0.0024135343454817215, + "learning_rate": 1.5023272466881492e-06, + "loss": 0.0, + "step": 13429 + }, + { + "epoch": 0.8655023522588129, + "grad_norm": 0.017187191238393462, + "learning_rate": 1.5016111707841033e-06, + "loss": 0.0001, + "step": 13430 + }, + { + "epoch": 0.8655667977057421, + "grad_norm": 0.35311210110829644, + "learning_rate": 1.5008950948800574e-06, + "loss": 0.0006, + "step": 13431 + }, + { + "epoch": 0.8656312431526713, + "grad_norm": 0.3191252663884319, + "learning_rate": 1.5001790189760115e-06, + "loss": 0.0019, + "step": 13432 + }, + { + "epoch": 0.8656956885996004, + "grad_norm": 0.04233723504764846, + "learning_rate": 1.4994629430719658e-06, + "loss": 0.0016, + "step": 13433 + }, + { + "epoch": 0.8657601340465296, + "grad_norm": 0.00145089825630913, + "learning_rate": 1.4987468671679198e-06, + "loss": 0.0, + "step": 13434 + }, + { + "epoch": 0.8658245794934588, + "grad_norm": 0.20710145012059114, + "learning_rate": 1.4980307912638741e-06, + "loss": 0.0002, + "step": 13435 + }, + { + "epoch": 0.865889024940388, + "grad_norm": 0.0031344022196231128, + "learning_rate": 1.4973147153598282e-06, + "loss": 0.0, + "step": 13436 + }, + { + "epoch": 0.8659534703873172, + "grad_norm": 0.0118111356298611, + "learning_rate": 1.4965986394557825e-06, + "loss": 0.0, + "step": 13437 + }, + { + "epoch": 0.8660179158342464, + "grad_norm": 0.0065583660622332745, + "learning_rate": 1.4958825635517366e-06, + "loss": 0.0, + "step": 13438 + }, + { + "epoch": 0.8660823612811754, + "grad_norm": 0.2883475069855709, + "learning_rate": 1.4951664876476907e-06, + "loss": 0.0025, + "step": 13439 + }, + { + "epoch": 0.8661468067281046, + "grad_norm": 0.005772183739680817, + "learning_rate": 1.494450411743645e-06, + "loss": 0.0, + "step": 13440 + }, + { + "epoch": 0.8662112521750338, + "grad_norm": 0.0052695852327754155, + "learning_rate": 1.4937343358395993e-06, + "loss": 0.0, + "step": 13441 + }, + { + "epoch": 0.866275697621963, + "grad_norm": 0.0012456353387858528, + "learning_rate": 1.4930182599355534e-06, + "loss": 0.0, + "step": 13442 + }, + { + "epoch": 0.8663401430688922, + "grad_norm": 0.0015878721758347974, + "learning_rate": 1.4923021840315075e-06, + "loss": 0.0, + "step": 13443 + }, + { + "epoch": 0.8664045885158214, + "grad_norm": 0.014455519841065518, + "learning_rate": 1.4915861081274616e-06, + "loss": 0.0, + "step": 13444 + }, + { + "epoch": 0.8664690339627505, + "grad_norm": 0.04454679008845088, + "learning_rate": 1.4908700322234157e-06, + "loss": 0.0001, + "step": 13445 + }, + { + "epoch": 0.8665334794096797, + "grad_norm": 0.45188975503978307, + "learning_rate": 1.4901539563193702e-06, + "loss": 0.003, + "step": 13446 + }, + { + "epoch": 0.8665979248566089, + "grad_norm": 0.010539871515908213, + "learning_rate": 1.4894378804153242e-06, + "loss": 0.0001, + "step": 13447 + }, + { + "epoch": 0.8666623703035381, + "grad_norm": 0.005161306898547368, + "learning_rate": 1.4887218045112783e-06, + "loss": 0.0, + "step": 13448 + }, + { + "epoch": 0.8667268157504673, + "grad_norm": 0.0016819094308091322, + "learning_rate": 1.4880057286072324e-06, + "loss": 0.0, + "step": 13449 + }, + { + "epoch": 0.8667912611973964, + "grad_norm": 0.13813809970298882, + "learning_rate": 1.4872896527031865e-06, + "loss": 0.0018, + "step": 13450 + }, + { + "epoch": 0.8668557066443255, + "grad_norm": 2.5470248661249853e-05, + "learning_rate": 1.4865735767991408e-06, + "loss": 0.0, + "step": 13451 + }, + { + "epoch": 0.8669201520912547, + "grad_norm": 0.001220750155884022, + "learning_rate": 1.4858575008950951e-06, + "loss": 0.0, + "step": 13452 + }, + { + "epoch": 0.8669845975381839, + "grad_norm": 0.09825203355172227, + "learning_rate": 1.4851414249910492e-06, + "loss": 0.001, + "step": 13453 + }, + { + "epoch": 0.8670490429851131, + "grad_norm": 0.0006788445189597282, + "learning_rate": 1.4844253490870033e-06, + "loss": 0.0, + "step": 13454 + }, + { + "epoch": 0.8671134884320423, + "grad_norm": 0.040948573152339476, + "learning_rate": 1.4837092731829576e-06, + "loss": 0.0002, + "step": 13455 + }, + { + "epoch": 0.8671779338789715, + "grad_norm": 1.2300284915043287, + "learning_rate": 1.4829931972789117e-06, + "loss": 0.0033, + "step": 13456 + }, + { + "epoch": 0.8672423793259006, + "grad_norm": 0.0013821472762745976, + "learning_rate": 1.4822771213748658e-06, + "loss": 0.0, + "step": 13457 + }, + { + "epoch": 0.8673068247728298, + "grad_norm": 0.0001129702660427535, + "learning_rate": 1.48156104547082e-06, + "loss": 0.0, + "step": 13458 + }, + { + "epoch": 0.867371270219759, + "grad_norm": 0.05667991960518738, + "learning_rate": 1.4808449695667744e-06, + "loss": 0.0002, + "step": 13459 + }, + { + "epoch": 0.8674357156666882, + "grad_norm": 0.01425209902570669, + "learning_rate": 1.4801288936627284e-06, + "loss": 0.0001, + "step": 13460 + }, + { + "epoch": 0.8675001611136173, + "grad_norm": 0.002137552431334319, + "learning_rate": 1.4794128177586825e-06, + "loss": 0.0, + "step": 13461 + }, + { + "epoch": 0.8675646065605465, + "grad_norm": 0.13578705766458848, + "learning_rate": 1.4786967418546366e-06, + "loss": 0.0001, + "step": 13462 + }, + { + "epoch": 0.8676290520074756, + "grad_norm": 0.0021114395670508357, + "learning_rate": 1.4779806659505907e-06, + "loss": 0.0, + "step": 13463 + }, + { + "epoch": 0.8676934974544048, + "grad_norm": 0.00019131494950189957, + "learning_rate": 1.4772645900465452e-06, + "loss": 0.0, + "step": 13464 + }, + { + "epoch": 0.867757942901334, + "grad_norm": 0.0035857106761646687, + "learning_rate": 1.4765485141424993e-06, + "loss": 0.0, + "step": 13465 + }, + { + "epoch": 0.8678223883482632, + "grad_norm": 0.0001692712253456178, + "learning_rate": 1.4758324382384534e-06, + "loss": 0.0, + "step": 13466 + }, + { + "epoch": 0.8678868337951924, + "grad_norm": 0.0005669241135073661, + "learning_rate": 1.4751163623344075e-06, + "loss": 0.0, + "step": 13467 + }, + { + "epoch": 0.8679512792421216, + "grad_norm": 0.001889031131247042, + "learning_rate": 1.4744002864303616e-06, + "loss": 0.0, + "step": 13468 + }, + { + "epoch": 0.8680157246890507, + "grad_norm": 0.05634150570667161, + "learning_rate": 1.4736842105263159e-06, + "loss": 0.0001, + "step": 13469 + }, + { + "epoch": 0.8680801701359799, + "grad_norm": 0.016770040901093828, + "learning_rate": 1.4729681346222702e-06, + "loss": 0.0, + "step": 13470 + }, + { + "epoch": 0.8681446155829091, + "grad_norm": 0.007393320511604408, + "learning_rate": 1.4722520587182242e-06, + "loss": 0.0, + "step": 13471 + }, + { + "epoch": 0.8682090610298382, + "grad_norm": 0.005720097209988372, + "learning_rate": 1.4715359828141783e-06, + "loss": 0.0, + "step": 13472 + }, + { + "epoch": 0.8682735064767674, + "grad_norm": 0.0014226270995623246, + "learning_rate": 1.4708199069101326e-06, + "loss": 0.0, + "step": 13473 + }, + { + "epoch": 0.8683379519236966, + "grad_norm": 0.03148733109778701, + "learning_rate": 1.4701038310060867e-06, + "loss": 0.0001, + "step": 13474 + }, + { + "epoch": 0.8684023973706257, + "grad_norm": 0.018190387811027366, + "learning_rate": 1.469387755102041e-06, + "loss": 0.0, + "step": 13475 + }, + { + "epoch": 0.8684668428175549, + "grad_norm": 0.00022876699057328286, + "learning_rate": 1.468671679197995e-06, + "loss": 0.0, + "step": 13476 + }, + { + "epoch": 0.8685312882644841, + "grad_norm": 0.007228422026355488, + "learning_rate": 1.4679556032939494e-06, + "loss": 0.0, + "step": 13477 + }, + { + "epoch": 0.8685957337114133, + "grad_norm": 0.022195556988556204, + "learning_rate": 1.4672395273899035e-06, + "loss": 0.0, + "step": 13478 + }, + { + "epoch": 0.8686601791583425, + "grad_norm": 0.0018545337949908754, + "learning_rate": 1.4665234514858576e-06, + "loss": 0.0, + "step": 13479 + }, + { + "epoch": 0.8687246246052717, + "grad_norm": 0.12720693745515196, + "learning_rate": 1.4658073755818117e-06, + "loss": 0.0003, + "step": 13480 + }, + { + "epoch": 0.8687890700522009, + "grad_norm": 0.14163634432106645, + "learning_rate": 1.4650912996777662e-06, + "loss": 0.0018, + "step": 13481 + }, + { + "epoch": 0.86885351549913, + "grad_norm": 0.4481010131587821, + "learning_rate": 1.4643752237737203e-06, + "loss": 0.0029, + "step": 13482 + }, + { + "epoch": 0.8689179609460591, + "grad_norm": 0.00217015626315937, + "learning_rate": 1.4636591478696743e-06, + "loss": 0.0, + "step": 13483 + }, + { + "epoch": 0.8689824063929883, + "grad_norm": 0.0025758466115319723, + "learning_rate": 1.4629430719656284e-06, + "loss": 0.0, + "step": 13484 + }, + { + "epoch": 0.8690468518399175, + "grad_norm": 0.44933947723859485, + "learning_rate": 1.4622269960615825e-06, + "loss": 0.0008, + "step": 13485 + }, + { + "epoch": 0.8691112972868467, + "grad_norm": 0.006622376254887656, + "learning_rate": 1.4615109201575366e-06, + "loss": 0.0, + "step": 13486 + }, + { + "epoch": 0.8691757427337758, + "grad_norm": 0.010118893463182543, + "learning_rate": 1.4607948442534911e-06, + "loss": 0.0, + "step": 13487 + }, + { + "epoch": 0.869240188180705, + "grad_norm": 0.002533162206517466, + "learning_rate": 1.4600787683494452e-06, + "loss": 0.0, + "step": 13488 + }, + { + "epoch": 0.8693046336276342, + "grad_norm": 0.0046699535377350216, + "learning_rate": 1.4593626924453993e-06, + "loss": 0.0, + "step": 13489 + }, + { + "epoch": 0.8693690790745634, + "grad_norm": 0.0006231500933800507, + "learning_rate": 1.4586466165413534e-06, + "loss": 0.0, + "step": 13490 + }, + { + "epoch": 0.8694335245214926, + "grad_norm": 0.0129105570295586, + "learning_rate": 1.4579305406373077e-06, + "loss": 0.0, + "step": 13491 + }, + { + "epoch": 0.8694979699684218, + "grad_norm": 0.6536391411143632, + "learning_rate": 1.4572144647332618e-06, + "loss": 0.003, + "step": 13492 + }, + { + "epoch": 0.869562415415351, + "grad_norm": 0.00645190186822942, + "learning_rate": 1.456498388829216e-06, + "loss": 0.0, + "step": 13493 + }, + { + "epoch": 0.86962686086228, + "grad_norm": 0.021776802677437902, + "learning_rate": 1.4557823129251701e-06, + "loss": 0.0002, + "step": 13494 + }, + { + "epoch": 0.8696913063092092, + "grad_norm": 0.9354631868333679, + "learning_rate": 1.4550662370211244e-06, + "loss": 0.0034, + "step": 13495 + }, + { + "epoch": 0.8697557517561384, + "grad_norm": 0.24174388126058172, + "learning_rate": 1.4543501611170785e-06, + "loss": 0.002, + "step": 13496 + }, + { + "epoch": 0.8698201972030676, + "grad_norm": 0.002051742025662557, + "learning_rate": 1.4536340852130326e-06, + "loss": 0.0, + "step": 13497 + }, + { + "epoch": 0.8698846426499968, + "grad_norm": 0.03131467117702846, + "learning_rate": 1.4529180093089867e-06, + "loss": 0.0, + "step": 13498 + }, + { + "epoch": 0.869949088096926, + "grad_norm": 0.016429974192892383, + "learning_rate": 1.4522019334049412e-06, + "loss": 0.0002, + "step": 13499 + }, + { + "epoch": 0.8700135335438551, + "grad_norm": 0.0035691864897899376, + "learning_rate": 1.4514858575008953e-06, + "loss": 0.0, + "step": 13500 + }, + { + "epoch": 0.8700779789907843, + "grad_norm": 0.014284719551174541, + "learning_rate": 1.4507697815968494e-06, + "loss": 0.0, + "step": 13501 + }, + { + "epoch": 0.8701424244377135, + "grad_norm": 0.0003508945434332547, + "learning_rate": 1.4500537056928035e-06, + "loss": 0.0, + "step": 13502 + }, + { + "epoch": 0.8702068698846427, + "grad_norm": 0.005536226193423074, + "learning_rate": 1.4493376297887576e-06, + "loss": 0.0, + "step": 13503 + }, + { + "epoch": 0.8702713153315719, + "grad_norm": 0.013431267923642803, + "learning_rate": 1.4486215538847119e-06, + "loss": 0.0, + "step": 13504 + }, + { + "epoch": 0.8703357607785009, + "grad_norm": 0.03924292381700065, + "learning_rate": 1.4479054779806662e-06, + "loss": 0.0, + "step": 13505 + }, + { + "epoch": 0.8704002062254301, + "grad_norm": 0.014671159050005588, + "learning_rate": 1.4471894020766202e-06, + "loss": 0.0, + "step": 13506 + }, + { + "epoch": 0.8704646516723593, + "grad_norm": 0.00044016681878419526, + "learning_rate": 1.4464733261725743e-06, + "loss": 0.0, + "step": 13507 + }, + { + "epoch": 0.8705290971192885, + "grad_norm": 0.0027930283003449025, + "learning_rate": 1.4457572502685286e-06, + "loss": 0.0, + "step": 13508 + }, + { + "epoch": 0.8705935425662177, + "grad_norm": 0.005349489845537028, + "learning_rate": 1.4450411743644827e-06, + "loss": 0.0, + "step": 13509 + }, + { + "epoch": 0.8706579880131469, + "grad_norm": 0.0025331927243357815, + "learning_rate": 1.444325098460437e-06, + "loss": 0.0, + "step": 13510 + }, + { + "epoch": 0.870722433460076, + "grad_norm": 0.1705458464266017, + "learning_rate": 1.4436090225563911e-06, + "loss": 0.0003, + "step": 13511 + }, + { + "epoch": 0.8707868789070052, + "grad_norm": 0.017371828112596428, + "learning_rate": 1.4428929466523454e-06, + "loss": 0.0, + "step": 13512 + }, + { + "epoch": 0.8708513243539344, + "grad_norm": 0.0038565979744293725, + "learning_rate": 1.4421768707482995e-06, + "loss": 0.0, + "step": 13513 + }, + { + "epoch": 0.8709157698008636, + "grad_norm": 0.0015431336010864626, + "learning_rate": 1.4414607948442536e-06, + "loss": 0.0, + "step": 13514 + }, + { + "epoch": 0.8709802152477928, + "grad_norm": 0.38062584393036586, + "learning_rate": 1.4407447189402077e-06, + "loss": 0.0007, + "step": 13515 + }, + { + "epoch": 0.871044660694722, + "grad_norm": 0.002684096329343692, + "learning_rate": 1.4400286430361622e-06, + "loss": 0.0, + "step": 13516 + }, + { + "epoch": 0.871109106141651, + "grad_norm": 0.0004909211941232381, + "learning_rate": 1.4393125671321163e-06, + "loss": 0.0, + "step": 13517 + }, + { + "epoch": 0.8711735515885802, + "grad_norm": 0.001157450722865243, + "learning_rate": 1.4385964912280704e-06, + "loss": 0.0, + "step": 13518 + }, + { + "epoch": 0.8712379970355094, + "grad_norm": 0.0035391587568699614, + "learning_rate": 1.4378804153240244e-06, + "loss": 0.0, + "step": 13519 + }, + { + "epoch": 0.8713024424824386, + "grad_norm": 0.00045686885179854926, + "learning_rate": 1.4371643394199785e-06, + "loss": 0.0, + "step": 13520 + }, + { + "epoch": 0.8713668879293678, + "grad_norm": 0.0021138812403863616, + "learning_rate": 1.4364482635159326e-06, + "loss": 0.0, + "step": 13521 + }, + { + "epoch": 0.871431333376297, + "grad_norm": 0.2128194987620841, + "learning_rate": 1.4357321876118871e-06, + "loss": 0.0006, + "step": 13522 + }, + { + "epoch": 0.8714957788232262, + "grad_norm": 0.01931788608564039, + "learning_rate": 1.4350161117078412e-06, + "loss": 0.0, + "step": 13523 + }, + { + "epoch": 0.8715602242701553, + "grad_norm": 0.0003648647198489776, + "learning_rate": 1.4343000358037953e-06, + "loss": 0.0, + "step": 13524 + }, + { + "epoch": 0.8716246697170845, + "grad_norm": 0.004620281109182782, + "learning_rate": 1.4335839598997494e-06, + "loss": 0.0, + "step": 13525 + }, + { + "epoch": 0.8716891151640137, + "grad_norm": 0.017122646764027347, + "learning_rate": 1.4328678839957037e-06, + "loss": 0.0, + "step": 13526 + }, + { + "epoch": 0.8717535606109429, + "grad_norm": 0.0012365848463775104, + "learning_rate": 1.4321518080916578e-06, + "loss": 0.0, + "step": 13527 + }, + { + "epoch": 0.871818006057872, + "grad_norm": 0.0016554455275155562, + "learning_rate": 1.431435732187612e-06, + "loss": 0.0, + "step": 13528 + }, + { + "epoch": 0.8718824515048011, + "grad_norm": 0.018926993280560703, + "learning_rate": 1.4307196562835662e-06, + "loss": 0.0015, + "step": 13529 + }, + { + "epoch": 0.8719468969517303, + "grad_norm": 0.014421366017851302, + "learning_rate": 1.4300035803795205e-06, + "loss": 0.0001, + "step": 13530 + }, + { + "epoch": 0.8720113423986595, + "grad_norm": 0.0012043756470092263, + "learning_rate": 1.4292875044754745e-06, + "loss": 0.0, + "step": 13531 + }, + { + "epoch": 0.8720757878455887, + "grad_norm": 0.06249889730434415, + "learning_rate": 1.4285714285714286e-06, + "loss": 0.0001, + "step": 13532 + }, + { + "epoch": 0.8721402332925179, + "grad_norm": 0.00483204331224003, + "learning_rate": 1.4278553526673827e-06, + "loss": 0.0, + "step": 13533 + }, + { + "epoch": 0.8722046787394471, + "grad_norm": 0.016188550479828548, + "learning_rate": 1.4271392767633372e-06, + "loss": 0.0001, + "step": 13534 + }, + { + "epoch": 0.8722691241863763, + "grad_norm": 0.2588630282665265, + "learning_rate": 1.4264232008592913e-06, + "loss": 0.0022, + "step": 13535 + }, + { + "epoch": 0.8723335696333054, + "grad_norm": 0.017522219661073486, + "learning_rate": 1.4257071249552454e-06, + "loss": 0.0, + "step": 13536 + }, + { + "epoch": 0.8723980150802346, + "grad_norm": 4.777473903327223, + "learning_rate": 1.4249910490511995e-06, + "loss": 0.0184, + "step": 13537 + }, + { + "epoch": 0.8724624605271638, + "grad_norm": 0.1122914928480347, + "learning_rate": 1.4242749731471536e-06, + "loss": 0.0, + "step": 13538 + }, + { + "epoch": 0.8725269059740929, + "grad_norm": 0.0003457079673620274, + "learning_rate": 1.4235588972431077e-06, + "loss": 0.0, + "step": 13539 + }, + { + "epoch": 0.8725913514210221, + "grad_norm": 0.008842368697230442, + "learning_rate": 1.4228428213390622e-06, + "loss": 0.0, + "step": 13540 + }, + { + "epoch": 0.8726557968679513, + "grad_norm": 0.16889078991419842, + "learning_rate": 1.4221267454350163e-06, + "loss": 0.0005, + "step": 13541 + }, + { + "epoch": 0.8727202423148804, + "grad_norm": 0.09180406278511782, + "learning_rate": 1.4214106695309703e-06, + "loss": 0.0001, + "step": 13542 + }, + { + "epoch": 0.8727846877618096, + "grad_norm": 0.0002542434149114111, + "learning_rate": 1.4206945936269244e-06, + "loss": 0.0, + "step": 13543 + }, + { + "epoch": 0.8728491332087388, + "grad_norm": 0.00021139095458569033, + "learning_rate": 1.4199785177228787e-06, + "loss": 0.0, + "step": 13544 + }, + { + "epoch": 0.872913578655668, + "grad_norm": 0.006701556482451194, + "learning_rate": 1.419262441818833e-06, + "loss": 0.0, + "step": 13545 + }, + { + "epoch": 0.8729780241025972, + "grad_norm": 0.0005409555739583791, + "learning_rate": 1.4185463659147871e-06, + "loss": 0.0, + "step": 13546 + }, + { + "epoch": 0.8730424695495264, + "grad_norm": 0.29029280227176923, + "learning_rate": 1.4178302900107412e-06, + "loss": 0.0005, + "step": 13547 + }, + { + "epoch": 0.8731069149964555, + "grad_norm": 0.003792905193337104, + "learning_rate": 1.4171142141066955e-06, + "loss": 0.0, + "step": 13548 + }, + { + "epoch": 0.8731713604433847, + "grad_norm": 0.027987387467200055, + "learning_rate": 1.4163981382026496e-06, + "loss": 0.0, + "step": 13549 + }, + { + "epoch": 0.8732358058903138, + "grad_norm": 0.004768562503807217, + "learning_rate": 1.4156820622986037e-06, + "loss": 0.0, + "step": 13550 + }, + { + "epoch": 0.873300251337243, + "grad_norm": 0.0008558681731003965, + "learning_rate": 1.414965986394558e-06, + "loss": 0.0, + "step": 13551 + }, + { + "epoch": 0.8733646967841722, + "grad_norm": 0.09874931857320618, + "learning_rate": 1.4142499104905123e-06, + "loss": 0.0001, + "step": 13552 + }, + { + "epoch": 0.8734291422311014, + "grad_norm": 0.5997927258018302, + "learning_rate": 1.4135338345864664e-06, + "loss": 0.0015, + "step": 13553 + }, + { + "epoch": 0.8734935876780305, + "grad_norm": 0.06590779190624765, + "learning_rate": 1.4128177586824204e-06, + "loss": 0.0001, + "step": 13554 + }, + { + "epoch": 0.8735580331249597, + "grad_norm": 0.010813035118637227, + "learning_rate": 1.4121016827783745e-06, + "loss": 0.0001, + "step": 13555 + }, + { + "epoch": 0.8736224785718889, + "grad_norm": 0.0007776535748360822, + "learning_rate": 1.4113856068743286e-06, + "loss": 0.0, + "step": 13556 + }, + { + "epoch": 0.8736869240188181, + "grad_norm": 0.514541151528957, + "learning_rate": 1.4106695309702831e-06, + "loss": 0.0216, + "step": 13557 + }, + { + "epoch": 0.8737513694657473, + "grad_norm": 0.0011680135680204743, + "learning_rate": 1.4099534550662372e-06, + "loss": 0.0, + "step": 13558 + }, + { + "epoch": 0.8738158149126765, + "grad_norm": 0.001274273368860219, + "learning_rate": 1.4092373791621913e-06, + "loss": 0.0, + "step": 13559 + }, + { + "epoch": 0.8738802603596056, + "grad_norm": 0.0016573716185651368, + "learning_rate": 1.4085213032581454e-06, + "loss": 0.0, + "step": 13560 + }, + { + "epoch": 0.8739447058065347, + "grad_norm": 0.0005385912905206142, + "learning_rate": 1.4078052273540995e-06, + "loss": 0.0, + "step": 13561 + }, + { + "epoch": 0.8740091512534639, + "grad_norm": 0.09338146530950199, + "learning_rate": 1.4070891514500538e-06, + "loss": 0.0002, + "step": 13562 + }, + { + "epoch": 0.8740735967003931, + "grad_norm": 0.0030035474103590263, + "learning_rate": 1.406373075546008e-06, + "loss": 0.0, + "step": 13563 + }, + { + "epoch": 0.8741380421473223, + "grad_norm": 0.0005251321157146958, + "learning_rate": 1.4056569996419622e-06, + "loss": 0.0, + "step": 13564 + }, + { + "epoch": 0.8742024875942515, + "grad_norm": 0.0021644242133924342, + "learning_rate": 1.4049409237379162e-06, + "loss": 0.0, + "step": 13565 + }, + { + "epoch": 0.8742669330411806, + "grad_norm": 0.0002113496909306623, + "learning_rate": 1.4042248478338705e-06, + "loss": 0.0, + "step": 13566 + }, + { + "epoch": 0.8743313784881098, + "grad_norm": 0.002869868115987156, + "learning_rate": 1.4035087719298246e-06, + "loss": 0.0, + "step": 13567 + }, + { + "epoch": 0.874395823935039, + "grad_norm": 3.127468659924674, + "learning_rate": 1.4027926960257787e-06, + "loss": 0.0123, + "step": 13568 + }, + { + "epoch": 0.8744602693819682, + "grad_norm": 0.08067815140353134, + "learning_rate": 1.402076620121733e-06, + "loss": 0.0002, + "step": 13569 + }, + { + "epoch": 0.8745247148288974, + "grad_norm": 0.017830586287385566, + "learning_rate": 1.4013605442176873e-06, + "loss": 0.0, + "step": 13570 + }, + { + "epoch": 0.8745891602758266, + "grad_norm": 0.006552162045926843, + "learning_rate": 1.4006444683136414e-06, + "loss": 0.0, + "step": 13571 + }, + { + "epoch": 0.8746536057227556, + "grad_norm": 8.130109325365869e-05, + "learning_rate": 1.3999283924095955e-06, + "loss": 0.0, + "step": 13572 + }, + { + "epoch": 0.8747180511696848, + "grad_norm": 0.00481453068839515, + "learning_rate": 1.3992123165055496e-06, + "loss": 0.0, + "step": 13573 + }, + { + "epoch": 0.874782496616614, + "grad_norm": 0.0016966316384423679, + "learning_rate": 1.3984962406015037e-06, + "loss": 0.0, + "step": 13574 + }, + { + "epoch": 0.8748469420635432, + "grad_norm": 6.507700313948872e-05, + "learning_rate": 1.3977801646974582e-06, + "loss": 0.0, + "step": 13575 + }, + { + "epoch": 0.8749113875104724, + "grad_norm": 0.0010351706398880871, + "learning_rate": 1.3970640887934123e-06, + "loss": 0.0, + "step": 13576 + }, + { + "epoch": 0.8749758329574016, + "grad_norm": 0.002042297500884413, + "learning_rate": 1.3963480128893664e-06, + "loss": 0.0, + "step": 13577 + }, + { + "epoch": 0.8750402784043307, + "grad_norm": 0.00035755874045150096, + "learning_rate": 1.3956319369853204e-06, + "loss": 0.0, + "step": 13578 + }, + { + "epoch": 0.8751047238512599, + "grad_norm": 0.0007519831593113182, + "learning_rate": 1.3949158610812747e-06, + "loss": 0.0, + "step": 13579 + }, + { + "epoch": 0.8751691692981891, + "grad_norm": 0.01997057158629694, + "learning_rate": 1.394199785177229e-06, + "loss": 0.0015, + "step": 13580 + }, + { + "epoch": 0.8752336147451183, + "grad_norm": 0.00042799543401737973, + "learning_rate": 1.3934837092731831e-06, + "loss": 0.0, + "step": 13581 + }, + { + "epoch": 0.8752980601920475, + "grad_norm": 0.003192598232773263, + "learning_rate": 1.3927676333691372e-06, + "loss": 0.0, + "step": 13582 + }, + { + "epoch": 0.8753625056389766, + "grad_norm": 0.00041092294582336747, + "learning_rate": 1.3920515574650915e-06, + "loss": 0.0, + "step": 13583 + }, + { + "epoch": 0.8754269510859057, + "grad_norm": 0.00015048739275314575, + "learning_rate": 1.3913354815610456e-06, + "loss": 0.0, + "step": 13584 + }, + { + "epoch": 0.8754913965328349, + "grad_norm": 0.00040192569485888335, + "learning_rate": 1.3906194056569997e-06, + "loss": 0.0, + "step": 13585 + }, + { + "epoch": 0.8755558419797641, + "grad_norm": 0.003118728490399756, + "learning_rate": 1.389903329752954e-06, + "loss": 0.0, + "step": 13586 + }, + { + "epoch": 0.8756202874266933, + "grad_norm": 0.028921511163939883, + "learning_rate": 1.3891872538489083e-06, + "loss": 0.0003, + "step": 13587 + }, + { + "epoch": 0.8756847328736225, + "grad_norm": 0.0007635765470389747, + "learning_rate": 1.3884711779448624e-06, + "loss": 0.0, + "step": 13588 + }, + { + "epoch": 0.8757491783205517, + "grad_norm": 0.019386178158575473, + "learning_rate": 1.3877551020408165e-06, + "loss": 0.0001, + "step": 13589 + }, + { + "epoch": 0.8758136237674808, + "grad_norm": 0.008952326877814495, + "learning_rate": 1.3870390261367705e-06, + "loss": 0.0, + "step": 13590 + }, + { + "epoch": 0.87587806921441, + "grad_norm": 0.0064969913757071, + "learning_rate": 1.3863229502327246e-06, + "loss": 0.0, + "step": 13591 + }, + { + "epoch": 0.8759425146613392, + "grad_norm": 0.24652262056953084, + "learning_rate": 1.3856068743286791e-06, + "loss": 0.0098, + "step": 13592 + }, + { + "epoch": 0.8760069601082684, + "grad_norm": 0.008717637446990048, + "learning_rate": 1.3848907984246332e-06, + "loss": 0.0, + "step": 13593 + }, + { + "epoch": 0.8760714055551976, + "grad_norm": 0.1055407587350373, + "learning_rate": 1.3841747225205873e-06, + "loss": 0.0001, + "step": 13594 + }, + { + "epoch": 0.8761358510021267, + "grad_norm": 0.013464757423937554, + "learning_rate": 1.3834586466165414e-06, + "loss": 0.0, + "step": 13595 + }, + { + "epoch": 0.8762002964490558, + "grad_norm": 0.0014337143804371206, + "learning_rate": 1.3827425707124955e-06, + "loss": 0.0, + "step": 13596 + }, + { + "epoch": 0.876264741895985, + "grad_norm": 0.4674874787515268, + "learning_rate": 1.3820264948084498e-06, + "loss": 0.0015, + "step": 13597 + }, + { + "epoch": 0.8763291873429142, + "grad_norm": 0.00098483335952236, + "learning_rate": 1.381310418904404e-06, + "loss": 0.0, + "step": 13598 + }, + { + "epoch": 0.8763936327898434, + "grad_norm": 0.09651554507659374, + "learning_rate": 1.3805943430003582e-06, + "loss": 0.0002, + "step": 13599 + }, + { + "epoch": 0.8764580782367726, + "grad_norm": 0.05975753060859266, + "learning_rate": 1.3798782670963123e-06, + "loss": 0.0006, + "step": 13600 + }, + { + "epoch": 0.8765225236837018, + "grad_norm": 0.0008181424472536412, + "learning_rate": 1.3791621911922666e-06, + "loss": 0.0, + "step": 13601 + }, + { + "epoch": 0.8765869691306309, + "grad_norm": 0.0007309612300930143, + "learning_rate": 1.3784461152882206e-06, + "loss": 0.0, + "step": 13602 + }, + { + "epoch": 0.8766514145775601, + "grad_norm": 0.0004453427397565943, + "learning_rate": 1.3777300393841747e-06, + "loss": 0.0, + "step": 13603 + }, + { + "epoch": 0.8767158600244893, + "grad_norm": 0.0013161070099909815, + "learning_rate": 1.377013963480129e-06, + "loss": 0.0, + "step": 13604 + }, + { + "epoch": 0.8767803054714185, + "grad_norm": 8.2985330607004e-05, + "learning_rate": 1.3762978875760833e-06, + "loss": 0.0, + "step": 13605 + }, + { + "epoch": 0.8768447509183476, + "grad_norm": 0.0014163201861122309, + "learning_rate": 1.3755818116720374e-06, + "loss": 0.0, + "step": 13606 + }, + { + "epoch": 0.8769091963652768, + "grad_norm": 0.006504134348321924, + "learning_rate": 1.3748657357679915e-06, + "loss": 0.0, + "step": 13607 + }, + { + "epoch": 0.8769736418122059, + "grad_norm": 0.004427846073773377, + "learning_rate": 1.3741496598639456e-06, + "loss": 0.0, + "step": 13608 + }, + { + "epoch": 0.8770380872591351, + "grad_norm": 8.069661349110574e-05, + "learning_rate": 1.3734335839598997e-06, + "loss": 0.0, + "step": 13609 + }, + { + "epoch": 0.8771025327060643, + "grad_norm": 0.00013724066858743323, + "learning_rate": 1.3727175080558542e-06, + "loss": 0.0, + "step": 13610 + }, + { + "epoch": 0.8771669781529935, + "grad_norm": 1.0793487189499922e-05, + "learning_rate": 1.3720014321518083e-06, + "loss": 0.0, + "step": 13611 + }, + { + "epoch": 0.8772314235999227, + "grad_norm": 0.016116090551314, + "learning_rate": 1.3712853562477624e-06, + "loss": 0.0001, + "step": 13612 + }, + { + "epoch": 0.8772958690468519, + "grad_norm": 0.0044309911209259294, + "learning_rate": 1.3705692803437164e-06, + "loss": 0.0, + "step": 13613 + }, + { + "epoch": 0.877360314493781, + "grad_norm": 0.00038663612199369644, + "learning_rate": 1.3698532044396705e-06, + "loss": 0.0, + "step": 13614 + }, + { + "epoch": 0.8774247599407102, + "grad_norm": 0.00034108201632122604, + "learning_rate": 1.369137128535625e-06, + "loss": 0.0, + "step": 13615 + }, + { + "epoch": 0.8774892053876394, + "grad_norm": 0.0007516695032004047, + "learning_rate": 1.3684210526315791e-06, + "loss": 0.0, + "step": 13616 + }, + { + "epoch": 0.8775536508345685, + "grad_norm": 0.000810677722890353, + "learning_rate": 1.3677049767275332e-06, + "loss": 0.0, + "step": 13617 + }, + { + "epoch": 0.8776180962814977, + "grad_norm": 0.0004293978015281151, + "learning_rate": 1.3669889008234873e-06, + "loss": 0.0, + "step": 13618 + }, + { + "epoch": 0.8776825417284269, + "grad_norm": 0.09996970768416442, + "learning_rate": 1.3662728249194416e-06, + "loss": 0.0018, + "step": 13619 + }, + { + "epoch": 0.877746987175356, + "grad_norm": 0.009210777517721869, + "learning_rate": 1.3655567490153957e-06, + "loss": 0.0, + "step": 13620 + }, + { + "epoch": 0.8778114326222852, + "grad_norm": 2.0031004060201308e-05, + "learning_rate": 1.36484067311135e-06, + "loss": 0.0, + "step": 13621 + }, + { + "epoch": 0.8778758780692144, + "grad_norm": 0.0002399283084197288, + "learning_rate": 1.364124597207304e-06, + "loss": 0.0, + "step": 13622 + }, + { + "epoch": 0.8779403235161436, + "grad_norm": 0.0014824010634681267, + "learning_rate": 1.3634085213032584e-06, + "loss": 0.0, + "step": 13623 + }, + { + "epoch": 0.8780047689630728, + "grad_norm": 0.0006106176764788257, + "learning_rate": 1.3626924453992125e-06, + "loss": 0.0, + "step": 13624 + }, + { + "epoch": 0.878069214410002, + "grad_norm": 0.009545374588239326, + "learning_rate": 1.3619763694951665e-06, + "loss": 0.0, + "step": 13625 + }, + { + "epoch": 0.8781336598569311, + "grad_norm": 0.00035692053622342487, + "learning_rate": 1.3612602935911206e-06, + "loss": 0.0, + "step": 13626 + }, + { + "epoch": 0.8781981053038603, + "grad_norm": 0.0086509276995505, + "learning_rate": 1.3605442176870751e-06, + "loss": 0.0, + "step": 13627 + }, + { + "epoch": 0.8782625507507894, + "grad_norm": 0.00010314129543442811, + "learning_rate": 1.3598281417830292e-06, + "loss": 0.0, + "step": 13628 + }, + { + "epoch": 0.8783269961977186, + "grad_norm": 0.01060045892036667, + "learning_rate": 1.3591120658789833e-06, + "loss": 0.0, + "step": 13629 + }, + { + "epoch": 0.8783914416446478, + "grad_norm": 0.0026332486628218815, + "learning_rate": 1.3583959899749374e-06, + "loss": 0.0, + "step": 13630 + }, + { + "epoch": 0.878455887091577, + "grad_norm": 0.004919190051721374, + "learning_rate": 1.3576799140708915e-06, + "loss": 0.0, + "step": 13631 + }, + { + "epoch": 0.8785203325385061, + "grad_norm": 0.005739669680137454, + "learning_rate": 1.3569638381668456e-06, + "loss": 0.0, + "step": 13632 + }, + { + "epoch": 0.8785847779854353, + "grad_norm": 0.12541555085615214, + "learning_rate": 1.3562477622628e-06, + "loss": 0.0016, + "step": 13633 + }, + { + "epoch": 0.8786492234323645, + "grad_norm": 0.009859135451052567, + "learning_rate": 1.3555316863587542e-06, + "loss": 0.0001, + "step": 13634 + }, + { + "epoch": 0.8787136688792937, + "grad_norm": 0.28341660181746003, + "learning_rate": 1.3548156104547083e-06, + "loss": 0.0006, + "step": 13635 + }, + { + "epoch": 0.8787781143262229, + "grad_norm": 0.0016875942051804806, + "learning_rate": 1.3540995345506624e-06, + "loss": 0.0, + "step": 13636 + }, + { + "epoch": 0.8788425597731521, + "grad_norm": 0.0005612762262681204, + "learning_rate": 1.3533834586466167e-06, + "loss": 0.0, + "step": 13637 + }, + { + "epoch": 0.8789070052200812, + "grad_norm": 0.01833501293870889, + "learning_rate": 1.3526673827425707e-06, + "loss": 0.0, + "step": 13638 + }, + { + "epoch": 0.8789714506670103, + "grad_norm": 0.0016657878041175568, + "learning_rate": 1.351951306838525e-06, + "loss": 0.0, + "step": 13639 + }, + { + "epoch": 0.8790358961139395, + "grad_norm": 0.008593055425421249, + "learning_rate": 1.3512352309344791e-06, + "loss": 0.0, + "step": 13640 + }, + { + "epoch": 0.8791003415608687, + "grad_norm": 0.022741383577491645, + "learning_rate": 1.3505191550304334e-06, + "loss": 0.0002, + "step": 13641 + }, + { + "epoch": 0.8791647870077979, + "grad_norm": 0.00012513328344494333, + "learning_rate": 1.3498030791263875e-06, + "loss": 0.0, + "step": 13642 + }, + { + "epoch": 0.8792292324547271, + "grad_norm": 0.0015158990746431459, + "learning_rate": 1.3490870032223416e-06, + "loss": 0.0, + "step": 13643 + }, + { + "epoch": 0.8792936779016562, + "grad_norm": 0.0001142028649185917, + "learning_rate": 1.3483709273182957e-06, + "loss": 0.0, + "step": 13644 + }, + { + "epoch": 0.8793581233485854, + "grad_norm": 0.00018615067818089466, + "learning_rate": 1.3476548514142502e-06, + "loss": 0.0, + "step": 13645 + }, + { + "epoch": 0.8794225687955146, + "grad_norm": 0.0016702490090621004, + "learning_rate": 1.3469387755102043e-06, + "loss": 0.0, + "step": 13646 + }, + { + "epoch": 0.8794870142424438, + "grad_norm": 0.014081349773854846, + "learning_rate": 1.3462226996061584e-06, + "loss": 0.0, + "step": 13647 + }, + { + "epoch": 0.879551459689373, + "grad_norm": 0.35911500897104526, + "learning_rate": 1.3455066237021125e-06, + "loss": 0.0003, + "step": 13648 + }, + { + "epoch": 0.8796159051363022, + "grad_norm": 0.0004700586450096311, + "learning_rate": 1.3447905477980665e-06, + "loss": 0.0, + "step": 13649 + }, + { + "epoch": 0.8796803505832312, + "grad_norm": 0.007744483732701911, + "learning_rate": 1.3440744718940208e-06, + "loss": 0.0001, + "step": 13650 + }, + { + "epoch": 0.8797447960301604, + "grad_norm": 0.0002823123017943778, + "learning_rate": 1.3433583959899751e-06, + "loss": 0.0, + "step": 13651 + }, + { + "epoch": 0.8798092414770896, + "grad_norm": 0.0020454451113791103, + "learning_rate": 1.3426423200859292e-06, + "loss": 0.0, + "step": 13652 + }, + { + "epoch": 0.8798736869240188, + "grad_norm": 0.011770034371449568, + "learning_rate": 1.3419262441818833e-06, + "loss": 0.0, + "step": 13653 + }, + { + "epoch": 0.879938132370948, + "grad_norm": 0.0009106569832223424, + "learning_rate": 1.3412101682778376e-06, + "loss": 0.0, + "step": 13654 + }, + { + "epoch": 0.8800025778178772, + "grad_norm": 0.00044701932244403723, + "learning_rate": 1.3404940923737917e-06, + "loss": 0.0, + "step": 13655 + }, + { + "epoch": 0.8800670232648063, + "grad_norm": 0.0013901423332051905, + "learning_rate": 1.339778016469746e-06, + "loss": 0.0, + "step": 13656 + }, + { + "epoch": 0.8801314687117355, + "grad_norm": 0.024266204133080428, + "learning_rate": 1.3390619405657e-06, + "loss": 0.0, + "step": 13657 + }, + { + "epoch": 0.8801959141586647, + "grad_norm": 0.003448365666168964, + "learning_rate": 1.3383458646616544e-06, + "loss": 0.0, + "step": 13658 + }, + { + "epoch": 0.8802603596055939, + "grad_norm": 0.25593555568473547, + "learning_rate": 1.3376297887576085e-06, + "loss": 0.0009, + "step": 13659 + }, + { + "epoch": 0.8803248050525231, + "grad_norm": 0.00023793433604057834, + "learning_rate": 1.3369137128535626e-06, + "loss": 0.0, + "step": 13660 + }, + { + "epoch": 0.8803892504994522, + "grad_norm": 0.024291282911824568, + "learning_rate": 1.3361976369495166e-06, + "loss": 0.0, + "step": 13661 + }, + { + "epoch": 0.8804536959463813, + "grad_norm": 0.18787491587994204, + "learning_rate": 1.3354815610454712e-06, + "loss": 0.0014, + "step": 13662 + }, + { + "epoch": 0.8805181413933105, + "grad_norm": 0.00011337929585352012, + "learning_rate": 1.3347654851414252e-06, + "loss": 0.0, + "step": 13663 + }, + { + "epoch": 0.8805825868402397, + "grad_norm": 0.00015175064550529223, + "learning_rate": 1.3340494092373793e-06, + "loss": 0.0, + "step": 13664 + }, + { + "epoch": 0.8806470322871689, + "grad_norm": 0.012094931872168792, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.0, + "step": 13665 + }, + { + "epoch": 0.8807114777340981, + "grad_norm": 0.008657847704660484, + "learning_rate": 1.3326172574292875e-06, + "loss": 0.0001, + "step": 13666 + }, + { + "epoch": 0.8807759231810273, + "grad_norm": 0.0029345671366418132, + "learning_rate": 1.3319011815252416e-06, + "loss": 0.0, + "step": 13667 + }, + { + "epoch": 0.8808403686279564, + "grad_norm": 0.00023515681443448538, + "learning_rate": 1.331185105621196e-06, + "loss": 0.0, + "step": 13668 + }, + { + "epoch": 0.8809048140748856, + "grad_norm": 0.0001054609110638124, + "learning_rate": 1.3304690297171502e-06, + "loss": 0.0, + "step": 13669 + }, + { + "epoch": 0.8809692595218148, + "grad_norm": 0.0027009243656194505, + "learning_rate": 1.3297529538131043e-06, + "loss": 0.0, + "step": 13670 + }, + { + "epoch": 0.881033704968744, + "grad_norm": 0.00012685416549973138, + "learning_rate": 1.3290368779090584e-06, + "loss": 0.0, + "step": 13671 + }, + { + "epoch": 0.8810981504156732, + "grad_norm": 0.3780272996016023, + "learning_rate": 1.3283208020050127e-06, + "loss": 0.0023, + "step": 13672 + }, + { + "epoch": 0.8811625958626023, + "grad_norm": 0.10550351364688096, + "learning_rate": 1.3276047261009667e-06, + "loss": 0.0001, + "step": 13673 + }, + { + "epoch": 0.8812270413095314, + "grad_norm": 0.0005343506373210069, + "learning_rate": 1.326888650196921e-06, + "loss": 0.0, + "step": 13674 + }, + { + "epoch": 0.8812914867564606, + "grad_norm": 0.006842115762950535, + "learning_rate": 1.3261725742928751e-06, + "loss": 0.0, + "step": 13675 + }, + { + "epoch": 0.8813559322033898, + "grad_norm": 0.0013787853414366827, + "learning_rate": 1.3254564983888294e-06, + "loss": 0.0, + "step": 13676 + }, + { + "epoch": 0.881420377650319, + "grad_norm": 0.6943399292392509, + "learning_rate": 1.3247404224847835e-06, + "loss": 0.0052, + "step": 13677 + }, + { + "epoch": 0.8814848230972482, + "grad_norm": 0.001786099160118304, + "learning_rate": 1.3240243465807376e-06, + "loss": 0.0, + "step": 13678 + }, + { + "epoch": 0.8815492685441774, + "grad_norm": 0.0029717481777851596, + "learning_rate": 1.3233082706766917e-06, + "loss": 0.0, + "step": 13679 + }, + { + "epoch": 0.8816137139911066, + "grad_norm": 6.38256307935207e-05, + "learning_rate": 1.3225921947726462e-06, + "loss": 0.0, + "step": 13680 + }, + { + "epoch": 0.8816781594380357, + "grad_norm": 0.0002884480060583603, + "learning_rate": 1.3218761188686003e-06, + "loss": 0.0, + "step": 13681 + }, + { + "epoch": 0.8817426048849649, + "grad_norm": 0.1623689515082662, + "learning_rate": 1.3211600429645544e-06, + "loss": 0.0002, + "step": 13682 + }, + { + "epoch": 0.8818070503318941, + "grad_norm": 0.04362992253845935, + "learning_rate": 1.3204439670605085e-06, + "loss": 0.0001, + "step": 13683 + }, + { + "epoch": 0.8818714957788232, + "grad_norm": 0.22108830118373307, + "learning_rate": 1.3197278911564625e-06, + "loss": 0.0004, + "step": 13684 + }, + { + "epoch": 0.8819359412257524, + "grad_norm": 0.0003036379952213748, + "learning_rate": 1.3190118152524166e-06, + "loss": 0.0, + "step": 13685 + }, + { + "epoch": 0.8820003866726815, + "grad_norm": 0.008215158033441384, + "learning_rate": 1.3182957393483711e-06, + "loss": 0.0001, + "step": 13686 + }, + { + "epoch": 0.8820648321196107, + "grad_norm": 0.000746369885476597, + "learning_rate": 1.3175796634443252e-06, + "loss": 0.0, + "step": 13687 + }, + { + "epoch": 0.8821292775665399, + "grad_norm": 0.2331175370722083, + "learning_rate": 1.3168635875402793e-06, + "loss": 0.0001, + "step": 13688 + }, + { + "epoch": 0.8821937230134691, + "grad_norm": 3.3127325739645425e-05, + "learning_rate": 1.3161475116362334e-06, + "loss": 0.0, + "step": 13689 + }, + { + "epoch": 0.8822581684603983, + "grad_norm": 0.00046292881256141657, + "learning_rate": 1.3154314357321877e-06, + "loss": 0.0, + "step": 13690 + }, + { + "epoch": 0.8823226139073275, + "grad_norm": 2.4879745552462693e-05, + "learning_rate": 1.314715359828142e-06, + "loss": 0.0, + "step": 13691 + }, + { + "epoch": 0.8823870593542567, + "grad_norm": 0.024843685636272067, + "learning_rate": 1.313999283924096e-06, + "loss": 0.0, + "step": 13692 + }, + { + "epoch": 0.8824515048011858, + "grad_norm": 0.0033880595142095345, + "learning_rate": 1.3132832080200502e-06, + "loss": 0.0, + "step": 13693 + }, + { + "epoch": 0.882515950248115, + "grad_norm": 0.005841345105160331, + "learning_rate": 1.3125671321160045e-06, + "loss": 0.0, + "step": 13694 + }, + { + "epoch": 0.8825803956950441, + "grad_norm": 0.0034708352770504813, + "learning_rate": 1.3118510562119586e-06, + "loss": 0.0, + "step": 13695 + }, + { + "epoch": 0.8826448411419733, + "grad_norm": 0.0011531345218125334, + "learning_rate": 1.3111349803079127e-06, + "loss": 0.0, + "step": 13696 + }, + { + "epoch": 0.8827092865889025, + "grad_norm": 0.0008902594119790399, + "learning_rate": 1.310418904403867e-06, + "loss": 0.0, + "step": 13697 + }, + { + "epoch": 0.8827737320358316, + "grad_norm": 0.02761833022301556, + "learning_rate": 1.3097028284998212e-06, + "loss": 0.0, + "step": 13698 + }, + { + "epoch": 0.8828381774827608, + "grad_norm": 7.322041241513758e-05, + "learning_rate": 1.3089867525957753e-06, + "loss": 0.0, + "step": 13699 + }, + { + "epoch": 0.88290262292969, + "grad_norm": 0.0003351661766753556, + "learning_rate": 1.3082706766917294e-06, + "loss": 0.0, + "step": 13700 + }, + { + "epoch": 0.8829670683766192, + "grad_norm": 0.13306850415458327, + "learning_rate": 1.3075546007876835e-06, + "loss": 0.0003, + "step": 13701 + }, + { + "epoch": 0.8830315138235484, + "grad_norm": 0.0009763088853932016, + "learning_rate": 1.3068385248836376e-06, + "loss": 0.0, + "step": 13702 + }, + { + "epoch": 0.8830959592704776, + "grad_norm": 0.030336426954044204, + "learning_rate": 1.3061224489795921e-06, + "loss": 0.0002, + "step": 13703 + }, + { + "epoch": 0.8831604047174068, + "grad_norm": 0.3518987319355953, + "learning_rate": 1.3054063730755462e-06, + "loss": 0.0013, + "step": 13704 + }, + { + "epoch": 0.8832248501643359, + "grad_norm": 0.00021259481191901802, + "learning_rate": 1.3046902971715003e-06, + "loss": 0.0, + "step": 13705 + }, + { + "epoch": 0.883289295611265, + "grad_norm": 0.0012099330641390034, + "learning_rate": 1.3039742212674544e-06, + "loss": 0.0, + "step": 13706 + }, + { + "epoch": 0.8833537410581942, + "grad_norm": 0.01886221954292911, + "learning_rate": 1.3032581453634085e-06, + "loss": 0.0, + "step": 13707 + }, + { + "epoch": 0.8834181865051234, + "grad_norm": 0.09828483459776614, + "learning_rate": 1.3025420694593628e-06, + "loss": 0.0001, + "step": 13708 + }, + { + "epoch": 0.8834826319520526, + "grad_norm": 0.0005948211234448418, + "learning_rate": 1.301825993555317e-06, + "loss": 0.0, + "step": 13709 + }, + { + "epoch": 0.8835470773989818, + "grad_norm": 0.00020818992887561486, + "learning_rate": 1.3011099176512711e-06, + "loss": 0.0, + "step": 13710 + }, + { + "epoch": 0.8836115228459109, + "grad_norm": 0.1842981683540968, + "learning_rate": 1.3003938417472252e-06, + "loss": 0.0011, + "step": 13711 + }, + { + "epoch": 0.8836759682928401, + "grad_norm": 0.008536273547773218, + "learning_rate": 1.2996777658431795e-06, + "loss": 0.0001, + "step": 13712 + }, + { + "epoch": 0.8837404137397693, + "grad_norm": 0.00032051570706314344, + "learning_rate": 1.2989616899391336e-06, + "loss": 0.0, + "step": 13713 + }, + { + "epoch": 0.8838048591866985, + "grad_norm": 0.17510439577339187, + "learning_rate": 1.2982456140350877e-06, + "loss": 0.0005, + "step": 13714 + }, + { + "epoch": 0.8838693046336277, + "grad_norm": 0.01028719993330639, + "learning_rate": 1.297529538131042e-06, + "loss": 0.0, + "step": 13715 + }, + { + "epoch": 0.8839337500805569, + "grad_norm": 0.08046575674952673, + "learning_rate": 1.2968134622269963e-06, + "loss": 0.0017, + "step": 13716 + }, + { + "epoch": 0.8839981955274859, + "grad_norm": 0.0006212313252662225, + "learning_rate": 1.2960973863229504e-06, + "loss": 0.0, + "step": 13717 + }, + { + "epoch": 0.8840626409744151, + "grad_norm": 0.00033610867341460235, + "learning_rate": 1.2953813104189045e-06, + "loss": 0.0, + "step": 13718 + }, + { + "epoch": 0.8841270864213443, + "grad_norm": 0.00023716963033959892, + "learning_rate": 1.2946652345148586e-06, + "loss": 0.0, + "step": 13719 + }, + { + "epoch": 0.8841915318682735, + "grad_norm": 0.0016286595639973542, + "learning_rate": 1.2939491586108126e-06, + "loss": 0.0, + "step": 13720 + }, + { + "epoch": 0.8842559773152027, + "grad_norm": 0.031614340354932574, + "learning_rate": 1.2932330827067672e-06, + "loss": 0.0001, + "step": 13721 + }, + { + "epoch": 0.8843204227621319, + "grad_norm": 0.00017104715739226403, + "learning_rate": 1.2925170068027212e-06, + "loss": 0.0, + "step": 13722 + }, + { + "epoch": 0.884384868209061, + "grad_norm": 9.70356569071254e-05, + "learning_rate": 1.2918009308986753e-06, + "loss": 0.0, + "step": 13723 + }, + { + "epoch": 0.8844493136559902, + "grad_norm": 0.3967907666004509, + "learning_rate": 1.2910848549946294e-06, + "loss": 0.0011, + "step": 13724 + }, + { + "epoch": 0.8845137591029194, + "grad_norm": 0.0008351767774307439, + "learning_rate": 1.2903687790905837e-06, + "loss": 0.0, + "step": 13725 + }, + { + "epoch": 0.8845782045498486, + "grad_norm": 0.0010768701346120296, + "learning_rate": 1.289652703186538e-06, + "loss": 0.0, + "step": 13726 + }, + { + "epoch": 0.8846426499967778, + "grad_norm": 0.00035160258067471443, + "learning_rate": 1.288936627282492e-06, + "loss": 0.0, + "step": 13727 + }, + { + "epoch": 0.8847070954437068, + "grad_norm": 0.21524670698834755, + "learning_rate": 1.2882205513784462e-06, + "loss": 0.0003, + "step": 13728 + }, + { + "epoch": 0.884771540890636, + "grad_norm": 7.928518740038473e-05, + "learning_rate": 1.2875044754744005e-06, + "loss": 0.0, + "step": 13729 + }, + { + "epoch": 0.8848359863375652, + "grad_norm": 0.7751929273734448, + "learning_rate": 1.2867883995703546e-06, + "loss": 0.0052, + "step": 13730 + }, + { + "epoch": 0.8849004317844944, + "grad_norm": 0.000505047569173897, + "learning_rate": 1.2860723236663087e-06, + "loss": 0.0, + "step": 13731 + }, + { + "epoch": 0.8849648772314236, + "grad_norm": 0.0017112831956432482, + "learning_rate": 1.285356247762263e-06, + "loss": 0.0, + "step": 13732 + }, + { + "epoch": 0.8850293226783528, + "grad_norm": 2.923113262558036e-05, + "learning_rate": 1.2846401718582173e-06, + "loss": 0.0, + "step": 13733 + }, + { + "epoch": 0.885093768125282, + "grad_norm": 0.08377971496993508, + "learning_rate": 1.2839240959541713e-06, + "loss": 0.0017, + "step": 13734 + }, + { + "epoch": 0.8851582135722111, + "grad_norm": 0.0018758007745659337, + "learning_rate": 1.2832080200501254e-06, + "loss": 0.0, + "step": 13735 + }, + { + "epoch": 0.8852226590191403, + "grad_norm": 0.005519941912064853, + "learning_rate": 1.2824919441460795e-06, + "loss": 0.0, + "step": 13736 + }, + { + "epoch": 0.8852871044660695, + "grad_norm": 0.007801686846673793, + "learning_rate": 1.2817758682420336e-06, + "loss": 0.0001, + "step": 13737 + }, + { + "epoch": 0.8853515499129987, + "grad_norm": 0.3482728911432587, + "learning_rate": 1.2810597923379881e-06, + "loss": 0.0008, + "step": 13738 + }, + { + "epoch": 0.8854159953599279, + "grad_norm": 0.0009555235755133371, + "learning_rate": 1.2803437164339422e-06, + "loss": 0.0, + "step": 13739 + }, + { + "epoch": 0.885480440806857, + "grad_norm": 0.002469717287478994, + "learning_rate": 1.2796276405298963e-06, + "loss": 0.0, + "step": 13740 + }, + { + "epoch": 0.8855448862537861, + "grad_norm": 0.0026660388959545405, + "learning_rate": 1.2789115646258504e-06, + "loss": 0.0, + "step": 13741 + }, + { + "epoch": 0.8856093317007153, + "grad_norm": 0.005242530312289297, + "learning_rate": 1.2781954887218045e-06, + "loss": 0.0, + "step": 13742 + }, + { + "epoch": 0.8856737771476445, + "grad_norm": 9.516788258618955e-05, + "learning_rate": 1.2774794128177588e-06, + "loss": 0.0, + "step": 13743 + }, + { + "epoch": 0.8857382225945737, + "grad_norm": 2.566322068723762e-05, + "learning_rate": 1.276763336913713e-06, + "loss": 0.0, + "step": 13744 + }, + { + "epoch": 0.8858026680415029, + "grad_norm": 0.00032851554042659717, + "learning_rate": 1.2760472610096671e-06, + "loss": 0.0, + "step": 13745 + }, + { + "epoch": 0.8858671134884321, + "grad_norm": 0.2881616395959894, + "learning_rate": 1.2753311851056212e-06, + "loss": 0.0024, + "step": 13746 + }, + { + "epoch": 0.8859315589353612, + "grad_norm": 0.0005407381204699677, + "learning_rate": 1.2746151092015755e-06, + "loss": 0.0, + "step": 13747 + }, + { + "epoch": 0.8859960043822904, + "grad_norm": 0.00013134327426654014, + "learning_rate": 1.2738990332975296e-06, + "loss": 0.0, + "step": 13748 + }, + { + "epoch": 0.8860604498292196, + "grad_norm": 1.0938324603394522e-05, + "learning_rate": 1.2731829573934837e-06, + "loss": 0.0, + "step": 13749 + }, + { + "epoch": 0.8861248952761488, + "grad_norm": 0.004271410915227554, + "learning_rate": 1.272466881489438e-06, + "loss": 0.0, + "step": 13750 + }, + { + "epoch": 0.8861893407230779, + "grad_norm": 0.0026957620892503614, + "learning_rate": 1.2717508055853923e-06, + "loss": 0.0, + "step": 13751 + }, + { + "epoch": 0.886253786170007, + "grad_norm": 0.0006152756950276613, + "learning_rate": 1.2710347296813464e-06, + "loss": 0.0, + "step": 13752 + }, + { + "epoch": 0.8863182316169362, + "grad_norm": 2.2124446079129158e-05, + "learning_rate": 1.2703186537773005e-06, + "loss": 0.0, + "step": 13753 + }, + { + "epoch": 0.8863826770638654, + "grad_norm": 0.05126629319637686, + "learning_rate": 1.2696025778732546e-06, + "loss": 0.0001, + "step": 13754 + }, + { + "epoch": 0.8864471225107946, + "grad_norm": 0.017536264153307488, + "learning_rate": 1.2688865019692087e-06, + "loss": 0.0001, + "step": 13755 + }, + { + "epoch": 0.8865115679577238, + "grad_norm": 0.04960957108481395, + "learning_rate": 1.2681704260651632e-06, + "loss": 0.0, + "step": 13756 + }, + { + "epoch": 0.886576013404653, + "grad_norm": 0.005953873003306027, + "learning_rate": 1.2674543501611172e-06, + "loss": 0.0001, + "step": 13757 + }, + { + "epoch": 0.8866404588515822, + "grad_norm": 0.0004576293244010673, + "learning_rate": 1.2667382742570713e-06, + "loss": 0.0, + "step": 13758 + }, + { + "epoch": 0.8867049042985113, + "grad_norm": 0.002728465719264111, + "learning_rate": 1.2660221983530254e-06, + "loss": 0.0, + "step": 13759 + }, + { + "epoch": 0.8867693497454405, + "grad_norm": 0.0020225571803567754, + "learning_rate": 1.2653061224489795e-06, + "loss": 0.0, + "step": 13760 + }, + { + "epoch": 0.8868337951923697, + "grad_norm": 0.14241467212327003, + "learning_rate": 1.264590046544934e-06, + "loss": 0.0004, + "step": 13761 + }, + { + "epoch": 0.8868982406392988, + "grad_norm": 0.0014042617102855625, + "learning_rate": 1.2638739706408881e-06, + "loss": 0.0, + "step": 13762 + }, + { + "epoch": 0.886962686086228, + "grad_norm": 0.029573956164921412, + "learning_rate": 1.2631578947368422e-06, + "loss": 0.0, + "step": 13763 + }, + { + "epoch": 0.8870271315331572, + "grad_norm": 0.0030826232614705414, + "learning_rate": 1.2624418188327963e-06, + "loss": 0.0, + "step": 13764 + }, + { + "epoch": 0.8870915769800863, + "grad_norm": 0.000188945426545367, + "learning_rate": 1.2617257429287506e-06, + "loss": 0.0, + "step": 13765 + }, + { + "epoch": 0.8871560224270155, + "grad_norm": 0.008708895375226261, + "learning_rate": 1.2610096670247047e-06, + "loss": 0.0, + "step": 13766 + }, + { + "epoch": 0.8872204678739447, + "grad_norm": 0.17166571446342943, + "learning_rate": 1.260293591120659e-06, + "loss": 0.0007, + "step": 13767 + }, + { + "epoch": 0.8872849133208739, + "grad_norm": 0.0806280824807589, + "learning_rate": 1.259577515216613e-06, + "loss": 0.0008, + "step": 13768 + }, + { + "epoch": 0.8873493587678031, + "grad_norm": 0.00011126534919590591, + "learning_rate": 1.2588614393125673e-06, + "loss": 0.0, + "step": 13769 + }, + { + "epoch": 0.8874138042147323, + "grad_norm": 0.0013592761131405947, + "learning_rate": 1.2581453634085214e-06, + "loss": 0.0, + "step": 13770 + }, + { + "epoch": 0.8874782496616614, + "grad_norm": 0.011567165436922357, + "learning_rate": 1.2574292875044755e-06, + "loss": 0.0, + "step": 13771 + }, + { + "epoch": 0.8875426951085906, + "grad_norm": 0.0010506584973813378, + "learning_rate": 1.2567132116004296e-06, + "loss": 0.0, + "step": 13772 + }, + { + "epoch": 0.8876071405555197, + "grad_norm": 0.001634937844128216, + "learning_rate": 1.2559971356963841e-06, + "loss": 0.0015, + "step": 13773 + }, + { + "epoch": 0.8876715860024489, + "grad_norm": 0.0007561086547375215, + "learning_rate": 1.2552810597923382e-06, + "loss": 0.0, + "step": 13774 + }, + { + "epoch": 0.8877360314493781, + "grad_norm": 0.00021426457144976387, + "learning_rate": 1.2545649838882923e-06, + "loss": 0.0, + "step": 13775 + }, + { + "epoch": 0.8878004768963073, + "grad_norm": 0.0017384501229145124, + "learning_rate": 1.2538489079842464e-06, + "loss": 0.0, + "step": 13776 + }, + { + "epoch": 0.8878649223432364, + "grad_norm": 0.46608758452350224, + "learning_rate": 1.2531328320802005e-06, + "loss": 0.003, + "step": 13777 + }, + { + "epoch": 0.8879293677901656, + "grad_norm": 0.003180161390498234, + "learning_rate": 1.2524167561761546e-06, + "loss": 0.0, + "step": 13778 + }, + { + "epoch": 0.8879938132370948, + "grad_norm": 0.0005952020863398482, + "learning_rate": 1.251700680272109e-06, + "loss": 0.0, + "step": 13779 + }, + { + "epoch": 0.888058258684024, + "grad_norm": 0.0008731909170846803, + "learning_rate": 1.2509846043680632e-06, + "loss": 0.0, + "step": 13780 + }, + { + "epoch": 0.8881227041309532, + "grad_norm": 0.003438820389549209, + "learning_rate": 1.2502685284640172e-06, + "loss": 0.0, + "step": 13781 + }, + { + "epoch": 0.8881871495778824, + "grad_norm": 0.00016075516900061808, + "learning_rate": 1.2495524525599713e-06, + "loss": 0.0, + "step": 13782 + }, + { + "epoch": 0.8882515950248115, + "grad_norm": 0.005191481164772671, + "learning_rate": 1.2488363766559256e-06, + "loss": 0.0, + "step": 13783 + }, + { + "epoch": 0.8883160404717406, + "grad_norm": 0.0009777725279008903, + "learning_rate": 1.2481203007518797e-06, + "loss": 0.0, + "step": 13784 + }, + { + "epoch": 0.8883804859186698, + "grad_norm": 0.18591865636404056, + "learning_rate": 1.247404224847834e-06, + "loss": 0.0002, + "step": 13785 + }, + { + "epoch": 0.888444931365599, + "grad_norm": 0.016696817939353084, + "learning_rate": 1.246688148943788e-06, + "loss": 0.0001, + "step": 13786 + }, + { + "epoch": 0.8885093768125282, + "grad_norm": 0.005742868049428506, + "learning_rate": 1.2459720730397424e-06, + "loss": 0.0001, + "step": 13787 + }, + { + "epoch": 0.8885738222594574, + "grad_norm": 0.0033753578995078114, + "learning_rate": 1.2452559971356965e-06, + "loss": 0.0, + "step": 13788 + }, + { + "epoch": 0.8886382677063865, + "grad_norm": 0.001299590697601426, + "learning_rate": 1.2445399212316508e-06, + "loss": 0.0, + "step": 13789 + }, + { + "epoch": 0.8887027131533157, + "grad_norm": 0.01598992051312426, + "learning_rate": 1.2438238453276049e-06, + "loss": 0.0002, + "step": 13790 + }, + { + "epoch": 0.8887671586002449, + "grad_norm": 0.7328409038968464, + "learning_rate": 1.243107769423559e-06, + "loss": 0.0008, + "step": 13791 + }, + { + "epoch": 0.8888316040471741, + "grad_norm": 0.00030574028092213986, + "learning_rate": 1.2423916935195133e-06, + "loss": 0.0, + "step": 13792 + }, + { + "epoch": 0.8888960494941033, + "grad_norm": 0.0005879745614522931, + "learning_rate": 1.2416756176154673e-06, + "loss": 0.0, + "step": 13793 + }, + { + "epoch": 0.8889604949410325, + "grad_norm": 0.010634271394679871, + "learning_rate": 1.2409595417114214e-06, + "loss": 0.0, + "step": 13794 + }, + { + "epoch": 0.8890249403879615, + "grad_norm": 0.0029319469486168174, + "learning_rate": 1.2402434658073757e-06, + "loss": 0.0, + "step": 13795 + }, + { + "epoch": 0.8890893858348907, + "grad_norm": 0.00044063952636015224, + "learning_rate": 1.2395273899033298e-06, + "loss": 0.0, + "step": 13796 + }, + { + "epoch": 0.8891538312818199, + "grad_norm": 0.03374082286006838, + "learning_rate": 1.238811313999284e-06, + "loss": 0.0, + "step": 13797 + }, + { + "epoch": 0.8892182767287491, + "grad_norm": 0.0031583981479104645, + "learning_rate": 1.2380952380952382e-06, + "loss": 0.0, + "step": 13798 + }, + { + "epoch": 0.8892827221756783, + "grad_norm": 0.0029201196111990247, + "learning_rate": 1.2373791621911923e-06, + "loss": 0.0, + "step": 13799 + }, + { + "epoch": 0.8893471676226075, + "grad_norm": 0.0002927000163405691, + "learning_rate": 1.2366630862871466e-06, + "loss": 0.0, + "step": 13800 + }, + { + "epoch": 0.8894116130695366, + "grad_norm": 0.00024142619848897449, + "learning_rate": 1.2359470103831007e-06, + "loss": 0.0, + "step": 13801 + }, + { + "epoch": 0.8894760585164658, + "grad_norm": 0.7436967061286708, + "learning_rate": 1.235230934479055e-06, + "loss": 0.0079, + "step": 13802 + }, + { + "epoch": 0.889540503963395, + "grad_norm": 0.0003735531843330027, + "learning_rate": 1.234514858575009e-06, + "loss": 0.0, + "step": 13803 + }, + { + "epoch": 0.8896049494103242, + "grad_norm": 0.004015970444965455, + "learning_rate": 1.2337987826709634e-06, + "loss": 0.0, + "step": 13804 + }, + { + "epoch": 0.8896693948572534, + "grad_norm": 0.00976236445414446, + "learning_rate": 1.2330827067669174e-06, + "loss": 0.0001, + "step": 13805 + }, + { + "epoch": 0.8897338403041825, + "grad_norm": 0.2136054593067004, + "learning_rate": 1.2323666308628715e-06, + "loss": 0.0032, + "step": 13806 + }, + { + "epoch": 0.8897982857511116, + "grad_norm": 0.0038618296947997015, + "learning_rate": 1.2316505549588258e-06, + "loss": 0.0, + "step": 13807 + }, + { + "epoch": 0.8898627311980408, + "grad_norm": 0.025397306718503647, + "learning_rate": 1.23093447905478e-06, + "loss": 0.0001, + "step": 13808 + }, + { + "epoch": 0.88992717664497, + "grad_norm": 0.009758540280719333, + "learning_rate": 1.230218403150734e-06, + "loss": 0.0, + "step": 13809 + }, + { + "epoch": 0.8899916220918992, + "grad_norm": 0.00016259548961602632, + "learning_rate": 1.2295023272466883e-06, + "loss": 0.0, + "step": 13810 + }, + { + "epoch": 0.8900560675388284, + "grad_norm": 0.0006507345876399534, + "learning_rate": 1.2287862513426424e-06, + "loss": 0.0, + "step": 13811 + }, + { + "epoch": 0.8901205129857576, + "grad_norm": 0.1792454153276647, + "learning_rate": 1.2280701754385965e-06, + "loss": 0.0003, + "step": 13812 + }, + { + "epoch": 0.8901849584326867, + "grad_norm": 0.00969342986091718, + "learning_rate": 1.2273540995345508e-06, + "loss": 0.0, + "step": 13813 + }, + { + "epoch": 0.8902494038796159, + "grad_norm": 0.0008400377772548953, + "learning_rate": 1.2266380236305049e-06, + "loss": 0.0, + "step": 13814 + }, + { + "epoch": 0.8903138493265451, + "grad_norm": 0.00017475221910045208, + "learning_rate": 1.2259219477264592e-06, + "loss": 0.0, + "step": 13815 + }, + { + "epoch": 0.8903782947734743, + "grad_norm": 0.00044403956214980725, + "learning_rate": 1.2252058718224132e-06, + "loss": 0.0, + "step": 13816 + }, + { + "epoch": 0.8904427402204035, + "grad_norm": 0.0698061899771694, + "learning_rate": 1.2244897959183673e-06, + "loss": 0.0001, + "step": 13817 + }, + { + "epoch": 0.8905071856673326, + "grad_norm": 0.025812009727729505, + "learning_rate": 1.2237737200143216e-06, + "loss": 0.0, + "step": 13818 + }, + { + "epoch": 0.8905716311142617, + "grad_norm": 0.36327571249402313, + "learning_rate": 1.2230576441102757e-06, + "loss": 0.0034, + "step": 13819 + }, + { + "epoch": 0.8906360765611909, + "grad_norm": 0.2055305667179693, + "learning_rate": 1.22234156820623e-06, + "loss": 0.0007, + "step": 13820 + }, + { + "epoch": 0.8907005220081201, + "grad_norm": 0.01355179297404914, + "learning_rate": 1.2216254923021841e-06, + "loss": 0.0, + "step": 13821 + }, + { + "epoch": 0.8907649674550493, + "grad_norm": 0.02007575542711268, + "learning_rate": 1.2209094163981384e-06, + "loss": 0.0002, + "step": 13822 + }, + { + "epoch": 0.8908294129019785, + "grad_norm": 0.07990338578217246, + "learning_rate": 1.2201933404940925e-06, + "loss": 0.0002, + "step": 13823 + }, + { + "epoch": 0.8908938583489077, + "grad_norm": 0.0023076275997369897, + "learning_rate": 1.2194772645900468e-06, + "loss": 0.0, + "step": 13824 + }, + { + "epoch": 0.8909583037958368, + "grad_norm": 0.009210954462485512, + "learning_rate": 1.2187611886860009e-06, + "loss": 0.0, + "step": 13825 + }, + { + "epoch": 0.891022749242766, + "grad_norm": 0.004081220907520122, + "learning_rate": 1.218045112781955e-06, + "loss": 0.0, + "step": 13826 + }, + { + "epoch": 0.8910871946896952, + "grad_norm": 0.008870903584263718, + "learning_rate": 1.2173290368779093e-06, + "loss": 0.0001, + "step": 13827 + }, + { + "epoch": 0.8911516401366244, + "grad_norm": 2.2225392181746675, + "learning_rate": 1.2166129609738633e-06, + "loss": 0.0042, + "step": 13828 + }, + { + "epoch": 0.8912160855835535, + "grad_norm": 7.365705023488865e-05, + "learning_rate": 1.2158968850698174e-06, + "loss": 0.0, + "step": 13829 + }, + { + "epoch": 0.8912805310304827, + "grad_norm": 8.679196143845854e-05, + "learning_rate": 1.2151808091657717e-06, + "loss": 0.0, + "step": 13830 + }, + { + "epoch": 0.8913449764774118, + "grad_norm": 0.0025323594558665664, + "learning_rate": 1.2144647332617258e-06, + "loss": 0.0, + "step": 13831 + }, + { + "epoch": 0.891409421924341, + "grad_norm": 0.12459235478699489, + "learning_rate": 1.21374865735768e-06, + "loss": 0.0001, + "step": 13832 + }, + { + "epoch": 0.8914738673712702, + "grad_norm": 0.011818195017482032, + "learning_rate": 1.2130325814536342e-06, + "loss": 0.0, + "step": 13833 + }, + { + "epoch": 0.8915383128181994, + "grad_norm": 0.14417296540437205, + "learning_rate": 1.2123165055495883e-06, + "loss": 0.0003, + "step": 13834 + }, + { + "epoch": 0.8916027582651286, + "grad_norm": 0.010910567690735996, + "learning_rate": 1.2116004296455424e-06, + "loss": 0.0001, + "step": 13835 + }, + { + "epoch": 0.8916672037120578, + "grad_norm": 7.052803181361057e-05, + "learning_rate": 1.2108843537414967e-06, + "loss": 0.0, + "step": 13836 + }, + { + "epoch": 0.891731649158987, + "grad_norm": 0.17583592412288143, + "learning_rate": 1.2101682778374508e-06, + "loss": 0.0003, + "step": 13837 + }, + { + "epoch": 0.8917960946059161, + "grad_norm": 0.023835514292793722, + "learning_rate": 1.209452201933405e-06, + "loss": 0.0, + "step": 13838 + }, + { + "epoch": 0.8918605400528453, + "grad_norm": 0.0015865956615176357, + "learning_rate": 1.2087361260293592e-06, + "loss": 0.0, + "step": 13839 + }, + { + "epoch": 0.8919249854997744, + "grad_norm": 0.2847122236501794, + "learning_rate": 1.2080200501253135e-06, + "loss": 0.0024, + "step": 13840 + }, + { + "epoch": 0.8919894309467036, + "grad_norm": 0.00039617673521535684, + "learning_rate": 1.2073039742212675e-06, + "loss": 0.0, + "step": 13841 + }, + { + "epoch": 0.8920538763936328, + "grad_norm": 0.0015060300582539626, + "learning_rate": 1.2065878983172218e-06, + "loss": 0.0, + "step": 13842 + }, + { + "epoch": 0.892118321840562, + "grad_norm": 7.080614102933089e-05, + "learning_rate": 1.205871822413176e-06, + "loss": 0.0, + "step": 13843 + }, + { + "epoch": 0.8921827672874911, + "grad_norm": 0.087443366093727, + "learning_rate": 1.20515574650913e-06, + "loss": 0.0004, + "step": 13844 + }, + { + "epoch": 0.8922472127344203, + "grad_norm": 0.0008844615606733612, + "learning_rate": 1.2044396706050843e-06, + "loss": 0.0, + "step": 13845 + }, + { + "epoch": 0.8923116581813495, + "grad_norm": 0.012319154787187002, + "learning_rate": 1.2037235947010384e-06, + "loss": 0.0, + "step": 13846 + }, + { + "epoch": 0.8923761036282787, + "grad_norm": 0.010120784738954119, + "learning_rate": 1.2030075187969925e-06, + "loss": 0.0, + "step": 13847 + }, + { + "epoch": 0.8924405490752079, + "grad_norm": 0.015798697562626713, + "learning_rate": 1.2022914428929468e-06, + "loss": 0.0001, + "step": 13848 + }, + { + "epoch": 0.892504994522137, + "grad_norm": 0.0012742288041931434, + "learning_rate": 1.2015753669889009e-06, + "loss": 0.0, + "step": 13849 + }, + { + "epoch": 0.8925694399690662, + "grad_norm": 0.020043164353019335, + "learning_rate": 1.2008592910848552e-06, + "loss": 0.0, + "step": 13850 + }, + { + "epoch": 0.8926338854159953, + "grad_norm": 0.03441415644112191, + "learning_rate": 1.2001432151808093e-06, + "loss": 0.0, + "step": 13851 + }, + { + "epoch": 0.8926983308629245, + "grad_norm": 0.00010035991690649003, + "learning_rate": 1.1994271392767633e-06, + "loss": 0.0, + "step": 13852 + }, + { + "epoch": 0.8927627763098537, + "grad_norm": 0.0003660068627871594, + "learning_rate": 1.1987110633727176e-06, + "loss": 0.0, + "step": 13853 + }, + { + "epoch": 0.8928272217567829, + "grad_norm": 0.013529318495661974, + "learning_rate": 1.1979949874686717e-06, + "loss": 0.0, + "step": 13854 + }, + { + "epoch": 0.892891667203712, + "grad_norm": 0.00016863076810777927, + "learning_rate": 1.1972789115646258e-06, + "loss": 0.0, + "step": 13855 + }, + { + "epoch": 0.8929561126506412, + "grad_norm": 0.004578985770439536, + "learning_rate": 1.1965628356605801e-06, + "loss": 0.0, + "step": 13856 + }, + { + "epoch": 0.8930205580975704, + "grad_norm": 0.6975465310207425, + "learning_rate": 1.1958467597565342e-06, + "loss": 0.0011, + "step": 13857 + }, + { + "epoch": 0.8930850035444996, + "grad_norm": 0.000828732726699813, + "learning_rate": 1.1951306838524885e-06, + "loss": 0.0, + "step": 13858 + }, + { + "epoch": 0.8931494489914288, + "grad_norm": 0.0008080449898315412, + "learning_rate": 1.1944146079484426e-06, + "loss": 0.0, + "step": 13859 + }, + { + "epoch": 0.893213894438358, + "grad_norm": 0.00027180681347506474, + "learning_rate": 1.1936985320443969e-06, + "loss": 0.0, + "step": 13860 + }, + { + "epoch": 0.8932783398852872, + "grad_norm": 0.018442260354453323, + "learning_rate": 1.192982456140351e-06, + "loss": 0.0001, + "step": 13861 + }, + { + "epoch": 0.8933427853322162, + "grad_norm": 0.4737637997431658, + "learning_rate": 1.1922663802363053e-06, + "loss": 0.0008, + "step": 13862 + }, + { + "epoch": 0.8934072307791454, + "grad_norm": 0.0038302988254236064, + "learning_rate": 1.1915503043322594e-06, + "loss": 0.0, + "step": 13863 + }, + { + "epoch": 0.8934716762260746, + "grad_norm": 0.0001033771169610538, + "learning_rate": 1.1908342284282134e-06, + "loss": 0.0, + "step": 13864 + }, + { + "epoch": 0.8935361216730038, + "grad_norm": 0.00024783354701006966, + "learning_rate": 1.1901181525241677e-06, + "loss": 0.0, + "step": 13865 + }, + { + "epoch": 0.893600567119933, + "grad_norm": 0.004368677639749437, + "learning_rate": 1.1894020766201218e-06, + "loss": 0.0, + "step": 13866 + }, + { + "epoch": 0.8936650125668621, + "grad_norm": 0.0018244756781653027, + "learning_rate": 1.188686000716076e-06, + "loss": 0.0, + "step": 13867 + }, + { + "epoch": 0.8937294580137913, + "grad_norm": 0.00023026815644767366, + "learning_rate": 1.1879699248120302e-06, + "loss": 0.0, + "step": 13868 + }, + { + "epoch": 0.8937939034607205, + "grad_norm": 0.003097842611027994, + "learning_rate": 1.1872538489079843e-06, + "loss": 0.0, + "step": 13869 + }, + { + "epoch": 0.8938583489076497, + "grad_norm": 0.03308828563419499, + "learning_rate": 1.1865377730039384e-06, + "loss": 0.0001, + "step": 13870 + }, + { + "epoch": 0.8939227943545789, + "grad_norm": 0.0006239056166978975, + "learning_rate": 1.1858216970998927e-06, + "loss": 0.0, + "step": 13871 + }, + { + "epoch": 0.8939872398015081, + "grad_norm": 1.2440699958126985e-05, + "learning_rate": 1.1851056211958468e-06, + "loss": 0.0, + "step": 13872 + }, + { + "epoch": 0.8940516852484371, + "grad_norm": 5.976930241618748e-05, + "learning_rate": 1.184389545291801e-06, + "loss": 0.0, + "step": 13873 + }, + { + "epoch": 0.8941161306953663, + "grad_norm": 0.0028886406204160628, + "learning_rate": 1.1836734693877552e-06, + "loss": 0.0, + "step": 13874 + }, + { + "epoch": 0.8941805761422955, + "grad_norm": 0.004454665917691752, + "learning_rate": 1.1829573934837095e-06, + "loss": 0.0, + "step": 13875 + }, + { + "epoch": 0.8942450215892247, + "grad_norm": 0.5158161040383212, + "learning_rate": 1.1822413175796635e-06, + "loss": 0.0029, + "step": 13876 + }, + { + "epoch": 0.8943094670361539, + "grad_norm": 0.02061518097300911, + "learning_rate": 1.1815252416756178e-06, + "loss": 0.0015, + "step": 13877 + }, + { + "epoch": 0.8943739124830831, + "grad_norm": 1.7711709279277926e-05, + "learning_rate": 1.180809165771572e-06, + "loss": 0.0, + "step": 13878 + }, + { + "epoch": 0.8944383579300123, + "grad_norm": 0.18675871185998966, + "learning_rate": 1.180093089867526e-06, + "loss": 0.0006, + "step": 13879 + }, + { + "epoch": 0.8945028033769414, + "grad_norm": 0.00019593830987744068, + "learning_rate": 1.1793770139634803e-06, + "loss": 0.0, + "step": 13880 + }, + { + "epoch": 0.8945672488238706, + "grad_norm": 0.00010785606641666989, + "learning_rate": 1.1786609380594344e-06, + "loss": 0.0, + "step": 13881 + }, + { + "epoch": 0.8946316942707998, + "grad_norm": 0.00014090590342756955, + "learning_rate": 1.1779448621553885e-06, + "loss": 0.0, + "step": 13882 + }, + { + "epoch": 0.894696139717729, + "grad_norm": 0.015730361982695604, + "learning_rate": 1.1772287862513428e-06, + "loss": 0.0001, + "step": 13883 + }, + { + "epoch": 0.8947605851646581, + "grad_norm": 0.165150147923511, + "learning_rate": 1.1765127103472969e-06, + "loss": 0.0006, + "step": 13884 + }, + { + "epoch": 0.8948250306115872, + "grad_norm": 0.001520522969766945, + "learning_rate": 1.175796634443251e-06, + "loss": 0.0, + "step": 13885 + }, + { + "epoch": 0.8948894760585164, + "grad_norm": 0.03972055794668295, + "learning_rate": 1.1750805585392053e-06, + "loss": 0.0001, + "step": 13886 + }, + { + "epoch": 0.8949539215054456, + "grad_norm": 0.0003771624240822517, + "learning_rate": 1.1743644826351593e-06, + "loss": 0.0, + "step": 13887 + }, + { + "epoch": 0.8950183669523748, + "grad_norm": 0.11441051028315816, + "learning_rate": 1.1736484067311136e-06, + "loss": 0.0001, + "step": 13888 + }, + { + "epoch": 0.895082812399304, + "grad_norm": 0.007310869051213783, + "learning_rate": 1.1729323308270677e-06, + "loss": 0.0, + "step": 13889 + }, + { + "epoch": 0.8951472578462332, + "grad_norm": 0.0001329414092162519, + "learning_rate": 1.1722162549230218e-06, + "loss": 0.0, + "step": 13890 + }, + { + "epoch": 0.8952117032931624, + "grad_norm": 0.0014470808931624586, + "learning_rate": 1.1715001790189761e-06, + "loss": 0.0, + "step": 13891 + }, + { + "epoch": 0.8952761487400915, + "grad_norm": 0.0004422742882779398, + "learning_rate": 1.1707841031149302e-06, + "loss": 0.0, + "step": 13892 + }, + { + "epoch": 0.8953405941870207, + "grad_norm": 0.0030581721309221434, + "learning_rate": 1.1700680272108845e-06, + "loss": 0.0, + "step": 13893 + }, + { + "epoch": 0.8954050396339499, + "grad_norm": 2.8451644614638217e-05, + "learning_rate": 1.1693519513068386e-06, + "loss": 0.0, + "step": 13894 + }, + { + "epoch": 0.8954694850808791, + "grad_norm": 0.0007229926952766978, + "learning_rate": 1.1686358754027929e-06, + "loss": 0.0, + "step": 13895 + }, + { + "epoch": 0.8955339305278082, + "grad_norm": 0.0029705727247215812, + "learning_rate": 1.167919799498747e-06, + "loss": 0.0, + "step": 13896 + }, + { + "epoch": 0.8955983759747373, + "grad_norm": 0.019346182931325447, + "learning_rate": 1.1672037235947013e-06, + "loss": 0.0001, + "step": 13897 + }, + { + "epoch": 0.8956628214216665, + "grad_norm": 0.0002895578797201431, + "learning_rate": 1.1664876476906554e-06, + "loss": 0.0, + "step": 13898 + }, + { + "epoch": 0.8957272668685957, + "grad_norm": 0.0006637398536619145, + "learning_rate": 1.1657715717866095e-06, + "loss": 0.0, + "step": 13899 + }, + { + "epoch": 0.8957917123155249, + "grad_norm": 0.00019846648889674596, + "learning_rate": 1.1650554958825638e-06, + "loss": 0.0, + "step": 13900 + }, + { + "epoch": 0.8958561577624541, + "grad_norm": 0.00031634464297703157, + "learning_rate": 1.1643394199785178e-06, + "loss": 0.0, + "step": 13901 + }, + { + "epoch": 0.8959206032093833, + "grad_norm": 0.004311220159345927, + "learning_rate": 1.163623344074472e-06, + "loss": 0.0, + "step": 13902 + }, + { + "epoch": 0.8959850486563125, + "grad_norm": 1.0598536802465255, + "learning_rate": 1.1629072681704262e-06, + "loss": 0.0031, + "step": 13903 + }, + { + "epoch": 0.8960494941032416, + "grad_norm": 0.0008670161043206829, + "learning_rate": 1.1621911922663803e-06, + "loss": 0.0, + "step": 13904 + }, + { + "epoch": 0.8961139395501708, + "grad_norm": 0.0011511655880350254, + "learning_rate": 1.1614751163623344e-06, + "loss": 0.0, + "step": 13905 + }, + { + "epoch": 0.8961783849971, + "grad_norm": 0.003973540317480649, + "learning_rate": 1.1607590404582887e-06, + "loss": 0.0, + "step": 13906 + }, + { + "epoch": 0.8962428304440291, + "grad_norm": 0.14830795080849515, + "learning_rate": 1.1600429645542428e-06, + "loss": 0.0018, + "step": 13907 + }, + { + "epoch": 0.8963072758909583, + "grad_norm": 0.0007323064595173414, + "learning_rate": 1.1593268886501969e-06, + "loss": 0.0, + "step": 13908 + }, + { + "epoch": 0.8963717213378875, + "grad_norm": 0.00023526852555263698, + "learning_rate": 1.1586108127461512e-06, + "loss": 0.0, + "step": 13909 + }, + { + "epoch": 0.8964361667848166, + "grad_norm": 0.002325935800464206, + "learning_rate": 1.1578947368421053e-06, + "loss": 0.0, + "step": 13910 + }, + { + "epoch": 0.8965006122317458, + "grad_norm": 0.4110362286054091, + "learning_rate": 1.1571786609380596e-06, + "loss": 0.0021, + "step": 13911 + }, + { + "epoch": 0.896565057678675, + "grad_norm": 0.021074825949622938, + "learning_rate": 1.1564625850340136e-06, + "loss": 0.0, + "step": 13912 + }, + { + "epoch": 0.8966295031256042, + "grad_norm": 0.0045007280964542195, + "learning_rate": 1.155746509129968e-06, + "loss": 0.0, + "step": 13913 + }, + { + "epoch": 0.8966939485725334, + "grad_norm": 0.0007198015241596646, + "learning_rate": 1.155030433225922e-06, + "loss": 0.0, + "step": 13914 + }, + { + "epoch": 0.8967583940194626, + "grad_norm": 0.019581690119028813, + "learning_rate": 1.1543143573218763e-06, + "loss": 0.0, + "step": 13915 + }, + { + "epoch": 0.8968228394663917, + "grad_norm": 0.00037558322132740917, + "learning_rate": 1.1535982814178304e-06, + "loss": 0.0, + "step": 13916 + }, + { + "epoch": 0.8968872849133209, + "grad_norm": 0.01912842353742819, + "learning_rate": 1.1528822055137845e-06, + "loss": 0.0001, + "step": 13917 + }, + { + "epoch": 0.89695173036025, + "grad_norm": 0.00896517446981582, + "learning_rate": 1.1521661296097388e-06, + "loss": 0.0, + "step": 13918 + }, + { + "epoch": 0.8970161758071792, + "grad_norm": 0.002951599669487491, + "learning_rate": 1.1514500537056929e-06, + "loss": 0.0, + "step": 13919 + }, + { + "epoch": 0.8970806212541084, + "grad_norm": 0.0033833929402165733, + "learning_rate": 1.150733977801647e-06, + "loss": 0.0, + "step": 13920 + }, + { + "epoch": 0.8971450667010376, + "grad_norm": 0.0063744704204274924, + "learning_rate": 1.1500179018976013e-06, + "loss": 0.0, + "step": 13921 + }, + { + "epoch": 0.8972095121479667, + "grad_norm": 0.0013080568903183537, + "learning_rate": 1.1493018259935554e-06, + "loss": 0.0, + "step": 13922 + }, + { + "epoch": 0.8972739575948959, + "grad_norm": 0.005177324218002255, + "learning_rate": 1.1485857500895097e-06, + "loss": 0.0, + "step": 13923 + }, + { + "epoch": 0.8973384030418251, + "grad_norm": 0.00016433341066363132, + "learning_rate": 1.1478696741854637e-06, + "loss": 0.0, + "step": 13924 + }, + { + "epoch": 0.8974028484887543, + "grad_norm": 0.005040815180212994, + "learning_rate": 1.1471535982814178e-06, + "loss": 0.0, + "step": 13925 + }, + { + "epoch": 0.8974672939356835, + "grad_norm": 0.19144006352602805, + "learning_rate": 1.1464375223773721e-06, + "loss": 0.0004, + "step": 13926 + }, + { + "epoch": 0.8975317393826127, + "grad_norm": 0.005144720003540155, + "learning_rate": 1.1457214464733262e-06, + "loss": 0.0, + "step": 13927 + }, + { + "epoch": 0.8975961848295418, + "grad_norm": 0.005620512736102895, + "learning_rate": 1.1450053705692803e-06, + "loss": 0.0, + "step": 13928 + }, + { + "epoch": 0.8976606302764709, + "grad_norm": 0.02314239599743946, + "learning_rate": 1.1442892946652346e-06, + "loss": 0.0001, + "step": 13929 + }, + { + "epoch": 0.8977250757234001, + "grad_norm": 3.417519190465539e-05, + "learning_rate": 1.1435732187611887e-06, + "loss": 0.0, + "step": 13930 + }, + { + "epoch": 0.8977895211703293, + "grad_norm": 0.15925896314589388, + "learning_rate": 1.142857142857143e-06, + "loss": 0.0025, + "step": 13931 + }, + { + "epoch": 0.8978539666172585, + "grad_norm": 0.002229307091129153, + "learning_rate": 1.142141066953097e-06, + "loss": 0.0, + "step": 13932 + }, + { + "epoch": 0.8979184120641877, + "grad_norm": 0.0027272175086743445, + "learning_rate": 1.1414249910490514e-06, + "loss": 0.0, + "step": 13933 + }, + { + "epoch": 0.8979828575111168, + "grad_norm": 0.03168294190225496, + "learning_rate": 1.1407089151450055e-06, + "loss": 0.0, + "step": 13934 + }, + { + "epoch": 0.898047302958046, + "grad_norm": 0.0009569713106729391, + "learning_rate": 1.1399928392409598e-06, + "loss": 0.0, + "step": 13935 + }, + { + "epoch": 0.8981117484049752, + "grad_norm": 0.0008837372743535399, + "learning_rate": 1.1392767633369138e-06, + "loss": 0.0, + "step": 13936 + }, + { + "epoch": 0.8981761938519044, + "grad_norm": 0.00016719703676270603, + "learning_rate": 1.138560687432868e-06, + "loss": 0.0, + "step": 13937 + }, + { + "epoch": 0.8982406392988336, + "grad_norm": 0.0015464604635017866, + "learning_rate": 1.1378446115288222e-06, + "loss": 0.0, + "step": 13938 + }, + { + "epoch": 0.8983050847457628, + "grad_norm": 0.005463581930813619, + "learning_rate": 1.1371285356247763e-06, + "loss": 0.0, + "step": 13939 + }, + { + "epoch": 0.8983695301926918, + "grad_norm": 0.004126363956869999, + "learning_rate": 1.1364124597207304e-06, + "loss": 0.0, + "step": 13940 + }, + { + "epoch": 0.898433975639621, + "grad_norm": 0.00044599899445592567, + "learning_rate": 1.1356963838166847e-06, + "loss": 0.0, + "step": 13941 + }, + { + "epoch": 0.8984984210865502, + "grad_norm": 0.0006417580259393078, + "learning_rate": 1.1349803079126388e-06, + "loss": 0.0, + "step": 13942 + }, + { + "epoch": 0.8985628665334794, + "grad_norm": 0.0028110095254215103, + "learning_rate": 1.1342642320085929e-06, + "loss": 0.0, + "step": 13943 + }, + { + "epoch": 0.8986273119804086, + "grad_norm": 0.0008243069210426579, + "learning_rate": 1.1335481561045472e-06, + "loss": 0.0, + "step": 13944 + }, + { + "epoch": 0.8986917574273378, + "grad_norm": 0.015608134890901078, + "learning_rate": 1.1328320802005013e-06, + "loss": 0.0001, + "step": 13945 + }, + { + "epoch": 0.8987562028742669, + "grad_norm": 0.00016418725029251164, + "learning_rate": 1.1321160042964556e-06, + "loss": 0.0, + "step": 13946 + }, + { + "epoch": 0.8988206483211961, + "grad_norm": 0.0017409146369892678, + "learning_rate": 1.1313999283924096e-06, + "loss": 0.0, + "step": 13947 + }, + { + "epoch": 0.8988850937681253, + "grad_norm": 5.412282878661023e-05, + "learning_rate": 1.130683852488364e-06, + "loss": 0.0, + "step": 13948 + }, + { + "epoch": 0.8989495392150545, + "grad_norm": 0.0004645559859257255, + "learning_rate": 1.129967776584318e-06, + "loss": 0.0, + "step": 13949 + }, + { + "epoch": 0.8990139846619837, + "grad_norm": 0.002543894375187707, + "learning_rate": 1.1292517006802723e-06, + "loss": 0.0, + "step": 13950 + }, + { + "epoch": 0.8990784301089128, + "grad_norm": 0.00021699801752490995, + "learning_rate": 1.1285356247762264e-06, + "loss": 0.0, + "step": 13951 + }, + { + "epoch": 0.8991428755558419, + "grad_norm": 0.00015121543904727036, + "learning_rate": 1.1278195488721805e-06, + "loss": 0.0, + "step": 13952 + }, + { + "epoch": 0.8992073210027711, + "grad_norm": 0.010239032263425, + "learning_rate": 1.1271034729681348e-06, + "loss": 0.0, + "step": 13953 + }, + { + "epoch": 0.8992717664497003, + "grad_norm": 0.018982114515024993, + "learning_rate": 1.1263873970640889e-06, + "loss": 0.0, + "step": 13954 + }, + { + "epoch": 0.8993362118966295, + "grad_norm": 0.0005564598211045715, + "learning_rate": 1.125671321160043e-06, + "loss": 0.0, + "step": 13955 + }, + { + "epoch": 0.8994006573435587, + "grad_norm": 0.0008152348454253998, + "learning_rate": 1.1249552452559973e-06, + "loss": 0.0, + "step": 13956 + }, + { + "epoch": 0.8994651027904879, + "grad_norm": 0.142068940030953, + "learning_rate": 1.1242391693519514e-06, + "loss": 0.0002, + "step": 13957 + }, + { + "epoch": 0.899529548237417, + "grad_norm": 0.017551017927371366, + "learning_rate": 1.1235230934479057e-06, + "loss": 0.0001, + "step": 13958 + }, + { + "epoch": 0.8995939936843462, + "grad_norm": 0.13314984757949708, + "learning_rate": 1.1228070175438598e-06, + "loss": 0.0002, + "step": 13959 + }, + { + "epoch": 0.8996584391312754, + "grad_norm": 0.003780466996403599, + "learning_rate": 1.1220909416398138e-06, + "loss": 0.0, + "step": 13960 + }, + { + "epoch": 0.8997228845782046, + "grad_norm": 3.8431651826954105e-05, + "learning_rate": 1.1213748657357681e-06, + "loss": 0.0, + "step": 13961 + }, + { + "epoch": 0.8997873300251337, + "grad_norm": 0.0007425784620714796, + "learning_rate": 1.1206587898317222e-06, + "loss": 0.0, + "step": 13962 + }, + { + "epoch": 0.8998517754720629, + "grad_norm": 0.0002841375235135086, + "learning_rate": 1.1199427139276763e-06, + "loss": 0.0, + "step": 13963 + }, + { + "epoch": 0.899916220918992, + "grad_norm": 0.0030655431826020446, + "learning_rate": 1.1192266380236306e-06, + "loss": 0.0, + "step": 13964 + }, + { + "epoch": 0.8999806663659212, + "grad_norm": 0.0006356895919749247, + "learning_rate": 1.1185105621195847e-06, + "loss": 0.0, + "step": 13965 + }, + { + "epoch": 0.9000451118128504, + "grad_norm": 0.020512951337609962, + "learning_rate": 1.117794486215539e-06, + "loss": 0.0001, + "step": 13966 + }, + { + "epoch": 0.9001095572597796, + "grad_norm": 0.04083187946678933, + "learning_rate": 1.117078410311493e-06, + "loss": 0.0003, + "step": 13967 + }, + { + "epoch": 0.9001740027067088, + "grad_norm": 0.0001550509178908707, + "learning_rate": 1.1163623344074474e-06, + "loss": 0.0, + "step": 13968 + }, + { + "epoch": 0.900238448153638, + "grad_norm": 0.0004273088370013827, + "learning_rate": 1.1156462585034015e-06, + "loss": 0.0, + "step": 13969 + }, + { + "epoch": 0.9003028936005671, + "grad_norm": 0.0003448674989961782, + "learning_rate": 1.1149301825993558e-06, + "loss": 0.0, + "step": 13970 + }, + { + "epoch": 0.9003673390474963, + "grad_norm": 0.0008768537809582048, + "learning_rate": 1.1142141066953099e-06, + "loss": 0.0, + "step": 13971 + }, + { + "epoch": 0.9004317844944255, + "grad_norm": 5.592022632896799e-05, + "learning_rate": 1.113498030791264e-06, + "loss": 0.0, + "step": 13972 + }, + { + "epoch": 0.9004962299413547, + "grad_norm": 0.6882594855306482, + "learning_rate": 1.1127819548872182e-06, + "loss": 0.0027, + "step": 13973 + }, + { + "epoch": 0.9005606753882838, + "grad_norm": 0.009598311999210143, + "learning_rate": 1.1120658789831723e-06, + "loss": 0.0, + "step": 13974 + }, + { + "epoch": 0.900625120835213, + "grad_norm": 0.02725082065057163, + "learning_rate": 1.1113498030791264e-06, + "loss": 0.0, + "step": 13975 + }, + { + "epoch": 0.9006895662821421, + "grad_norm": 0.00868479275831414, + "learning_rate": 1.1106337271750807e-06, + "loss": 0.0, + "step": 13976 + }, + { + "epoch": 0.9007540117290713, + "grad_norm": 0.007083911823828566, + "learning_rate": 1.1099176512710348e-06, + "loss": 0.0001, + "step": 13977 + }, + { + "epoch": 0.9008184571760005, + "grad_norm": 0.002803608504870315, + "learning_rate": 1.1092015753669889e-06, + "loss": 0.0, + "step": 13978 + }, + { + "epoch": 0.9008829026229297, + "grad_norm": 0.0006188193145472269, + "learning_rate": 1.1084854994629432e-06, + "loss": 0.0, + "step": 13979 + }, + { + "epoch": 0.9009473480698589, + "grad_norm": 0.024428739904172637, + "learning_rate": 1.1077694235588973e-06, + "loss": 0.0002, + "step": 13980 + }, + { + "epoch": 0.9010117935167881, + "grad_norm": 0.0003124864897644293, + "learning_rate": 1.1070533476548514e-06, + "loss": 0.0, + "step": 13981 + }, + { + "epoch": 0.9010762389637172, + "grad_norm": 0.10956738785973091, + "learning_rate": 1.1063372717508057e-06, + "loss": 0.0001, + "step": 13982 + }, + { + "epoch": 0.9011406844106464, + "grad_norm": 0.02323807997631617, + "learning_rate": 1.1056211958467597e-06, + "loss": 0.0, + "step": 13983 + }, + { + "epoch": 0.9012051298575756, + "grad_norm": 7.911839137377666e-05, + "learning_rate": 1.104905119942714e-06, + "loss": 0.0, + "step": 13984 + }, + { + "epoch": 0.9012695753045047, + "grad_norm": 1.153643374127301e-05, + "learning_rate": 1.1041890440386681e-06, + "loss": 0.0, + "step": 13985 + }, + { + "epoch": 0.9013340207514339, + "grad_norm": 0.03433700359423184, + "learning_rate": 1.1034729681346224e-06, + "loss": 0.0, + "step": 13986 + }, + { + "epoch": 0.9013984661983631, + "grad_norm": 0.0001273740311295764, + "learning_rate": 1.1027568922305765e-06, + "loss": 0.0, + "step": 13987 + }, + { + "epoch": 0.9014629116452922, + "grad_norm": 0.00029643403967102475, + "learning_rate": 1.1020408163265308e-06, + "loss": 0.0, + "step": 13988 + }, + { + "epoch": 0.9015273570922214, + "grad_norm": 0.00167223813672472, + "learning_rate": 1.101324740422485e-06, + "loss": 0.0, + "step": 13989 + }, + { + "epoch": 0.9015918025391506, + "grad_norm": 0.049248185514050316, + "learning_rate": 1.100608664518439e-06, + "loss": 0.0, + "step": 13990 + }, + { + "epoch": 0.9016562479860798, + "grad_norm": 0.002362216149147042, + "learning_rate": 1.0998925886143933e-06, + "loss": 0.0, + "step": 13991 + }, + { + "epoch": 0.901720693433009, + "grad_norm": 0.006003178554491009, + "learning_rate": 1.0991765127103474e-06, + "loss": 0.0001, + "step": 13992 + }, + { + "epoch": 0.9017851388799382, + "grad_norm": 0.22652499118620095, + "learning_rate": 1.0984604368063015e-06, + "loss": 0.0002, + "step": 13993 + }, + { + "epoch": 0.9018495843268673, + "grad_norm": 0.02536765650686301, + "learning_rate": 1.0977443609022558e-06, + "loss": 0.0002, + "step": 13994 + }, + { + "epoch": 0.9019140297737965, + "grad_norm": 0.04242994603591199, + "learning_rate": 1.0970282849982098e-06, + "loss": 0.0001, + "step": 13995 + }, + { + "epoch": 0.9019784752207256, + "grad_norm": 0.007177177779843645, + "learning_rate": 1.0963122090941641e-06, + "loss": 0.0, + "step": 13996 + }, + { + "epoch": 0.9020429206676548, + "grad_norm": 0.180822076514872, + "learning_rate": 1.0955961331901182e-06, + "loss": 0.002, + "step": 13997 + }, + { + "epoch": 0.902107366114584, + "grad_norm": 0.0034121207623937933, + "learning_rate": 1.0948800572860723e-06, + "loss": 0.0, + "step": 13998 + }, + { + "epoch": 0.9021718115615132, + "grad_norm": 0.004040904780187532, + "learning_rate": 1.0941639813820266e-06, + "loss": 0.0, + "step": 13999 + }, + { + "epoch": 0.9022362570084423, + "grad_norm": 0.000310484319187804, + "learning_rate": 1.0934479054779807e-06, + "loss": 0.0, + "step": 14000 + }, + { + "epoch": 0.9023007024553715, + "grad_norm": 0.01932705216887116, + "learning_rate": 1.0927318295739348e-06, + "loss": 0.0001, + "step": 14001 + }, + { + "epoch": 0.9023651479023007, + "grad_norm": 0.0008629699703404948, + "learning_rate": 1.092015753669889e-06, + "loss": 0.0, + "step": 14002 + }, + { + "epoch": 0.9024295933492299, + "grad_norm": 0.0003779540630065977, + "learning_rate": 1.0912996777658432e-06, + "loss": 0.0, + "step": 14003 + }, + { + "epoch": 0.9024940387961591, + "grad_norm": 0.004033788037230716, + "learning_rate": 1.0905836018617975e-06, + "loss": 0.0, + "step": 14004 + }, + { + "epoch": 0.9025584842430883, + "grad_norm": 0.00012087015154678254, + "learning_rate": 1.0898675259577516e-06, + "loss": 0.0, + "step": 14005 + }, + { + "epoch": 0.9026229296900175, + "grad_norm": 0.0031684532813541473, + "learning_rate": 1.0891514500537059e-06, + "loss": 0.0, + "step": 14006 + }, + { + "epoch": 0.9026873751369465, + "grad_norm": 0.7884746287853464, + "learning_rate": 1.08843537414966e-06, + "loss": 0.0014, + "step": 14007 + }, + { + "epoch": 0.9027518205838757, + "grad_norm": 2.669712599619591e-05, + "learning_rate": 1.0877192982456142e-06, + "loss": 0.0, + "step": 14008 + }, + { + "epoch": 0.9028162660308049, + "grad_norm": 0.0009535648759989387, + "learning_rate": 1.0870032223415683e-06, + "loss": 0.0, + "step": 14009 + }, + { + "epoch": 0.9028807114777341, + "grad_norm": 0.002809913133161104, + "learning_rate": 1.0862871464375224e-06, + "loss": 0.0, + "step": 14010 + }, + { + "epoch": 0.9029451569246633, + "grad_norm": 0.0012233205019003342, + "learning_rate": 1.0855710705334767e-06, + "loss": 0.0, + "step": 14011 + }, + { + "epoch": 0.9030096023715924, + "grad_norm": 0.005354862179131867, + "learning_rate": 1.0848549946294308e-06, + "loss": 0.0, + "step": 14012 + }, + { + "epoch": 0.9030740478185216, + "grad_norm": 0.0001383134804233665, + "learning_rate": 1.0841389187253849e-06, + "loss": 0.0, + "step": 14013 + }, + { + "epoch": 0.9031384932654508, + "grad_norm": 0.0012410408689786621, + "learning_rate": 1.0834228428213392e-06, + "loss": 0.0, + "step": 14014 + }, + { + "epoch": 0.90320293871238, + "grad_norm": 0.00015145828171334654, + "learning_rate": 1.0827067669172933e-06, + "loss": 0.0, + "step": 14015 + }, + { + "epoch": 0.9032673841593092, + "grad_norm": 0.0029233848246608997, + "learning_rate": 1.0819906910132474e-06, + "loss": 0.0, + "step": 14016 + }, + { + "epoch": 0.9033318296062384, + "grad_norm": 6.762058916841936e-06, + "learning_rate": 1.0812746151092017e-06, + "loss": 0.0, + "step": 14017 + }, + { + "epoch": 0.9033962750531674, + "grad_norm": 0.14695861977500713, + "learning_rate": 1.0805585392051558e-06, + "loss": 0.0018, + "step": 14018 + }, + { + "epoch": 0.9034607205000966, + "grad_norm": 0.0027973229930890954, + "learning_rate": 1.07984246330111e-06, + "loss": 0.0, + "step": 14019 + }, + { + "epoch": 0.9035251659470258, + "grad_norm": 0.004426821608431362, + "learning_rate": 1.0791263873970641e-06, + "loss": 0.0, + "step": 14020 + }, + { + "epoch": 0.903589611393955, + "grad_norm": 0.9546233422531074, + "learning_rate": 1.0784103114930184e-06, + "loss": 0.0039, + "step": 14021 + }, + { + "epoch": 0.9036540568408842, + "grad_norm": 0.0015999861394946403, + "learning_rate": 1.0776942355889725e-06, + "loss": 0.0, + "step": 14022 + }, + { + "epoch": 0.9037185022878134, + "grad_norm": 0.8040267676053892, + "learning_rate": 1.0769781596849268e-06, + "loss": 0.0073, + "step": 14023 + }, + { + "epoch": 0.9037829477347425, + "grad_norm": 0.1496658198797121, + "learning_rate": 1.076262083780881e-06, + "loss": 0.0009, + "step": 14024 + }, + { + "epoch": 0.9038473931816717, + "grad_norm": 0.00028597161914835925, + "learning_rate": 1.075546007876835e-06, + "loss": 0.0, + "step": 14025 + }, + { + "epoch": 0.9039118386286009, + "grad_norm": 0.0010954895000369828, + "learning_rate": 1.0748299319727893e-06, + "loss": 0.0, + "step": 14026 + }, + { + "epoch": 0.9039762840755301, + "grad_norm": 0.00033146946288850447, + "learning_rate": 1.0741138560687434e-06, + "loss": 0.0, + "step": 14027 + }, + { + "epoch": 0.9040407295224593, + "grad_norm": 0.0071175550409161665, + "learning_rate": 1.0733977801646975e-06, + "loss": 0.0, + "step": 14028 + }, + { + "epoch": 0.9041051749693884, + "grad_norm": 0.01369357303133858, + "learning_rate": 1.0726817042606518e-06, + "loss": 0.0, + "step": 14029 + }, + { + "epoch": 0.9041696204163175, + "grad_norm": 0.009619704462363344, + "learning_rate": 1.0719656283566059e-06, + "loss": 0.0, + "step": 14030 + }, + { + "epoch": 0.9042340658632467, + "grad_norm": 0.0007094879850284002, + "learning_rate": 1.0712495524525602e-06, + "loss": 0.0, + "step": 14031 + }, + { + "epoch": 0.9042985113101759, + "grad_norm": 0.013750855588743177, + "learning_rate": 1.0705334765485142e-06, + "loss": 0.0001, + "step": 14032 + }, + { + "epoch": 0.9043629567571051, + "grad_norm": 0.00027600823917411867, + "learning_rate": 1.0698174006444683e-06, + "loss": 0.0, + "step": 14033 + }, + { + "epoch": 0.9044274022040343, + "grad_norm": 0.01503473339922653, + "learning_rate": 1.0691013247404226e-06, + "loss": 0.0, + "step": 14034 + }, + { + "epoch": 0.9044918476509635, + "grad_norm": 2.9273438306325014e-05, + "learning_rate": 1.0683852488363767e-06, + "loss": 0.0, + "step": 14035 + }, + { + "epoch": 0.9045562930978926, + "grad_norm": 0.001225609437622927, + "learning_rate": 1.0676691729323308e-06, + "loss": 0.0, + "step": 14036 + }, + { + "epoch": 0.9046207385448218, + "grad_norm": 0.000198674450168846, + "learning_rate": 1.066953097028285e-06, + "loss": 0.0, + "step": 14037 + }, + { + "epoch": 0.904685183991751, + "grad_norm": 0.0008732649309423083, + "learning_rate": 1.0662370211242392e-06, + "loss": 0.0, + "step": 14038 + }, + { + "epoch": 0.9047496294386802, + "grad_norm": 8.002756539824641e-05, + "learning_rate": 1.0655209452201935e-06, + "loss": 0.0, + "step": 14039 + }, + { + "epoch": 0.9048140748856093, + "grad_norm": 0.01401901919825385, + "learning_rate": 1.0648048693161476e-06, + "loss": 0.0, + "step": 14040 + }, + { + "epoch": 0.9048785203325385, + "grad_norm": 0.014786355131894208, + "learning_rate": 1.0640887934121019e-06, + "loss": 0.0, + "step": 14041 + }, + { + "epoch": 0.9049429657794676, + "grad_norm": 0.004576884247605246, + "learning_rate": 1.063372717508056e-06, + "loss": 0.0, + "step": 14042 + }, + { + "epoch": 0.9050074112263968, + "grad_norm": 0.05576671370867512, + "learning_rate": 1.0626566416040103e-06, + "loss": 0.0016, + "step": 14043 + }, + { + "epoch": 0.905071856673326, + "grad_norm": 0.49583190856322845, + "learning_rate": 1.0619405656999643e-06, + "loss": 0.0008, + "step": 14044 + }, + { + "epoch": 0.9051363021202552, + "grad_norm": 0.003267573240087532, + "learning_rate": 1.0612244897959184e-06, + "loss": 0.0, + "step": 14045 + }, + { + "epoch": 0.9052007475671844, + "grad_norm": 0.000656877303059541, + "learning_rate": 1.0605084138918727e-06, + "loss": 0.0, + "step": 14046 + }, + { + "epoch": 0.9052651930141136, + "grad_norm": 0.39314117668237164, + "learning_rate": 1.0597923379878268e-06, + "loss": 0.0009, + "step": 14047 + }, + { + "epoch": 0.9053296384610428, + "grad_norm": 0.000852336283270037, + "learning_rate": 1.059076262083781e-06, + "loss": 0.0, + "step": 14048 + }, + { + "epoch": 0.9053940839079719, + "grad_norm": 0.00025226461633294403, + "learning_rate": 1.0583601861797352e-06, + "loss": 0.0, + "step": 14049 + }, + { + "epoch": 0.9054585293549011, + "grad_norm": 9.980164147839169e-05, + "learning_rate": 1.0576441102756893e-06, + "loss": 0.0, + "step": 14050 + }, + { + "epoch": 0.9055229748018303, + "grad_norm": 0.0008063489059367108, + "learning_rate": 1.0569280343716434e-06, + "loss": 0.0, + "step": 14051 + }, + { + "epoch": 0.9055874202487594, + "grad_norm": 0.006412100452888351, + "learning_rate": 1.0562119584675977e-06, + "loss": 0.0001, + "step": 14052 + }, + { + "epoch": 0.9056518656956886, + "grad_norm": 0.0010209184892873981, + "learning_rate": 1.0554958825635518e-06, + "loss": 0.0, + "step": 14053 + }, + { + "epoch": 0.9057163111426177, + "grad_norm": 1.1038661163874564, + "learning_rate": 1.0547798066595058e-06, + "loss": 0.0033, + "step": 14054 + }, + { + "epoch": 0.9057807565895469, + "grad_norm": 0.005365736820451497, + "learning_rate": 1.0540637307554601e-06, + "loss": 0.0, + "step": 14055 + }, + { + "epoch": 0.9058452020364761, + "grad_norm": 0.0005929740637682495, + "learning_rate": 1.0533476548514142e-06, + "loss": 0.0, + "step": 14056 + }, + { + "epoch": 0.9059096474834053, + "grad_norm": 0.0018838975597551804, + "learning_rate": 1.0526315789473685e-06, + "loss": 0.0, + "step": 14057 + }, + { + "epoch": 0.9059740929303345, + "grad_norm": 0.00012933569279299136, + "learning_rate": 1.0519155030433226e-06, + "loss": 0.0, + "step": 14058 + }, + { + "epoch": 0.9060385383772637, + "grad_norm": 0.0006639789205916543, + "learning_rate": 1.051199427139277e-06, + "loss": 0.0, + "step": 14059 + }, + { + "epoch": 0.9061029838241929, + "grad_norm": 0.0002403311090842733, + "learning_rate": 1.050483351235231e-06, + "loss": 0.0, + "step": 14060 + }, + { + "epoch": 0.906167429271122, + "grad_norm": 0.0005676848754295945, + "learning_rate": 1.0497672753311853e-06, + "loss": 0.0, + "step": 14061 + }, + { + "epoch": 0.9062318747180512, + "grad_norm": 0.0001549777960638785, + "learning_rate": 1.0490511994271394e-06, + "loss": 0.0, + "step": 14062 + }, + { + "epoch": 0.9062963201649803, + "grad_norm": 0.008584136048833318, + "learning_rate": 1.0483351235230935e-06, + "loss": 0.0001, + "step": 14063 + }, + { + "epoch": 0.9063607656119095, + "grad_norm": 0.05943516934136947, + "learning_rate": 1.0476190476190478e-06, + "loss": 0.0001, + "step": 14064 + }, + { + "epoch": 0.9064252110588387, + "grad_norm": 0.003992473211187499, + "learning_rate": 1.0469029717150019e-06, + "loss": 0.0, + "step": 14065 + }, + { + "epoch": 0.9064896565057678, + "grad_norm": 0.0011531323035489564, + "learning_rate": 1.0461868958109562e-06, + "loss": 0.0, + "step": 14066 + }, + { + "epoch": 0.906554101952697, + "grad_norm": 0.15580797256069312, + "learning_rate": 1.0454708199069102e-06, + "loss": 0.0003, + "step": 14067 + }, + { + "epoch": 0.9066185473996262, + "grad_norm": 0.000671488923623265, + "learning_rate": 1.0447547440028643e-06, + "loss": 0.0, + "step": 14068 + }, + { + "epoch": 0.9066829928465554, + "grad_norm": 0.144156905549645, + "learning_rate": 1.0440386680988186e-06, + "loss": 0.0005, + "step": 14069 + }, + { + "epoch": 0.9067474382934846, + "grad_norm": 0.018324477936150364, + "learning_rate": 1.0433225921947727e-06, + "loss": 0.0001, + "step": 14070 + }, + { + "epoch": 0.9068118837404138, + "grad_norm": 0.004034000423565145, + "learning_rate": 1.0426065162907268e-06, + "loss": 0.0, + "step": 14071 + }, + { + "epoch": 0.906876329187343, + "grad_norm": 0.0016369275846041295, + "learning_rate": 1.041890440386681e-06, + "loss": 0.0, + "step": 14072 + }, + { + "epoch": 0.9069407746342721, + "grad_norm": 0.02112292707345774, + "learning_rate": 1.0411743644826352e-06, + "loss": 0.0001, + "step": 14073 + }, + { + "epoch": 0.9070052200812012, + "grad_norm": 0.0006955568561556969, + "learning_rate": 1.0404582885785893e-06, + "loss": 0.0, + "step": 14074 + }, + { + "epoch": 0.9070696655281304, + "grad_norm": 0.4678265376205017, + "learning_rate": 1.0397422126745436e-06, + "loss": 0.0008, + "step": 14075 + }, + { + "epoch": 0.9071341109750596, + "grad_norm": 1.7344620141548626, + "learning_rate": 1.0390261367704977e-06, + "loss": 0.0136, + "step": 14076 + }, + { + "epoch": 0.9071985564219888, + "grad_norm": 0.020784053798210578, + "learning_rate": 1.038310060866452e-06, + "loss": 0.0002, + "step": 14077 + }, + { + "epoch": 0.907263001868918, + "grad_norm": 0.3764581147815863, + "learning_rate": 1.037593984962406e-06, + "loss": 0.0031, + "step": 14078 + }, + { + "epoch": 0.9073274473158471, + "grad_norm": 0.015363784519826553, + "learning_rate": 1.0368779090583603e-06, + "loss": 0.0, + "step": 14079 + }, + { + "epoch": 0.9073918927627763, + "grad_norm": 9.048272690266486e-05, + "learning_rate": 1.0361618331543144e-06, + "loss": 0.0, + "step": 14080 + }, + { + "epoch": 0.9074563382097055, + "grad_norm": 0.4679583858813428, + "learning_rate": 1.0354457572502687e-06, + "loss": 0.0033, + "step": 14081 + }, + { + "epoch": 0.9075207836566347, + "grad_norm": 0.6642780402944453, + "learning_rate": 1.0347296813462228e-06, + "loss": 0.0019, + "step": 14082 + }, + { + "epoch": 0.9075852291035639, + "grad_norm": 0.0001042014709199235, + "learning_rate": 1.034013605442177e-06, + "loss": 0.0, + "step": 14083 + }, + { + "epoch": 0.9076496745504931, + "grad_norm": 0.001710960297502053, + "learning_rate": 1.0332975295381312e-06, + "loss": 0.0, + "step": 14084 + }, + { + "epoch": 0.9077141199974221, + "grad_norm": 0.008412103343040999, + "learning_rate": 1.0325814536340853e-06, + "loss": 0.0, + "step": 14085 + }, + { + "epoch": 0.9077785654443513, + "grad_norm": 0.0004960663945197895, + "learning_rate": 1.0318653777300394e-06, + "loss": 0.0, + "step": 14086 + }, + { + "epoch": 0.9078430108912805, + "grad_norm": 0.0005570506348725557, + "learning_rate": 1.0311493018259937e-06, + "loss": 0.0, + "step": 14087 + }, + { + "epoch": 0.9079074563382097, + "grad_norm": 6.0842231279498473e-05, + "learning_rate": 1.0304332259219478e-06, + "loss": 0.0, + "step": 14088 + }, + { + "epoch": 0.9079719017851389, + "grad_norm": 0.005005108687200161, + "learning_rate": 1.0297171500179019e-06, + "loss": 0.0, + "step": 14089 + }, + { + "epoch": 0.908036347232068, + "grad_norm": 0.006442428525242369, + "learning_rate": 1.0290010741138562e-06, + "loss": 0.0, + "step": 14090 + }, + { + "epoch": 0.9081007926789972, + "grad_norm": 0.1994985317478551, + "learning_rate": 1.0282849982098102e-06, + "loss": 0.0003, + "step": 14091 + }, + { + "epoch": 0.9081652381259264, + "grad_norm": 0.000303360637107461, + "learning_rate": 1.0275689223057645e-06, + "loss": 0.0, + "step": 14092 + }, + { + "epoch": 0.9082296835728556, + "grad_norm": 0.08736249878582208, + "learning_rate": 1.0268528464017186e-06, + "loss": 0.0003, + "step": 14093 + }, + { + "epoch": 0.9082941290197848, + "grad_norm": 0.0004485341202632126, + "learning_rate": 1.026136770497673e-06, + "loss": 0.0, + "step": 14094 + }, + { + "epoch": 0.908358574466714, + "grad_norm": 0.008703583972100429, + "learning_rate": 1.025420694593627e-06, + "loss": 0.0, + "step": 14095 + }, + { + "epoch": 0.908423019913643, + "grad_norm": 0.0042350023156911215, + "learning_rate": 1.0247046186895813e-06, + "loss": 0.0, + "step": 14096 + }, + { + "epoch": 0.9084874653605722, + "grad_norm": 0.0005834744678831924, + "learning_rate": 1.0239885427855354e-06, + "loss": 0.0, + "step": 14097 + }, + { + "epoch": 0.9085519108075014, + "grad_norm": 0.00024838361865375634, + "learning_rate": 1.0232724668814895e-06, + "loss": 0.0, + "step": 14098 + }, + { + "epoch": 0.9086163562544306, + "grad_norm": 0.0010090785760577745, + "learning_rate": 1.0225563909774438e-06, + "loss": 0.0, + "step": 14099 + }, + { + "epoch": 0.9086808017013598, + "grad_norm": 0.00846511578975414, + "learning_rate": 1.0218403150733979e-06, + "loss": 0.0, + "step": 14100 + }, + { + "epoch": 0.908745247148289, + "grad_norm": 0.003092826092203375, + "learning_rate": 1.021124239169352e-06, + "loss": 0.0, + "step": 14101 + }, + { + "epoch": 0.9088096925952182, + "grad_norm": 0.004059035298200513, + "learning_rate": 1.0204081632653063e-06, + "loss": 0.0, + "step": 14102 + }, + { + "epoch": 0.9088741380421473, + "grad_norm": 0.031057954187540666, + "learning_rate": 1.0196920873612603e-06, + "loss": 0.0002, + "step": 14103 + }, + { + "epoch": 0.9089385834890765, + "grad_norm": 0.007398061002103076, + "learning_rate": 1.0189760114572146e-06, + "loss": 0.0, + "step": 14104 + }, + { + "epoch": 0.9090030289360057, + "grad_norm": 0.0037488336929485094, + "learning_rate": 1.0182599355531687e-06, + "loss": 0.0, + "step": 14105 + }, + { + "epoch": 0.9090674743829349, + "grad_norm": 0.0013617477461019352, + "learning_rate": 1.0175438596491228e-06, + "loss": 0.0, + "step": 14106 + }, + { + "epoch": 0.909131919829864, + "grad_norm": 0.0011576876094899014, + "learning_rate": 1.0168277837450771e-06, + "loss": 0.0, + "step": 14107 + }, + { + "epoch": 0.9091963652767932, + "grad_norm": 0.012976751798772565, + "learning_rate": 1.0161117078410312e-06, + "loss": 0.0, + "step": 14108 + }, + { + "epoch": 0.9092608107237223, + "grad_norm": 0.0016032709037401551, + "learning_rate": 1.0153956319369853e-06, + "loss": 0.0, + "step": 14109 + }, + { + "epoch": 0.9093252561706515, + "grad_norm": 0.0405984077396907, + "learning_rate": 1.0146795560329396e-06, + "loss": 0.0, + "step": 14110 + }, + { + "epoch": 0.9093897016175807, + "grad_norm": 9.815901939114581e-05, + "learning_rate": 1.0139634801288937e-06, + "loss": 0.0, + "step": 14111 + }, + { + "epoch": 0.9094541470645099, + "grad_norm": 0.0007421945859217288, + "learning_rate": 1.013247404224848e-06, + "loss": 0.0, + "step": 14112 + }, + { + "epoch": 0.9095185925114391, + "grad_norm": 0.0044882259964135664, + "learning_rate": 1.012531328320802e-06, + "loss": 0.0, + "step": 14113 + }, + { + "epoch": 0.9095830379583683, + "grad_norm": 0.37177406992972634, + "learning_rate": 1.0118152524167564e-06, + "loss": 0.0012, + "step": 14114 + }, + { + "epoch": 0.9096474834052974, + "grad_norm": 0.0072924378929102186, + "learning_rate": 1.0110991765127104e-06, + "loss": 0.0, + "step": 14115 + }, + { + "epoch": 0.9097119288522266, + "grad_norm": 0.0006270749704708487, + "learning_rate": 1.0103831006086647e-06, + "loss": 0.0, + "step": 14116 + }, + { + "epoch": 0.9097763742991558, + "grad_norm": 0.006042327240725415, + "learning_rate": 1.0096670247046188e-06, + "loss": 0.0, + "step": 14117 + }, + { + "epoch": 0.909840819746085, + "grad_norm": 0.00024216543431723143, + "learning_rate": 1.008950948800573e-06, + "loss": 0.0, + "step": 14118 + }, + { + "epoch": 0.9099052651930141, + "grad_norm": 0.11918004743883662, + "learning_rate": 1.0082348728965272e-06, + "loss": 0.0003, + "step": 14119 + }, + { + "epoch": 0.9099697106399433, + "grad_norm": 0.007700771472075251, + "learning_rate": 1.0075187969924813e-06, + "loss": 0.0, + "step": 14120 + }, + { + "epoch": 0.9100341560868724, + "grad_norm": 0.001777663954785876, + "learning_rate": 1.0068027210884354e-06, + "loss": 0.0, + "step": 14121 + }, + { + "epoch": 0.9100986015338016, + "grad_norm": 0.004718985009600268, + "learning_rate": 1.0060866451843897e-06, + "loss": 0.0, + "step": 14122 + }, + { + "epoch": 0.9101630469807308, + "grad_norm": 0.006394187944082488, + "learning_rate": 1.0053705692803438e-06, + "loss": 0.0, + "step": 14123 + }, + { + "epoch": 0.91022749242766, + "grad_norm": 0.04192140346511436, + "learning_rate": 1.0046544933762979e-06, + "loss": 0.0001, + "step": 14124 + }, + { + "epoch": 0.9102919378745892, + "grad_norm": 0.020396504587853657, + "learning_rate": 1.0039384174722522e-06, + "loss": 0.0001, + "step": 14125 + }, + { + "epoch": 0.9103563833215184, + "grad_norm": 9.335500306033411e-05, + "learning_rate": 1.0032223415682062e-06, + "loss": 0.0, + "step": 14126 + }, + { + "epoch": 0.9104208287684475, + "grad_norm": 0.001052229329982989, + "learning_rate": 1.0025062656641603e-06, + "loss": 0.0, + "step": 14127 + }, + { + "epoch": 0.9104852742153767, + "grad_norm": 0.00044013625982259485, + "learning_rate": 1.0017901897601146e-06, + "loss": 0.0, + "step": 14128 + }, + { + "epoch": 0.9105497196623059, + "grad_norm": 0.07613235756810091, + "learning_rate": 1.0010741138560687e-06, + "loss": 0.0008, + "step": 14129 + }, + { + "epoch": 0.910614165109235, + "grad_norm": 0.0015174921341569256, + "learning_rate": 1.000358037952023e-06, + "loss": 0.0, + "step": 14130 + }, + { + "epoch": 0.9106786105561642, + "grad_norm": 8.869528142380125e-05, + "learning_rate": 9.99641962047977e-07, + "loss": 0.0, + "step": 14131 + }, + { + "epoch": 0.9107430560030934, + "grad_norm": 0.0018528149454641407, + "learning_rate": 9.989258861439314e-07, + "loss": 0.0, + "step": 14132 + }, + { + "epoch": 0.9108075014500225, + "grad_norm": 0.0013100437281134071, + "learning_rate": 9.982098102398855e-07, + "loss": 0.0, + "step": 14133 + }, + { + "epoch": 0.9108719468969517, + "grad_norm": 0.0038834947242926132, + "learning_rate": 9.974937343358398e-07, + "loss": 0.0, + "step": 14134 + }, + { + "epoch": 0.9109363923438809, + "grad_norm": 0.003056712747435566, + "learning_rate": 9.967776584317939e-07, + "loss": 0.0, + "step": 14135 + }, + { + "epoch": 0.9110008377908101, + "grad_norm": 0.022727158280140002, + "learning_rate": 9.96061582527748e-07, + "loss": 0.0001, + "step": 14136 + }, + { + "epoch": 0.9110652832377393, + "grad_norm": 0.000252373752242693, + "learning_rate": 9.953455066237023e-07, + "loss": 0.0, + "step": 14137 + }, + { + "epoch": 0.9111297286846685, + "grad_norm": 0.005239760950976765, + "learning_rate": 9.946294307196563e-07, + "loss": 0.0, + "step": 14138 + }, + { + "epoch": 0.9111941741315976, + "grad_norm": 0.07205721082201468, + "learning_rate": 9.939133548156106e-07, + "loss": 0.0001, + "step": 14139 + }, + { + "epoch": 0.9112586195785268, + "grad_norm": 0.001012468350364244, + "learning_rate": 9.931972789115647e-07, + "loss": 0.0, + "step": 14140 + }, + { + "epoch": 0.9113230650254559, + "grad_norm": 0.020371513483527344, + "learning_rate": 9.924812030075188e-07, + "loss": 0.0001, + "step": 14141 + }, + { + "epoch": 0.9113875104723851, + "grad_norm": 0.5710255593894316, + "learning_rate": 9.917651271034731e-07, + "loss": 0.0038, + "step": 14142 + }, + { + "epoch": 0.9114519559193143, + "grad_norm": 0.003678925592998401, + "learning_rate": 9.910490511994272e-07, + "loss": 0.0, + "step": 14143 + }, + { + "epoch": 0.9115164013662435, + "grad_norm": 0.0010209924279294914, + "learning_rate": 9.903329752953813e-07, + "loss": 0.0, + "step": 14144 + }, + { + "epoch": 0.9115808468131726, + "grad_norm": 0.002210604517047804, + "learning_rate": 9.896168993913356e-07, + "loss": 0.0, + "step": 14145 + }, + { + "epoch": 0.9116452922601018, + "grad_norm": 0.004980856990466322, + "learning_rate": 9.889008234872897e-07, + "loss": 0.0, + "step": 14146 + }, + { + "epoch": 0.911709737707031, + "grad_norm": 0.005197732728780156, + "learning_rate": 9.881847475832438e-07, + "loss": 0.0, + "step": 14147 + }, + { + "epoch": 0.9117741831539602, + "grad_norm": 0.0006288855310315486, + "learning_rate": 9.87468671679198e-07, + "loss": 0.0, + "step": 14148 + }, + { + "epoch": 0.9118386286008894, + "grad_norm": 0.2870331747440102, + "learning_rate": 9.867525957751522e-07, + "loss": 0.0006, + "step": 14149 + }, + { + "epoch": 0.9119030740478186, + "grad_norm": 0.009226923949112864, + "learning_rate": 9.860365198711065e-07, + "loss": 0.0, + "step": 14150 + }, + { + "epoch": 0.9119675194947477, + "grad_norm": 0.00021664613314001927, + "learning_rate": 9.853204439670605e-07, + "loss": 0.0, + "step": 14151 + }, + { + "epoch": 0.9120319649416768, + "grad_norm": 0.07574482256106105, + "learning_rate": 9.846043680630148e-07, + "loss": 0.0001, + "step": 14152 + }, + { + "epoch": 0.912096410388606, + "grad_norm": 0.0005642558123564372, + "learning_rate": 9.83888292158969e-07, + "loss": 0.0, + "step": 14153 + }, + { + "epoch": 0.9121608558355352, + "grad_norm": 0.00046067530354494283, + "learning_rate": 9.831722162549232e-07, + "loss": 0.0, + "step": 14154 + }, + { + "epoch": 0.9122253012824644, + "grad_norm": 0.0019799729249998076, + "learning_rate": 9.824561403508773e-07, + "loss": 0.0, + "step": 14155 + }, + { + "epoch": 0.9122897467293936, + "grad_norm": 0.0037062705460312938, + "learning_rate": 9.817400644468314e-07, + "loss": 0.0, + "step": 14156 + }, + { + "epoch": 0.9123541921763227, + "grad_norm": 0.21694178506613537, + "learning_rate": 9.810239885427857e-07, + "loss": 0.0012, + "step": 14157 + }, + { + "epoch": 0.9124186376232519, + "grad_norm": 2.7329962563106694e-05, + "learning_rate": 9.803079126387398e-07, + "loss": 0.0, + "step": 14158 + }, + { + "epoch": 0.9124830830701811, + "grad_norm": 0.017096042360411064, + "learning_rate": 9.795918367346939e-07, + "loss": 0.0001, + "step": 14159 + }, + { + "epoch": 0.9125475285171103, + "grad_norm": 0.0022227801850019, + "learning_rate": 9.788757608306482e-07, + "loss": 0.0, + "step": 14160 + }, + { + "epoch": 0.9126119739640395, + "grad_norm": 0.00022310574290482324, + "learning_rate": 9.781596849266023e-07, + "loss": 0.0, + "step": 14161 + }, + { + "epoch": 0.9126764194109687, + "grad_norm": 0.0001928504205817919, + "learning_rate": 9.774436090225563e-07, + "loss": 0.0, + "step": 14162 + }, + { + "epoch": 0.9127408648578977, + "grad_norm": 0.0003620854373529105, + "learning_rate": 9.767275331185106e-07, + "loss": 0.0, + "step": 14163 + }, + { + "epoch": 0.9128053103048269, + "grad_norm": 0.0003246124005665155, + "learning_rate": 9.760114572144647e-07, + "loss": 0.0, + "step": 14164 + }, + { + "epoch": 0.9128697557517561, + "grad_norm": 0.00037746760821441995, + "learning_rate": 9.75295381310419e-07, + "loss": 0.0, + "step": 14165 + }, + { + "epoch": 0.9129342011986853, + "grad_norm": 0.0021465206942596557, + "learning_rate": 9.745793054063731e-07, + "loss": 0.0, + "step": 14166 + }, + { + "epoch": 0.9129986466456145, + "grad_norm": 0.009832061962162885, + "learning_rate": 9.738632295023274e-07, + "loss": 0.0, + "step": 14167 + }, + { + "epoch": 0.9130630920925437, + "grad_norm": 0.011101602739312185, + "learning_rate": 9.731471535982815e-07, + "loss": 0.0001, + "step": 14168 + }, + { + "epoch": 0.9131275375394728, + "grad_norm": 0.002187157704356501, + "learning_rate": 9.724310776942358e-07, + "loss": 0.0, + "step": 14169 + }, + { + "epoch": 0.913191982986402, + "grad_norm": 0.01536422494149394, + "learning_rate": 9.717150017901899e-07, + "loss": 0.0001, + "step": 14170 + }, + { + "epoch": 0.9132564284333312, + "grad_norm": 0.14876547142920768, + "learning_rate": 9.70998925886144e-07, + "loss": 0.0003, + "step": 14171 + }, + { + "epoch": 0.9133208738802604, + "grad_norm": 0.0063150968108142535, + "learning_rate": 9.702828499820983e-07, + "loss": 0.0001, + "step": 14172 + }, + { + "epoch": 0.9133853193271896, + "grad_norm": 0.0011484692990875677, + "learning_rate": 9.695667740780524e-07, + "loss": 0.0, + "step": 14173 + }, + { + "epoch": 0.9134497647741187, + "grad_norm": 0.00021408494086622607, + "learning_rate": 9.688506981740067e-07, + "loss": 0.0, + "step": 14174 + }, + { + "epoch": 0.9135142102210478, + "grad_norm": 0.005474376006443933, + "learning_rate": 9.681346222699607e-07, + "loss": 0.0, + "step": 14175 + }, + { + "epoch": 0.913578655667977, + "grad_norm": 0.0009916894050175195, + "learning_rate": 9.674185463659148e-07, + "loss": 0.0, + "step": 14176 + }, + { + "epoch": 0.9136431011149062, + "grad_norm": 0.004285597191006188, + "learning_rate": 9.667024704618691e-07, + "loss": 0.0, + "step": 14177 + }, + { + "epoch": 0.9137075465618354, + "grad_norm": 6.016757867062615e-05, + "learning_rate": 9.659863945578232e-07, + "loss": 0.0, + "step": 14178 + }, + { + "epoch": 0.9137719920087646, + "grad_norm": 0.00013869091499201224, + "learning_rate": 9.652703186537773e-07, + "loss": 0.0, + "step": 14179 + }, + { + "epoch": 0.9138364374556938, + "grad_norm": 0.008156751396045343, + "learning_rate": 9.645542427497316e-07, + "loss": 0.0001, + "step": 14180 + }, + { + "epoch": 0.913900882902623, + "grad_norm": 0.0010459882012239957, + "learning_rate": 9.638381668456857e-07, + "loss": 0.0, + "step": 14181 + }, + { + "epoch": 0.9139653283495521, + "grad_norm": 0.05334048614408453, + "learning_rate": 9.631220909416398e-07, + "loss": 0.0001, + "step": 14182 + }, + { + "epoch": 0.9140297737964813, + "grad_norm": 0.0015840045943365014, + "learning_rate": 9.62406015037594e-07, + "loss": 0.0, + "step": 14183 + }, + { + "epoch": 0.9140942192434105, + "grad_norm": 0.7855814830833372, + "learning_rate": 9.616899391335482e-07, + "loss": 0.0021, + "step": 14184 + }, + { + "epoch": 0.9141586646903396, + "grad_norm": 0.00020580164141785254, + "learning_rate": 9.609738632295025e-07, + "loss": 0.0, + "step": 14185 + }, + { + "epoch": 0.9142231101372688, + "grad_norm": 0.0011450973071098205, + "learning_rate": 9.602577873254565e-07, + "loss": 0.0, + "step": 14186 + }, + { + "epoch": 0.9142875555841979, + "grad_norm": 0.008593922428411569, + "learning_rate": 9.595417114214108e-07, + "loss": 0.0, + "step": 14187 + }, + { + "epoch": 0.9143520010311271, + "grad_norm": 0.0016867673517670938, + "learning_rate": 9.58825635517365e-07, + "loss": 0.0, + "step": 14188 + }, + { + "epoch": 0.9144164464780563, + "grad_norm": 0.0015443399622426112, + "learning_rate": 9.581095596133192e-07, + "loss": 0.0, + "step": 14189 + }, + { + "epoch": 0.9144808919249855, + "grad_norm": 0.02063164477752474, + "learning_rate": 9.573934837092733e-07, + "loss": 0.0001, + "step": 14190 + }, + { + "epoch": 0.9145453373719147, + "grad_norm": 0.0024049854891011677, + "learning_rate": 9.566774078052274e-07, + "loss": 0.0, + "step": 14191 + }, + { + "epoch": 0.9146097828188439, + "grad_norm": 0.01716273295228107, + "learning_rate": 9.559613319011817e-07, + "loss": 0.0002, + "step": 14192 + }, + { + "epoch": 0.914674228265773, + "grad_norm": 0.12328758990372396, + "learning_rate": 9.552452559971358e-07, + "loss": 0.0012, + "step": 14193 + }, + { + "epoch": 0.9147386737127022, + "grad_norm": 0.04222494037999026, + "learning_rate": 9.545291800930899e-07, + "loss": 0.0, + "step": 14194 + }, + { + "epoch": 0.9148031191596314, + "grad_norm": 0.0006967093892120433, + "learning_rate": 9.538131041890442e-07, + "loss": 0.0, + "step": 14195 + }, + { + "epoch": 0.9148675646065606, + "grad_norm": 0.10183633970320935, + "learning_rate": 9.530970282849983e-07, + "loss": 0.0014, + "step": 14196 + }, + { + "epoch": 0.9149320100534897, + "grad_norm": 0.04194055060462203, + "learning_rate": 9.523809523809525e-07, + "loss": 0.0017, + "step": 14197 + }, + { + "epoch": 0.9149964555004189, + "grad_norm": 0.2690406148430233, + "learning_rate": 9.516648764769066e-07, + "loss": 0.0005, + "step": 14198 + }, + { + "epoch": 0.915060900947348, + "grad_norm": 0.00021367249271819308, + "learning_rate": 9.509488005728608e-07, + "loss": 0.0, + "step": 14199 + }, + { + "epoch": 0.9151253463942772, + "grad_norm": 0.0014708756146450935, + "learning_rate": 9.502327246688149e-07, + "loss": 0.0, + "step": 14200 + }, + { + "epoch": 0.9151897918412064, + "grad_norm": 0.0002496282942443766, + "learning_rate": 9.495166487647692e-07, + "loss": 0.0, + "step": 14201 + }, + { + "epoch": 0.9152542372881356, + "grad_norm": 0.016152459797736374, + "learning_rate": 9.488005728607233e-07, + "loss": 0.0, + "step": 14202 + }, + { + "epoch": 0.9153186827350648, + "grad_norm": 0.010979395241874716, + "learning_rate": 9.480844969566774e-07, + "loss": 0.0, + "step": 14203 + }, + { + "epoch": 0.915383128181994, + "grad_norm": 0.0009375031145412428, + "learning_rate": 9.473684210526317e-07, + "loss": 0.0, + "step": 14204 + }, + { + "epoch": 0.9154475736289232, + "grad_norm": 0.01797304484362447, + "learning_rate": 9.466523451485858e-07, + "loss": 0.0001, + "step": 14205 + }, + { + "epoch": 0.9155120190758523, + "grad_norm": 0.0002960165775091309, + "learning_rate": 9.4593626924454e-07, + "loss": 0.0, + "step": 14206 + }, + { + "epoch": 0.9155764645227815, + "grad_norm": 0.00013216728878975067, + "learning_rate": 9.452201933404942e-07, + "loss": 0.0, + "step": 14207 + }, + { + "epoch": 0.9156409099697106, + "grad_norm": 0.0016359036208272025, + "learning_rate": 9.445041174364484e-07, + "loss": 0.0, + "step": 14208 + }, + { + "epoch": 0.9157053554166398, + "grad_norm": 0.002744799936497066, + "learning_rate": 9.437880415324025e-07, + "loss": 0.0, + "step": 14209 + }, + { + "epoch": 0.915769800863569, + "grad_norm": 0.00023160867892980746, + "learning_rate": 9.430719656283567e-07, + "loss": 0.0, + "step": 14210 + }, + { + "epoch": 0.9158342463104981, + "grad_norm": 0.009411741817506578, + "learning_rate": 9.423558897243108e-07, + "loss": 0.0, + "step": 14211 + }, + { + "epoch": 0.9158986917574273, + "grad_norm": 0.001378941707229188, + "learning_rate": 9.416398138202651e-07, + "loss": 0.0, + "step": 14212 + }, + { + "epoch": 0.9159631372043565, + "grad_norm": 0.025205632894264283, + "learning_rate": 9.409237379162192e-07, + "loss": 0.0001, + "step": 14213 + }, + { + "epoch": 0.9160275826512857, + "grad_norm": 0.39096405573360754, + "learning_rate": 9.402076620121733e-07, + "loss": 0.001, + "step": 14214 + }, + { + "epoch": 0.9160920280982149, + "grad_norm": 0.00014380302746555912, + "learning_rate": 9.394915861081276e-07, + "loss": 0.0, + "step": 14215 + }, + { + "epoch": 0.9161564735451441, + "grad_norm": 0.002670300688032658, + "learning_rate": 9.387755102040817e-07, + "loss": 0.0, + "step": 14216 + }, + { + "epoch": 0.9162209189920733, + "grad_norm": 0.11628580086246942, + "learning_rate": 9.380594343000359e-07, + "loss": 0.0004, + "step": 14217 + }, + { + "epoch": 0.9162853644390024, + "grad_norm": 0.0002583038235821294, + "learning_rate": 9.373433583959901e-07, + "loss": 0.0, + "step": 14218 + }, + { + "epoch": 0.9163498098859315, + "grad_norm": 0.022131033830474392, + "learning_rate": 9.366272824919443e-07, + "loss": 0.0, + "step": 14219 + }, + { + "epoch": 0.9164142553328607, + "grad_norm": 0.0015146300558742055, + "learning_rate": 9.359112065878984e-07, + "loss": 0.0, + "step": 14220 + }, + { + "epoch": 0.9164787007797899, + "grad_norm": 4.8640373473175795e-05, + "learning_rate": 9.351951306838527e-07, + "loss": 0.0, + "step": 14221 + }, + { + "epoch": 0.9165431462267191, + "grad_norm": 0.4697828516869718, + "learning_rate": 9.344790547798067e-07, + "loss": 0.003, + "step": 14222 + }, + { + "epoch": 0.9166075916736482, + "grad_norm": 0.057128720813026404, + "learning_rate": 9.337629788757608e-07, + "loss": 0.0016, + "step": 14223 + }, + { + "epoch": 0.9166720371205774, + "grad_norm": 0.03367085797727787, + "learning_rate": 9.330469029717151e-07, + "loss": 0.0001, + "step": 14224 + }, + { + "epoch": 0.9167364825675066, + "grad_norm": 0.0030701623211639023, + "learning_rate": 9.323308270676692e-07, + "loss": 0.0, + "step": 14225 + }, + { + "epoch": 0.9168009280144358, + "grad_norm": 0.0019137412471503506, + "learning_rate": 9.316147511636234e-07, + "loss": 0.0, + "step": 14226 + }, + { + "epoch": 0.916865373461365, + "grad_norm": 0.0006687030619872437, + "learning_rate": 9.308986752595776e-07, + "loss": 0.0, + "step": 14227 + }, + { + "epoch": 0.9169298189082942, + "grad_norm": 1.7368992487315262, + "learning_rate": 9.301825993555318e-07, + "loss": 0.0119, + "step": 14228 + }, + { + "epoch": 0.9169942643552234, + "grad_norm": 0.0003233422494308578, + "learning_rate": 9.294665234514859e-07, + "loss": 0.0, + "step": 14229 + }, + { + "epoch": 0.9170587098021524, + "grad_norm": 0.0069097439182549385, + "learning_rate": 9.287504475474402e-07, + "loss": 0.0, + "step": 14230 + }, + { + "epoch": 0.9171231552490816, + "grad_norm": 0.0029392214582604566, + "learning_rate": 9.280343716433943e-07, + "loss": 0.0, + "step": 14231 + }, + { + "epoch": 0.9171876006960108, + "grad_norm": 0.0067482743978787415, + "learning_rate": 9.273182957393484e-07, + "loss": 0.0, + "step": 14232 + }, + { + "epoch": 0.91725204614294, + "grad_norm": 0.0020425040118576547, + "learning_rate": 9.266022198353027e-07, + "loss": 0.0, + "step": 14233 + }, + { + "epoch": 0.9173164915898692, + "grad_norm": 0.043513331215841396, + "learning_rate": 9.258861439312567e-07, + "loss": 0.0001, + "step": 14234 + }, + { + "epoch": 0.9173809370367983, + "grad_norm": 0.0005674751602963122, + "learning_rate": 9.251700680272109e-07, + "loss": 0.0, + "step": 14235 + }, + { + "epoch": 0.9174453824837275, + "grad_norm": 0.00017620281295303855, + "learning_rate": 9.244539921231651e-07, + "loss": 0.0, + "step": 14236 + }, + { + "epoch": 0.9175098279306567, + "grad_norm": 0.001052507181908841, + "learning_rate": 9.237379162191193e-07, + "loss": 0.0, + "step": 14237 + }, + { + "epoch": 0.9175742733775859, + "grad_norm": 0.14251254807613536, + "learning_rate": 9.230218403150734e-07, + "loss": 0.0019, + "step": 14238 + }, + { + "epoch": 0.9176387188245151, + "grad_norm": 0.0016286015558987282, + "learning_rate": 9.223057644110277e-07, + "loss": 0.0, + "step": 14239 + }, + { + "epoch": 0.9177031642714443, + "grad_norm": 0.00010581374894291006, + "learning_rate": 9.215896885069818e-07, + "loss": 0.0, + "step": 14240 + }, + { + "epoch": 0.9177676097183733, + "grad_norm": 0.0015797488393140531, + "learning_rate": 9.208736126029359e-07, + "loss": 0.0, + "step": 14241 + }, + { + "epoch": 0.9178320551653025, + "grad_norm": 0.0008123762794186068, + "learning_rate": 9.201575366988902e-07, + "loss": 0.0, + "step": 14242 + }, + { + "epoch": 0.9178965006122317, + "grad_norm": 0.008421636058342962, + "learning_rate": 9.194414607948443e-07, + "loss": 0.0, + "step": 14243 + }, + { + "epoch": 0.9179609460591609, + "grad_norm": 0.000672241911371534, + "learning_rate": 9.187253848907985e-07, + "loss": 0.0, + "step": 14244 + }, + { + "epoch": 0.9180253915060901, + "grad_norm": 0.0005130752034765722, + "learning_rate": 9.180093089867527e-07, + "loss": 0.0, + "step": 14245 + }, + { + "epoch": 0.9180898369530193, + "grad_norm": 6.957164338496054e-05, + "learning_rate": 9.172932330827068e-07, + "loss": 0.0, + "step": 14246 + }, + { + "epoch": 0.9181542823999485, + "grad_norm": 0.004817899785118754, + "learning_rate": 9.16577157178661e-07, + "loss": 0.0, + "step": 14247 + }, + { + "epoch": 0.9182187278468776, + "grad_norm": 0.0004698135349077357, + "learning_rate": 9.158610812746152e-07, + "loss": 0.0, + "step": 14248 + }, + { + "epoch": 0.9182831732938068, + "grad_norm": 0.022730519540926336, + "learning_rate": 9.151450053705693e-07, + "loss": 0.0002, + "step": 14249 + }, + { + "epoch": 0.918347618740736, + "grad_norm": 0.0039667025926992784, + "learning_rate": 9.144289294665236e-07, + "loss": 0.0, + "step": 14250 + }, + { + "epoch": 0.9184120641876652, + "grad_norm": 0.002694283624502539, + "learning_rate": 9.137128535624777e-07, + "loss": 0.0, + "step": 14251 + }, + { + "epoch": 0.9184765096345943, + "grad_norm": 0.012668603875843228, + "learning_rate": 9.129967776584318e-07, + "loss": 0.0001, + "step": 14252 + }, + { + "epoch": 0.9185409550815234, + "grad_norm": 0.00016267248176073663, + "learning_rate": 9.122807017543861e-07, + "loss": 0.0, + "step": 14253 + }, + { + "epoch": 0.9186054005284526, + "grad_norm": 0.16982769822718005, + "learning_rate": 9.115646258503402e-07, + "loss": 0.0005, + "step": 14254 + }, + { + "epoch": 0.9186698459753818, + "grad_norm": 0.0004709426465504047, + "learning_rate": 9.108485499462944e-07, + "loss": 0.0, + "step": 14255 + }, + { + "epoch": 0.918734291422311, + "grad_norm": 0.0028317174475590155, + "learning_rate": 9.101324740422486e-07, + "loss": 0.0, + "step": 14256 + }, + { + "epoch": 0.9187987368692402, + "grad_norm": 0.00039515144590099115, + "learning_rate": 9.094163981382028e-07, + "loss": 0.0, + "step": 14257 + }, + { + "epoch": 0.9188631823161694, + "grad_norm": 0.034502345692150146, + "learning_rate": 9.087003222341568e-07, + "loss": 0.0, + "step": 14258 + }, + { + "epoch": 0.9189276277630986, + "grad_norm": 0.013624407437962447, + "learning_rate": 9.079842463301111e-07, + "loss": 0.0001, + "step": 14259 + }, + { + "epoch": 0.9189920732100277, + "grad_norm": 0.31261428888863013, + "learning_rate": 9.072681704260652e-07, + "loss": 0.0011, + "step": 14260 + }, + { + "epoch": 0.9190565186569569, + "grad_norm": 0.27199733765961076, + "learning_rate": 9.065520945220194e-07, + "loss": 0.0005, + "step": 14261 + }, + { + "epoch": 0.9191209641038861, + "grad_norm": 0.04721813969246109, + "learning_rate": 9.058360186179736e-07, + "loss": 0.0016, + "step": 14262 + }, + { + "epoch": 0.9191854095508152, + "grad_norm": 0.0005272377698462465, + "learning_rate": 9.051199427139278e-07, + "loss": 0.0, + "step": 14263 + }, + { + "epoch": 0.9192498549977444, + "grad_norm": 0.003632628093650156, + "learning_rate": 9.044038668098819e-07, + "loss": 0.0, + "step": 14264 + }, + { + "epoch": 0.9193143004446735, + "grad_norm": 0.08249185312389225, + "learning_rate": 9.036877909058362e-07, + "loss": 0.0001, + "step": 14265 + }, + { + "epoch": 0.9193787458916027, + "grad_norm": 0.01769209298130543, + "learning_rate": 9.029717150017903e-07, + "loss": 0.0001, + "step": 14266 + }, + { + "epoch": 0.9194431913385319, + "grad_norm": 0.007572932169365242, + "learning_rate": 9.022556390977444e-07, + "loss": 0.0, + "step": 14267 + }, + { + "epoch": 0.9195076367854611, + "grad_norm": 0.0006129075108329983, + "learning_rate": 9.015395631936987e-07, + "loss": 0.0, + "step": 14268 + }, + { + "epoch": 0.9195720822323903, + "grad_norm": 0.15367907565903965, + "learning_rate": 9.008234872896527e-07, + "loss": 0.0019, + "step": 14269 + }, + { + "epoch": 0.9196365276793195, + "grad_norm": 0.00012896780691753785, + "learning_rate": 9.001074113856069e-07, + "loss": 0.0, + "step": 14270 + }, + { + "epoch": 0.9197009731262487, + "grad_norm": 0.04012686418183218, + "learning_rate": 8.993913354815611e-07, + "loss": 0.0001, + "step": 14271 + }, + { + "epoch": 0.9197654185731778, + "grad_norm": 0.009376539846461099, + "learning_rate": 8.986752595775153e-07, + "loss": 0.0, + "step": 14272 + }, + { + "epoch": 0.919829864020107, + "grad_norm": 0.10376941411654185, + "learning_rate": 8.979591836734694e-07, + "loss": 0.0001, + "step": 14273 + }, + { + "epoch": 0.9198943094670362, + "grad_norm": 0.004417011550164127, + "learning_rate": 8.972431077694237e-07, + "loss": 0.0, + "step": 14274 + }, + { + "epoch": 0.9199587549139653, + "grad_norm": 0.0010578230684077303, + "learning_rate": 8.965270318653778e-07, + "loss": 0.0, + "step": 14275 + }, + { + "epoch": 0.9200232003608945, + "grad_norm": 0.005991646645697451, + "learning_rate": 8.958109559613319e-07, + "loss": 0.0, + "step": 14276 + }, + { + "epoch": 0.9200876458078237, + "grad_norm": 0.0013921188764913148, + "learning_rate": 8.950948800572862e-07, + "loss": 0.0, + "step": 14277 + }, + { + "epoch": 0.9201520912547528, + "grad_norm": 0.003431294612354495, + "learning_rate": 8.943788041532403e-07, + "loss": 0.0, + "step": 14278 + }, + { + "epoch": 0.920216536701682, + "grad_norm": 0.0009395657251499541, + "learning_rate": 8.936627282491945e-07, + "loss": 0.0, + "step": 14279 + }, + { + "epoch": 0.9202809821486112, + "grad_norm": 0.21383604058028396, + "learning_rate": 8.929466523451487e-07, + "loss": 0.0007, + "step": 14280 + }, + { + "epoch": 0.9203454275955404, + "grad_norm": 0.07478550921576256, + "learning_rate": 8.922305764411029e-07, + "loss": 0.0002, + "step": 14281 + }, + { + "epoch": 0.9204098730424696, + "grad_norm": 0.0018806258634827169, + "learning_rate": 8.915145005370569e-07, + "loss": 0.0, + "step": 14282 + }, + { + "epoch": 0.9204743184893988, + "grad_norm": 0.0006642417777200569, + "learning_rate": 8.907984246330112e-07, + "loss": 0.0, + "step": 14283 + }, + { + "epoch": 0.9205387639363279, + "grad_norm": 0.033905118569600774, + "learning_rate": 8.900823487289653e-07, + "loss": 0.0001, + "step": 14284 + }, + { + "epoch": 0.9206032093832571, + "grad_norm": 0.00029521834576314433, + "learning_rate": 8.893662728249196e-07, + "loss": 0.0, + "step": 14285 + }, + { + "epoch": 0.9206676548301862, + "grad_norm": 0.0026862544176196293, + "learning_rate": 8.886501969208737e-07, + "loss": 0.0, + "step": 14286 + }, + { + "epoch": 0.9207321002771154, + "grad_norm": 0.029072616489818775, + "learning_rate": 8.879341210168278e-07, + "loss": 0.0015, + "step": 14287 + }, + { + "epoch": 0.9207965457240446, + "grad_norm": 0.005206056469858584, + "learning_rate": 8.872180451127821e-07, + "loss": 0.0, + "step": 14288 + }, + { + "epoch": 0.9208609911709738, + "grad_norm": 0.004492865880830228, + "learning_rate": 8.865019692087362e-07, + "loss": 0.0, + "step": 14289 + }, + { + "epoch": 0.9209254366179029, + "grad_norm": 0.04546572249122386, + "learning_rate": 8.857858933046904e-07, + "loss": 0.0, + "step": 14290 + }, + { + "epoch": 0.9209898820648321, + "grad_norm": 0.005094889517667441, + "learning_rate": 8.850698174006446e-07, + "loss": 0.0001, + "step": 14291 + }, + { + "epoch": 0.9210543275117613, + "grad_norm": 0.6241890175700587, + "learning_rate": 8.843537414965988e-07, + "loss": 0.0136, + "step": 14292 + }, + { + "epoch": 0.9211187729586905, + "grad_norm": 0.489125871665835, + "learning_rate": 8.836376655925528e-07, + "loss": 0.0015, + "step": 14293 + }, + { + "epoch": 0.9211832184056197, + "grad_norm": 0.09846547066195413, + "learning_rate": 8.829215896885071e-07, + "loss": 0.0002, + "step": 14294 + }, + { + "epoch": 0.9212476638525489, + "grad_norm": 0.0005087791308881729, + "learning_rate": 8.822055137844612e-07, + "loss": 0.0, + "step": 14295 + }, + { + "epoch": 0.921312109299478, + "grad_norm": 0.0017957632932818469, + "learning_rate": 8.814894378804153e-07, + "loss": 0.0, + "step": 14296 + }, + { + "epoch": 0.9213765547464071, + "grad_norm": 0.04921608342410101, + "learning_rate": 8.807733619763696e-07, + "loss": 0.0004, + "step": 14297 + }, + { + "epoch": 0.9214410001933363, + "grad_norm": 0.0911446236348484, + "learning_rate": 8.800572860723237e-07, + "loss": 0.0016, + "step": 14298 + }, + { + "epoch": 0.9215054456402655, + "grad_norm": 0.0002452915588548585, + "learning_rate": 8.793412101682779e-07, + "loss": 0.0, + "step": 14299 + }, + { + "epoch": 0.9215698910871947, + "grad_norm": 0.0007380774980053137, + "learning_rate": 8.786251342642321e-07, + "loss": 0.0, + "step": 14300 + }, + { + "epoch": 0.9216343365341239, + "grad_norm": 0.000311741694295826, + "learning_rate": 8.779090583601863e-07, + "loss": 0.0, + "step": 14301 + }, + { + "epoch": 0.921698781981053, + "grad_norm": 0.0879874067442999, + "learning_rate": 8.771929824561404e-07, + "loss": 0.001, + "step": 14302 + }, + { + "epoch": 0.9217632274279822, + "grad_norm": 0.005910103099318323, + "learning_rate": 8.764769065520947e-07, + "loss": 0.0, + "step": 14303 + }, + { + "epoch": 0.9218276728749114, + "grad_norm": 0.00033256577001777654, + "learning_rate": 8.757608306480488e-07, + "loss": 0.0, + "step": 14304 + }, + { + "epoch": 0.9218921183218406, + "grad_norm": 0.009101280903348382, + "learning_rate": 8.750447547440028e-07, + "loss": 0.0, + "step": 14305 + }, + { + "epoch": 0.9219565637687698, + "grad_norm": 0.0008341859623354918, + "learning_rate": 8.743286788399571e-07, + "loss": 0.0, + "step": 14306 + }, + { + "epoch": 0.922021009215699, + "grad_norm": 0.0005060169378157814, + "learning_rate": 8.736126029359112e-07, + "loss": 0.0, + "step": 14307 + }, + { + "epoch": 0.922085454662628, + "grad_norm": 0.2939196223695395, + "learning_rate": 8.728965270318654e-07, + "loss": 0.001, + "step": 14308 + }, + { + "epoch": 0.9221499001095572, + "grad_norm": 0.00019054155313982495, + "learning_rate": 8.721804511278196e-07, + "loss": 0.0, + "step": 14309 + }, + { + "epoch": 0.9222143455564864, + "grad_norm": 0.011363912084689533, + "learning_rate": 8.714643752237738e-07, + "loss": 0.0001, + "step": 14310 + }, + { + "epoch": 0.9222787910034156, + "grad_norm": 0.0007240625442157176, + "learning_rate": 8.707482993197279e-07, + "loss": 0.0, + "step": 14311 + }, + { + "epoch": 0.9223432364503448, + "grad_norm": 0.22208887725504373, + "learning_rate": 8.700322234156822e-07, + "loss": 0.0003, + "step": 14312 + }, + { + "epoch": 0.922407681897274, + "grad_norm": 0.013504279937124145, + "learning_rate": 8.693161475116363e-07, + "loss": 0.0, + "step": 14313 + }, + { + "epoch": 0.9224721273442031, + "grad_norm": 0.0020501837505711128, + "learning_rate": 8.686000716075904e-07, + "loss": 0.0, + "step": 14314 + }, + { + "epoch": 0.9225365727911323, + "grad_norm": 0.28737357044486633, + "learning_rate": 8.678839957035447e-07, + "loss": 0.0003, + "step": 14315 + }, + { + "epoch": 0.9226010182380615, + "grad_norm": 0.0010953903883233731, + "learning_rate": 8.671679197994988e-07, + "loss": 0.0, + "step": 14316 + }, + { + "epoch": 0.9226654636849907, + "grad_norm": 0.009895230589149193, + "learning_rate": 8.664518438954529e-07, + "loss": 0.0001, + "step": 14317 + }, + { + "epoch": 0.9227299091319199, + "grad_norm": 0.04075174082050103, + "learning_rate": 8.657357679914071e-07, + "loss": 0.0, + "step": 14318 + }, + { + "epoch": 0.922794354578849, + "grad_norm": 0.0002692212550240471, + "learning_rate": 8.650196920873613e-07, + "loss": 0.0, + "step": 14319 + }, + { + "epoch": 0.9228588000257781, + "grad_norm": 0.00048252134601037153, + "learning_rate": 8.643036161833155e-07, + "loss": 0.0, + "step": 14320 + }, + { + "epoch": 0.9229232454727073, + "grad_norm": 3.825583155027952e-05, + "learning_rate": 8.635875402792697e-07, + "loss": 0.0, + "step": 14321 + }, + { + "epoch": 0.9229876909196365, + "grad_norm": 0.0018811696074475584, + "learning_rate": 8.628714643752238e-07, + "loss": 0.0, + "step": 14322 + }, + { + "epoch": 0.9230521363665657, + "grad_norm": 0.07775464787976358, + "learning_rate": 8.621553884711781e-07, + "loss": 0.0051, + "step": 14323 + }, + { + "epoch": 0.9231165818134949, + "grad_norm": 0.058226357454030764, + "learning_rate": 8.614393125671322e-07, + "loss": 0.0001, + "step": 14324 + }, + { + "epoch": 0.9231810272604241, + "grad_norm": 0.012174789048439801, + "learning_rate": 8.607232366630863e-07, + "loss": 0.0, + "step": 14325 + }, + { + "epoch": 0.9232454727073532, + "grad_norm": 0.0017584853190584743, + "learning_rate": 8.600071607590406e-07, + "loss": 0.0, + "step": 14326 + }, + { + "epoch": 0.9233099181542824, + "grad_norm": 0.000461639247452941, + "learning_rate": 8.592910848549947e-07, + "loss": 0.0, + "step": 14327 + }, + { + "epoch": 0.9233743636012116, + "grad_norm": 0.07013433453961926, + "learning_rate": 8.585750089509489e-07, + "loss": 0.0001, + "step": 14328 + }, + { + "epoch": 0.9234388090481408, + "grad_norm": 0.00018168184062102873, + "learning_rate": 8.57858933046903e-07, + "loss": 0.0, + "step": 14329 + }, + { + "epoch": 0.9235032544950699, + "grad_norm": 0.009711667844548244, + "learning_rate": 8.571428571428572e-07, + "loss": 0.0, + "step": 14330 + }, + { + "epoch": 0.923567699941999, + "grad_norm": 0.0006406341603395895, + "learning_rate": 8.564267812388113e-07, + "loss": 0.0, + "step": 14331 + }, + { + "epoch": 0.9236321453889282, + "grad_norm": 8.876367947043951e-05, + "learning_rate": 8.557107053347656e-07, + "loss": 0.0, + "step": 14332 + }, + { + "epoch": 0.9236965908358574, + "grad_norm": 0.6359557512368327, + "learning_rate": 8.549946294307197e-07, + "loss": 0.0067, + "step": 14333 + }, + { + "epoch": 0.9237610362827866, + "grad_norm": 0.00493237461485779, + "learning_rate": 8.542785535266739e-07, + "loss": 0.0, + "step": 14334 + }, + { + "epoch": 0.9238254817297158, + "grad_norm": 0.0052610262877635415, + "learning_rate": 8.535624776226281e-07, + "loss": 0.0, + "step": 14335 + }, + { + "epoch": 0.923889927176645, + "grad_norm": 0.00011238967086745099, + "learning_rate": 8.528464017185823e-07, + "loss": 0.0, + "step": 14336 + }, + { + "epoch": 0.9239543726235742, + "grad_norm": 2.3785792728021552e-05, + "learning_rate": 8.521303258145364e-07, + "loss": 0.0, + "step": 14337 + }, + { + "epoch": 0.9240188180705033, + "grad_norm": 0.000555459237647623, + "learning_rate": 8.514142499104907e-07, + "loss": 0.0, + "step": 14338 + }, + { + "epoch": 0.9240832635174325, + "grad_norm": 0.010765654901809371, + "learning_rate": 8.506981740064448e-07, + "loss": 0.0, + "step": 14339 + }, + { + "epoch": 0.9241477089643617, + "grad_norm": 0.004778807092981911, + "learning_rate": 8.499820981023989e-07, + "loss": 0.0, + "step": 14340 + }, + { + "epoch": 0.9242121544112908, + "grad_norm": 0.005520193338135902, + "learning_rate": 8.492660221983531e-07, + "loss": 0.0, + "step": 14341 + }, + { + "epoch": 0.92427659985822, + "grad_norm": 0.3407595779824848, + "learning_rate": 8.485499462943072e-07, + "loss": 0.0006, + "step": 14342 + }, + { + "epoch": 0.9243410453051492, + "grad_norm": 4.109653675093666e-05, + "learning_rate": 8.478338703902614e-07, + "loss": 0.0, + "step": 14343 + }, + { + "epoch": 0.9244054907520783, + "grad_norm": 0.05114493215860188, + "learning_rate": 8.471177944862156e-07, + "loss": 0.0001, + "step": 14344 + }, + { + "epoch": 0.9244699361990075, + "grad_norm": 0.004551564714074149, + "learning_rate": 8.464017185821698e-07, + "loss": 0.0, + "step": 14345 + }, + { + "epoch": 0.9245343816459367, + "grad_norm": 0.0021971180571567555, + "learning_rate": 8.456856426781239e-07, + "loss": 0.0, + "step": 14346 + }, + { + "epoch": 0.9245988270928659, + "grad_norm": 0.0005614679583030613, + "learning_rate": 8.449695667740782e-07, + "loss": 0.0, + "step": 14347 + }, + { + "epoch": 0.9246632725397951, + "grad_norm": 0.3467760408850226, + "learning_rate": 8.442534908700323e-07, + "loss": 0.0017, + "step": 14348 + }, + { + "epoch": 0.9247277179867243, + "grad_norm": 0.08151521610210832, + "learning_rate": 8.435374149659864e-07, + "loss": 0.0008, + "step": 14349 + }, + { + "epoch": 0.9247921634336534, + "grad_norm": 0.004019518333800433, + "learning_rate": 8.428213390619407e-07, + "loss": 0.0, + "step": 14350 + }, + { + "epoch": 0.9248566088805826, + "grad_norm": 0.07447157949814238, + "learning_rate": 8.421052631578948e-07, + "loss": 0.0001, + "step": 14351 + }, + { + "epoch": 0.9249210543275118, + "grad_norm": 0.014641115226128458, + "learning_rate": 8.41389187253849e-07, + "loss": 0.0, + "step": 14352 + }, + { + "epoch": 0.9249854997744409, + "grad_norm": 0.3754926862321877, + "learning_rate": 8.406731113498031e-07, + "loss": 0.0031, + "step": 14353 + }, + { + "epoch": 0.9250499452213701, + "grad_norm": 0.0006381091658297029, + "learning_rate": 8.399570354457573e-07, + "loss": 0.0, + "step": 14354 + }, + { + "epoch": 0.9251143906682993, + "grad_norm": 0.0007651448783783858, + "learning_rate": 8.392409595417115e-07, + "loss": 0.0, + "step": 14355 + }, + { + "epoch": 0.9251788361152284, + "grad_norm": 0.010261725668583554, + "learning_rate": 8.385248836376657e-07, + "loss": 0.0, + "step": 14356 + }, + { + "epoch": 0.9252432815621576, + "grad_norm": 0.004215161724131631, + "learning_rate": 8.378088077336198e-07, + "loss": 0.0, + "step": 14357 + }, + { + "epoch": 0.9253077270090868, + "grad_norm": 0.010846641072357954, + "learning_rate": 8.370927318295741e-07, + "loss": 0.0, + "step": 14358 + }, + { + "epoch": 0.925372172456016, + "grad_norm": 0.0036661152239055466, + "learning_rate": 8.363766559255282e-07, + "loss": 0.0, + "step": 14359 + }, + { + "epoch": 0.9254366179029452, + "grad_norm": 0.2835533081727429, + "learning_rate": 8.356605800214823e-07, + "loss": 0.0023, + "step": 14360 + }, + { + "epoch": 0.9255010633498744, + "grad_norm": 0.0005097153361995352, + "learning_rate": 8.349445041174366e-07, + "loss": 0.0, + "step": 14361 + }, + { + "epoch": 0.9255655087968035, + "grad_norm": 0.0035745185335236363, + "learning_rate": 8.342284282133907e-07, + "loss": 0.0, + "step": 14362 + }, + { + "epoch": 0.9256299542437327, + "grad_norm": 0.0022912225414757144, + "learning_rate": 8.335123523093449e-07, + "loss": 0.0, + "step": 14363 + }, + { + "epoch": 0.9256943996906618, + "grad_norm": 0.006479848061025811, + "learning_rate": 8.327962764052991e-07, + "loss": 0.0, + "step": 14364 + }, + { + "epoch": 0.925758845137591, + "grad_norm": 0.04159580182992835, + "learning_rate": 8.320802005012532e-07, + "loss": 0.0001, + "step": 14365 + }, + { + "epoch": 0.9258232905845202, + "grad_norm": 0.003863711422317911, + "learning_rate": 8.313641245972073e-07, + "loss": 0.0, + "step": 14366 + }, + { + "epoch": 0.9258877360314494, + "grad_norm": 0.00010527621767379967, + "learning_rate": 8.306480486931616e-07, + "loss": 0.0, + "step": 14367 + }, + { + "epoch": 0.9259521814783785, + "grad_norm": 0.0008107817368530498, + "learning_rate": 8.299319727891157e-07, + "loss": 0.0, + "step": 14368 + }, + { + "epoch": 0.9260166269253077, + "grad_norm": 0.39974754607215024, + "learning_rate": 8.292158968850698e-07, + "loss": 0.0026, + "step": 14369 + }, + { + "epoch": 0.9260810723722369, + "grad_norm": 0.0003916170677837437, + "learning_rate": 8.284998209810241e-07, + "loss": 0.0, + "step": 14370 + }, + { + "epoch": 0.9261455178191661, + "grad_norm": 0.00032176926961688867, + "learning_rate": 8.277837450769782e-07, + "loss": 0.0, + "step": 14371 + }, + { + "epoch": 0.9262099632660953, + "grad_norm": 0.004748618602019491, + "learning_rate": 8.270676691729324e-07, + "loss": 0.0, + "step": 14372 + }, + { + "epoch": 0.9262744087130245, + "grad_norm": 0.0005727854899369785, + "learning_rate": 8.263515932688866e-07, + "loss": 0.0, + "step": 14373 + }, + { + "epoch": 0.9263388541599537, + "grad_norm": 9.207616447138975e-05, + "learning_rate": 8.256355173648408e-07, + "loss": 0.0, + "step": 14374 + }, + { + "epoch": 0.9264032996068827, + "grad_norm": 0.00040550139835061086, + "learning_rate": 8.249194414607949e-07, + "loss": 0.0, + "step": 14375 + }, + { + "epoch": 0.9264677450538119, + "grad_norm": 0.0011182196861263627, + "learning_rate": 8.242033655567492e-07, + "loss": 0.0, + "step": 14376 + }, + { + "epoch": 0.9265321905007411, + "grad_norm": 0.002358272684080987, + "learning_rate": 8.234872896527032e-07, + "loss": 0.0, + "step": 14377 + }, + { + "epoch": 0.9265966359476703, + "grad_norm": 0.12104582607336277, + "learning_rate": 8.227712137486573e-07, + "loss": 0.0001, + "step": 14378 + }, + { + "epoch": 0.9266610813945995, + "grad_norm": 0.007726797377316758, + "learning_rate": 8.220551378446116e-07, + "loss": 0.0, + "step": 14379 + }, + { + "epoch": 0.9267255268415286, + "grad_norm": 0.008248464069637097, + "learning_rate": 8.213390619405657e-07, + "loss": 0.0, + "step": 14380 + }, + { + "epoch": 0.9267899722884578, + "grad_norm": 0.0001702704616484478, + "learning_rate": 8.206229860365199e-07, + "loss": 0.0, + "step": 14381 + }, + { + "epoch": 0.926854417735387, + "grad_norm": 0.000260795053944009, + "learning_rate": 8.199069101324741e-07, + "loss": 0.0, + "step": 14382 + }, + { + "epoch": 0.9269188631823162, + "grad_norm": 0.0007138246285888987, + "learning_rate": 8.191908342284283e-07, + "loss": 0.0, + "step": 14383 + }, + { + "epoch": 0.9269833086292454, + "grad_norm": 0.00015103320664588784, + "learning_rate": 8.184747583243824e-07, + "loss": 0.0, + "step": 14384 + }, + { + "epoch": 0.9270477540761746, + "grad_norm": 0.6980205264079195, + "learning_rate": 8.177586824203367e-07, + "loss": 0.0055, + "step": 14385 + }, + { + "epoch": 0.9271121995231036, + "grad_norm": 0.06006724016241198, + "learning_rate": 8.170426065162908e-07, + "loss": 0.0001, + "step": 14386 + }, + { + "epoch": 0.9271766449700328, + "grad_norm": 0.1601703742242303, + "learning_rate": 8.163265306122449e-07, + "loss": 0.0037, + "step": 14387 + }, + { + "epoch": 0.927241090416962, + "grad_norm": 0.0006902069417974614, + "learning_rate": 8.156104547081992e-07, + "loss": 0.0, + "step": 14388 + }, + { + "epoch": 0.9273055358638912, + "grad_norm": 0.001034304148302098, + "learning_rate": 8.148943788041532e-07, + "loss": 0.0, + "step": 14389 + }, + { + "epoch": 0.9273699813108204, + "grad_norm": 0.0006792645530657716, + "learning_rate": 8.141783029001074e-07, + "loss": 0.0, + "step": 14390 + }, + { + "epoch": 0.9274344267577496, + "grad_norm": 0.006263223817885295, + "learning_rate": 8.134622269960616e-07, + "loss": 0.0, + "step": 14391 + }, + { + "epoch": 0.9274988722046787, + "grad_norm": 0.0018722316210985034, + "learning_rate": 8.127461510920158e-07, + "loss": 0.0, + "step": 14392 + }, + { + "epoch": 0.9275633176516079, + "grad_norm": 0.000974320736434331, + "learning_rate": 8.1203007518797e-07, + "loss": 0.0, + "step": 14393 + }, + { + "epoch": 0.9276277630985371, + "grad_norm": 0.0003239978115912819, + "learning_rate": 8.113139992839242e-07, + "loss": 0.0, + "step": 14394 + }, + { + "epoch": 0.9276922085454663, + "grad_norm": 0.0013150495822469358, + "learning_rate": 8.105979233798783e-07, + "loss": 0.0, + "step": 14395 + }, + { + "epoch": 0.9277566539923955, + "grad_norm": 0.015568682946537292, + "learning_rate": 8.098818474758326e-07, + "loss": 0.0, + "step": 14396 + }, + { + "epoch": 0.9278210994393246, + "grad_norm": 0.059350822962951, + "learning_rate": 8.091657715717867e-07, + "loss": 0.0, + "step": 14397 + }, + { + "epoch": 0.9278855448862537, + "grad_norm": 0.019161179937591782, + "learning_rate": 8.084496956677408e-07, + "loss": 0.0, + "step": 14398 + }, + { + "epoch": 0.9279499903331829, + "grad_norm": 0.0008154856476937402, + "learning_rate": 8.077336197636951e-07, + "loss": 0.0, + "step": 14399 + }, + { + "epoch": 0.9280144357801121, + "grad_norm": 0.0017092046185970635, + "learning_rate": 8.070175438596491e-07, + "loss": 0.0, + "step": 14400 + }, + { + "epoch": 0.9280788812270413, + "grad_norm": 0.0223845070364754, + "learning_rate": 8.063014679556033e-07, + "loss": 0.0001, + "step": 14401 + }, + { + "epoch": 0.9281433266739705, + "grad_norm": 0.08140399362746532, + "learning_rate": 8.055853920515575e-07, + "loss": 0.0007, + "step": 14402 + }, + { + "epoch": 0.9282077721208997, + "grad_norm": 0.007940254101373572, + "learning_rate": 8.048693161475117e-07, + "loss": 0.0, + "step": 14403 + }, + { + "epoch": 0.9282722175678289, + "grad_norm": 0.03842277464554137, + "learning_rate": 8.041532402434658e-07, + "loss": 0.0, + "step": 14404 + }, + { + "epoch": 0.928336663014758, + "grad_norm": 0.0015349968048687594, + "learning_rate": 8.034371643394201e-07, + "loss": 0.0, + "step": 14405 + }, + { + "epoch": 0.9284011084616872, + "grad_norm": 0.7215145680998309, + "learning_rate": 8.027210884353742e-07, + "loss": 0.0047, + "step": 14406 + }, + { + "epoch": 0.9284655539086164, + "grad_norm": 0.14605131986601827, + "learning_rate": 8.020050125313284e-07, + "loss": 0.0, + "step": 14407 + }, + { + "epoch": 0.9285299993555455, + "grad_norm": 0.045908688354199964, + "learning_rate": 8.012889366272826e-07, + "loss": 0.0001, + "step": 14408 + }, + { + "epoch": 0.9285944448024747, + "grad_norm": 0.002024083815106119, + "learning_rate": 8.005728607232368e-07, + "loss": 0.0, + "step": 14409 + }, + { + "epoch": 0.9286588902494038, + "grad_norm": 0.002112488711979832, + "learning_rate": 7.998567848191909e-07, + "loss": 0.0, + "step": 14410 + }, + { + "epoch": 0.928723335696333, + "grad_norm": 0.008628272653972625, + "learning_rate": 7.991407089151452e-07, + "loss": 0.0, + "step": 14411 + }, + { + "epoch": 0.9287877811432622, + "grad_norm": 0.09060448977613139, + "learning_rate": 7.984246330110993e-07, + "loss": 0.0002, + "step": 14412 + }, + { + "epoch": 0.9288522265901914, + "grad_norm": 0.0005132101275217304, + "learning_rate": 7.977085571070533e-07, + "loss": 0.0, + "step": 14413 + }, + { + "epoch": 0.9289166720371206, + "grad_norm": 0.1643300826197386, + "learning_rate": 7.969924812030076e-07, + "loss": 0.0003, + "step": 14414 + }, + { + "epoch": 0.9289811174840498, + "grad_norm": 0.0016093713965732953, + "learning_rate": 7.962764052989617e-07, + "loss": 0.0015, + "step": 14415 + }, + { + "epoch": 0.929045562930979, + "grad_norm": 0.21922135659460637, + "learning_rate": 7.955603293949159e-07, + "loss": 0.0008, + "step": 14416 + }, + { + "epoch": 0.9291100083779081, + "grad_norm": 0.0008585809892495993, + "learning_rate": 7.948442534908701e-07, + "loss": 0.0, + "step": 14417 + }, + { + "epoch": 0.9291744538248373, + "grad_norm": 0.009192170655711953, + "learning_rate": 7.941281775868243e-07, + "loss": 0.0, + "step": 14418 + }, + { + "epoch": 0.9292388992717664, + "grad_norm": 0.25046316098969706, + "learning_rate": 7.934121016827784e-07, + "loss": 0.0013, + "step": 14419 + }, + { + "epoch": 0.9293033447186956, + "grad_norm": 0.0051350095650668295, + "learning_rate": 7.926960257787327e-07, + "loss": 0.0, + "step": 14420 + }, + { + "epoch": 0.9293677901656248, + "grad_norm": 0.0026750066483972193, + "learning_rate": 7.919799498746868e-07, + "loss": 0.0, + "step": 14421 + }, + { + "epoch": 0.929432235612554, + "grad_norm": 0.004276538716577033, + "learning_rate": 7.912638739706409e-07, + "loss": 0.0, + "step": 14422 + }, + { + "epoch": 0.9294966810594831, + "grad_norm": 0.3635859288520741, + "learning_rate": 7.905477980665952e-07, + "loss": 0.0028, + "step": 14423 + }, + { + "epoch": 0.9295611265064123, + "grad_norm": 0.01937078856667039, + "learning_rate": 7.898317221625492e-07, + "loss": 0.0, + "step": 14424 + }, + { + "epoch": 0.9296255719533415, + "grad_norm": 0.001685193181962873, + "learning_rate": 7.891156462585034e-07, + "loss": 0.0, + "step": 14425 + }, + { + "epoch": 0.9296900174002707, + "grad_norm": 0.0005174008097750863, + "learning_rate": 7.883995703544576e-07, + "loss": 0.0, + "step": 14426 + }, + { + "epoch": 0.9297544628471999, + "grad_norm": 3.9323004013979845e-05, + "learning_rate": 7.876834944504118e-07, + "loss": 0.0, + "step": 14427 + }, + { + "epoch": 0.929818908294129, + "grad_norm": 0.23817010538682193, + "learning_rate": 7.86967418546366e-07, + "loss": 0.0003, + "step": 14428 + }, + { + "epoch": 0.9298833537410582, + "grad_norm": 0.014149205828395605, + "learning_rate": 7.862513426423202e-07, + "loss": 0.0, + "step": 14429 + }, + { + "epoch": 0.9299477991879874, + "grad_norm": 0.0005149379627849445, + "learning_rate": 7.855352667382743e-07, + "loss": 0.0, + "step": 14430 + }, + { + "epoch": 0.9300122446349165, + "grad_norm": 0.31164079805275147, + "learning_rate": 7.848191908342286e-07, + "loss": 0.001, + "step": 14431 + }, + { + "epoch": 0.9300766900818457, + "grad_norm": 0.027360878841657936, + "learning_rate": 7.841031149301827e-07, + "loss": 0.0, + "step": 14432 + }, + { + "epoch": 0.9301411355287749, + "grad_norm": 0.0019418651000683484, + "learning_rate": 7.833870390261368e-07, + "loss": 0.0, + "step": 14433 + }, + { + "epoch": 0.930205580975704, + "grad_norm": 0.00013151979960604563, + "learning_rate": 7.826709631220911e-07, + "loss": 0.0, + "step": 14434 + }, + { + "epoch": 0.9302700264226332, + "grad_norm": 0.0005116016903126348, + "learning_rate": 7.819548872180452e-07, + "loss": 0.0, + "step": 14435 + }, + { + "epoch": 0.9303344718695624, + "grad_norm": 0.003360318785380491, + "learning_rate": 7.812388113139993e-07, + "loss": 0.0, + "step": 14436 + }, + { + "epoch": 0.9303989173164916, + "grad_norm": 0.005443713674886183, + "learning_rate": 7.805227354099535e-07, + "loss": 0.0, + "step": 14437 + }, + { + "epoch": 0.9304633627634208, + "grad_norm": 0.006804981826999122, + "learning_rate": 7.798066595059077e-07, + "loss": 0.0, + "step": 14438 + }, + { + "epoch": 0.93052780821035, + "grad_norm": 0.004543146095431098, + "learning_rate": 7.790905836018618e-07, + "loss": 0.0, + "step": 14439 + }, + { + "epoch": 0.9305922536572792, + "grad_norm": 0.004907974032332147, + "learning_rate": 7.783745076978161e-07, + "loss": 0.0, + "step": 14440 + }, + { + "epoch": 0.9306566991042083, + "grad_norm": 0.007826499670387036, + "learning_rate": 7.776584317937702e-07, + "loss": 0.0, + "step": 14441 + }, + { + "epoch": 0.9307211445511374, + "grad_norm": 0.0004948724082326526, + "learning_rate": 7.769423558897243e-07, + "loss": 0.0, + "step": 14442 + }, + { + "epoch": 0.9307855899980666, + "grad_norm": 0.008689859857442889, + "learning_rate": 7.762262799856786e-07, + "loss": 0.0, + "step": 14443 + }, + { + "epoch": 0.9308500354449958, + "grad_norm": 0.003751375962738023, + "learning_rate": 7.755102040816327e-07, + "loss": 0.0, + "step": 14444 + }, + { + "epoch": 0.930914480891925, + "grad_norm": 0.00013363970816859745, + "learning_rate": 7.747941281775869e-07, + "loss": 0.0, + "step": 14445 + }, + { + "epoch": 0.9309789263388542, + "grad_norm": 0.0018097887452774059, + "learning_rate": 7.740780522735411e-07, + "loss": 0.0, + "step": 14446 + }, + { + "epoch": 0.9310433717857833, + "grad_norm": 0.0018097887452774059, + "learning_rate": 7.740780522735411e-07, + "loss": 0.0306, + "step": 14447 + }, + { + "epoch": 0.9311078172327125, + "grad_norm": 0.743557799372613, + "learning_rate": 7.733619763694953e-07, + "loss": 0.0037, + "step": 14448 + }, + { + "epoch": 0.9311722626796417, + "grad_norm": 0.003228825487809219, + "learning_rate": 7.726459004654493e-07, + "loss": 0.0, + "step": 14449 + }, + { + "epoch": 0.9312367081265709, + "grad_norm": 0.0332350349660026, + "learning_rate": 7.719298245614036e-07, + "loss": 0.0001, + "step": 14450 + }, + { + "epoch": 0.9313011535735001, + "grad_norm": 0.004896196076213735, + "learning_rate": 7.712137486573577e-07, + "loss": 0.0, + "step": 14451 + }, + { + "epoch": 0.9313655990204293, + "grad_norm": 6.021259437749865e-05, + "learning_rate": 7.704976727533118e-07, + "loss": 0.0, + "step": 14452 + }, + { + "epoch": 0.9314300444673583, + "grad_norm": 0.003628188229278082, + "learning_rate": 7.697815968492661e-07, + "loss": 0.0, + "step": 14453 + }, + { + "epoch": 0.9314944899142875, + "grad_norm": 0.0017509759768460378, + "learning_rate": 7.690655209452202e-07, + "loss": 0.0, + "step": 14454 + }, + { + "epoch": 0.9315589353612167, + "grad_norm": 0.6015600476896165, + "learning_rate": 7.683494450411744e-07, + "loss": 0.0004, + "step": 14455 + }, + { + "epoch": 0.9316233808081459, + "grad_norm": 0.0020188174805083393, + "learning_rate": 7.676333691371286e-07, + "loss": 0.0, + "step": 14456 + }, + { + "epoch": 0.9316878262550751, + "grad_norm": 0.014562413827033343, + "learning_rate": 7.669172932330828e-07, + "loss": 0.0, + "step": 14457 + }, + { + "epoch": 0.9317522717020043, + "grad_norm": 0.0032687377442210427, + "learning_rate": 7.662012173290369e-07, + "loss": 0.0, + "step": 14458 + }, + { + "epoch": 0.9318167171489334, + "grad_norm": 3.5950292092738695e-05, + "learning_rate": 7.654851414249912e-07, + "loss": 0.0, + "step": 14459 + }, + { + "epoch": 0.9318811625958626, + "grad_norm": 0.007673081489013223, + "learning_rate": 7.647690655209453e-07, + "loss": 0.0, + "step": 14460 + }, + { + "epoch": 0.9319456080427918, + "grad_norm": 0.2343050216474507, + "learning_rate": 7.640529896168993e-07, + "loss": 0.0024, + "step": 14461 + }, + { + "epoch": 0.932010053489721, + "grad_norm": 0.0435925417565156, + "learning_rate": 7.633369137128536e-07, + "loss": 0.0005, + "step": 14462 + }, + { + "epoch": 0.9320744989366502, + "grad_norm": 0.20216371414031317, + "learning_rate": 7.626208378088077e-07, + "loss": 0.0003, + "step": 14463 + }, + { + "epoch": 0.9321389443835792, + "grad_norm": 0.0002489122222914264, + "learning_rate": 7.61904761904762e-07, + "loss": 0.0, + "step": 14464 + }, + { + "epoch": 0.9322033898305084, + "grad_norm": 0.00018979941206031793, + "learning_rate": 7.611886860007161e-07, + "loss": 0.0, + "step": 14465 + }, + { + "epoch": 0.9322678352774376, + "grad_norm": 0.0036174353999676827, + "learning_rate": 7.604726100966703e-07, + "loss": 0.0, + "step": 14466 + }, + { + "epoch": 0.9323322807243668, + "grad_norm": 0.00010448488517973264, + "learning_rate": 7.597565341926245e-07, + "loss": 0.0, + "step": 14467 + }, + { + "epoch": 0.932396726171296, + "grad_norm": 0.06316045030658364, + "learning_rate": 7.590404582885787e-07, + "loss": 0.0004, + "step": 14468 + }, + { + "epoch": 0.9324611716182252, + "grad_norm": 0.5535720570292619, + "learning_rate": 7.583243823845328e-07, + "loss": 0.0008, + "step": 14469 + }, + { + "epoch": 0.9325256170651544, + "grad_norm": 0.009089972948693839, + "learning_rate": 7.576083064804871e-07, + "loss": 0.0, + "step": 14470 + }, + { + "epoch": 0.9325900625120835, + "grad_norm": 0.009528753366977356, + "learning_rate": 7.568922305764412e-07, + "loss": 0.0, + "step": 14471 + }, + { + "epoch": 0.9326545079590127, + "grad_norm": 0.08273048072921844, + "learning_rate": 7.561761546723953e-07, + "loss": 0.0001, + "step": 14472 + }, + { + "epoch": 0.9327189534059419, + "grad_norm": 0.0024652483846048165, + "learning_rate": 7.554600787683496e-07, + "loss": 0.0, + "step": 14473 + }, + { + "epoch": 0.9327833988528711, + "grad_norm": 0.04121828109839269, + "learning_rate": 7.547440028643036e-07, + "loss": 0.0, + "step": 14474 + }, + { + "epoch": 0.9328478442998002, + "grad_norm": 0.14005019012037867, + "learning_rate": 7.540279269602578e-07, + "loss": 0.0002, + "step": 14475 + }, + { + "epoch": 0.9329122897467294, + "grad_norm": 0.020674897633846747, + "learning_rate": 7.53311851056212e-07, + "loss": 0.0, + "step": 14476 + }, + { + "epoch": 0.9329767351936585, + "grad_norm": 0.39202941200795505, + "learning_rate": 7.525957751521662e-07, + "loss": 0.0017, + "step": 14477 + }, + { + "epoch": 0.9330411806405877, + "grad_norm": 0.0019212015864760914, + "learning_rate": 7.518796992481203e-07, + "loss": 0.0, + "step": 14478 + }, + { + "epoch": 0.9331056260875169, + "grad_norm": 0.058421093156673494, + "learning_rate": 7.511636233440746e-07, + "loss": 0.0001, + "step": 14479 + }, + { + "epoch": 0.9331700715344461, + "grad_norm": 2.9589337738041364, + "learning_rate": 7.504475474400287e-07, + "loss": 0.019, + "step": 14480 + }, + { + "epoch": 0.9332345169813753, + "grad_norm": 0.0026238409074990647, + "learning_rate": 7.497314715359829e-07, + "loss": 0.0, + "step": 14481 + }, + { + "epoch": 0.9332989624283045, + "grad_norm": 0.00039909083865957056, + "learning_rate": 7.490153956319371e-07, + "loss": 0.0, + "step": 14482 + }, + { + "epoch": 0.9333634078752336, + "grad_norm": 0.061526487668434125, + "learning_rate": 7.482993197278913e-07, + "loss": 0.0002, + "step": 14483 + }, + { + "epoch": 0.9334278533221628, + "grad_norm": 0.0009454930574369729, + "learning_rate": 7.475832438238454e-07, + "loss": 0.0, + "step": 14484 + }, + { + "epoch": 0.933492298769092, + "grad_norm": 0.008536048096211434, + "learning_rate": 7.468671679197997e-07, + "loss": 0.0001, + "step": 14485 + }, + { + "epoch": 0.9335567442160211, + "grad_norm": 0.01352383029196929, + "learning_rate": 7.461510920157537e-07, + "loss": 0.0, + "step": 14486 + }, + { + "epoch": 0.9336211896629503, + "grad_norm": 5.9316654650585815e-05, + "learning_rate": 7.454350161117078e-07, + "loss": 0.0, + "step": 14487 + }, + { + "epoch": 0.9336856351098795, + "grad_norm": 0.00803302243530583, + "learning_rate": 7.447189402076621e-07, + "loss": 0.0, + "step": 14488 + }, + { + "epoch": 0.9337500805568086, + "grad_norm": 0.0003945896452044587, + "learning_rate": 7.440028643036162e-07, + "loss": 0.0, + "step": 14489 + }, + { + "epoch": 0.9338145260037378, + "grad_norm": 0.030083509488064174, + "learning_rate": 7.432867883995704e-07, + "loss": 0.0, + "step": 14490 + }, + { + "epoch": 0.933878971450667, + "grad_norm": 0.00690686336390923, + "learning_rate": 7.425707124955246e-07, + "loss": 0.0, + "step": 14491 + }, + { + "epoch": 0.9339434168975962, + "grad_norm": 0.005939841642474873, + "learning_rate": 7.418546365914788e-07, + "loss": 0.0, + "step": 14492 + }, + { + "epoch": 0.9340078623445254, + "grad_norm": 0.007773213962894371, + "learning_rate": 7.411385606874329e-07, + "loss": 0.0, + "step": 14493 + }, + { + "epoch": 0.9340723077914546, + "grad_norm": 0.015595200460516845, + "learning_rate": 7.404224847833872e-07, + "loss": 0.0, + "step": 14494 + }, + { + "epoch": 0.9341367532383837, + "grad_norm": 0.0009196707970691353, + "learning_rate": 7.397064088793413e-07, + "loss": 0.0, + "step": 14495 + }, + { + "epoch": 0.9342011986853129, + "grad_norm": 0.25542377927708176, + "learning_rate": 7.389903329752953e-07, + "loss": 0.0011, + "step": 14496 + }, + { + "epoch": 0.9342656441322421, + "grad_norm": 0.006353587293436875, + "learning_rate": 7.382742570712496e-07, + "loss": 0.0, + "step": 14497 + }, + { + "epoch": 0.9343300895791712, + "grad_norm": 0.13289643888858305, + "learning_rate": 7.375581811672037e-07, + "loss": 0.0002, + "step": 14498 + }, + { + "epoch": 0.9343945350261004, + "grad_norm": 0.002705438309291519, + "learning_rate": 7.368421052631579e-07, + "loss": 0.0, + "step": 14499 + }, + { + "epoch": 0.9344589804730296, + "grad_norm": 0.009522138093246023, + "learning_rate": 7.361260293591121e-07, + "loss": 0.0, + "step": 14500 + }, + { + "epoch": 0.9345234259199587, + "grad_norm": 9.089710282497509e-05, + "learning_rate": 7.354099534550663e-07, + "loss": 0.0, + "step": 14501 + }, + { + "epoch": 0.9345878713668879, + "grad_norm": 0.0021077175463139405, + "learning_rate": 7.346938775510205e-07, + "loss": 0.0, + "step": 14502 + }, + { + "epoch": 0.9346523168138171, + "grad_norm": 0.0025995208788616095, + "learning_rate": 7.339778016469747e-07, + "loss": 0.0, + "step": 14503 + }, + { + "epoch": 0.9347167622607463, + "grad_norm": 0.004934568402119453, + "learning_rate": 7.332617257429288e-07, + "loss": 0.0001, + "step": 14504 + }, + { + "epoch": 0.9347812077076755, + "grad_norm": 0.0359308569018603, + "learning_rate": 7.325456498388831e-07, + "loss": 0.0001, + "step": 14505 + }, + { + "epoch": 0.9348456531546047, + "grad_norm": 0.00332544182917945, + "learning_rate": 7.318295739348372e-07, + "loss": 0.0, + "step": 14506 + }, + { + "epoch": 0.9349100986015338, + "grad_norm": 0.02133744018917271, + "learning_rate": 7.311134980307913e-07, + "loss": 0.0002, + "step": 14507 + }, + { + "epoch": 0.934974544048463, + "grad_norm": 0.0002502341843614646, + "learning_rate": 7.303974221267456e-07, + "loss": 0.0, + "step": 14508 + }, + { + "epoch": 0.9350389894953921, + "grad_norm": 0.14950297708202429, + "learning_rate": 7.296813462226996e-07, + "loss": 0.0018, + "step": 14509 + }, + { + "epoch": 0.9351034349423213, + "grad_norm": 0.009830572551912385, + "learning_rate": 7.289652703186538e-07, + "loss": 0.0, + "step": 14510 + }, + { + "epoch": 0.9351678803892505, + "grad_norm": 0.006458090662713216, + "learning_rate": 7.28249194414608e-07, + "loss": 0.0, + "step": 14511 + }, + { + "epoch": 0.9352323258361797, + "grad_norm": 0.03246659776362317, + "learning_rate": 7.275331185105622e-07, + "loss": 0.0, + "step": 14512 + }, + { + "epoch": 0.9352967712831088, + "grad_norm": 0.0006368826106888853, + "learning_rate": 7.268170426065163e-07, + "loss": 0.0, + "step": 14513 + }, + { + "epoch": 0.935361216730038, + "grad_norm": 0.19962360346299066, + "learning_rate": 7.261009667024706e-07, + "loss": 0.0023, + "step": 14514 + }, + { + "epoch": 0.9354256621769672, + "grad_norm": 0.0033006617114257246, + "learning_rate": 7.253848907984247e-07, + "loss": 0.0, + "step": 14515 + }, + { + "epoch": 0.9354901076238964, + "grad_norm": 1.7566595243507924e-05, + "learning_rate": 7.246688148943788e-07, + "loss": 0.0, + "step": 14516 + }, + { + "epoch": 0.9355545530708256, + "grad_norm": 0.00010102185496019302, + "learning_rate": 7.239527389903331e-07, + "loss": 0.0, + "step": 14517 + }, + { + "epoch": 0.9356189985177548, + "grad_norm": 0.012268373618383493, + "learning_rate": 7.232366630862872e-07, + "loss": 0.0, + "step": 14518 + }, + { + "epoch": 0.935683443964684, + "grad_norm": 0.005793788340712795, + "learning_rate": 7.225205871822414e-07, + "loss": 0.0, + "step": 14519 + }, + { + "epoch": 0.935747889411613, + "grad_norm": 0.0018801939476720873, + "learning_rate": 7.218045112781956e-07, + "loss": 0.0, + "step": 14520 + }, + { + "epoch": 0.9358123348585422, + "grad_norm": 0.0018678091905745316, + "learning_rate": 7.210884353741497e-07, + "loss": 0.0, + "step": 14521 + }, + { + "epoch": 0.9358767803054714, + "grad_norm": 0.03396311333269752, + "learning_rate": 7.203723594701038e-07, + "loss": 0.0, + "step": 14522 + }, + { + "epoch": 0.9359412257524006, + "grad_norm": 0.003039178851842719, + "learning_rate": 7.196562835660581e-07, + "loss": 0.0, + "step": 14523 + }, + { + "epoch": 0.9360056711993298, + "grad_norm": 0.0005585103175504581, + "learning_rate": 7.189402076620122e-07, + "loss": 0.0, + "step": 14524 + }, + { + "epoch": 0.9360701166462589, + "grad_norm": 0.001817373108679384, + "learning_rate": 7.182241317579663e-07, + "loss": 0.0, + "step": 14525 + }, + { + "epoch": 0.9361345620931881, + "grad_norm": 0.0021193474744381205, + "learning_rate": 7.175080558539206e-07, + "loss": 0.0, + "step": 14526 + }, + { + "epoch": 0.9361990075401173, + "grad_norm": 0.002530882383435495, + "learning_rate": 7.167919799498747e-07, + "loss": 0.0, + "step": 14527 + }, + { + "epoch": 0.9362634529870465, + "grad_norm": 0.000818128099790117, + "learning_rate": 7.160759040458289e-07, + "loss": 0.0, + "step": 14528 + }, + { + "epoch": 0.9363278984339757, + "grad_norm": 0.009652635970240627, + "learning_rate": 7.153598281417831e-07, + "loss": 0.0, + "step": 14529 + }, + { + "epoch": 0.9363923438809049, + "grad_norm": 3.3612020853590016e-05, + "learning_rate": 7.146437522377373e-07, + "loss": 0.0, + "step": 14530 + }, + { + "epoch": 0.9364567893278339, + "grad_norm": 0.0013493935115245926, + "learning_rate": 7.139276763336914e-07, + "loss": 0.0, + "step": 14531 + }, + { + "epoch": 0.9365212347747631, + "grad_norm": 0.0024142204573161376, + "learning_rate": 7.132116004296457e-07, + "loss": 0.0, + "step": 14532 + }, + { + "epoch": 0.9365856802216923, + "grad_norm": 0.2993254916879797, + "learning_rate": 7.124955245255997e-07, + "loss": 0.001, + "step": 14533 + }, + { + "epoch": 0.9366501256686215, + "grad_norm": 0.009092237905382923, + "learning_rate": 7.117794486215538e-07, + "loss": 0.0, + "step": 14534 + }, + { + "epoch": 0.9367145711155507, + "grad_norm": 0.006500430507406041, + "learning_rate": 7.110633727175081e-07, + "loss": 0.0, + "step": 14535 + }, + { + "epoch": 0.9367790165624799, + "grad_norm": 0.0011474112037967268, + "learning_rate": 7.103472968134622e-07, + "loss": 0.0, + "step": 14536 + }, + { + "epoch": 0.936843462009409, + "grad_norm": 0.0018082425170928157, + "learning_rate": 7.096312209094165e-07, + "loss": 0.0, + "step": 14537 + }, + { + "epoch": 0.9369079074563382, + "grad_norm": 0.04529145066339704, + "learning_rate": 7.089151450053706e-07, + "loss": 0.0001, + "step": 14538 + }, + { + "epoch": 0.9369723529032674, + "grad_norm": 0.00024786370618296017, + "learning_rate": 7.081990691013248e-07, + "loss": 0.0, + "step": 14539 + }, + { + "epoch": 0.9370367983501966, + "grad_norm": 0.0014020936697642379, + "learning_rate": 7.07482993197279e-07, + "loss": 0.0, + "step": 14540 + }, + { + "epoch": 0.9371012437971258, + "grad_norm": 0.0005209691458706825, + "learning_rate": 7.067669172932332e-07, + "loss": 0.0, + "step": 14541 + }, + { + "epoch": 0.9371656892440549, + "grad_norm": 0.0019354748003275726, + "learning_rate": 7.060508413891873e-07, + "loss": 0.0, + "step": 14542 + }, + { + "epoch": 0.937230134690984, + "grad_norm": 0.0009135498107519807, + "learning_rate": 7.053347654851416e-07, + "loss": 0.0, + "step": 14543 + }, + { + "epoch": 0.9372945801379132, + "grad_norm": 0.005087708128836168, + "learning_rate": 7.046186895810957e-07, + "loss": 0.0, + "step": 14544 + }, + { + "epoch": 0.9373590255848424, + "grad_norm": 0.002904169890841453, + "learning_rate": 7.039026136770497e-07, + "loss": 0.0, + "step": 14545 + }, + { + "epoch": 0.9374234710317716, + "grad_norm": 0.0005245864079457482, + "learning_rate": 7.03186537773004e-07, + "loss": 0.0, + "step": 14546 + }, + { + "epoch": 0.9374879164787008, + "grad_norm": 0.00027773400644070097, + "learning_rate": 7.024704618689581e-07, + "loss": 0.0, + "step": 14547 + }, + { + "epoch": 0.93755236192563, + "grad_norm": 0.01755280790804899, + "learning_rate": 7.017543859649123e-07, + "loss": 0.0, + "step": 14548 + }, + { + "epoch": 0.9376168073725591, + "grad_norm": 0.00025611269007827297, + "learning_rate": 7.010383100608665e-07, + "loss": 0.0, + "step": 14549 + }, + { + "epoch": 0.9376812528194883, + "grad_norm": 0.6315640272738314, + "learning_rate": 7.003222341568207e-07, + "loss": 0.0037, + "step": 14550 + }, + { + "epoch": 0.9377456982664175, + "grad_norm": 0.021922899573825504, + "learning_rate": 6.996061582527748e-07, + "loss": 0.0002, + "step": 14551 + }, + { + "epoch": 0.9378101437133467, + "grad_norm": 0.09865185582099384, + "learning_rate": 6.988900823487291e-07, + "loss": 0.0001, + "step": 14552 + }, + { + "epoch": 0.9378745891602758, + "grad_norm": 0.012986664406985956, + "learning_rate": 6.981740064446832e-07, + "loss": 0.0, + "step": 14553 + }, + { + "epoch": 0.937939034607205, + "grad_norm": 0.011523680815714164, + "learning_rate": 6.974579305406374e-07, + "loss": 0.0, + "step": 14554 + }, + { + "epoch": 0.9380034800541341, + "grad_norm": 0.0031393640358416347, + "learning_rate": 6.967418546365916e-07, + "loss": 0.0, + "step": 14555 + }, + { + "epoch": 0.9380679255010633, + "grad_norm": 0.019008252551270352, + "learning_rate": 6.960257787325458e-07, + "loss": 0.0001, + "step": 14556 + }, + { + "epoch": 0.9381323709479925, + "grad_norm": 0.0023275455744178716, + "learning_rate": 6.953097028284998e-07, + "loss": 0.0, + "step": 14557 + }, + { + "epoch": 0.9381968163949217, + "grad_norm": 0.005539228100833343, + "learning_rate": 6.945936269244541e-07, + "loss": 0.0, + "step": 14558 + }, + { + "epoch": 0.9382612618418509, + "grad_norm": 0.03702531724548152, + "learning_rate": 6.938775510204082e-07, + "loss": 0.0001, + "step": 14559 + }, + { + "epoch": 0.9383257072887801, + "grad_norm": 0.0015204518635009605, + "learning_rate": 6.931614751163623e-07, + "loss": 0.0, + "step": 14560 + }, + { + "epoch": 0.9383901527357092, + "grad_norm": 0.02489419878730623, + "learning_rate": 6.924453992123166e-07, + "loss": 0.0001, + "step": 14561 + }, + { + "epoch": 0.9384545981826384, + "grad_norm": 0.001340772269739388, + "learning_rate": 6.917293233082707e-07, + "loss": 0.0, + "step": 14562 + }, + { + "epoch": 0.9385190436295676, + "grad_norm": 0.00010016378777309323, + "learning_rate": 6.910132474042249e-07, + "loss": 0.0, + "step": 14563 + }, + { + "epoch": 0.9385834890764967, + "grad_norm": 0.0013275941617353507, + "learning_rate": 6.902971715001791e-07, + "loss": 0.0, + "step": 14564 + }, + { + "epoch": 0.9386479345234259, + "grad_norm": 0.22654219240400406, + "learning_rate": 6.895810955961333e-07, + "loss": 0.001, + "step": 14565 + }, + { + "epoch": 0.9387123799703551, + "grad_norm": 0.0022122313042215727, + "learning_rate": 6.888650196920874e-07, + "loss": 0.0, + "step": 14566 + }, + { + "epoch": 0.9387768254172842, + "grad_norm": 0.004470017713398727, + "learning_rate": 6.881489437880417e-07, + "loss": 0.0, + "step": 14567 + }, + { + "epoch": 0.9388412708642134, + "grad_norm": 0.0007068921499948373, + "learning_rate": 6.874328678839958e-07, + "loss": 0.0, + "step": 14568 + }, + { + "epoch": 0.9389057163111426, + "grad_norm": 0.12361918294424723, + "learning_rate": 6.867167919799498e-07, + "loss": 0.0, + "step": 14569 + }, + { + "epoch": 0.9389701617580718, + "grad_norm": 0.0011432207910996807, + "learning_rate": 6.860007160759041e-07, + "loss": 0.0, + "step": 14570 + }, + { + "epoch": 0.939034607205001, + "grad_norm": 9.127357167783924e-05, + "learning_rate": 6.852846401718582e-07, + "loss": 0.0, + "step": 14571 + }, + { + "epoch": 0.9390990526519302, + "grad_norm": 0.02928841687336939, + "learning_rate": 6.845685642678125e-07, + "loss": 0.0001, + "step": 14572 + }, + { + "epoch": 0.9391634980988594, + "grad_norm": 0.002151597050746372, + "learning_rate": 6.838524883637666e-07, + "loss": 0.0, + "step": 14573 + }, + { + "epoch": 0.9392279435457885, + "grad_norm": 0.0027989000811481177, + "learning_rate": 6.831364124597208e-07, + "loss": 0.0, + "step": 14574 + }, + { + "epoch": 0.9392923889927177, + "grad_norm": 0.000564584986700068, + "learning_rate": 6.82420336555675e-07, + "loss": 0.0, + "step": 14575 + }, + { + "epoch": 0.9393568344396468, + "grad_norm": 0.001180073223414386, + "learning_rate": 6.817042606516292e-07, + "loss": 0.0, + "step": 14576 + }, + { + "epoch": 0.939421279886576, + "grad_norm": 0.005913909066126716, + "learning_rate": 6.809881847475833e-07, + "loss": 0.0, + "step": 14577 + }, + { + "epoch": 0.9394857253335052, + "grad_norm": 0.0006904970453639949, + "learning_rate": 6.802721088435376e-07, + "loss": 0.0, + "step": 14578 + }, + { + "epoch": 0.9395501707804343, + "grad_norm": 0.0004785256442391764, + "learning_rate": 6.795560329394917e-07, + "loss": 0.0, + "step": 14579 + }, + { + "epoch": 0.9396146162273635, + "grad_norm": 0.0065965709683497, + "learning_rate": 6.788399570354457e-07, + "loss": 0.0, + "step": 14580 + }, + { + "epoch": 0.9396790616742927, + "grad_norm": 0.03483474602814386, + "learning_rate": 6.781238811314e-07, + "loss": 0.0001, + "step": 14581 + }, + { + "epoch": 0.9397435071212219, + "grad_norm": 0.0013426242906063755, + "learning_rate": 6.774078052273541e-07, + "loss": 0.0, + "step": 14582 + }, + { + "epoch": 0.9398079525681511, + "grad_norm": 0.11016212508584611, + "learning_rate": 6.766917293233083e-07, + "loss": 0.0003, + "step": 14583 + }, + { + "epoch": 0.9398723980150803, + "grad_norm": 0.0025457227936089575, + "learning_rate": 6.759756534192625e-07, + "loss": 0.0, + "step": 14584 + }, + { + "epoch": 0.9399368434620095, + "grad_norm": 0.0024229772528888424, + "learning_rate": 6.752595775152167e-07, + "loss": 0.0, + "step": 14585 + }, + { + "epoch": 0.9400012889089386, + "grad_norm": 0.030600970769373386, + "learning_rate": 6.745435016111708e-07, + "loss": 0.0, + "step": 14586 + }, + { + "epoch": 0.9400657343558677, + "grad_norm": 0.0005166095727222362, + "learning_rate": 6.738274257071251e-07, + "loss": 0.0, + "step": 14587 + }, + { + "epoch": 0.9401301798027969, + "grad_norm": 0.003762972511217016, + "learning_rate": 6.731113498030792e-07, + "loss": 0.0, + "step": 14588 + }, + { + "epoch": 0.9401946252497261, + "grad_norm": 0.09675340539245941, + "learning_rate": 6.723952738990333e-07, + "loss": 0.0001, + "step": 14589 + }, + { + "epoch": 0.9402590706966553, + "grad_norm": 0.4073272873244558, + "learning_rate": 6.716791979949876e-07, + "loss": 0.0027, + "step": 14590 + }, + { + "epoch": 0.9403235161435844, + "grad_norm": 0.00020267804633695348, + "learning_rate": 6.709631220909417e-07, + "loss": 0.0, + "step": 14591 + }, + { + "epoch": 0.9403879615905136, + "grad_norm": 0.004281621504404462, + "learning_rate": 6.702470461868958e-07, + "loss": 0.0, + "step": 14592 + }, + { + "epoch": 0.9404524070374428, + "grad_norm": 0.0028956385487686137, + "learning_rate": 6.6953097028285e-07, + "loss": 0.0, + "step": 14593 + }, + { + "epoch": 0.940516852484372, + "grad_norm": 0.2428985202888916, + "learning_rate": 6.688148943788042e-07, + "loss": 0.0008, + "step": 14594 + }, + { + "epoch": 0.9405812979313012, + "grad_norm": 0.008918584360090404, + "learning_rate": 6.680988184747583e-07, + "loss": 0.0, + "step": 14595 + }, + { + "epoch": 0.9406457433782304, + "grad_norm": 0.0010786200917876955, + "learning_rate": 6.673827425707126e-07, + "loss": 0.0, + "step": 14596 + }, + { + "epoch": 0.9407101888251596, + "grad_norm": 0.001239924009437483, + "learning_rate": 6.666666666666667e-07, + "loss": 0.0, + "step": 14597 + }, + { + "epoch": 0.9407746342720886, + "grad_norm": 0.17303675628424575, + "learning_rate": 6.659505907626208e-07, + "loss": 0.0006, + "step": 14598 + }, + { + "epoch": 0.9408390797190178, + "grad_norm": 0.0022172739171691714, + "learning_rate": 6.652345148585751e-07, + "loss": 0.0, + "step": 14599 + }, + { + "epoch": 0.940903525165947, + "grad_norm": 0.0009180732993219411, + "learning_rate": 6.645184389545292e-07, + "loss": 0.0, + "step": 14600 + }, + { + "epoch": 0.9409679706128762, + "grad_norm": 0.218649577182511, + "learning_rate": 6.638023630504834e-07, + "loss": 0.0006, + "step": 14601 + }, + { + "epoch": 0.9410324160598054, + "grad_norm": 0.001048798207710388, + "learning_rate": 6.630862871464376e-07, + "loss": 0.0, + "step": 14602 + }, + { + "epoch": 0.9410968615067345, + "grad_norm": 0.0015205476430884322, + "learning_rate": 6.623702112423918e-07, + "loss": 0.0, + "step": 14603 + }, + { + "epoch": 0.9411613069536637, + "grad_norm": 0.022077528704297078, + "learning_rate": 6.616541353383458e-07, + "loss": 0.0002, + "step": 14604 + }, + { + "epoch": 0.9412257524005929, + "grad_norm": 0.0004027494800950605, + "learning_rate": 6.609380594343001e-07, + "loss": 0.0, + "step": 14605 + }, + { + "epoch": 0.9412901978475221, + "grad_norm": 0.2511364171702047, + "learning_rate": 6.602219835302542e-07, + "loss": 0.0009, + "step": 14606 + }, + { + "epoch": 0.9413546432944513, + "grad_norm": 0.7542887295599734, + "learning_rate": 6.595059076262083e-07, + "loss": 0.0064, + "step": 14607 + }, + { + "epoch": 0.9414190887413805, + "grad_norm": 0.004123371457966273, + "learning_rate": 6.587898317221626e-07, + "loss": 0.0, + "step": 14608 + }, + { + "epoch": 0.9414835341883095, + "grad_norm": 0.002666736267158426, + "learning_rate": 6.580737558181167e-07, + "loss": 0.0, + "step": 14609 + }, + { + "epoch": 0.9415479796352387, + "grad_norm": 0.3278604758169207, + "learning_rate": 6.57357679914071e-07, + "loss": 0.0023, + "step": 14610 + }, + { + "epoch": 0.9416124250821679, + "grad_norm": 0.0035760959578561863, + "learning_rate": 6.566416040100251e-07, + "loss": 0.0, + "step": 14611 + }, + { + "epoch": 0.9416768705290971, + "grad_norm": 0.11170839315038376, + "learning_rate": 6.559255281059793e-07, + "loss": 0.0017, + "step": 14612 + }, + { + "epoch": 0.9417413159760263, + "grad_norm": 0.15507088651507142, + "learning_rate": 6.552094522019335e-07, + "loss": 0.0003, + "step": 14613 + }, + { + "epoch": 0.9418057614229555, + "grad_norm": 0.016109021664184563, + "learning_rate": 6.544933762978877e-07, + "loss": 0.0001, + "step": 14614 + }, + { + "epoch": 0.9418702068698847, + "grad_norm": 0.027611918240110323, + "learning_rate": 6.537773003938418e-07, + "loss": 0.0002, + "step": 14615 + }, + { + "epoch": 0.9419346523168138, + "grad_norm": 0.047448985239203184, + "learning_rate": 6.530612244897961e-07, + "loss": 0.0005, + "step": 14616 + }, + { + "epoch": 0.941999097763743, + "grad_norm": 0.0013488067911044182, + "learning_rate": 6.523451485857501e-07, + "loss": 0.0, + "step": 14617 + }, + { + "epoch": 0.9420635432106722, + "grad_norm": 0.011817455589975698, + "learning_rate": 6.516290726817042e-07, + "loss": 0.0, + "step": 14618 + }, + { + "epoch": 0.9421279886576014, + "grad_norm": 0.0021414563002524058, + "learning_rate": 6.509129967776585e-07, + "loss": 0.0, + "step": 14619 + }, + { + "epoch": 0.9421924341045305, + "grad_norm": 0.021960025706933405, + "learning_rate": 6.501969208736126e-07, + "loss": 0.0, + "step": 14620 + }, + { + "epoch": 0.9422568795514596, + "grad_norm": 0.03452706750454334, + "learning_rate": 6.494808449695668e-07, + "loss": 0.0001, + "step": 14621 + }, + { + "epoch": 0.9423213249983888, + "grad_norm": 0.012501871096559888, + "learning_rate": 6.48764769065521e-07, + "loss": 0.0, + "step": 14622 + }, + { + "epoch": 0.942385770445318, + "grad_norm": 0.0062451286698885266, + "learning_rate": 6.480486931614752e-07, + "loss": 0.0, + "step": 14623 + }, + { + "epoch": 0.9424502158922472, + "grad_norm": 0.052520878296810417, + "learning_rate": 6.473326172574293e-07, + "loss": 0.0001, + "step": 14624 + }, + { + "epoch": 0.9425146613391764, + "grad_norm": 0.002352564833617126, + "learning_rate": 6.466165413533836e-07, + "loss": 0.0, + "step": 14625 + }, + { + "epoch": 0.9425791067861056, + "grad_norm": 0.0006536874626899899, + "learning_rate": 6.459004654493377e-07, + "loss": 0.0, + "step": 14626 + }, + { + "epoch": 0.9426435522330348, + "grad_norm": 0.0061842458013730735, + "learning_rate": 6.451843895452919e-07, + "loss": 0.0, + "step": 14627 + }, + { + "epoch": 0.9427079976799639, + "grad_norm": 0.16732236779005036, + "learning_rate": 6.44468313641246e-07, + "loss": 0.0007, + "step": 14628 + }, + { + "epoch": 0.9427724431268931, + "grad_norm": 0.0007157079141550446, + "learning_rate": 6.437522377372002e-07, + "loss": 0.0, + "step": 14629 + }, + { + "epoch": 0.9428368885738223, + "grad_norm": 0.0006728257843449776, + "learning_rate": 6.430361618331543e-07, + "loss": 0.0, + "step": 14630 + }, + { + "epoch": 0.9429013340207514, + "grad_norm": 0.0019405160432858116, + "learning_rate": 6.423200859291086e-07, + "loss": 0.0, + "step": 14631 + }, + { + "epoch": 0.9429657794676806, + "grad_norm": 0.016156358466427594, + "learning_rate": 6.416040100250627e-07, + "loss": 0.0001, + "step": 14632 + }, + { + "epoch": 0.9430302249146097, + "grad_norm": 0.0034454949082682366, + "learning_rate": 6.408879341210168e-07, + "loss": 0.0, + "step": 14633 + }, + { + "epoch": 0.9430946703615389, + "grad_norm": 0.0012120870879107008, + "learning_rate": 6.401718582169711e-07, + "loss": 0.0, + "step": 14634 + }, + { + "epoch": 0.9431591158084681, + "grad_norm": 0.0002194363515000686, + "learning_rate": 6.394557823129252e-07, + "loss": 0.0, + "step": 14635 + }, + { + "epoch": 0.9432235612553973, + "grad_norm": 0.29232680880760875, + "learning_rate": 6.387397064088794e-07, + "loss": 0.0026, + "step": 14636 + }, + { + "epoch": 0.9432880067023265, + "grad_norm": 0.0003396572970585941, + "learning_rate": 6.380236305048336e-07, + "loss": 0.0, + "step": 14637 + }, + { + "epoch": 0.9433524521492557, + "grad_norm": 1.5725421922903065, + "learning_rate": 6.373075546007878e-07, + "loss": 0.0148, + "step": 14638 + }, + { + "epoch": 0.9434168975961849, + "grad_norm": 0.006618061930661491, + "learning_rate": 6.365914786967419e-07, + "loss": 0.0, + "step": 14639 + }, + { + "epoch": 0.943481343043114, + "grad_norm": 1.0823258091984502, + "learning_rate": 6.358754027926962e-07, + "loss": 0.0037, + "step": 14640 + }, + { + "epoch": 0.9435457884900432, + "grad_norm": 0.037367184014392985, + "learning_rate": 6.351593268886502e-07, + "loss": 0.0003, + "step": 14641 + }, + { + "epoch": 0.9436102339369723, + "grad_norm": 0.00023618925437843773, + "learning_rate": 6.344432509846043e-07, + "loss": 0.0, + "step": 14642 + }, + { + "epoch": 0.9436746793839015, + "grad_norm": 0.019535478491815153, + "learning_rate": 6.337271750805586e-07, + "loss": 0.0001, + "step": 14643 + }, + { + "epoch": 0.9437391248308307, + "grad_norm": 0.23517341519437462, + "learning_rate": 6.330110991765127e-07, + "loss": 0.0003, + "step": 14644 + }, + { + "epoch": 0.9438035702777599, + "grad_norm": 0.0012437920009850324, + "learning_rate": 6.32295023272467e-07, + "loss": 0.0, + "step": 14645 + }, + { + "epoch": 0.943868015724689, + "grad_norm": 0.005780556663777061, + "learning_rate": 6.315789473684211e-07, + "loss": 0.0, + "step": 14646 + }, + { + "epoch": 0.9439324611716182, + "grad_norm": 0.027663328196446874, + "learning_rate": 6.308628714643753e-07, + "loss": 0.0001, + "step": 14647 + }, + { + "epoch": 0.9439969066185474, + "grad_norm": 0.03173487718373573, + "learning_rate": 6.301467955603295e-07, + "loss": 0.0003, + "step": 14648 + }, + { + "epoch": 0.9440613520654766, + "grad_norm": 0.0038316525115960707, + "learning_rate": 6.294307196562837e-07, + "loss": 0.0, + "step": 14649 + }, + { + "epoch": 0.9441257975124058, + "grad_norm": 0.007783604901538253, + "learning_rate": 6.287146437522378e-07, + "loss": 0.0, + "step": 14650 + }, + { + "epoch": 0.944190242959335, + "grad_norm": 0.2807085202444718, + "learning_rate": 6.279985678481921e-07, + "loss": 0.0013, + "step": 14651 + }, + { + "epoch": 0.9442546884062641, + "grad_norm": 3.8764283243383565e-05, + "learning_rate": 6.272824919441461e-07, + "loss": 0.0, + "step": 14652 + }, + { + "epoch": 0.9443191338531933, + "grad_norm": 0.33581027127862345, + "learning_rate": 6.265664160401002e-07, + "loss": 0.0022, + "step": 14653 + }, + { + "epoch": 0.9443835793001224, + "grad_norm": 0.011455286908056026, + "learning_rate": 6.258503401360545e-07, + "loss": 0.0, + "step": 14654 + }, + { + "epoch": 0.9444480247470516, + "grad_norm": 0.01142539022155588, + "learning_rate": 6.251342642320086e-07, + "loss": 0.0, + "step": 14655 + }, + { + "epoch": 0.9445124701939808, + "grad_norm": 0.004852478793097276, + "learning_rate": 6.244181883279628e-07, + "loss": 0.0, + "step": 14656 + }, + { + "epoch": 0.94457691564091, + "grad_norm": 0.0019989295768203233, + "learning_rate": 6.23702112423917e-07, + "loss": 0.0, + "step": 14657 + }, + { + "epoch": 0.9446413610878391, + "grad_norm": 0.000900061966794239, + "learning_rate": 6.229860365198712e-07, + "loss": 0.0, + "step": 14658 + }, + { + "epoch": 0.9447058065347683, + "grad_norm": 0.0002101536116015678, + "learning_rate": 6.222699606158254e-07, + "loss": 0.0, + "step": 14659 + }, + { + "epoch": 0.9447702519816975, + "grad_norm": 0.008588863535994664, + "learning_rate": 6.215538847117795e-07, + "loss": 0.0, + "step": 14660 + }, + { + "epoch": 0.9448346974286267, + "grad_norm": 0.04302587423745892, + "learning_rate": 6.208378088077337e-07, + "loss": 0.0001, + "step": 14661 + }, + { + "epoch": 0.9448991428755559, + "grad_norm": 0.005371981311745458, + "learning_rate": 6.201217329036879e-07, + "loss": 0.0001, + "step": 14662 + }, + { + "epoch": 0.9449635883224851, + "grad_norm": 0.018151983512062528, + "learning_rate": 6.19405656999642e-07, + "loss": 0.0, + "step": 14663 + }, + { + "epoch": 0.9450280337694142, + "grad_norm": 0.0012138588332065569, + "learning_rate": 6.186895810955961e-07, + "loss": 0.0, + "step": 14664 + }, + { + "epoch": 0.9450924792163433, + "grad_norm": 0.001995413386031814, + "learning_rate": 6.179735051915503e-07, + "loss": 0.0, + "step": 14665 + }, + { + "epoch": 0.9451569246632725, + "grad_norm": 0.0007131550059482363, + "learning_rate": 6.172574292875045e-07, + "loss": 0.0, + "step": 14666 + }, + { + "epoch": 0.9452213701102017, + "grad_norm": 0.44967737421083515, + "learning_rate": 6.165413533834587e-07, + "loss": 0.0004, + "step": 14667 + }, + { + "epoch": 0.9452858155571309, + "grad_norm": 0.007368867849144508, + "learning_rate": 6.158252774794129e-07, + "loss": 0.0, + "step": 14668 + }, + { + "epoch": 0.94535026100406, + "grad_norm": 0.0074857147827597816, + "learning_rate": 6.15109201575367e-07, + "loss": 0.0, + "step": 14669 + }, + { + "epoch": 0.9454147064509892, + "grad_norm": 0.00023285440752826664, + "learning_rate": 6.143931256713212e-07, + "loss": 0.0, + "step": 14670 + }, + { + "epoch": 0.9454791518979184, + "grad_norm": 0.000904988311691927, + "learning_rate": 6.136770497672754e-07, + "loss": 0.0, + "step": 14671 + }, + { + "epoch": 0.9455435973448476, + "grad_norm": 0.12923123113742113, + "learning_rate": 6.129609738632296e-07, + "loss": 0.0003, + "step": 14672 + }, + { + "epoch": 0.9456080427917768, + "grad_norm": 0.0025449127476049856, + "learning_rate": 6.122448979591837e-07, + "loss": 0.0, + "step": 14673 + }, + { + "epoch": 0.945672488238706, + "grad_norm": 0.0006296391375895605, + "learning_rate": 6.115288220551379e-07, + "loss": 0.0, + "step": 14674 + }, + { + "epoch": 0.9457369336856352, + "grad_norm": 0.1292198729783402, + "learning_rate": 6.108127461510921e-07, + "loss": 0.0004, + "step": 14675 + }, + { + "epoch": 0.9458013791325642, + "grad_norm": 0.001215412270731588, + "learning_rate": 6.100966702470462e-07, + "loss": 0.0, + "step": 14676 + }, + { + "epoch": 0.9458658245794934, + "grad_norm": 0.001039974299197542, + "learning_rate": 6.093805943430004e-07, + "loss": 0.0, + "step": 14677 + }, + { + "epoch": 0.9459302700264226, + "grad_norm": 0.6429765156199316, + "learning_rate": 6.086645184389546e-07, + "loss": 0.0003, + "step": 14678 + }, + { + "epoch": 0.9459947154733518, + "grad_norm": 0.0008230507368414166, + "learning_rate": 6.079484425349087e-07, + "loss": 0.0, + "step": 14679 + }, + { + "epoch": 0.946059160920281, + "grad_norm": 0.00024397614750560574, + "learning_rate": 6.072323666308629e-07, + "loss": 0.0, + "step": 14680 + }, + { + "epoch": 0.9461236063672102, + "grad_norm": 0.007197154311858535, + "learning_rate": 6.065162907268171e-07, + "loss": 0.0, + "step": 14681 + }, + { + "epoch": 0.9461880518141393, + "grad_norm": 0.1378045088749925, + "learning_rate": 6.058002148227712e-07, + "loss": 0.0002, + "step": 14682 + }, + { + "epoch": 0.9462524972610685, + "grad_norm": 0.0013014266775007563, + "learning_rate": 6.050841389187254e-07, + "loss": 0.0, + "step": 14683 + }, + { + "epoch": 0.9463169427079977, + "grad_norm": 0.000253774893202994, + "learning_rate": 6.043680630146796e-07, + "loss": 0.0, + "step": 14684 + }, + { + "epoch": 0.9463813881549269, + "grad_norm": 0.0038829678671018237, + "learning_rate": 6.036519871106338e-07, + "loss": 0.0, + "step": 14685 + }, + { + "epoch": 0.9464458336018561, + "grad_norm": 0.00037332670641363337, + "learning_rate": 6.02935911206588e-07, + "loss": 0.0, + "step": 14686 + }, + { + "epoch": 0.9465102790487852, + "grad_norm": 0.00011434537095249858, + "learning_rate": 6.022198353025422e-07, + "loss": 0.0, + "step": 14687 + }, + { + "epoch": 0.9465747244957143, + "grad_norm": 0.022522121263546153, + "learning_rate": 6.015037593984962e-07, + "loss": 0.0001, + "step": 14688 + }, + { + "epoch": 0.9466391699426435, + "grad_norm": 0.08534634801181562, + "learning_rate": 6.007876834944504e-07, + "loss": 0.0002, + "step": 14689 + }, + { + "epoch": 0.9467036153895727, + "grad_norm": 0.008271676467209783, + "learning_rate": 6.000716075904046e-07, + "loss": 0.0, + "step": 14690 + }, + { + "epoch": 0.9467680608365019, + "grad_norm": 0.01073189738361365, + "learning_rate": 5.993555316863588e-07, + "loss": 0.0, + "step": 14691 + }, + { + "epoch": 0.9468325062834311, + "grad_norm": 0.004863051197559781, + "learning_rate": 5.986394557823129e-07, + "loss": 0.0, + "step": 14692 + }, + { + "epoch": 0.9468969517303603, + "grad_norm": 0.010054763393330567, + "learning_rate": 5.979233798782671e-07, + "loss": 0.0, + "step": 14693 + }, + { + "epoch": 0.9469613971772894, + "grad_norm": 0.002437897019058872, + "learning_rate": 5.972073039742213e-07, + "loss": 0.0, + "step": 14694 + }, + { + "epoch": 0.9470258426242186, + "grad_norm": 0.013852156443718828, + "learning_rate": 5.964912280701755e-07, + "loss": 0.0001, + "step": 14695 + }, + { + "epoch": 0.9470902880711478, + "grad_norm": 0.0003029941248519201, + "learning_rate": 5.957751521661297e-07, + "loss": 0.0, + "step": 14696 + }, + { + "epoch": 0.947154733518077, + "grad_norm": 0.002594353409038197, + "learning_rate": 5.950590762620839e-07, + "loss": 0.0, + "step": 14697 + }, + { + "epoch": 0.9472191789650061, + "grad_norm": 0.2996835992179366, + "learning_rate": 5.94343000358038e-07, + "loss": 0.0019, + "step": 14698 + }, + { + "epoch": 0.9472836244119353, + "grad_norm": 0.011269491880908408, + "learning_rate": 5.936269244539922e-07, + "loss": 0.0, + "step": 14699 + }, + { + "epoch": 0.9473480698588644, + "grad_norm": 0.0009420079780451361, + "learning_rate": 5.929108485499463e-07, + "loss": 0.0, + "step": 14700 + }, + { + "epoch": 0.9474125153057936, + "grad_norm": 0.0017608986153713367, + "learning_rate": 5.921947726459005e-07, + "loss": 0.0, + "step": 14701 + }, + { + "epoch": 0.9474769607527228, + "grad_norm": 0.0013094455840437949, + "learning_rate": 5.914786967418547e-07, + "loss": 0.0, + "step": 14702 + }, + { + "epoch": 0.947541406199652, + "grad_norm": 0.002251141911154188, + "learning_rate": 5.907626208378089e-07, + "loss": 0.0, + "step": 14703 + }, + { + "epoch": 0.9476058516465812, + "grad_norm": 0.011790262267177658, + "learning_rate": 5.90046544933763e-07, + "loss": 0.0, + "step": 14704 + }, + { + "epoch": 0.9476702970935104, + "grad_norm": 0.002285259579575094, + "learning_rate": 5.893304690297172e-07, + "loss": 0.0, + "step": 14705 + }, + { + "epoch": 0.9477347425404395, + "grad_norm": 0.0006253211658536153, + "learning_rate": 5.886143931256714e-07, + "loss": 0.0, + "step": 14706 + }, + { + "epoch": 0.9477991879873687, + "grad_norm": 0.005398306974137369, + "learning_rate": 5.878983172216255e-07, + "loss": 0.0, + "step": 14707 + }, + { + "epoch": 0.9478636334342979, + "grad_norm": 0.0002294010027379783, + "learning_rate": 5.871822413175797e-07, + "loss": 0.0, + "step": 14708 + }, + { + "epoch": 0.947928078881227, + "grad_norm": 0.00025866017801789347, + "learning_rate": 5.864661654135339e-07, + "loss": 0.0, + "step": 14709 + }, + { + "epoch": 0.9479925243281562, + "grad_norm": 0.0005619206255567925, + "learning_rate": 5.857500895094881e-07, + "loss": 0.0, + "step": 14710 + }, + { + "epoch": 0.9480569697750854, + "grad_norm": 0.19647921181904326, + "learning_rate": 5.850340136054423e-07, + "loss": 0.0021, + "step": 14711 + }, + { + "epoch": 0.9481214152220145, + "grad_norm": 0.08082751242960239, + "learning_rate": 5.843179377013964e-07, + "loss": 0.0001, + "step": 14712 + }, + { + "epoch": 0.9481858606689437, + "grad_norm": 0.0014319050676462684, + "learning_rate": 5.836018617973506e-07, + "loss": 0.0, + "step": 14713 + }, + { + "epoch": 0.9482503061158729, + "grad_norm": 0.0021555817053756725, + "learning_rate": 5.828857858933047e-07, + "loss": 0.0, + "step": 14714 + }, + { + "epoch": 0.9483147515628021, + "grad_norm": 0.0019083600342925365, + "learning_rate": 5.821697099892589e-07, + "loss": 0.0, + "step": 14715 + }, + { + "epoch": 0.9483791970097313, + "grad_norm": 0.41414290673102844, + "learning_rate": 5.814536340852131e-07, + "loss": 0.003, + "step": 14716 + }, + { + "epoch": 0.9484436424566605, + "grad_norm": 0.008211531723705947, + "learning_rate": 5.807375581811672e-07, + "loss": 0.0, + "step": 14717 + }, + { + "epoch": 0.9485080879035896, + "grad_norm": 0.00016811693723598264, + "learning_rate": 5.800214822771214e-07, + "loss": 0.0, + "step": 14718 + }, + { + "epoch": 0.9485725333505188, + "grad_norm": 0.0008967912893646619, + "learning_rate": 5.793054063730756e-07, + "loss": 0.0, + "step": 14719 + }, + { + "epoch": 0.9486369787974479, + "grad_norm": 0.006830654593558417, + "learning_rate": 5.785893304690298e-07, + "loss": 0.0, + "step": 14720 + }, + { + "epoch": 0.9487014242443771, + "grad_norm": 0.02299987371644241, + "learning_rate": 5.77873254564984e-07, + "loss": 0.0001, + "step": 14721 + }, + { + "epoch": 0.9487658696913063, + "grad_norm": 0.023379648106420153, + "learning_rate": 5.771571786609382e-07, + "loss": 0.0002, + "step": 14722 + }, + { + "epoch": 0.9488303151382355, + "grad_norm": 0.011996984555252728, + "learning_rate": 5.764411027568922e-07, + "loss": 0.0001, + "step": 14723 + }, + { + "epoch": 0.9488947605851646, + "grad_norm": 0.0010316172728567184, + "learning_rate": 5.757250268528464e-07, + "loss": 0.0, + "step": 14724 + }, + { + "epoch": 0.9489592060320938, + "grad_norm": 0.0006875478425085163, + "learning_rate": 5.750089509488006e-07, + "loss": 0.0, + "step": 14725 + }, + { + "epoch": 0.949023651479023, + "grad_norm": 0.0007850438860483339, + "learning_rate": 5.742928750447548e-07, + "loss": 0.0, + "step": 14726 + }, + { + "epoch": 0.9490880969259522, + "grad_norm": 0.1185001488226447, + "learning_rate": 5.735767991407089e-07, + "loss": 0.0001, + "step": 14727 + }, + { + "epoch": 0.9491525423728814, + "grad_norm": 0.00042052385974219097, + "learning_rate": 5.728607232366631e-07, + "loss": 0.0, + "step": 14728 + }, + { + "epoch": 0.9492169878198106, + "grad_norm": 0.023423001652901322, + "learning_rate": 5.721446473326173e-07, + "loss": 0.0002, + "step": 14729 + }, + { + "epoch": 0.9492814332667397, + "grad_norm": 0.00019013561235615476, + "learning_rate": 5.714285714285715e-07, + "loss": 0.0, + "step": 14730 + }, + { + "epoch": 0.9493458787136689, + "grad_norm": 0.008218830548370379, + "learning_rate": 5.707124955245257e-07, + "loss": 0.0, + "step": 14731 + }, + { + "epoch": 0.949410324160598, + "grad_norm": 0.006414075984919605, + "learning_rate": 5.699964196204799e-07, + "loss": 0.0, + "step": 14732 + }, + { + "epoch": 0.9494747696075272, + "grad_norm": 0.0007078383412863962, + "learning_rate": 5.69280343716434e-07, + "loss": 0.0, + "step": 14733 + }, + { + "epoch": 0.9495392150544564, + "grad_norm": 0.030856944468306394, + "learning_rate": 5.685642678123882e-07, + "loss": 0.0, + "step": 14734 + }, + { + "epoch": 0.9496036605013856, + "grad_norm": 0.0069689116245061295, + "learning_rate": 5.678481919083424e-07, + "loss": 0.0, + "step": 14735 + }, + { + "epoch": 0.9496681059483147, + "grad_norm": 0.005919522472079522, + "learning_rate": 5.671321160042964e-07, + "loss": 0.0, + "step": 14736 + }, + { + "epoch": 0.9497325513952439, + "grad_norm": 0.0028393379796273445, + "learning_rate": 5.664160401002506e-07, + "loss": 0.0, + "step": 14737 + }, + { + "epoch": 0.9497969968421731, + "grad_norm": 0.036288034329051345, + "learning_rate": 5.656999641962048e-07, + "loss": 0.0001, + "step": 14738 + }, + { + "epoch": 0.9498614422891023, + "grad_norm": 0.0012907005085677104, + "learning_rate": 5.64983888292159e-07, + "loss": 0.0, + "step": 14739 + }, + { + "epoch": 0.9499258877360315, + "grad_norm": 0.01761313683527187, + "learning_rate": 5.642678123881132e-07, + "loss": 0.0001, + "step": 14740 + }, + { + "epoch": 0.9499903331829607, + "grad_norm": 0.009967518907129018, + "learning_rate": 5.635517364840674e-07, + "loss": 0.0, + "step": 14741 + }, + { + "epoch": 0.9500547786298899, + "grad_norm": 0.0018596338611846243, + "learning_rate": 5.628356605800215e-07, + "loss": 0.0, + "step": 14742 + }, + { + "epoch": 0.9501192240768189, + "grad_norm": 0.004828228626011528, + "learning_rate": 5.621195846759757e-07, + "loss": 0.0, + "step": 14743 + }, + { + "epoch": 0.9501836695237481, + "grad_norm": 0.006116660387896934, + "learning_rate": 5.614035087719299e-07, + "loss": 0.0, + "step": 14744 + }, + { + "epoch": 0.9502481149706773, + "grad_norm": 0.00060880213351492, + "learning_rate": 5.606874328678841e-07, + "loss": 0.0, + "step": 14745 + }, + { + "epoch": 0.9503125604176065, + "grad_norm": 0.14036646230097022, + "learning_rate": 5.599713569638382e-07, + "loss": 0.0002, + "step": 14746 + }, + { + "epoch": 0.9503770058645357, + "grad_norm": 0.3058433494413237, + "learning_rate": 5.592552810597923e-07, + "loss": 0.0008, + "step": 14747 + }, + { + "epoch": 0.9504414513114648, + "grad_norm": 0.024473252072277532, + "learning_rate": 5.585392051557465e-07, + "loss": 0.0001, + "step": 14748 + }, + { + "epoch": 0.950505896758394, + "grad_norm": 0.0001964074795710888, + "learning_rate": 5.578231292517007e-07, + "loss": 0.0, + "step": 14749 + }, + { + "epoch": 0.9505703422053232, + "grad_norm": 0.0008308660348116496, + "learning_rate": 5.571070533476549e-07, + "loss": 0.0, + "step": 14750 + }, + { + "epoch": 0.9506347876522524, + "grad_norm": 0.0006085072018791508, + "learning_rate": 5.563909774436091e-07, + "loss": 0.0, + "step": 14751 + }, + { + "epoch": 0.9506992330991816, + "grad_norm": 0.0014048571143467556, + "learning_rate": 5.556749015395632e-07, + "loss": 0.0, + "step": 14752 + }, + { + "epoch": 0.9507636785461108, + "grad_norm": 0.000805794097092772, + "learning_rate": 5.549588256355174e-07, + "loss": 0.0, + "step": 14753 + }, + { + "epoch": 0.9508281239930398, + "grad_norm": 0.00016851595053399371, + "learning_rate": 5.542427497314716e-07, + "loss": 0.0, + "step": 14754 + }, + { + "epoch": 0.950892569439969, + "grad_norm": 0.30476000730045005, + "learning_rate": 5.535266738274257e-07, + "loss": 0.001, + "step": 14755 + }, + { + "epoch": 0.9509570148868982, + "grad_norm": 0.017287191174224937, + "learning_rate": 5.528105979233799e-07, + "loss": 0.0, + "step": 14756 + }, + { + "epoch": 0.9510214603338274, + "grad_norm": 0.0008960561301208798, + "learning_rate": 5.520945220193341e-07, + "loss": 0.0, + "step": 14757 + }, + { + "epoch": 0.9510859057807566, + "grad_norm": 0.022833389816133105, + "learning_rate": 5.513784461152883e-07, + "loss": 0.0001, + "step": 14758 + }, + { + "epoch": 0.9511503512276858, + "grad_norm": 0.0007505371049380001, + "learning_rate": 5.506623702112424e-07, + "loss": 0.0, + "step": 14759 + }, + { + "epoch": 0.951214796674615, + "grad_norm": 0.004165736964287361, + "learning_rate": 5.499462943071966e-07, + "loss": 0.0, + "step": 14760 + }, + { + "epoch": 0.9512792421215441, + "grad_norm": 0.01478945535523023, + "learning_rate": 5.492302184031507e-07, + "loss": 0.0, + "step": 14761 + }, + { + "epoch": 0.9513436875684733, + "grad_norm": 0.0018664110528120968, + "learning_rate": 5.485141424991049e-07, + "loss": 0.0, + "step": 14762 + }, + { + "epoch": 0.9514081330154025, + "grad_norm": 0.5059164891348179, + "learning_rate": 5.477980665950591e-07, + "loss": 0.0014, + "step": 14763 + }, + { + "epoch": 0.9514725784623317, + "grad_norm": 0.0008552185764550522, + "learning_rate": 5.470819906910133e-07, + "loss": 0.0, + "step": 14764 + }, + { + "epoch": 0.9515370239092608, + "grad_norm": 0.003477670911935671, + "learning_rate": 5.463659147869674e-07, + "loss": 0.0, + "step": 14765 + }, + { + "epoch": 0.9516014693561899, + "grad_norm": 0.5282372710846787, + "learning_rate": 5.456498388829216e-07, + "loss": 0.0007, + "step": 14766 + }, + { + "epoch": 0.9516659148031191, + "grad_norm": 0.0669501936225196, + "learning_rate": 5.449337629788758e-07, + "loss": 0.0001, + "step": 14767 + }, + { + "epoch": 0.9517303602500483, + "grad_norm": 0.001082596470456628, + "learning_rate": 5.4421768707483e-07, + "loss": 0.0, + "step": 14768 + }, + { + "epoch": 0.9517948056969775, + "grad_norm": 0.013068741403885711, + "learning_rate": 5.435016111707842e-07, + "loss": 0.0, + "step": 14769 + }, + { + "epoch": 0.9518592511439067, + "grad_norm": 0.0013497502223646179, + "learning_rate": 5.427855352667384e-07, + "loss": 0.0, + "step": 14770 + }, + { + "epoch": 0.9519236965908359, + "grad_norm": 0.06839671137127833, + "learning_rate": 5.420694593626924e-07, + "loss": 0.0001, + "step": 14771 + }, + { + "epoch": 0.951988142037765, + "grad_norm": 0.00470835857793575, + "learning_rate": 5.413533834586466e-07, + "loss": 0.0, + "step": 14772 + }, + { + "epoch": 0.9520525874846942, + "grad_norm": 0.008123316991707919, + "learning_rate": 5.406373075546008e-07, + "loss": 0.0, + "step": 14773 + }, + { + "epoch": 0.9521170329316234, + "grad_norm": 0.1347828798719565, + "learning_rate": 5.39921231650555e-07, + "loss": 0.0002, + "step": 14774 + }, + { + "epoch": 0.9521814783785526, + "grad_norm": 0.0013437443289455824, + "learning_rate": 5.392051557465092e-07, + "loss": 0.0, + "step": 14775 + }, + { + "epoch": 0.9522459238254817, + "grad_norm": 0.03295035955820533, + "learning_rate": 5.384890798424634e-07, + "loss": 0.0, + "step": 14776 + }, + { + "epoch": 0.9523103692724109, + "grad_norm": 0.002494679766104263, + "learning_rate": 5.377730039384175e-07, + "loss": 0.0, + "step": 14777 + }, + { + "epoch": 0.95237481471934, + "grad_norm": 0.0004908145695510208, + "learning_rate": 5.370569280343717e-07, + "loss": 0.0, + "step": 14778 + }, + { + "epoch": 0.9524392601662692, + "grad_norm": 0.07464586197099465, + "learning_rate": 5.363408521303259e-07, + "loss": 0.0001, + "step": 14779 + }, + { + "epoch": 0.9525037056131984, + "grad_norm": 9.668897209007286e-05, + "learning_rate": 5.356247762262801e-07, + "loss": 0.0, + "step": 14780 + }, + { + "epoch": 0.9525681510601276, + "grad_norm": 0.0045286485902072664, + "learning_rate": 5.349087003222342e-07, + "loss": 0.0, + "step": 14781 + }, + { + "epoch": 0.9526325965070568, + "grad_norm": 0.0027660902273609613, + "learning_rate": 5.341926244181884e-07, + "loss": 0.0, + "step": 14782 + }, + { + "epoch": 0.952697041953986, + "grad_norm": 0.005766246153932424, + "learning_rate": 5.334765485141425e-07, + "loss": 0.0, + "step": 14783 + }, + { + "epoch": 0.9527614874009152, + "grad_norm": 0.001195186415794037, + "learning_rate": 5.327604726100967e-07, + "loss": 0.0, + "step": 14784 + }, + { + "epoch": 0.9528259328478443, + "grad_norm": 0.0010986515422721234, + "learning_rate": 5.320443967060509e-07, + "loss": 0.0, + "step": 14785 + }, + { + "epoch": 0.9528903782947735, + "grad_norm": 0.02101568137284521, + "learning_rate": 5.313283208020051e-07, + "loss": 0.0, + "step": 14786 + }, + { + "epoch": 0.9529548237417026, + "grad_norm": 0.004531570145919255, + "learning_rate": 5.306122448979592e-07, + "loss": 0.0, + "step": 14787 + }, + { + "epoch": 0.9530192691886318, + "grad_norm": 0.00046395947667538586, + "learning_rate": 5.298961689939134e-07, + "loss": 0.0, + "step": 14788 + }, + { + "epoch": 0.953083714635561, + "grad_norm": 0.07514254155018177, + "learning_rate": 5.291800930898676e-07, + "loss": 0.0017, + "step": 14789 + }, + { + "epoch": 0.9531481600824901, + "grad_norm": 0.00022843025780618325, + "learning_rate": 5.284640171858217e-07, + "loss": 0.0, + "step": 14790 + }, + { + "epoch": 0.9532126055294193, + "grad_norm": 0.004491149951619666, + "learning_rate": 5.277479412817759e-07, + "loss": 0.0015, + "step": 14791 + }, + { + "epoch": 0.9532770509763485, + "grad_norm": 0.0005985131719744827, + "learning_rate": 5.270318653777301e-07, + "loss": 0.0, + "step": 14792 + }, + { + "epoch": 0.9533414964232777, + "grad_norm": 0.0005078170219091677, + "learning_rate": 5.263157894736843e-07, + "loss": 0.0, + "step": 14793 + }, + { + "epoch": 0.9534059418702069, + "grad_norm": 0.002439488648787511, + "learning_rate": 5.255997135696385e-07, + "loss": 0.0015, + "step": 14794 + }, + { + "epoch": 0.9534703873171361, + "grad_norm": 0.001817523298432707, + "learning_rate": 5.248836376655927e-07, + "loss": 0.0, + "step": 14795 + }, + { + "epoch": 0.9535348327640653, + "grad_norm": 0.004825207351911492, + "learning_rate": 5.241675617615467e-07, + "loss": 0.0, + "step": 14796 + }, + { + "epoch": 0.9535992782109944, + "grad_norm": 0.0004920440792501377, + "learning_rate": 5.234514858575009e-07, + "loss": 0.0, + "step": 14797 + }, + { + "epoch": 0.9536637236579236, + "grad_norm": 0.003268970344869564, + "learning_rate": 5.227354099534551e-07, + "loss": 0.0, + "step": 14798 + }, + { + "epoch": 0.9537281691048527, + "grad_norm": 6.151258179910864e-05, + "learning_rate": 5.220193340494093e-07, + "loss": 0.0, + "step": 14799 + }, + { + "epoch": 0.9537926145517819, + "grad_norm": 0.0668628749985501, + "learning_rate": 5.213032581453634e-07, + "loss": 0.0002, + "step": 14800 + }, + { + "epoch": 0.9538570599987111, + "grad_norm": 0.000984831570237332, + "learning_rate": 5.205871822413176e-07, + "loss": 0.0, + "step": 14801 + }, + { + "epoch": 0.9539215054456402, + "grad_norm": 0.060623120937525234, + "learning_rate": 5.198711063372718e-07, + "loss": 0.0001, + "step": 14802 + }, + { + "epoch": 0.9539859508925694, + "grad_norm": 0.0007038617180894285, + "learning_rate": 5.19155030433226e-07, + "loss": 0.0, + "step": 14803 + }, + { + "epoch": 0.9540503963394986, + "grad_norm": 0.0028088622921580444, + "learning_rate": 5.184389545291802e-07, + "loss": 0.0, + "step": 14804 + }, + { + "epoch": 0.9541148417864278, + "grad_norm": 0.010322705780409896, + "learning_rate": 5.177228786251344e-07, + "loss": 0.0, + "step": 14805 + }, + { + "epoch": 0.954179287233357, + "grad_norm": 0.004806938039548968, + "learning_rate": 5.170068027210885e-07, + "loss": 0.0, + "step": 14806 + }, + { + "epoch": 0.9542437326802862, + "grad_norm": 0.00021441404646725413, + "learning_rate": 5.162907268170426e-07, + "loss": 0.0, + "step": 14807 + }, + { + "epoch": 0.9543081781272154, + "grad_norm": 0.04337507862455308, + "learning_rate": 5.155746509129968e-07, + "loss": 0.0001, + "step": 14808 + }, + { + "epoch": 0.9543726235741445, + "grad_norm": 0.16570335639962636, + "learning_rate": 5.148585750089509e-07, + "loss": 0.0002, + "step": 14809 + }, + { + "epoch": 0.9544370690210736, + "grad_norm": 0.006217551071894887, + "learning_rate": 5.141424991049051e-07, + "loss": 0.0, + "step": 14810 + }, + { + "epoch": 0.9545015144680028, + "grad_norm": 0.0012870383012455599, + "learning_rate": 5.134264232008593e-07, + "loss": 0.0, + "step": 14811 + }, + { + "epoch": 0.954565959914932, + "grad_norm": 0.016881525437275658, + "learning_rate": 5.127103472968135e-07, + "loss": 0.0, + "step": 14812 + }, + { + "epoch": 0.9546304053618612, + "grad_norm": 0.001555288436966095, + "learning_rate": 5.119942713927677e-07, + "loss": 0.0, + "step": 14813 + }, + { + "epoch": 0.9546948508087904, + "grad_norm": 0.008399473534627302, + "learning_rate": 5.112781954887219e-07, + "loss": 0.0, + "step": 14814 + }, + { + "epoch": 0.9547592962557195, + "grad_norm": 0.0026212772132492594, + "learning_rate": 5.10562119584676e-07, + "loss": 0.0, + "step": 14815 + }, + { + "epoch": 0.9548237417026487, + "grad_norm": 0.005733009960411951, + "learning_rate": 5.098460436806302e-07, + "loss": 0.0, + "step": 14816 + }, + { + "epoch": 0.9548881871495779, + "grad_norm": 0.030370706949806717, + "learning_rate": 5.091299677765844e-07, + "loss": 0.0001, + "step": 14817 + }, + { + "epoch": 0.9549526325965071, + "grad_norm": 0.0005157755746085372, + "learning_rate": 5.084138918725386e-07, + "loss": 0.0, + "step": 14818 + }, + { + "epoch": 0.9550170780434363, + "grad_norm": 2.917667148725611e-05, + "learning_rate": 5.076978159684926e-07, + "loss": 0.0, + "step": 14819 + }, + { + "epoch": 0.9550815234903655, + "grad_norm": 0.09262644516760862, + "learning_rate": 5.069817400644468e-07, + "loss": 0.0008, + "step": 14820 + }, + { + "epoch": 0.9551459689372945, + "grad_norm": 0.003610028407824278, + "learning_rate": 5.06265664160401e-07, + "loss": 0.0, + "step": 14821 + }, + { + "epoch": 0.9552104143842237, + "grad_norm": 0.0028029187425091767, + "learning_rate": 5.055495882563552e-07, + "loss": 0.0, + "step": 14822 + }, + { + "epoch": 0.9552748598311529, + "grad_norm": 0.00040773365753766214, + "learning_rate": 5.048335123523094e-07, + "loss": 0.0, + "step": 14823 + }, + { + "epoch": 0.9553393052780821, + "grad_norm": 0.00031257184924321485, + "learning_rate": 5.041174364482636e-07, + "loss": 0.0, + "step": 14824 + }, + { + "epoch": 0.9554037507250113, + "grad_norm": 0.00040114875345770496, + "learning_rate": 5.034013605442177e-07, + "loss": 0.0, + "step": 14825 + }, + { + "epoch": 0.9554681961719405, + "grad_norm": 0.008520543711840797, + "learning_rate": 5.026852846401719e-07, + "loss": 0.0, + "step": 14826 + }, + { + "epoch": 0.9555326416188696, + "grad_norm": 0.00017712328294400463, + "learning_rate": 5.019692087361261e-07, + "loss": 0.0, + "step": 14827 + }, + { + "epoch": 0.9555970870657988, + "grad_norm": 0.00448170861329047, + "learning_rate": 5.012531328320802e-07, + "loss": 0.0, + "step": 14828 + }, + { + "epoch": 0.955661532512728, + "grad_norm": 0.002184216611653189, + "learning_rate": 5.005370569280344e-07, + "loss": 0.0, + "step": 14829 + }, + { + "epoch": 0.9557259779596572, + "grad_norm": 0.2183928895592596, + "learning_rate": 4.998209810239886e-07, + "loss": 0.001, + "step": 14830 + }, + { + "epoch": 0.9557904234065864, + "grad_norm": 0.012881565613289223, + "learning_rate": 4.991049051199427e-07, + "loss": 0.0001, + "step": 14831 + }, + { + "epoch": 0.9558548688535154, + "grad_norm": 0.09858547151000312, + "learning_rate": 4.983888292158969e-07, + "loss": 0.0007, + "step": 14832 + }, + { + "epoch": 0.9559193143004446, + "grad_norm": 0.001604564122585394, + "learning_rate": 4.976727533118511e-07, + "loss": 0.0, + "step": 14833 + }, + { + "epoch": 0.9559837597473738, + "grad_norm": 0.002502730599271993, + "learning_rate": 4.969566774078053e-07, + "loss": 0.0, + "step": 14834 + }, + { + "epoch": 0.956048205194303, + "grad_norm": 0.039794989158713906, + "learning_rate": 4.962406015037594e-07, + "loss": 0.0, + "step": 14835 + }, + { + "epoch": 0.9561126506412322, + "grad_norm": 0.05801081561332805, + "learning_rate": 4.955245255997136e-07, + "loss": 0.0004, + "step": 14836 + }, + { + "epoch": 0.9561770960881614, + "grad_norm": 0.5109591488232742, + "learning_rate": 4.948084496956678e-07, + "loss": 0.0038, + "step": 14837 + }, + { + "epoch": 0.9562415415350906, + "grad_norm": 0.0008859893281252492, + "learning_rate": 4.940923737916219e-07, + "loss": 0.0, + "step": 14838 + }, + { + "epoch": 0.9563059869820197, + "grad_norm": 0.0004626148219854151, + "learning_rate": 4.933762978875761e-07, + "loss": 0.0, + "step": 14839 + }, + { + "epoch": 0.9563704324289489, + "grad_norm": 0.13976812235699776, + "learning_rate": 4.926602219835303e-07, + "loss": 0.0006, + "step": 14840 + }, + { + "epoch": 0.9564348778758781, + "grad_norm": 0.0003939018396175427, + "learning_rate": 4.919441460794845e-07, + "loss": 0.0, + "step": 14841 + }, + { + "epoch": 0.9564993233228073, + "grad_norm": 0.004048120224935769, + "learning_rate": 4.912280701754387e-07, + "loss": 0.0, + "step": 14842 + }, + { + "epoch": 0.9565637687697364, + "grad_norm": 0.2624320373657076, + "learning_rate": 4.905119942713928e-07, + "loss": 0.0012, + "step": 14843 + }, + { + "epoch": 0.9566282142166656, + "grad_norm": 0.014094358110339713, + "learning_rate": 4.897959183673469e-07, + "loss": 0.0, + "step": 14844 + }, + { + "epoch": 0.9566926596635947, + "grad_norm": 0.0006931803598242944, + "learning_rate": 4.890798424633011e-07, + "loss": 0.0, + "step": 14845 + }, + { + "epoch": 0.9567571051105239, + "grad_norm": 0.026155114545255122, + "learning_rate": 4.883637665592553e-07, + "loss": 0.0001, + "step": 14846 + }, + { + "epoch": 0.9568215505574531, + "grad_norm": 0.0004005979354409368, + "learning_rate": 4.876476906552095e-07, + "loss": 0.0, + "step": 14847 + }, + { + "epoch": 0.9568859960043823, + "grad_norm": 0.05163703402803826, + "learning_rate": 4.869316147511637e-07, + "loss": 0.0002, + "step": 14848 + }, + { + "epoch": 0.9569504414513115, + "grad_norm": 0.00026671949687704445, + "learning_rate": 4.862155388471179e-07, + "loss": 0.0, + "step": 14849 + }, + { + "epoch": 0.9570148868982407, + "grad_norm": 0.17085980114842259, + "learning_rate": 4.85499462943072e-07, + "loss": 0.0001, + "step": 14850 + }, + { + "epoch": 0.9570793323451698, + "grad_norm": 0.023131071071852638, + "learning_rate": 4.847833870390262e-07, + "loss": 0.0, + "step": 14851 + }, + { + "epoch": 0.957143777792099, + "grad_norm": 0.0025030747163234074, + "learning_rate": 4.840673111349804e-07, + "loss": 0.0, + "step": 14852 + }, + { + "epoch": 0.9572082232390282, + "grad_norm": 0.0014296599078724075, + "learning_rate": 4.833512352309346e-07, + "loss": 0.0, + "step": 14853 + }, + { + "epoch": 0.9572726686859573, + "grad_norm": 0.00019132692676034923, + "learning_rate": 4.826351593268887e-07, + "loss": 0.0, + "step": 14854 + }, + { + "epoch": 0.9573371141328865, + "grad_norm": 0.000852706506334407, + "learning_rate": 4.819190834228428e-07, + "loss": 0.0, + "step": 14855 + }, + { + "epoch": 0.9574015595798157, + "grad_norm": 0.002841819849834939, + "learning_rate": 4.81203007518797e-07, + "loss": 0.0, + "step": 14856 + }, + { + "epoch": 0.9574660050267448, + "grad_norm": 0.2582747621177174, + "learning_rate": 4.804869316147512e-07, + "loss": 0.0062, + "step": 14857 + }, + { + "epoch": 0.957530450473674, + "grad_norm": 0.0001664016880520021, + "learning_rate": 4.797708557107054e-07, + "loss": 0.0, + "step": 14858 + }, + { + "epoch": 0.9575948959206032, + "grad_norm": 0.03976250129414592, + "learning_rate": 4.790547798066596e-07, + "loss": 0.0, + "step": 14859 + }, + { + "epoch": 0.9576593413675324, + "grad_norm": 0.007408347848849625, + "learning_rate": 4.783387039026137e-07, + "loss": 0.0, + "step": 14860 + }, + { + "epoch": 0.9577237868144616, + "grad_norm": 0.0006574807862907933, + "learning_rate": 4.776226279985679e-07, + "loss": 0.0, + "step": 14861 + }, + { + "epoch": 0.9577882322613908, + "grad_norm": 0.0028955546851931594, + "learning_rate": 4.769065520945221e-07, + "loss": 0.0, + "step": 14862 + }, + { + "epoch": 0.9578526777083199, + "grad_norm": 0.007108678630948101, + "learning_rate": 4.7619047619047623e-07, + "loss": 0.0001, + "step": 14863 + }, + { + "epoch": 0.9579171231552491, + "grad_norm": 0.27302020792699677, + "learning_rate": 4.754744002864304e-07, + "loss": 0.002, + "step": 14864 + }, + { + "epoch": 0.9579815686021782, + "grad_norm": 0.00322635562029426, + "learning_rate": 4.747583243823846e-07, + "loss": 0.0, + "step": 14865 + }, + { + "epoch": 0.9580460140491074, + "grad_norm": 0.001582561611280091, + "learning_rate": 4.740422484783387e-07, + "loss": 0.0, + "step": 14866 + }, + { + "epoch": 0.9581104594960366, + "grad_norm": 0.03473567228204857, + "learning_rate": 4.733261725742929e-07, + "loss": 0.0001, + "step": 14867 + }, + { + "epoch": 0.9581749049429658, + "grad_norm": 0.004032275081067416, + "learning_rate": 4.726100966702471e-07, + "loss": 0.0, + "step": 14868 + }, + { + "epoch": 0.9582393503898949, + "grad_norm": 0.005672604264508217, + "learning_rate": 4.718940207662012e-07, + "loss": 0.0, + "step": 14869 + }, + { + "epoch": 0.9583037958368241, + "grad_norm": 0.0024886796876244506, + "learning_rate": 4.711779448621554e-07, + "loss": 0.0, + "step": 14870 + }, + { + "epoch": 0.9583682412837533, + "grad_norm": 0.38062694010373216, + "learning_rate": 4.704618689581096e-07, + "loss": 0.0015, + "step": 14871 + }, + { + "epoch": 0.9584326867306825, + "grad_norm": 0.0013407293218750376, + "learning_rate": 4.697457930540638e-07, + "loss": 0.0, + "step": 14872 + }, + { + "epoch": 0.9584971321776117, + "grad_norm": 0.12025046514185207, + "learning_rate": 4.6902971715001794e-07, + "loss": 0.0003, + "step": 14873 + }, + { + "epoch": 0.9585615776245409, + "grad_norm": 0.013953009693871022, + "learning_rate": 4.6831364124597214e-07, + "loss": 0.0, + "step": 14874 + }, + { + "epoch": 0.95862602307147, + "grad_norm": 0.000177093544672877, + "learning_rate": 4.6759756534192633e-07, + "loss": 0.0, + "step": 14875 + }, + { + "epoch": 0.9586904685183992, + "grad_norm": 0.011330444616391867, + "learning_rate": 4.668814894378804e-07, + "loss": 0.0, + "step": 14876 + }, + { + "epoch": 0.9587549139653283, + "grad_norm": 0.0004049873983944743, + "learning_rate": 4.661654135338346e-07, + "loss": 0.0, + "step": 14877 + }, + { + "epoch": 0.9588193594122575, + "grad_norm": 0.0850805202870303, + "learning_rate": 4.654493376297888e-07, + "loss": 0.0016, + "step": 14878 + }, + { + "epoch": 0.9588838048591867, + "grad_norm": 0.0004410744841034204, + "learning_rate": 4.6473326172574294e-07, + "loss": 0.0, + "step": 14879 + }, + { + "epoch": 0.9589482503061159, + "grad_norm": 0.011639213360409064, + "learning_rate": 4.6401718582169713e-07, + "loss": 0.0, + "step": 14880 + }, + { + "epoch": 0.959012695753045, + "grad_norm": 0.008735612177839278, + "learning_rate": 4.6330110991765133e-07, + "loss": 0.0, + "step": 14881 + }, + { + "epoch": 0.9590771411999742, + "grad_norm": 0.020911397296875626, + "learning_rate": 4.6258503401360547e-07, + "loss": 0.0, + "step": 14882 + }, + { + "epoch": 0.9591415866469034, + "grad_norm": 0.001564329687710305, + "learning_rate": 4.6186895810955966e-07, + "loss": 0.0, + "step": 14883 + }, + { + "epoch": 0.9592060320938326, + "grad_norm": 0.0016710818587629625, + "learning_rate": 4.6115288220551385e-07, + "loss": 0.0, + "step": 14884 + }, + { + "epoch": 0.9592704775407618, + "grad_norm": 0.0039500020524210425, + "learning_rate": 4.6043680630146794e-07, + "loss": 0.0, + "step": 14885 + }, + { + "epoch": 0.959334922987691, + "grad_norm": 0.00036652025819384343, + "learning_rate": 4.5972073039742213e-07, + "loss": 0.0, + "step": 14886 + }, + { + "epoch": 0.9593993684346201, + "grad_norm": 0.06222436145498517, + "learning_rate": 4.590046544933763e-07, + "loss": 0.0001, + "step": 14887 + }, + { + "epoch": 0.9594638138815492, + "grad_norm": 0.0022996133312288224, + "learning_rate": 4.582885785893305e-07, + "loss": 0.0, + "step": 14888 + }, + { + "epoch": 0.9595282593284784, + "grad_norm": 0.018717318945592407, + "learning_rate": 4.5757250268528466e-07, + "loss": 0.0001, + "step": 14889 + }, + { + "epoch": 0.9595927047754076, + "grad_norm": 0.0005461212142547301, + "learning_rate": 4.5685642678123885e-07, + "loss": 0.0, + "step": 14890 + }, + { + "epoch": 0.9596571502223368, + "grad_norm": 0.0004114048163026691, + "learning_rate": 4.5614035087719304e-07, + "loss": 0.0, + "step": 14891 + }, + { + "epoch": 0.959721595669266, + "grad_norm": 0.0038923848919269173, + "learning_rate": 4.554242749731472e-07, + "loss": 0.0, + "step": 14892 + }, + { + "epoch": 0.9597860411161951, + "grad_norm": 0.005338129096024907, + "learning_rate": 4.547081990691014e-07, + "loss": 0.0, + "step": 14893 + }, + { + "epoch": 0.9598504865631243, + "grad_norm": 0.04265827342489956, + "learning_rate": 4.5399212316505557e-07, + "loss": 0.0, + "step": 14894 + }, + { + "epoch": 0.9599149320100535, + "grad_norm": 0.00013671547659063613, + "learning_rate": 4.532760472610097e-07, + "loss": 0.0, + "step": 14895 + }, + { + "epoch": 0.9599793774569827, + "grad_norm": 0.00960232425371068, + "learning_rate": 4.525599713569639e-07, + "loss": 0.0, + "step": 14896 + }, + { + "epoch": 0.9600438229039119, + "grad_norm": 0.00965865226960607, + "learning_rate": 4.518438954529181e-07, + "loss": 0.0, + "step": 14897 + }, + { + "epoch": 0.9601082683508411, + "grad_norm": 9.192379356709644e-05, + "learning_rate": 4.511278195488722e-07, + "loss": 0.0, + "step": 14898 + }, + { + "epoch": 0.9601727137977701, + "grad_norm": 0.00015305449739958055, + "learning_rate": 4.504117436448264e-07, + "loss": 0.0, + "step": 14899 + }, + { + "epoch": 0.9602371592446993, + "grad_norm": 0.14304743566147973, + "learning_rate": 4.4969566774078057e-07, + "loss": 0.0003, + "step": 14900 + }, + { + "epoch": 0.9603016046916285, + "grad_norm": 0.009295935886398593, + "learning_rate": 4.489795918367347e-07, + "loss": 0.0, + "step": 14901 + }, + { + "epoch": 0.9603660501385577, + "grad_norm": 0.0002162139983364177, + "learning_rate": 4.482635159326889e-07, + "loss": 0.0, + "step": 14902 + }, + { + "epoch": 0.9604304955854869, + "grad_norm": 0.03242333980491538, + "learning_rate": 4.475474400286431e-07, + "loss": 0.0001, + "step": 14903 + }, + { + "epoch": 0.9604949410324161, + "grad_norm": 0.002406562227087017, + "learning_rate": 4.4683136412459723e-07, + "loss": 0.0, + "step": 14904 + }, + { + "epoch": 0.9605593864793452, + "grad_norm": 0.0056715677897925445, + "learning_rate": 4.461152882205514e-07, + "loss": 0.0, + "step": 14905 + }, + { + "epoch": 0.9606238319262744, + "grad_norm": 0.00045505831911909236, + "learning_rate": 4.453992123165056e-07, + "loss": 0.0, + "step": 14906 + }, + { + "epoch": 0.9606882773732036, + "grad_norm": 0.0007268449933700111, + "learning_rate": 4.446831364124598e-07, + "loss": 0.0, + "step": 14907 + }, + { + "epoch": 0.9607527228201328, + "grad_norm": 0.00022912322387547547, + "learning_rate": 4.439670605084139e-07, + "loss": 0.0, + "step": 14908 + }, + { + "epoch": 0.960817168267062, + "grad_norm": 0.0022316612740019702, + "learning_rate": 4.432509846043681e-07, + "loss": 0.0, + "step": 14909 + }, + { + "epoch": 0.9608816137139911, + "grad_norm": 0.004242630686752434, + "learning_rate": 4.425349087003223e-07, + "loss": 0.0, + "step": 14910 + }, + { + "epoch": 0.9609460591609202, + "grad_norm": 0.04213162492549318, + "learning_rate": 4.418188327962764e-07, + "loss": 0.0001, + "step": 14911 + }, + { + "epoch": 0.9610105046078494, + "grad_norm": 0.0008370413518882776, + "learning_rate": 4.411027568922306e-07, + "loss": 0.0, + "step": 14912 + }, + { + "epoch": 0.9610749500547786, + "grad_norm": 0.0009282456515276538, + "learning_rate": 4.403866809881848e-07, + "loss": 0.0, + "step": 14913 + }, + { + "epoch": 0.9611393955017078, + "grad_norm": 0.0004216024907836894, + "learning_rate": 4.3967060508413895e-07, + "loss": 0.0, + "step": 14914 + }, + { + "epoch": 0.961203840948637, + "grad_norm": 0.004501349239021442, + "learning_rate": 4.3895452918009314e-07, + "loss": 0.0, + "step": 14915 + }, + { + "epoch": 0.9612682863955662, + "grad_norm": 0.007865435385491804, + "learning_rate": 4.3823845327604733e-07, + "loss": 0.0001, + "step": 14916 + }, + { + "epoch": 0.9613327318424953, + "grad_norm": 0.0001713778914629011, + "learning_rate": 4.375223773720014e-07, + "loss": 0.0, + "step": 14917 + }, + { + "epoch": 0.9613971772894245, + "grad_norm": 0.1708792157943735, + "learning_rate": 4.368063014679556e-07, + "loss": 0.0007, + "step": 14918 + }, + { + "epoch": 0.9614616227363537, + "grad_norm": 0.000479988725056364, + "learning_rate": 4.360902255639098e-07, + "loss": 0.0, + "step": 14919 + }, + { + "epoch": 0.9615260681832829, + "grad_norm": 0.006377414893212848, + "learning_rate": 4.3537414965986395e-07, + "loss": 0.0, + "step": 14920 + }, + { + "epoch": 0.961590513630212, + "grad_norm": 0.0036946523398398512, + "learning_rate": 4.3465807375581814e-07, + "loss": 0.0, + "step": 14921 + }, + { + "epoch": 0.9616549590771412, + "grad_norm": 0.014419668187101007, + "learning_rate": 4.3394199785177233e-07, + "loss": 0.0, + "step": 14922 + }, + { + "epoch": 0.9617194045240703, + "grad_norm": 0.011962387624208598, + "learning_rate": 4.3322592194772647e-07, + "loss": 0.0001, + "step": 14923 + }, + { + "epoch": 0.9617838499709995, + "grad_norm": 0.012241861601313967, + "learning_rate": 4.3250984604368067e-07, + "loss": 0.0, + "step": 14924 + }, + { + "epoch": 0.9618482954179287, + "grad_norm": 0.0005817396474641864, + "learning_rate": 4.3179377013963486e-07, + "loss": 0.0, + "step": 14925 + }, + { + "epoch": 0.9619127408648579, + "grad_norm": 0.0004082905114981401, + "learning_rate": 4.3107769423558905e-07, + "loss": 0.0, + "step": 14926 + }, + { + "epoch": 0.9619771863117871, + "grad_norm": 0.0023784330583258473, + "learning_rate": 4.3036161833154314e-07, + "loss": 0.0, + "step": 14927 + }, + { + "epoch": 0.9620416317587163, + "grad_norm": 0.002489868312757046, + "learning_rate": 4.2964554242749733e-07, + "loss": 0.0, + "step": 14928 + }, + { + "epoch": 0.9621060772056454, + "grad_norm": 0.003762379342939597, + "learning_rate": 4.289294665234515e-07, + "loss": 0.0, + "step": 14929 + }, + { + "epoch": 0.9621705226525746, + "grad_norm": 0.0017895349142890476, + "learning_rate": 4.2821339061940566e-07, + "loss": 0.0, + "step": 14930 + }, + { + "epoch": 0.9622349680995038, + "grad_norm": 0.0004923169026729873, + "learning_rate": 4.2749731471535986e-07, + "loss": 0.0, + "step": 14931 + }, + { + "epoch": 0.9622994135464329, + "grad_norm": 0.00017590920193244695, + "learning_rate": 4.2678123881131405e-07, + "loss": 0.0, + "step": 14932 + }, + { + "epoch": 0.9623638589933621, + "grad_norm": 0.00048500241951370985, + "learning_rate": 4.260651629072682e-07, + "loss": 0.0, + "step": 14933 + }, + { + "epoch": 0.9624283044402913, + "grad_norm": 0.0010236614349840823, + "learning_rate": 4.253490870032224e-07, + "loss": 0.0, + "step": 14934 + }, + { + "epoch": 0.9624927498872204, + "grad_norm": 0.0016053604449928272, + "learning_rate": 4.246330110991766e-07, + "loss": 0.0, + "step": 14935 + }, + { + "epoch": 0.9625571953341496, + "grad_norm": 0.0012853831294313406, + "learning_rate": 4.239169351951307e-07, + "loss": 0.0, + "step": 14936 + }, + { + "epoch": 0.9626216407810788, + "grad_norm": 0.0011035914182909066, + "learning_rate": 4.232008592910849e-07, + "loss": 0.0, + "step": 14937 + }, + { + "epoch": 0.962686086228008, + "grad_norm": 0.07527333398147694, + "learning_rate": 4.224847833870391e-07, + "loss": 0.0008, + "step": 14938 + }, + { + "epoch": 0.9627505316749372, + "grad_norm": 0.03563464857298093, + "learning_rate": 4.217687074829932e-07, + "loss": 0.0001, + "step": 14939 + }, + { + "epoch": 0.9628149771218664, + "grad_norm": 0.02769180787150801, + "learning_rate": 4.210526315789474e-07, + "loss": 0.0, + "step": 14940 + }, + { + "epoch": 0.9628794225687956, + "grad_norm": 0.0017228653770633396, + "learning_rate": 4.2033655567490157e-07, + "loss": 0.0, + "step": 14941 + }, + { + "epoch": 0.9629438680157247, + "grad_norm": 0.013080418369412865, + "learning_rate": 4.1962047977085577e-07, + "loss": 0.0001, + "step": 14942 + }, + { + "epoch": 0.9630083134626538, + "grad_norm": 0.33033698275444734, + "learning_rate": 4.189044038668099e-07, + "loss": 0.0008, + "step": 14943 + }, + { + "epoch": 0.963072758909583, + "grad_norm": 0.004267734442137806, + "learning_rate": 4.181883279627641e-07, + "loss": 0.0, + "step": 14944 + }, + { + "epoch": 0.9631372043565122, + "grad_norm": 0.01560012273070935, + "learning_rate": 4.174722520587183e-07, + "loss": 0.0, + "step": 14945 + }, + { + "epoch": 0.9632016498034414, + "grad_norm": 0.0043316884787335036, + "learning_rate": 4.1675617615467243e-07, + "loss": 0.0, + "step": 14946 + }, + { + "epoch": 0.9632660952503705, + "grad_norm": 0.00013166375289761843, + "learning_rate": 4.160401002506266e-07, + "loss": 0.0, + "step": 14947 + }, + { + "epoch": 0.9633305406972997, + "grad_norm": 0.0010617943515079224, + "learning_rate": 4.153240243465808e-07, + "loss": 0.0, + "step": 14948 + }, + { + "epoch": 0.9633949861442289, + "grad_norm": 0.06773688748923617, + "learning_rate": 4.146079484425349e-07, + "loss": 0.0001, + "step": 14949 + }, + { + "epoch": 0.9634594315911581, + "grad_norm": 0.03567411390503899, + "learning_rate": 4.138918725384891e-07, + "loss": 0.0004, + "step": 14950 + }, + { + "epoch": 0.9635238770380873, + "grad_norm": 6.775566679261235e-05, + "learning_rate": 4.131757966344433e-07, + "loss": 0.0, + "step": 14951 + }, + { + "epoch": 0.9635883224850165, + "grad_norm": 0.04405645647140258, + "learning_rate": 4.1245972073039743e-07, + "loss": 0.0001, + "step": 14952 + }, + { + "epoch": 0.9636527679319457, + "grad_norm": 0.00023435635260378457, + "learning_rate": 4.117436448263516e-07, + "loss": 0.0, + "step": 14953 + }, + { + "epoch": 0.9637172133788748, + "grad_norm": 0.1547491531137055, + "learning_rate": 4.110275689223058e-07, + "loss": 0.0002, + "step": 14954 + }, + { + "epoch": 0.9637816588258039, + "grad_norm": 0.561563241625908, + "learning_rate": 4.1031149301825995e-07, + "loss": 0.0035, + "step": 14955 + }, + { + "epoch": 0.9638461042727331, + "grad_norm": 0.005156861571168343, + "learning_rate": 4.0959541711421415e-07, + "loss": 0.0001, + "step": 14956 + }, + { + "epoch": 0.9639105497196623, + "grad_norm": 0.19693480029990224, + "learning_rate": 4.0887934121016834e-07, + "loss": 0.0006, + "step": 14957 + }, + { + "epoch": 0.9639749951665915, + "grad_norm": 0.0005532097400886752, + "learning_rate": 4.0816326530612243e-07, + "loss": 0.0, + "step": 14958 + }, + { + "epoch": 0.9640394406135206, + "grad_norm": 0.00238901745451577, + "learning_rate": 4.074471894020766e-07, + "loss": 0.0, + "step": 14959 + }, + { + "epoch": 0.9641038860604498, + "grad_norm": 0.0026651981924149973, + "learning_rate": 4.067311134980308e-07, + "loss": 0.0, + "step": 14960 + }, + { + "epoch": 0.964168331507379, + "grad_norm": 0.003920529480750357, + "learning_rate": 4.06015037593985e-07, + "loss": 0.0, + "step": 14961 + }, + { + "epoch": 0.9642327769543082, + "grad_norm": 0.003843617604241777, + "learning_rate": 4.0529896168993915e-07, + "loss": 0.0, + "step": 14962 + }, + { + "epoch": 0.9642972224012374, + "grad_norm": 0.0025857098032880323, + "learning_rate": 4.0458288578589334e-07, + "loss": 0.0, + "step": 14963 + }, + { + "epoch": 0.9643616678481666, + "grad_norm": 0.018335899444566185, + "learning_rate": 4.0386680988184753e-07, + "loss": 0.0, + "step": 14964 + }, + { + "epoch": 0.9644261132950958, + "grad_norm": 0.0009981560033705578, + "learning_rate": 4.0315073397780167e-07, + "loss": 0.0, + "step": 14965 + }, + { + "epoch": 0.9644905587420248, + "grad_norm": 0.0009387569199021039, + "learning_rate": 4.0243465807375586e-07, + "loss": 0.0, + "step": 14966 + }, + { + "epoch": 0.964555004188954, + "grad_norm": 0.019251289181688305, + "learning_rate": 4.0171858216971006e-07, + "loss": 0.0001, + "step": 14967 + }, + { + "epoch": 0.9646194496358832, + "grad_norm": 0.00010738621659829706, + "learning_rate": 4.010025062656642e-07, + "loss": 0.0, + "step": 14968 + }, + { + "epoch": 0.9646838950828124, + "grad_norm": 0.03203928420734971, + "learning_rate": 4.002864303616184e-07, + "loss": 0.0001, + "step": 14969 + }, + { + "epoch": 0.9647483405297416, + "grad_norm": 0.0003362348993916054, + "learning_rate": 3.995703544575726e-07, + "loss": 0.0, + "step": 14970 + }, + { + "epoch": 0.9648127859766708, + "grad_norm": 0.000265084003407136, + "learning_rate": 3.9885427855352667e-07, + "loss": 0.0, + "step": 14971 + }, + { + "epoch": 0.9648772314235999, + "grad_norm": 0.004495177856111423, + "learning_rate": 3.9813820264948086e-07, + "loss": 0.0, + "step": 14972 + }, + { + "epoch": 0.9649416768705291, + "grad_norm": 0.04050054439674393, + "learning_rate": 3.9742212674543505e-07, + "loss": 0.0001, + "step": 14973 + }, + { + "epoch": 0.9650061223174583, + "grad_norm": 6.170983398313063e-05, + "learning_rate": 3.967060508413892e-07, + "loss": 0.0, + "step": 14974 + }, + { + "epoch": 0.9650705677643875, + "grad_norm": 0.00017663978023309493, + "learning_rate": 3.959899749373434e-07, + "loss": 0.0, + "step": 14975 + }, + { + "epoch": 0.9651350132113167, + "grad_norm": 5.699082884079711e-05, + "learning_rate": 3.952738990332976e-07, + "loss": 0.0, + "step": 14976 + }, + { + "epoch": 0.9651994586582457, + "grad_norm": 0.000602340558573276, + "learning_rate": 3.945578231292517e-07, + "loss": 0.0, + "step": 14977 + }, + { + "epoch": 0.9652639041051749, + "grad_norm": 0.0006447693375527919, + "learning_rate": 3.938417472252059e-07, + "loss": 0.0, + "step": 14978 + }, + { + "epoch": 0.9653283495521041, + "grad_norm": 0.005086575180647127, + "learning_rate": 3.931256713211601e-07, + "loss": 0.0, + "step": 14979 + }, + { + "epoch": 0.9653927949990333, + "grad_norm": 0.18404470009542506, + "learning_rate": 3.924095954171143e-07, + "loss": 0.0021, + "step": 14980 + }, + { + "epoch": 0.9654572404459625, + "grad_norm": 0.0012513134685006979, + "learning_rate": 3.916935195130684e-07, + "loss": 0.0, + "step": 14981 + }, + { + "epoch": 0.9655216858928917, + "grad_norm": 0.0026647520580636358, + "learning_rate": 3.909774436090226e-07, + "loss": 0.0, + "step": 14982 + }, + { + "epoch": 0.9655861313398209, + "grad_norm": 0.00042825462339338795, + "learning_rate": 3.9026136770497677e-07, + "loss": 0.0, + "step": 14983 + }, + { + "epoch": 0.96565057678675, + "grad_norm": 0.07452899937460482, + "learning_rate": 3.895452918009309e-07, + "loss": 0.0001, + "step": 14984 + }, + { + "epoch": 0.9657150222336792, + "grad_norm": 0.0003942303834534447, + "learning_rate": 3.888292158968851e-07, + "loss": 0.0, + "step": 14985 + }, + { + "epoch": 0.9657794676806084, + "grad_norm": 0.000612557581035135, + "learning_rate": 3.881131399928393e-07, + "loss": 0.0, + "step": 14986 + }, + { + "epoch": 0.9658439131275376, + "grad_norm": 0.015846453551998426, + "learning_rate": 3.8739706408879344e-07, + "loss": 0.0, + "step": 14987 + }, + { + "epoch": 0.9659083585744667, + "grad_norm": 0.10003049039179873, + "learning_rate": 3.8668098818474763e-07, + "loss": 0.0012, + "step": 14988 + }, + { + "epoch": 0.9659728040213958, + "grad_norm": 0.00026728753770736686, + "learning_rate": 3.859649122807018e-07, + "loss": 0.0, + "step": 14989 + }, + { + "epoch": 0.966037249468325, + "grad_norm": 0.0002459140433178353, + "learning_rate": 3.852488363766559e-07, + "loss": 0.0, + "step": 14990 + }, + { + "epoch": 0.9661016949152542, + "grad_norm": 0.0005397851917570067, + "learning_rate": 3.845327604726101e-07, + "loss": 0.0, + "step": 14991 + }, + { + "epoch": 0.9661661403621834, + "grad_norm": 0.00021344605552674962, + "learning_rate": 3.838166845685643e-07, + "loss": 0.0, + "step": 14992 + }, + { + "epoch": 0.9662305858091126, + "grad_norm": 0.0004351620778108871, + "learning_rate": 3.8310060866451843e-07, + "loss": 0.0, + "step": 14993 + }, + { + "epoch": 0.9662950312560418, + "grad_norm": 0.000685698942079955, + "learning_rate": 3.8238453276047263e-07, + "loss": 0.0, + "step": 14994 + }, + { + "epoch": 0.966359476702971, + "grad_norm": 0.04826283823719522, + "learning_rate": 3.816684568564268e-07, + "loss": 0.0003, + "step": 14995 + }, + { + "epoch": 0.9664239221499001, + "grad_norm": 0.0009347700493750752, + "learning_rate": 3.80952380952381e-07, + "loss": 0.0, + "step": 14996 + }, + { + "epoch": 0.9664883675968293, + "grad_norm": 6.683883255573541e-05, + "learning_rate": 3.8023630504833515e-07, + "loss": 0.0, + "step": 14997 + }, + { + "epoch": 0.9665528130437585, + "grad_norm": 0.014872162732202817, + "learning_rate": 3.7952022914428935e-07, + "loss": 0.0, + "step": 14998 + }, + { + "epoch": 0.9666172584906876, + "grad_norm": 0.002071540981744842, + "learning_rate": 3.7880415324024354e-07, + "loss": 0.0, + "step": 14999 + }, + { + "epoch": 0.9666817039376168, + "grad_norm": 0.0568428519555771, + "learning_rate": 3.780880773361976e-07, + "loss": 0.0004, + "step": 15000 + }, + { + "epoch": 0.966746149384546, + "grad_norm": 0.008794983717003983, + "learning_rate": 3.773720014321518e-07, + "loss": 0.0, + "step": 15001 + }, + { + "epoch": 0.9668105948314751, + "grad_norm": 0.00263075135347942, + "learning_rate": 3.76655925528106e-07, + "loss": 0.0, + "step": 15002 + }, + { + "epoch": 0.9668750402784043, + "grad_norm": 0.00011607515633291526, + "learning_rate": 3.7593984962406015e-07, + "loss": 0.0, + "step": 15003 + }, + { + "epoch": 0.9669394857253335, + "grad_norm": 0.00035570977344336345, + "learning_rate": 3.7522377372001434e-07, + "loss": 0.0, + "step": 15004 + }, + { + "epoch": 0.9670039311722627, + "grad_norm": 0.0003867004197191581, + "learning_rate": 3.7450769781596854e-07, + "loss": 0.0, + "step": 15005 + }, + { + "epoch": 0.9670683766191919, + "grad_norm": 0.00025640017192041177, + "learning_rate": 3.737916219119227e-07, + "loss": 0.0, + "step": 15006 + }, + { + "epoch": 0.9671328220661211, + "grad_norm": 0.001456672819982795, + "learning_rate": 3.7307554600787687e-07, + "loss": 0.0, + "step": 15007 + }, + { + "epoch": 0.9671972675130502, + "grad_norm": 0.0014484291111371464, + "learning_rate": 3.7235947010383106e-07, + "loss": 0.0, + "step": 15008 + }, + { + "epoch": 0.9672617129599794, + "grad_norm": 0.0015187991078433156, + "learning_rate": 3.716433941997852e-07, + "loss": 0.0, + "step": 15009 + }, + { + "epoch": 0.9673261584069085, + "grad_norm": 0.0004086356212154145, + "learning_rate": 3.709273182957394e-07, + "loss": 0.0, + "step": 15010 + }, + { + "epoch": 0.9673906038538377, + "grad_norm": 0.0012860003776702935, + "learning_rate": 3.702112423916936e-07, + "loss": 0.0, + "step": 15011 + }, + { + "epoch": 0.9674550493007669, + "grad_norm": 0.0003662492167377044, + "learning_rate": 3.694951664876477e-07, + "loss": 0.0, + "step": 15012 + }, + { + "epoch": 0.967519494747696, + "grad_norm": 0.0007289835883782952, + "learning_rate": 3.6877909058360187e-07, + "loss": 0.0, + "step": 15013 + }, + { + "epoch": 0.9675839401946252, + "grad_norm": 0.312234252945699, + "learning_rate": 3.6806301467955606e-07, + "loss": 0.0006, + "step": 15014 + }, + { + "epoch": 0.9676483856415544, + "grad_norm": 0.00048179295287462684, + "learning_rate": 3.6734693877551025e-07, + "loss": 0.0, + "step": 15015 + }, + { + "epoch": 0.9677128310884836, + "grad_norm": 0.001208616655439765, + "learning_rate": 3.666308628714644e-07, + "loss": 0.0, + "step": 15016 + }, + { + "epoch": 0.9677772765354128, + "grad_norm": 0.3449243621045279, + "learning_rate": 3.659147869674186e-07, + "loss": 0.0002, + "step": 15017 + }, + { + "epoch": 0.967841721982342, + "grad_norm": 0.015200695074966152, + "learning_rate": 3.651987110633728e-07, + "loss": 0.0, + "step": 15018 + }, + { + "epoch": 0.9679061674292712, + "grad_norm": 0.0014013163488261668, + "learning_rate": 3.644826351593269e-07, + "loss": 0.0, + "step": 15019 + }, + { + "epoch": 0.9679706128762003, + "grad_norm": 0.0005596241722062707, + "learning_rate": 3.637665592552811e-07, + "loss": 0.0, + "step": 15020 + }, + { + "epoch": 0.9680350583231294, + "grad_norm": 0.0005016758906052871, + "learning_rate": 3.630504833512353e-07, + "loss": 0.0, + "step": 15021 + }, + { + "epoch": 0.9680995037700586, + "grad_norm": 0.0002737052037732511, + "learning_rate": 3.623344074471894e-07, + "loss": 0.0, + "step": 15022 + }, + { + "epoch": 0.9681639492169878, + "grad_norm": 0.004652262607627751, + "learning_rate": 3.616183315431436e-07, + "loss": 0.0, + "step": 15023 + }, + { + "epoch": 0.968228394663917, + "grad_norm": 0.0029658107489418965, + "learning_rate": 3.609022556390978e-07, + "loss": 0.0, + "step": 15024 + }, + { + "epoch": 0.9682928401108462, + "grad_norm": 0.0018419730604352967, + "learning_rate": 3.601861797350519e-07, + "loss": 0.0, + "step": 15025 + }, + { + "epoch": 0.9683572855577753, + "grad_norm": 0.0028526369216188116, + "learning_rate": 3.594701038310061e-07, + "loss": 0.0, + "step": 15026 + }, + { + "epoch": 0.9684217310047045, + "grad_norm": 0.0008088329827487265, + "learning_rate": 3.587540279269603e-07, + "loss": 0.0, + "step": 15027 + }, + { + "epoch": 0.9684861764516337, + "grad_norm": 0.10358512542605385, + "learning_rate": 3.5803795202291444e-07, + "loss": 0.0001, + "step": 15028 + }, + { + "epoch": 0.9685506218985629, + "grad_norm": 0.0004286461885383949, + "learning_rate": 3.5732187611886864e-07, + "loss": 0.0, + "step": 15029 + }, + { + "epoch": 0.9686150673454921, + "grad_norm": 0.0017663911047688466, + "learning_rate": 3.5660580021482283e-07, + "loss": 0.0, + "step": 15030 + }, + { + "epoch": 0.9686795127924213, + "grad_norm": 0.0016889736383674534, + "learning_rate": 3.558897243107769e-07, + "loss": 0.0, + "step": 15031 + }, + { + "epoch": 0.9687439582393504, + "grad_norm": 0.0320417512712144, + "learning_rate": 3.551736484067311e-07, + "loss": 0.0001, + "step": 15032 + }, + { + "epoch": 0.9688084036862795, + "grad_norm": 0.014156902284652106, + "learning_rate": 3.544575725026853e-07, + "loss": 0.0, + "step": 15033 + }, + { + "epoch": 0.9688728491332087, + "grad_norm": 0.0018575665054532513, + "learning_rate": 3.537414965986395e-07, + "loss": 0.0, + "step": 15034 + }, + { + "epoch": 0.9689372945801379, + "grad_norm": 0.3058661502755169, + "learning_rate": 3.5302542069459363e-07, + "loss": 0.0011, + "step": 15035 + }, + { + "epoch": 0.9690017400270671, + "grad_norm": 0.05986802368608069, + "learning_rate": 3.523093447905478e-07, + "loss": 0.0002, + "step": 15036 + }, + { + "epoch": 0.9690661854739963, + "grad_norm": 0.0004834545781512201, + "learning_rate": 3.51593268886502e-07, + "loss": 0.0, + "step": 15037 + }, + { + "epoch": 0.9691306309209254, + "grad_norm": 0.012819738966230294, + "learning_rate": 3.5087719298245616e-07, + "loss": 0.0, + "step": 15038 + }, + { + "epoch": 0.9691950763678546, + "grad_norm": 6.932870897949043e-05, + "learning_rate": 3.5016111707841035e-07, + "loss": 0.0, + "step": 15039 + }, + { + "epoch": 0.9692595218147838, + "grad_norm": 0.03340801142225263, + "learning_rate": 3.4944504117436454e-07, + "loss": 0.0001, + "step": 15040 + }, + { + "epoch": 0.969323967261713, + "grad_norm": 0.0023576116642467257, + "learning_rate": 3.487289652703187e-07, + "loss": 0.0, + "step": 15041 + }, + { + "epoch": 0.9693884127086422, + "grad_norm": 0.001895887451075661, + "learning_rate": 3.480128893662729e-07, + "loss": 0.0, + "step": 15042 + }, + { + "epoch": 0.9694528581555714, + "grad_norm": 0.0004433743816133635, + "learning_rate": 3.4729681346222707e-07, + "loss": 0.0, + "step": 15043 + }, + { + "epoch": 0.9695173036025004, + "grad_norm": 0.19629095086095616, + "learning_rate": 3.4658073755818116e-07, + "loss": 0.0004, + "step": 15044 + }, + { + "epoch": 0.9695817490494296, + "grad_norm": 0.00023726558542802232, + "learning_rate": 3.4586466165413535e-07, + "loss": 0.0, + "step": 15045 + }, + { + "epoch": 0.9696461944963588, + "grad_norm": 0.018993789490638787, + "learning_rate": 3.4514858575008954e-07, + "loss": 0.0, + "step": 15046 + }, + { + "epoch": 0.969710639943288, + "grad_norm": 0.0002520063939503362, + "learning_rate": 3.444325098460437e-07, + "loss": 0.0, + "step": 15047 + }, + { + "epoch": 0.9697750853902172, + "grad_norm": 0.005074428359961007, + "learning_rate": 3.437164339419979e-07, + "loss": 0.0, + "step": 15048 + }, + { + "epoch": 0.9698395308371464, + "grad_norm": 0.0015976258024932811, + "learning_rate": 3.4300035803795207e-07, + "loss": 0.0, + "step": 15049 + }, + { + "epoch": 0.9699039762840755, + "grad_norm": 0.0011570189577347902, + "learning_rate": 3.4228428213390626e-07, + "loss": 0.0, + "step": 15050 + }, + { + "epoch": 0.9699684217310047, + "grad_norm": 0.0467625252062478, + "learning_rate": 3.415682062298604e-07, + "loss": 0.0, + "step": 15051 + }, + { + "epoch": 0.9700328671779339, + "grad_norm": 0.10716094057074699, + "learning_rate": 3.408521303258146e-07, + "loss": 0.0001, + "step": 15052 + }, + { + "epoch": 0.9700973126248631, + "grad_norm": 0.0010008353049710968, + "learning_rate": 3.401360544217688e-07, + "loss": 0.0, + "step": 15053 + }, + { + "epoch": 0.9701617580717923, + "grad_norm": 0.05783591134718608, + "learning_rate": 3.3941997851772287e-07, + "loss": 0.0, + "step": 15054 + }, + { + "epoch": 0.9702262035187214, + "grad_norm": 0.01778828143835162, + "learning_rate": 3.3870390261367707e-07, + "loss": 0.0001, + "step": 15055 + }, + { + "epoch": 0.9702906489656505, + "grad_norm": 0.011812855447108965, + "learning_rate": 3.3798782670963126e-07, + "loss": 0.0, + "step": 15056 + }, + { + "epoch": 0.9703550944125797, + "grad_norm": 0.0007705646428616686, + "learning_rate": 3.372717508055854e-07, + "loss": 0.0, + "step": 15057 + }, + { + "epoch": 0.9704195398595089, + "grad_norm": 0.0002786041604228193, + "learning_rate": 3.365556749015396e-07, + "loss": 0.0, + "step": 15058 + }, + { + "epoch": 0.9704839853064381, + "grad_norm": 0.03147547745520999, + "learning_rate": 3.358395989974938e-07, + "loss": 0.0003, + "step": 15059 + }, + { + "epoch": 0.9705484307533673, + "grad_norm": 0.0025172349651695577, + "learning_rate": 3.351235230934479e-07, + "loss": 0.0, + "step": 15060 + }, + { + "epoch": 0.9706128762002965, + "grad_norm": 0.0004264914551491804, + "learning_rate": 3.344074471894021e-07, + "loss": 0.0, + "step": 15061 + }, + { + "epoch": 0.9706773216472256, + "grad_norm": 0.37015364172979875, + "learning_rate": 3.336913712853563e-07, + "loss": 0.001, + "step": 15062 + }, + { + "epoch": 0.9707417670941548, + "grad_norm": 0.0026231783325515613, + "learning_rate": 3.329752953813104e-07, + "loss": 0.0, + "step": 15063 + }, + { + "epoch": 0.970806212541084, + "grad_norm": 2.1080350788028017, + "learning_rate": 3.322592194772646e-07, + "loss": 0.0038, + "step": 15064 + }, + { + "epoch": 0.9708706579880132, + "grad_norm": 0.001236758915207383, + "learning_rate": 3.315431435732188e-07, + "loss": 0.0, + "step": 15065 + }, + { + "epoch": 0.9709351034349423, + "grad_norm": 0.0008858550028966706, + "learning_rate": 3.308270676691729e-07, + "loss": 0.0, + "step": 15066 + }, + { + "epoch": 0.9709995488818715, + "grad_norm": 0.012957684924999985, + "learning_rate": 3.301109917651271e-07, + "loss": 0.0001, + "step": 15067 + }, + { + "epoch": 0.9710639943288006, + "grad_norm": 0.0035890522764365973, + "learning_rate": 3.293949158610813e-07, + "loss": 0.0, + "step": 15068 + }, + { + "epoch": 0.9711284397757298, + "grad_norm": 0.0075972949912963775, + "learning_rate": 3.286788399570355e-07, + "loss": 0.0001, + "step": 15069 + }, + { + "epoch": 0.971192885222659, + "grad_norm": 0.00017337491251541505, + "learning_rate": 3.2796276405298964e-07, + "loss": 0.0, + "step": 15070 + }, + { + "epoch": 0.9712573306695882, + "grad_norm": 0.00840977936296432, + "learning_rate": 3.2724668814894383e-07, + "loss": 0.0, + "step": 15071 + }, + { + "epoch": 0.9713217761165174, + "grad_norm": 0.02215838745153162, + "learning_rate": 3.2653061224489803e-07, + "loss": 0.0002, + "step": 15072 + }, + { + "epoch": 0.9713862215634466, + "grad_norm": 8.793780417752341, + "learning_rate": 3.258145363408521e-07, + "loss": 0.023, + "step": 15073 + }, + { + "epoch": 0.9714506670103757, + "grad_norm": 0.006577569953616285, + "learning_rate": 3.250984604368063e-07, + "loss": 0.0, + "step": 15074 + }, + { + "epoch": 0.9715151124573049, + "grad_norm": 0.11361466455953624, + "learning_rate": 3.243823845327605e-07, + "loss": 0.0003, + "step": 15075 + }, + { + "epoch": 0.9715795579042341, + "grad_norm": 0.0046748844567726456, + "learning_rate": 3.2366630862871464e-07, + "loss": 0.0, + "step": 15076 + }, + { + "epoch": 0.9716440033511632, + "grad_norm": 0.015705236972287314, + "learning_rate": 3.2295023272466883e-07, + "loss": 0.0001, + "step": 15077 + }, + { + "epoch": 0.9717084487980924, + "grad_norm": 0.27528340031077064, + "learning_rate": 3.22234156820623e-07, + "loss": 0.0022, + "step": 15078 + }, + { + "epoch": 0.9717728942450216, + "grad_norm": 0.016288971732021627, + "learning_rate": 3.2151808091657716e-07, + "loss": 0.0001, + "step": 15079 + }, + { + "epoch": 0.9718373396919507, + "grad_norm": 0.47387322685274225, + "learning_rate": 3.2080200501253136e-07, + "loss": 0.0044, + "step": 15080 + }, + { + "epoch": 0.9719017851388799, + "grad_norm": 0.0011477208401962785, + "learning_rate": 3.2008592910848555e-07, + "loss": 0.0, + "step": 15081 + }, + { + "epoch": 0.9719662305858091, + "grad_norm": 0.0034684748119078452, + "learning_rate": 3.193698532044397e-07, + "loss": 0.0, + "step": 15082 + }, + { + "epoch": 0.9720306760327383, + "grad_norm": 0.004370211072131715, + "learning_rate": 3.186537773003939e-07, + "loss": 0.0, + "step": 15083 + }, + { + "epoch": 0.9720951214796675, + "grad_norm": 0.004952676162059169, + "learning_rate": 3.179377013963481e-07, + "loss": 0.0, + "step": 15084 + }, + { + "epoch": 0.9721595669265967, + "grad_norm": 0.0369411296736311, + "learning_rate": 3.1722162549230216e-07, + "loss": 0.0001, + "step": 15085 + }, + { + "epoch": 0.9722240123735258, + "grad_norm": 0.0003230311543377619, + "learning_rate": 3.1650554958825636e-07, + "loss": 0.0, + "step": 15086 + }, + { + "epoch": 0.972288457820455, + "grad_norm": 0.0008222280461801855, + "learning_rate": 3.1578947368421055e-07, + "loss": 0.0, + "step": 15087 + }, + { + "epoch": 0.9723529032673841, + "grad_norm": 0.00021895085382019427, + "learning_rate": 3.1507339778016474e-07, + "loss": 0.0, + "step": 15088 + }, + { + "epoch": 0.9724173487143133, + "grad_norm": 0.0011354382390777846, + "learning_rate": 3.143573218761189e-07, + "loss": 0.0, + "step": 15089 + }, + { + "epoch": 0.9724817941612425, + "grad_norm": 0.0003584275642686638, + "learning_rate": 3.136412459720731e-07, + "loss": 0.0, + "step": 15090 + }, + { + "epoch": 0.9725462396081717, + "grad_norm": 0.00023074791790306088, + "learning_rate": 3.1292517006802727e-07, + "loss": 0.0, + "step": 15091 + }, + { + "epoch": 0.9726106850551008, + "grad_norm": 0.02096019492573941, + "learning_rate": 3.122090941639814e-07, + "loss": 0.0, + "step": 15092 + }, + { + "epoch": 0.97267513050203, + "grad_norm": 0.0014797492472057203, + "learning_rate": 3.114930182599356e-07, + "loss": 0.0, + "step": 15093 + }, + { + "epoch": 0.9727395759489592, + "grad_norm": 0.0010520218654664925, + "learning_rate": 3.1077694235588974e-07, + "loss": 0.0, + "step": 15094 + }, + { + "epoch": 0.9728040213958884, + "grad_norm": 0.0008259018489257181, + "learning_rate": 3.1006086645184393e-07, + "loss": 0.0, + "step": 15095 + }, + { + "epoch": 0.9728684668428176, + "grad_norm": 0.000960079676407256, + "learning_rate": 3.0934479054779807e-07, + "loss": 0.0, + "step": 15096 + }, + { + "epoch": 0.9729329122897468, + "grad_norm": 0.000389977335042752, + "learning_rate": 3.0862871464375226e-07, + "loss": 0.0, + "step": 15097 + }, + { + "epoch": 0.972997357736676, + "grad_norm": 0.008095213189473769, + "learning_rate": 3.0791263873970646e-07, + "loss": 0.0, + "step": 15098 + }, + { + "epoch": 0.973061803183605, + "grad_norm": 0.0006163576107396637, + "learning_rate": 3.071965628356606e-07, + "loss": 0.0, + "step": 15099 + }, + { + "epoch": 0.9731262486305342, + "grad_norm": 0.03904227537872416, + "learning_rate": 3.064804869316148e-07, + "loss": 0.0002, + "step": 15100 + }, + { + "epoch": 0.9731906940774634, + "grad_norm": 0.09723703527236527, + "learning_rate": 3.0576441102756893e-07, + "loss": 0.0002, + "step": 15101 + }, + { + "epoch": 0.9732551395243926, + "grad_norm": 0.1627526732828032, + "learning_rate": 3.050483351235231e-07, + "loss": 0.0006, + "step": 15102 + }, + { + "epoch": 0.9733195849713218, + "grad_norm": 0.055487841104563056, + "learning_rate": 3.043322592194773e-07, + "loss": 0.0001, + "step": 15103 + }, + { + "epoch": 0.973384030418251, + "grad_norm": 0.00046731365863853907, + "learning_rate": 3.0361618331543146e-07, + "loss": 0.0, + "step": 15104 + }, + { + "epoch": 0.9734484758651801, + "grad_norm": 0.0034985155925200406, + "learning_rate": 3.029001074113856e-07, + "loss": 0.0, + "step": 15105 + }, + { + "epoch": 0.9735129213121093, + "grad_norm": 0.2584352197511904, + "learning_rate": 3.021840315073398e-07, + "loss": 0.0013, + "step": 15106 + }, + { + "epoch": 0.9735773667590385, + "grad_norm": 0.017777880831103005, + "learning_rate": 3.01467955603294e-07, + "loss": 0.0001, + "step": 15107 + }, + { + "epoch": 0.9736418122059677, + "grad_norm": 0.155115485075132, + "learning_rate": 3.007518796992481e-07, + "loss": 0.0006, + "step": 15108 + }, + { + "epoch": 0.9737062576528969, + "grad_norm": 0.003783563882959519, + "learning_rate": 3.000358037952023e-07, + "loss": 0.0, + "step": 15109 + }, + { + "epoch": 0.973770703099826, + "grad_norm": 0.01880250699719194, + "learning_rate": 2.9931972789115645e-07, + "loss": 0.0, + "step": 15110 + }, + { + "epoch": 0.9738351485467551, + "grad_norm": 0.0008284128947969417, + "learning_rate": 2.9860365198711065e-07, + "loss": 0.0, + "step": 15111 + }, + { + "epoch": 0.9738995939936843, + "grad_norm": 0.00040061634608030576, + "learning_rate": 2.9788757608306484e-07, + "loss": 0.0, + "step": 15112 + }, + { + "epoch": 0.9739640394406135, + "grad_norm": 0.0015559103676428346, + "learning_rate": 2.97171500179019e-07, + "loss": 0.0, + "step": 15113 + }, + { + "epoch": 0.9740284848875427, + "grad_norm": 4.241696346927127e-05, + "learning_rate": 2.9645542427497317e-07, + "loss": 0.0, + "step": 15114 + }, + { + "epoch": 0.9740929303344719, + "grad_norm": 0.0075211743685994975, + "learning_rate": 2.9573934837092736e-07, + "loss": 0.0, + "step": 15115 + }, + { + "epoch": 0.974157375781401, + "grad_norm": 0.0007922172603788567, + "learning_rate": 2.950232724668815e-07, + "loss": 0.0, + "step": 15116 + }, + { + "epoch": 0.9742218212283302, + "grad_norm": 8.615947529956862e-05, + "learning_rate": 2.943071965628357e-07, + "loss": 0.0, + "step": 15117 + }, + { + "epoch": 0.9742862666752594, + "grad_norm": 0.0011283102264602943, + "learning_rate": 2.9359112065878984e-07, + "loss": 0.0, + "step": 15118 + }, + { + "epoch": 0.9743507121221886, + "grad_norm": 0.0003551449614521658, + "learning_rate": 2.9287504475474403e-07, + "loss": 0.0, + "step": 15119 + }, + { + "epoch": 0.9744151575691178, + "grad_norm": 0.011122411754986475, + "learning_rate": 2.921589688506982e-07, + "loss": 0.0, + "step": 15120 + }, + { + "epoch": 0.974479603016047, + "grad_norm": 0.002033701591427555, + "learning_rate": 2.9144289294665236e-07, + "loss": 0.0, + "step": 15121 + }, + { + "epoch": 0.974544048462976, + "grad_norm": 0.0004772722821963182, + "learning_rate": 2.9072681704260656e-07, + "loss": 0.0, + "step": 15122 + }, + { + "epoch": 0.9746084939099052, + "grad_norm": 0.014922254632288611, + "learning_rate": 2.900107411385607e-07, + "loss": 0.0, + "step": 15123 + }, + { + "epoch": 0.9746729393568344, + "grad_norm": 0.14024032856409033, + "learning_rate": 2.892946652345149e-07, + "loss": 0.0005, + "step": 15124 + }, + { + "epoch": 0.9747373848037636, + "grad_norm": 0.005984868862872008, + "learning_rate": 2.885785893304691e-07, + "loss": 0.0, + "step": 15125 + }, + { + "epoch": 0.9748018302506928, + "grad_norm": 0.004334931194733932, + "learning_rate": 2.878625134264232e-07, + "loss": 0.0, + "step": 15126 + }, + { + "epoch": 0.974866275697622, + "grad_norm": 0.002894429516225133, + "learning_rate": 2.871464375223774e-07, + "loss": 0.0, + "step": 15127 + }, + { + "epoch": 0.9749307211445511, + "grad_norm": 0.0003413323669872801, + "learning_rate": 2.8643036161833155e-07, + "loss": 0.0, + "step": 15128 + }, + { + "epoch": 0.9749951665914803, + "grad_norm": 0.0010935772408200211, + "learning_rate": 2.8571428571428575e-07, + "loss": 0.0, + "step": 15129 + }, + { + "epoch": 0.9750596120384095, + "grad_norm": 0.11490509704268698, + "learning_rate": 2.8499820981023994e-07, + "loss": 0.0001, + "step": 15130 + }, + { + "epoch": 0.9751240574853387, + "grad_norm": 0.24854126418726763, + "learning_rate": 2.842821339061941e-07, + "loss": 0.0005, + "step": 15131 + }, + { + "epoch": 0.9751885029322679, + "grad_norm": 0.008830847027775436, + "learning_rate": 2.835660580021482e-07, + "loss": 0.0, + "step": 15132 + }, + { + "epoch": 0.975252948379197, + "grad_norm": 0.0008734777265217501, + "learning_rate": 2.828499820981024e-07, + "loss": 0.0, + "step": 15133 + }, + { + "epoch": 0.9753173938261261, + "grad_norm": 0.36864601786661755, + "learning_rate": 2.821339061940566e-07, + "loss": 0.0007, + "step": 15134 + }, + { + "epoch": 0.9753818392730553, + "grad_norm": 0.0022641853155302765, + "learning_rate": 2.8141783029001074e-07, + "loss": 0.0, + "step": 15135 + }, + { + "epoch": 0.9754462847199845, + "grad_norm": 0.06988027629866522, + "learning_rate": 2.8070175438596494e-07, + "loss": 0.0002, + "step": 15136 + }, + { + "epoch": 0.9755107301669137, + "grad_norm": 0.0006696937469466642, + "learning_rate": 2.799856784819191e-07, + "loss": 0.0, + "step": 15137 + }, + { + "epoch": 0.9755751756138429, + "grad_norm": 0.0013796730065024532, + "learning_rate": 2.7926960257787327e-07, + "loss": 0.0, + "step": 15138 + }, + { + "epoch": 0.9756396210607721, + "grad_norm": 0.0004004937699149055, + "learning_rate": 2.7855352667382746e-07, + "loss": 0.0, + "step": 15139 + }, + { + "epoch": 0.9757040665077013, + "grad_norm": 0.005334612036549248, + "learning_rate": 2.778374507697816e-07, + "loss": 0.0, + "step": 15140 + }, + { + "epoch": 0.9757685119546304, + "grad_norm": 0.002374710997573885, + "learning_rate": 2.771213748657358e-07, + "loss": 0.0, + "step": 15141 + }, + { + "epoch": 0.9758329574015596, + "grad_norm": 0.0010283480037241186, + "learning_rate": 2.7640529896168994e-07, + "loss": 0.0, + "step": 15142 + }, + { + "epoch": 0.9758974028484888, + "grad_norm": 0.0018373973709965015, + "learning_rate": 2.7568922305764413e-07, + "loss": 0.0, + "step": 15143 + }, + { + "epoch": 0.9759618482954179, + "grad_norm": 0.00855921588567582, + "learning_rate": 2.749731471535983e-07, + "loss": 0.0, + "step": 15144 + }, + { + "epoch": 0.9760262937423471, + "grad_norm": 0.0020050944102561717, + "learning_rate": 2.7425707124955246e-07, + "loss": 0.0, + "step": 15145 + }, + { + "epoch": 0.9760907391892762, + "grad_norm": 0.022109847137461454, + "learning_rate": 2.7354099534550665e-07, + "loss": 0.0, + "step": 15146 + }, + { + "epoch": 0.9761551846362054, + "grad_norm": 0.0011914119683068853, + "learning_rate": 2.728249194414608e-07, + "loss": 0.0, + "step": 15147 + }, + { + "epoch": 0.9762196300831346, + "grad_norm": 0.0011568151027143419, + "learning_rate": 2.72108843537415e-07, + "loss": 0.0, + "step": 15148 + }, + { + "epoch": 0.9762840755300638, + "grad_norm": 0.0003769420355886379, + "learning_rate": 2.713927676333692e-07, + "loss": 0.0, + "step": 15149 + }, + { + "epoch": 0.976348520976993, + "grad_norm": 0.004740660637754758, + "learning_rate": 2.706766917293233e-07, + "loss": 0.0, + "step": 15150 + }, + { + "epoch": 0.9764129664239222, + "grad_norm": 0.01738213764846972, + "learning_rate": 2.699606158252775e-07, + "loss": 0.0002, + "step": 15151 + }, + { + "epoch": 0.9764774118708514, + "grad_norm": 0.0019549345364176865, + "learning_rate": 2.692445399212317e-07, + "loss": 0.0, + "step": 15152 + }, + { + "epoch": 0.9765418573177805, + "grad_norm": 0.0007493480390459141, + "learning_rate": 2.6852846401718585e-07, + "loss": 0.0, + "step": 15153 + }, + { + "epoch": 0.9766063027647097, + "grad_norm": 0.0026944598274480157, + "learning_rate": 2.6781238811314004e-07, + "loss": 0.0, + "step": 15154 + }, + { + "epoch": 0.9766707482116388, + "grad_norm": 0.011713198623444991, + "learning_rate": 2.670963122090942e-07, + "loss": 0.0, + "step": 15155 + }, + { + "epoch": 0.976735193658568, + "grad_norm": 0.022796453401214445, + "learning_rate": 2.6638023630504837e-07, + "loss": 0.0, + "step": 15156 + }, + { + "epoch": 0.9767996391054972, + "grad_norm": 0.0003065624087929334, + "learning_rate": 2.6566416040100256e-07, + "loss": 0.0, + "step": 15157 + }, + { + "epoch": 0.9768640845524263, + "grad_norm": 0.00849296321003075, + "learning_rate": 2.649480844969567e-07, + "loss": 0.0, + "step": 15158 + }, + { + "epoch": 0.9769285299993555, + "grad_norm": 0.0007528658127822991, + "learning_rate": 2.6423200859291084e-07, + "loss": 0.0, + "step": 15159 + }, + { + "epoch": 0.9769929754462847, + "grad_norm": 0.013537214203229, + "learning_rate": 2.6351593268886504e-07, + "loss": 0.0, + "step": 15160 + }, + { + "epoch": 0.9770574208932139, + "grad_norm": 0.008491264624011879, + "learning_rate": 2.6279985678481923e-07, + "loss": 0.0, + "step": 15161 + }, + { + "epoch": 0.9771218663401431, + "grad_norm": 0.040971711185676433, + "learning_rate": 2.6208378088077337e-07, + "loss": 0.0001, + "step": 15162 + }, + { + "epoch": 0.9771863117870723, + "grad_norm": 0.0021006739403628245, + "learning_rate": 2.6136770497672756e-07, + "loss": 0.0, + "step": 15163 + }, + { + "epoch": 0.9772507572340015, + "grad_norm": 0.0033201136895484383, + "learning_rate": 2.606516290726817e-07, + "loss": 0.0, + "step": 15164 + }, + { + "epoch": 0.9773152026809306, + "grad_norm": 0.0009516754660645897, + "learning_rate": 2.599355531686359e-07, + "loss": 0.0, + "step": 15165 + }, + { + "epoch": 0.9773796481278597, + "grad_norm": 0.0014256401567068185, + "learning_rate": 2.592194772645901e-07, + "loss": 0.0, + "step": 15166 + }, + { + "epoch": 0.9774440935747889, + "grad_norm": 0.007363190876471923, + "learning_rate": 2.5850340136054423e-07, + "loss": 0.0001, + "step": 15167 + }, + { + "epoch": 0.9775085390217181, + "grad_norm": 0.0001785714074357449, + "learning_rate": 2.577873254564984e-07, + "loss": 0.0, + "step": 15168 + }, + { + "epoch": 0.9775729844686473, + "grad_norm": 0.0010834485266816886, + "learning_rate": 2.5707124955245256e-07, + "loss": 0.0, + "step": 15169 + }, + { + "epoch": 0.9776374299155765, + "grad_norm": 0.14266291148414587, + "learning_rate": 2.5635517364840675e-07, + "loss": 0.0011, + "step": 15170 + }, + { + "epoch": 0.9777018753625056, + "grad_norm": 0.10230791742266519, + "learning_rate": 2.5563909774436095e-07, + "loss": 0.0003, + "step": 15171 + }, + { + "epoch": 0.9777663208094348, + "grad_norm": 0.18012416689572205, + "learning_rate": 2.549230218403151e-07, + "loss": 0.0011, + "step": 15172 + }, + { + "epoch": 0.977830766256364, + "grad_norm": 0.018825811985009936, + "learning_rate": 2.542069459362693e-07, + "loss": 0.0, + "step": 15173 + }, + { + "epoch": 0.9778952117032932, + "grad_norm": 0.0003657376167657997, + "learning_rate": 2.534908700322234e-07, + "loss": 0.0, + "step": 15174 + }, + { + "epoch": 0.9779596571502224, + "grad_norm": 0.0016266523017274357, + "learning_rate": 2.527747941281776e-07, + "loss": 0.0, + "step": 15175 + }, + { + "epoch": 0.9780241025971516, + "grad_norm": 0.031643844370067425, + "learning_rate": 2.520587182241318e-07, + "loss": 0.0002, + "step": 15176 + }, + { + "epoch": 0.9780885480440807, + "grad_norm": 0.0001272113728231804, + "learning_rate": 2.5134264232008594e-07, + "loss": 0.0, + "step": 15177 + }, + { + "epoch": 0.9781529934910098, + "grad_norm": 0.051368001198941485, + "learning_rate": 2.506265664160401e-07, + "loss": 0.0001, + "step": 15178 + }, + { + "epoch": 0.978217438937939, + "grad_norm": 0.11006070518329952, + "learning_rate": 2.499104905119943e-07, + "loss": 0.0017, + "step": 15179 + }, + { + "epoch": 0.9782818843848682, + "grad_norm": 0.007755887701504084, + "learning_rate": 2.4919441460794847e-07, + "loss": 0.0, + "step": 15180 + }, + { + "epoch": 0.9783463298317974, + "grad_norm": 0.0017534684004093396, + "learning_rate": 2.4847833870390266e-07, + "loss": 0.0, + "step": 15181 + }, + { + "epoch": 0.9784107752787266, + "grad_norm": 0.07081709706644274, + "learning_rate": 2.477622627998568e-07, + "loss": 0.0001, + "step": 15182 + }, + { + "epoch": 0.9784752207256557, + "grad_norm": 0.0018758709562702654, + "learning_rate": 2.4704618689581094e-07, + "loss": 0.0, + "step": 15183 + }, + { + "epoch": 0.9785396661725849, + "grad_norm": 0.001043274861493202, + "learning_rate": 2.4633011099176513e-07, + "loss": 0.0, + "step": 15184 + }, + { + "epoch": 0.9786041116195141, + "grad_norm": 0.0011120056715451592, + "learning_rate": 2.4561403508771933e-07, + "loss": 0.0, + "step": 15185 + }, + { + "epoch": 0.9786685570664433, + "grad_norm": 0.07570947081657063, + "learning_rate": 2.4489795918367347e-07, + "loss": 0.0002, + "step": 15186 + }, + { + "epoch": 0.9787330025133725, + "grad_norm": 0.0002145057054067127, + "learning_rate": 2.4418188327962766e-07, + "loss": 0.0, + "step": 15187 + }, + { + "epoch": 0.9787974479603017, + "grad_norm": 0.003613192592037386, + "learning_rate": 2.4346580737558185e-07, + "loss": 0.0, + "step": 15188 + }, + { + "epoch": 0.9788618934072307, + "grad_norm": 0.01221347519544524, + "learning_rate": 2.42749731471536e-07, + "loss": 0.0, + "step": 15189 + }, + { + "epoch": 0.9789263388541599, + "grad_norm": 0.0004138912713609249, + "learning_rate": 2.420336555674902e-07, + "loss": 0.0, + "step": 15190 + }, + { + "epoch": 0.9789907843010891, + "grad_norm": 0.013110504602359498, + "learning_rate": 2.413175796634443e-07, + "loss": 0.0, + "step": 15191 + }, + { + "epoch": 0.9790552297480183, + "grad_norm": 0.07918108957232092, + "learning_rate": 2.406015037593985e-07, + "loss": 0.0001, + "step": 15192 + }, + { + "epoch": 0.9791196751949475, + "grad_norm": 0.0009919039297520671, + "learning_rate": 2.398854278553527e-07, + "loss": 0.0, + "step": 15193 + }, + { + "epoch": 0.9791841206418767, + "grad_norm": 0.0015389498764722693, + "learning_rate": 2.3916935195130685e-07, + "loss": 0.0, + "step": 15194 + }, + { + "epoch": 0.9792485660888058, + "grad_norm": 0.0007932537619711912, + "learning_rate": 2.3845327604726104e-07, + "loss": 0.0, + "step": 15195 + }, + { + "epoch": 0.979313011535735, + "grad_norm": 0.3223723494933655, + "learning_rate": 2.377372001432152e-07, + "loss": 0.0007, + "step": 15196 + }, + { + "epoch": 0.9793774569826642, + "grad_norm": 0.0037370869206710114, + "learning_rate": 2.3702112423916935e-07, + "loss": 0.0, + "step": 15197 + }, + { + "epoch": 0.9794419024295934, + "grad_norm": 0.01667958658079532, + "learning_rate": 2.3630504833512354e-07, + "loss": 0.0, + "step": 15198 + }, + { + "epoch": 0.9795063478765226, + "grad_norm": 0.045919618893321044, + "learning_rate": 2.355889724310777e-07, + "loss": 0.0001, + "step": 15199 + }, + { + "epoch": 0.9795707933234516, + "grad_norm": 0.00020583061499714295, + "learning_rate": 2.348728965270319e-07, + "loss": 0.0, + "step": 15200 + }, + { + "epoch": 0.9796352387703808, + "grad_norm": 0.0035573688295585603, + "learning_rate": 2.3415682062298607e-07, + "loss": 0.0, + "step": 15201 + }, + { + "epoch": 0.97969968421731, + "grad_norm": 0.0003429043040178539, + "learning_rate": 2.334407447189402e-07, + "loss": 0.0, + "step": 15202 + }, + { + "epoch": 0.9797641296642392, + "grad_norm": 0.00013085403944323605, + "learning_rate": 2.327246688148944e-07, + "loss": 0.0, + "step": 15203 + }, + { + "epoch": 0.9798285751111684, + "grad_norm": 0.005921728071037252, + "learning_rate": 2.3200859291084857e-07, + "loss": 0.0, + "step": 15204 + }, + { + "epoch": 0.9798930205580976, + "grad_norm": 0.000992523744680652, + "learning_rate": 2.3129251700680273e-07, + "loss": 0.0, + "step": 15205 + }, + { + "epoch": 0.9799574660050268, + "grad_norm": 0.0006133625288807913, + "learning_rate": 2.3057644110275693e-07, + "loss": 0.0, + "step": 15206 + }, + { + "epoch": 0.9800219114519559, + "grad_norm": 0.0008549871418847644, + "learning_rate": 2.2986036519871107e-07, + "loss": 0.0, + "step": 15207 + }, + { + "epoch": 0.9800863568988851, + "grad_norm": 0.00020320387837958342, + "learning_rate": 2.2914428929466526e-07, + "loss": 0.0, + "step": 15208 + }, + { + "epoch": 0.9801508023458143, + "grad_norm": 0.22708419225833618, + "learning_rate": 2.2842821339061943e-07, + "loss": 0.0029, + "step": 15209 + }, + { + "epoch": 0.9802152477927435, + "grad_norm": 0.0004024974515455393, + "learning_rate": 2.277121374865736e-07, + "loss": 0.0, + "step": 15210 + }, + { + "epoch": 0.9802796932396726, + "grad_norm": 0.04274219695899788, + "learning_rate": 2.2699606158252778e-07, + "loss": 0.0001, + "step": 15211 + }, + { + "epoch": 0.9803441386866018, + "grad_norm": 0.034374677117413543, + "learning_rate": 2.2627998567848195e-07, + "loss": 0.0001, + "step": 15212 + }, + { + "epoch": 0.9804085841335309, + "grad_norm": 0.000535652454101118, + "learning_rate": 2.255639097744361e-07, + "loss": 0.0, + "step": 15213 + }, + { + "epoch": 0.9804730295804601, + "grad_norm": 0.14314235455022237, + "learning_rate": 2.2484783387039028e-07, + "loss": 0.0002, + "step": 15214 + }, + { + "epoch": 0.9805374750273893, + "grad_norm": 0.0018527310044761708, + "learning_rate": 2.2413175796634445e-07, + "loss": 0.0, + "step": 15215 + }, + { + "epoch": 0.9806019204743185, + "grad_norm": 0.013785798169173939, + "learning_rate": 2.2341568206229862e-07, + "loss": 0.0, + "step": 15216 + }, + { + "epoch": 0.9806663659212477, + "grad_norm": 0.008295814609581176, + "learning_rate": 2.226996061582528e-07, + "loss": 0.0, + "step": 15217 + }, + { + "epoch": 0.9807308113681769, + "grad_norm": 0.00018404890305874304, + "learning_rate": 2.2198353025420695e-07, + "loss": 0.0, + "step": 15218 + }, + { + "epoch": 0.980795256815106, + "grad_norm": 0.00012482961636409894, + "learning_rate": 2.2126745435016114e-07, + "loss": 0.0, + "step": 15219 + }, + { + "epoch": 0.9808597022620352, + "grad_norm": 0.0049240704985918085, + "learning_rate": 2.205513784461153e-07, + "loss": 0.0, + "step": 15220 + }, + { + "epoch": 0.9809241477089644, + "grad_norm": 0.0031281767796909107, + "learning_rate": 2.1983530254206947e-07, + "loss": 0.0, + "step": 15221 + }, + { + "epoch": 0.9809885931558935, + "grad_norm": 0.0008334030137153192, + "learning_rate": 2.1911922663802367e-07, + "loss": 0.0, + "step": 15222 + }, + { + "epoch": 0.9810530386028227, + "grad_norm": 0.00019850462293857934, + "learning_rate": 2.184031507339778e-07, + "loss": 0.0, + "step": 15223 + }, + { + "epoch": 0.9811174840497519, + "grad_norm": 0.00036782715442014083, + "learning_rate": 2.1768707482993197e-07, + "loss": 0.0, + "step": 15224 + }, + { + "epoch": 0.981181929496681, + "grad_norm": 0.0003710061416936885, + "learning_rate": 2.1697099892588617e-07, + "loss": 0.0, + "step": 15225 + }, + { + "epoch": 0.9812463749436102, + "grad_norm": 0.012523587796355857, + "learning_rate": 2.1625492302184033e-07, + "loss": 0.0, + "step": 15226 + }, + { + "epoch": 0.9813108203905394, + "grad_norm": 0.002729799152419526, + "learning_rate": 2.1553884711779453e-07, + "loss": 0.0, + "step": 15227 + }, + { + "epoch": 0.9813752658374686, + "grad_norm": 0.0001641298882696401, + "learning_rate": 2.1482277121374867e-07, + "loss": 0.0, + "step": 15228 + }, + { + "epoch": 0.9814397112843978, + "grad_norm": 1.309516649618846, + "learning_rate": 2.1410669530970283e-07, + "loss": 0.0006, + "step": 15229 + }, + { + "epoch": 0.981504156731327, + "grad_norm": 0.014743855267792875, + "learning_rate": 2.1339061940565702e-07, + "loss": 0.0, + "step": 15230 + }, + { + "epoch": 0.9815686021782561, + "grad_norm": 0.0023293280917756737, + "learning_rate": 2.126745435016112e-07, + "loss": 0.0, + "step": 15231 + }, + { + "epoch": 0.9816330476251853, + "grad_norm": 9.553283079941483e-05, + "learning_rate": 2.1195846759756536e-07, + "loss": 0.0, + "step": 15232 + }, + { + "epoch": 0.9816974930721144, + "grad_norm": 4.284839848487141e-05, + "learning_rate": 2.1124239169351955e-07, + "loss": 0.0, + "step": 15233 + }, + { + "epoch": 0.9817619385190436, + "grad_norm": 0.0060593230029311156, + "learning_rate": 2.105263157894737e-07, + "loss": 0.0001, + "step": 15234 + }, + { + "epoch": 0.9818263839659728, + "grad_norm": 0.0011964518485110813, + "learning_rate": 2.0981023988542788e-07, + "loss": 0.0, + "step": 15235 + }, + { + "epoch": 0.981890829412902, + "grad_norm": 0.32554900492517685, + "learning_rate": 2.0909416398138205e-07, + "loss": 0.0022, + "step": 15236 + }, + { + "epoch": 0.9819552748598311, + "grad_norm": 0.0006848094652021027, + "learning_rate": 2.0837808807733622e-07, + "loss": 0.0, + "step": 15237 + }, + { + "epoch": 0.9820197203067603, + "grad_norm": 0.001575442477279625, + "learning_rate": 2.076620121732904e-07, + "loss": 0.0, + "step": 15238 + }, + { + "epoch": 0.9820841657536895, + "grad_norm": 0.07122442175884686, + "learning_rate": 2.0694593626924455e-07, + "loss": 0.0003, + "step": 15239 + }, + { + "epoch": 0.9821486112006187, + "grad_norm": 0.047471250288807096, + "learning_rate": 2.0622986036519871e-07, + "loss": 0.0004, + "step": 15240 + }, + { + "epoch": 0.9822130566475479, + "grad_norm": 0.0006905600191781044, + "learning_rate": 2.055137844611529e-07, + "loss": 0.0, + "step": 15241 + }, + { + "epoch": 0.9822775020944771, + "grad_norm": 3.866391618307458, + "learning_rate": 2.0479770855710707e-07, + "loss": 0.0589, + "step": 15242 + }, + { + "epoch": 0.9823419475414062, + "grad_norm": 0.0015586490202091803, + "learning_rate": 2.0408163265306121e-07, + "loss": 0.0, + "step": 15243 + }, + { + "epoch": 0.9824063929883353, + "grad_norm": 0.0002299173824988018, + "learning_rate": 2.033655567490154e-07, + "loss": 0.0, + "step": 15244 + }, + { + "epoch": 0.9824708384352645, + "grad_norm": 0.05903546807777156, + "learning_rate": 2.0264948084496957e-07, + "loss": 0.0003, + "step": 15245 + }, + { + "epoch": 0.9825352838821937, + "grad_norm": 0.05004071891032356, + "learning_rate": 2.0193340494092377e-07, + "loss": 0.0001, + "step": 15246 + }, + { + "epoch": 0.9825997293291229, + "grad_norm": 0.00033025603537803906, + "learning_rate": 2.0121732903687793e-07, + "loss": 0.0, + "step": 15247 + }, + { + "epoch": 0.9826641747760521, + "grad_norm": 0.0006552670394381289, + "learning_rate": 2.005012531328321e-07, + "loss": 0.0, + "step": 15248 + }, + { + "epoch": 0.9827286202229812, + "grad_norm": 0.0016977273912929997, + "learning_rate": 1.997851772287863e-07, + "loss": 0.0, + "step": 15249 + }, + { + "epoch": 0.9827930656699104, + "grad_norm": 0.18565537141618688, + "learning_rate": 1.9906910132474043e-07, + "loss": 0.0005, + "step": 15250 + }, + { + "epoch": 0.9828575111168396, + "grad_norm": 0.07746788176045784, + "learning_rate": 1.983530254206946e-07, + "loss": 0.0017, + "step": 15251 + }, + { + "epoch": 0.9829219565637688, + "grad_norm": 0.001825236225272376, + "learning_rate": 1.976369495166488e-07, + "loss": 0.0, + "step": 15252 + }, + { + "epoch": 0.982986402010698, + "grad_norm": 0.02276023707093468, + "learning_rate": 1.9692087361260296e-07, + "loss": 0.0002, + "step": 15253 + }, + { + "epoch": 0.9830508474576272, + "grad_norm": 0.0002736884330044127, + "learning_rate": 1.9620479770855715e-07, + "loss": 0.0, + "step": 15254 + }, + { + "epoch": 0.9831152929045563, + "grad_norm": 0.003627516108369626, + "learning_rate": 1.954887218045113e-07, + "loss": 0.0, + "step": 15255 + }, + { + "epoch": 0.9831797383514854, + "grad_norm": 0.0022958784688874136, + "learning_rate": 1.9477264590046546e-07, + "loss": 0.0, + "step": 15256 + }, + { + "epoch": 0.9832441837984146, + "grad_norm": 0.00014915189100592606, + "learning_rate": 1.9405656999641965e-07, + "loss": 0.0, + "step": 15257 + }, + { + "epoch": 0.9833086292453438, + "grad_norm": 6.148843509926524e-05, + "learning_rate": 1.9334049409237381e-07, + "loss": 0.0, + "step": 15258 + }, + { + "epoch": 0.983373074692273, + "grad_norm": 0.00010684416463237988, + "learning_rate": 1.9262441818832795e-07, + "loss": 0.0, + "step": 15259 + }, + { + "epoch": 0.9834375201392022, + "grad_norm": 0.8740741054404098, + "learning_rate": 1.9190834228428215e-07, + "loss": 0.0013, + "step": 15260 + }, + { + "epoch": 0.9835019655861313, + "grad_norm": 0.001053662257371922, + "learning_rate": 1.9119226638023631e-07, + "loss": 0.0, + "step": 15261 + }, + { + "epoch": 0.9835664110330605, + "grad_norm": 0.2909590903034731, + "learning_rate": 1.904761904761905e-07, + "loss": 0.0005, + "step": 15262 + }, + { + "epoch": 0.9836308564799897, + "grad_norm": 0.001149833658102217, + "learning_rate": 1.8976011457214467e-07, + "loss": 0.0, + "step": 15263 + }, + { + "epoch": 0.9836953019269189, + "grad_norm": 0.0009560136118597889, + "learning_rate": 1.890440386680988e-07, + "loss": 0.0, + "step": 15264 + }, + { + "epoch": 0.9837597473738481, + "grad_norm": 0.04803480853553378, + "learning_rate": 1.88327962764053e-07, + "loss": 0.0002, + "step": 15265 + }, + { + "epoch": 0.9838241928207773, + "grad_norm": 0.0016538616173160513, + "learning_rate": 1.8761188686000717e-07, + "loss": 0.0, + "step": 15266 + }, + { + "epoch": 0.9838886382677063, + "grad_norm": 0.01374418114037808, + "learning_rate": 1.8689581095596134e-07, + "loss": 0.0001, + "step": 15267 + }, + { + "epoch": 0.9839530837146355, + "grad_norm": 0.19178463067401658, + "learning_rate": 1.8617973505191553e-07, + "loss": 0.0003, + "step": 15268 + }, + { + "epoch": 0.9840175291615647, + "grad_norm": 0.0006956476860202471, + "learning_rate": 1.854636591478697e-07, + "loss": 0.0, + "step": 15269 + }, + { + "epoch": 0.9840819746084939, + "grad_norm": 0.00035165631147443776, + "learning_rate": 1.8474758324382384e-07, + "loss": 0.0, + "step": 15270 + }, + { + "epoch": 0.9841464200554231, + "grad_norm": 0.002953816493226346, + "learning_rate": 1.8403150733977803e-07, + "loss": 0.0, + "step": 15271 + }, + { + "epoch": 0.9842108655023523, + "grad_norm": 0.0003641283605604771, + "learning_rate": 1.833154314357322e-07, + "loss": 0.0, + "step": 15272 + }, + { + "epoch": 0.9842753109492814, + "grad_norm": 0.0033303717674017914, + "learning_rate": 1.825993555316864e-07, + "loss": 0.0, + "step": 15273 + }, + { + "epoch": 0.9843397563962106, + "grad_norm": 0.0006977955450955413, + "learning_rate": 1.8188327962764056e-07, + "loss": 0.0, + "step": 15274 + }, + { + "epoch": 0.9844042018431398, + "grad_norm": 0.006914318477005871, + "learning_rate": 1.811672037235947e-07, + "loss": 0.0, + "step": 15275 + }, + { + "epoch": 0.984468647290069, + "grad_norm": 0.0029855712369581877, + "learning_rate": 1.804511278195489e-07, + "loss": 0.0, + "step": 15276 + }, + { + "epoch": 0.9845330927369982, + "grad_norm": 0.0007799178281663518, + "learning_rate": 1.7973505191550305e-07, + "loss": 0.0, + "step": 15277 + }, + { + "epoch": 0.9845975381839273, + "grad_norm": 0.010909400934901978, + "learning_rate": 1.7901897601145722e-07, + "loss": 0.0, + "step": 15278 + }, + { + "epoch": 0.9846619836308564, + "grad_norm": 0.0044053793582521416, + "learning_rate": 1.7830290010741141e-07, + "loss": 0.0, + "step": 15279 + }, + { + "epoch": 0.9847264290777856, + "grad_norm": 0.9564209529679507, + "learning_rate": 1.7758682420336555e-07, + "loss": 0.0091, + "step": 15280 + }, + { + "epoch": 0.9847908745247148, + "grad_norm": 0.40579912033949256, + "learning_rate": 1.7687074829931975e-07, + "loss": 0.0035, + "step": 15281 + }, + { + "epoch": 0.984855319971644, + "grad_norm": 0.09994860926754734, + "learning_rate": 1.761546723952739e-07, + "loss": 0.0001, + "step": 15282 + }, + { + "epoch": 0.9849197654185732, + "grad_norm": 0.04675263088519276, + "learning_rate": 1.7543859649122808e-07, + "loss": 0.0003, + "step": 15283 + }, + { + "epoch": 0.9849842108655024, + "grad_norm": 0.00011445797154276652, + "learning_rate": 1.7472252058718227e-07, + "loss": 0.0, + "step": 15284 + }, + { + "epoch": 0.9850486563124315, + "grad_norm": 0.012145525836946542, + "learning_rate": 1.7400644468313644e-07, + "loss": 0.0, + "step": 15285 + }, + { + "epoch": 0.9851131017593607, + "grad_norm": 0.0003475919052068295, + "learning_rate": 1.7329036877909058e-07, + "loss": 0.0, + "step": 15286 + }, + { + "epoch": 0.9851775472062899, + "grad_norm": 0.02558688960961057, + "learning_rate": 1.7257429287504477e-07, + "loss": 0.0, + "step": 15287 + }, + { + "epoch": 0.9852419926532191, + "grad_norm": 0.00010669744202209233, + "learning_rate": 1.7185821697099894e-07, + "loss": 0.0, + "step": 15288 + }, + { + "epoch": 0.9853064381001482, + "grad_norm": 0.0006361258570100312, + "learning_rate": 1.7114214106695313e-07, + "loss": 0.0, + "step": 15289 + }, + { + "epoch": 0.9853708835470774, + "grad_norm": 0.046747478735065066, + "learning_rate": 1.704260651629073e-07, + "loss": 0.0001, + "step": 15290 + }, + { + "epoch": 0.9854353289940065, + "grad_norm": 0.00012749033638977258, + "learning_rate": 1.6970998925886144e-07, + "loss": 0.0, + "step": 15291 + }, + { + "epoch": 0.9854997744409357, + "grad_norm": 0.0007303949571343691, + "learning_rate": 1.6899391335481563e-07, + "loss": 0.0, + "step": 15292 + }, + { + "epoch": 0.9855642198878649, + "grad_norm": 0.00033894960956004134, + "learning_rate": 1.682778374507698e-07, + "loss": 0.0, + "step": 15293 + }, + { + "epoch": 0.9856286653347941, + "grad_norm": 0.12599162817477413, + "learning_rate": 1.6756176154672396e-07, + "loss": 0.0014, + "step": 15294 + }, + { + "epoch": 0.9856931107817233, + "grad_norm": 0.0004907227780169364, + "learning_rate": 1.6684568564267816e-07, + "loss": 0.0, + "step": 15295 + }, + { + "epoch": 0.9857575562286525, + "grad_norm": 0.3005311471082716, + "learning_rate": 1.661296097386323e-07, + "loss": 0.0012, + "step": 15296 + }, + { + "epoch": 0.9858220016755816, + "grad_norm": 0.001568842005944896, + "learning_rate": 1.6541353383458646e-07, + "loss": 0.0, + "step": 15297 + }, + { + "epoch": 0.9858864471225108, + "grad_norm": 0.004407247136844806, + "learning_rate": 1.6469745793054065e-07, + "loss": 0.0, + "step": 15298 + }, + { + "epoch": 0.98595089256944, + "grad_norm": 0.0003473983895130904, + "learning_rate": 1.6398138202649482e-07, + "loss": 0.0, + "step": 15299 + }, + { + "epoch": 0.9860153380163691, + "grad_norm": 0.00863692447582535, + "learning_rate": 1.6326530612244901e-07, + "loss": 0.0, + "step": 15300 + }, + { + "epoch": 0.9860797834632983, + "grad_norm": 0.00211103398049022, + "learning_rate": 1.6254923021840315e-07, + "loss": 0.0, + "step": 15301 + }, + { + "epoch": 0.9861442289102275, + "grad_norm": 0.0018230068901888296, + "learning_rate": 1.6183315431435732e-07, + "loss": 0.0, + "step": 15302 + }, + { + "epoch": 0.9862086743571566, + "grad_norm": 0.015832869636882328, + "learning_rate": 1.611170784103115e-07, + "loss": 0.0, + "step": 15303 + }, + { + "epoch": 0.9862731198040858, + "grad_norm": 0.023791212077183488, + "learning_rate": 1.6040100250626568e-07, + "loss": 0.0015, + "step": 15304 + }, + { + "epoch": 0.986337565251015, + "grad_norm": 0.0038339565004358363, + "learning_rate": 1.5968492660221985e-07, + "loss": 0.0, + "step": 15305 + }, + { + "epoch": 0.9864020106979442, + "grad_norm": 0.043639507155779036, + "learning_rate": 1.5896885069817404e-07, + "loss": 0.0001, + "step": 15306 + }, + { + "epoch": 0.9864664561448734, + "grad_norm": 0.006059066329226813, + "learning_rate": 1.5825277479412818e-07, + "loss": 0.0, + "step": 15307 + }, + { + "epoch": 0.9865309015918026, + "grad_norm": 0.0073793358972736945, + "learning_rate": 1.5753669889008237e-07, + "loss": 0.0001, + "step": 15308 + }, + { + "epoch": 0.9865953470387318, + "grad_norm": 0.11897962667166657, + "learning_rate": 1.5682062298603654e-07, + "loss": 0.0004, + "step": 15309 + }, + { + "epoch": 0.9866597924856609, + "grad_norm": 0.0016394853548579568, + "learning_rate": 1.561045470819907e-07, + "loss": 0.0, + "step": 15310 + }, + { + "epoch": 0.98672423793259, + "grad_norm": 0.0023086441209033705, + "learning_rate": 1.5538847117794487e-07, + "loss": 0.0, + "step": 15311 + }, + { + "epoch": 0.9867886833795192, + "grad_norm": 0.000473328226471948, + "learning_rate": 1.5467239527389904e-07, + "loss": 0.0, + "step": 15312 + }, + { + "epoch": 0.9868531288264484, + "grad_norm": 0.003990274313165149, + "learning_rate": 1.5395631936985323e-07, + "loss": 0.0, + "step": 15313 + }, + { + "epoch": 0.9869175742733776, + "grad_norm": 0.17149084682895488, + "learning_rate": 1.532402434658074e-07, + "loss": 0.0006, + "step": 15314 + }, + { + "epoch": 0.9869820197203067, + "grad_norm": 0.009257975158263423, + "learning_rate": 1.5252416756176156e-07, + "loss": 0.0, + "step": 15315 + }, + { + "epoch": 0.9870464651672359, + "grad_norm": 0.014955583267460219, + "learning_rate": 1.5180809165771573e-07, + "loss": 0.0, + "step": 15316 + }, + { + "epoch": 0.9871109106141651, + "grad_norm": 0.10271010438525041, + "learning_rate": 1.510920157536699e-07, + "loss": 0.0001, + "step": 15317 + }, + { + "epoch": 0.9871753560610943, + "grad_norm": 0.001647549115393784, + "learning_rate": 1.5037593984962406e-07, + "loss": 0.0, + "step": 15318 + }, + { + "epoch": 0.9872398015080235, + "grad_norm": 0.00025357547641687695, + "learning_rate": 1.4965986394557823e-07, + "loss": 0.0, + "step": 15319 + }, + { + "epoch": 0.9873042469549527, + "grad_norm": 0.019612992119780556, + "learning_rate": 1.4894378804153242e-07, + "loss": 0.0, + "step": 15320 + }, + { + "epoch": 0.9873686924018819, + "grad_norm": 0.001171164674540023, + "learning_rate": 1.4822771213748659e-07, + "loss": 0.0, + "step": 15321 + }, + { + "epoch": 0.9874331378488109, + "grad_norm": 0.0034558749272922897, + "learning_rate": 1.4751163623344075e-07, + "loss": 0.0, + "step": 15322 + }, + { + "epoch": 0.9874975832957401, + "grad_norm": 0.00045139744625150767, + "learning_rate": 1.4679556032939492e-07, + "loss": 0.0, + "step": 15323 + }, + { + "epoch": 0.9875620287426693, + "grad_norm": 0.0007023775731221126, + "learning_rate": 1.460794844253491e-07, + "loss": 0.0, + "step": 15324 + }, + { + "epoch": 0.9876264741895985, + "grad_norm": 0.0009513016630730917, + "learning_rate": 1.4536340852130328e-07, + "loss": 0.0, + "step": 15325 + }, + { + "epoch": 0.9876909196365277, + "grad_norm": 0.05982552834564737, + "learning_rate": 1.4464733261725744e-07, + "loss": 0.0002, + "step": 15326 + }, + { + "epoch": 0.9877553650834568, + "grad_norm": 0.003501219232904495, + "learning_rate": 1.439312567132116e-07, + "loss": 0.0, + "step": 15327 + }, + { + "epoch": 0.987819810530386, + "grad_norm": 0.007539849747465022, + "learning_rate": 1.4321518080916578e-07, + "loss": 0.0, + "step": 15328 + }, + { + "epoch": 0.9878842559773152, + "grad_norm": 0.0007982695888039108, + "learning_rate": 1.4249910490511997e-07, + "loss": 0.0, + "step": 15329 + }, + { + "epoch": 0.9879487014242444, + "grad_norm": 0.0011078097149875995, + "learning_rate": 1.417830290010741e-07, + "loss": 0.0, + "step": 15330 + }, + { + "epoch": 0.9880131468711736, + "grad_norm": 0.0003443949979325242, + "learning_rate": 1.410669530970283e-07, + "loss": 0.0, + "step": 15331 + }, + { + "epoch": 0.9880775923181028, + "grad_norm": 0.1633003675412003, + "learning_rate": 1.4035087719298247e-07, + "loss": 0.0005, + "step": 15332 + }, + { + "epoch": 0.988142037765032, + "grad_norm": 0.18057904691791732, + "learning_rate": 1.3963480128893664e-07, + "loss": 0.0018, + "step": 15333 + }, + { + "epoch": 0.988206483211961, + "grad_norm": 0.0007753720428181999, + "learning_rate": 1.389187253848908e-07, + "loss": 0.0, + "step": 15334 + }, + { + "epoch": 0.9882709286588902, + "grad_norm": 0.0017123565601338646, + "learning_rate": 1.3820264948084497e-07, + "loss": 0.0, + "step": 15335 + }, + { + "epoch": 0.9883353741058194, + "grad_norm": 0.0344613932183989, + "learning_rate": 1.3748657357679916e-07, + "loss": 0.0, + "step": 15336 + }, + { + "epoch": 0.9883998195527486, + "grad_norm": 0.19325097253192008, + "learning_rate": 1.3677049767275333e-07, + "loss": 0.0002, + "step": 15337 + }, + { + "epoch": 0.9884642649996778, + "grad_norm": 0.0009819431197656918, + "learning_rate": 1.360544217687075e-07, + "loss": 0.0, + "step": 15338 + }, + { + "epoch": 0.988528710446607, + "grad_norm": 0.1516750895755982, + "learning_rate": 1.3533834586466166e-07, + "loss": 0.0023, + "step": 15339 + }, + { + "epoch": 0.9885931558935361, + "grad_norm": 0.001658402824212663, + "learning_rate": 1.3462226996061585e-07, + "loss": 0.0, + "step": 15340 + }, + { + "epoch": 0.9886576013404653, + "grad_norm": 0.0015698162912195304, + "learning_rate": 1.3390619405657002e-07, + "loss": 0.0, + "step": 15341 + }, + { + "epoch": 0.9887220467873945, + "grad_norm": 0.008429947141429642, + "learning_rate": 1.3319011815252419e-07, + "loss": 0.0001, + "step": 15342 + }, + { + "epoch": 0.9887864922343237, + "grad_norm": 0.0024822454325503923, + "learning_rate": 1.3247404224847835e-07, + "loss": 0.0, + "step": 15343 + }, + { + "epoch": 0.9888509376812529, + "grad_norm": 4.2695759973628696e-05, + "learning_rate": 1.3175796634443252e-07, + "loss": 0.0, + "step": 15344 + }, + { + "epoch": 0.988915383128182, + "grad_norm": 0.03248854376485151, + "learning_rate": 1.3104189044038668e-07, + "loss": 0.0001, + "step": 15345 + }, + { + "epoch": 0.9889798285751111, + "grad_norm": 0.009666005164565861, + "learning_rate": 1.3032581453634085e-07, + "loss": 0.0, + "step": 15346 + }, + { + "epoch": 0.9890442740220403, + "grad_norm": 0.012461908937499057, + "learning_rate": 1.2960973863229504e-07, + "loss": 0.0, + "step": 15347 + }, + { + "epoch": 0.9891087194689695, + "grad_norm": 0.009181846757203724, + "learning_rate": 1.288936627282492e-07, + "loss": 0.0, + "step": 15348 + }, + { + "epoch": 0.9891731649158987, + "grad_norm": 0.3086505185798492, + "learning_rate": 1.2817758682420338e-07, + "loss": 0.0005, + "step": 15349 + }, + { + "epoch": 0.9892376103628279, + "grad_norm": 9.000946232606643e-05, + "learning_rate": 1.2746151092015754e-07, + "loss": 0.0, + "step": 15350 + }, + { + "epoch": 0.989302055809757, + "grad_norm": 0.00017109891452988802, + "learning_rate": 1.267454350161117e-07, + "loss": 0.0, + "step": 15351 + }, + { + "epoch": 0.9893665012566862, + "grad_norm": 0.0014436826725710914, + "learning_rate": 1.260293591120659e-07, + "loss": 0.0, + "step": 15352 + }, + { + "epoch": 0.9894309467036154, + "grad_norm": 0.010794710318647652, + "learning_rate": 1.2531328320802004e-07, + "loss": 0.0, + "step": 15353 + }, + { + "epoch": 0.9894953921505446, + "grad_norm": 0.00487863410298011, + "learning_rate": 1.2459720730397423e-07, + "loss": 0.0, + "step": 15354 + }, + { + "epoch": 0.9895598375974738, + "grad_norm": 0.004251305964631465, + "learning_rate": 1.238811313999284e-07, + "loss": 0.0, + "step": 15355 + }, + { + "epoch": 0.9896242830444029, + "grad_norm": 0.0009995787571245016, + "learning_rate": 1.2316505549588257e-07, + "loss": 0.0, + "step": 15356 + }, + { + "epoch": 0.989688728491332, + "grad_norm": 0.0002976666463085584, + "learning_rate": 1.2244897959183673e-07, + "loss": 0.0, + "step": 15357 + }, + { + "epoch": 0.9897531739382612, + "grad_norm": 0.00029859447566665933, + "learning_rate": 1.2173290368779093e-07, + "loss": 0.0, + "step": 15358 + }, + { + "epoch": 0.9898176193851904, + "grad_norm": 0.017260082460229826, + "learning_rate": 1.210168277837451e-07, + "loss": 0.0002, + "step": 15359 + }, + { + "epoch": 0.9898820648321196, + "grad_norm": 0.03121110798224628, + "learning_rate": 1.2030075187969926e-07, + "loss": 0.0001, + "step": 15360 + }, + { + "epoch": 0.9899465102790488, + "grad_norm": 0.0005941675613315212, + "learning_rate": 1.1958467597565343e-07, + "loss": 0.0, + "step": 15361 + }, + { + "epoch": 0.990010955725978, + "grad_norm": 9.114690847879131e-05, + "learning_rate": 1.188686000716076e-07, + "loss": 0.0, + "step": 15362 + }, + { + "epoch": 0.9900754011729072, + "grad_norm": 1.7429828966388234, + "learning_rate": 1.1815252416756177e-07, + "loss": 0.0055, + "step": 15363 + }, + { + "epoch": 0.9901398466198363, + "grad_norm": 0.518626867181079, + "learning_rate": 1.1743644826351595e-07, + "loss": 0.0028, + "step": 15364 + }, + { + "epoch": 0.9902042920667655, + "grad_norm": 0.3350462068176509, + "learning_rate": 1.167203723594701e-07, + "loss": 0.0003, + "step": 15365 + }, + { + "epoch": 0.9902687375136947, + "grad_norm": 0.05405229337421042, + "learning_rate": 1.1600429645542428e-07, + "loss": 0.0001, + "step": 15366 + }, + { + "epoch": 0.9903331829606238, + "grad_norm": 0.06269417714193161, + "learning_rate": 1.1528822055137846e-07, + "loss": 0.0001, + "step": 15367 + }, + { + "epoch": 0.990397628407553, + "grad_norm": 0.00020441138102923534, + "learning_rate": 1.1457214464733263e-07, + "loss": 0.0, + "step": 15368 + }, + { + "epoch": 0.9904620738544822, + "grad_norm": 0.3037918203904865, + "learning_rate": 1.138560687432868e-07, + "loss": 0.0008, + "step": 15369 + }, + { + "epoch": 0.9905265193014113, + "grad_norm": 0.02238230572787554, + "learning_rate": 1.1313999283924098e-07, + "loss": 0.0002, + "step": 15370 + }, + { + "epoch": 0.9905909647483405, + "grad_norm": 0.1309645785006673, + "learning_rate": 1.1242391693519514e-07, + "loss": 0.0003, + "step": 15371 + }, + { + "epoch": 0.9906554101952697, + "grad_norm": 0.001562903964244142, + "learning_rate": 1.1170784103114931e-07, + "loss": 0.0, + "step": 15372 + }, + { + "epoch": 0.9907198556421989, + "grad_norm": 0.45439102627328637, + "learning_rate": 1.1099176512710347e-07, + "loss": 0.0021, + "step": 15373 + }, + { + "epoch": 0.9907843010891281, + "grad_norm": 0.4411138224181444, + "learning_rate": 1.1027568922305765e-07, + "loss": 0.0053, + "step": 15374 + }, + { + "epoch": 0.9908487465360573, + "grad_norm": 0.0006388717337027706, + "learning_rate": 1.0955961331901183e-07, + "loss": 0.0, + "step": 15375 + }, + { + "epoch": 0.9909131919829864, + "grad_norm": 0.08775647044388801, + "learning_rate": 1.0884353741496599e-07, + "loss": 0.0009, + "step": 15376 + }, + { + "epoch": 0.9909776374299156, + "grad_norm": 0.01739325179771781, + "learning_rate": 1.0812746151092017e-07, + "loss": 0.0001, + "step": 15377 + }, + { + "epoch": 0.9910420828768447, + "grad_norm": 0.034971450178768425, + "learning_rate": 1.0741138560687433e-07, + "loss": 0.0001, + "step": 15378 + }, + { + "epoch": 0.9911065283237739, + "grad_norm": 0.21837122514810614, + "learning_rate": 1.0669530970282851e-07, + "loss": 0.0027, + "step": 15379 + }, + { + "epoch": 0.9911709737707031, + "grad_norm": 0.03615418008543626, + "learning_rate": 1.0597923379878268e-07, + "loss": 0.0, + "step": 15380 + }, + { + "epoch": 0.9912354192176323, + "grad_norm": 0.0013153067476646975, + "learning_rate": 1.0526315789473685e-07, + "loss": 0.0, + "step": 15381 + }, + { + "epoch": 0.9912998646645614, + "grad_norm": 0.013645522977196328, + "learning_rate": 1.0454708199069102e-07, + "loss": 0.0001, + "step": 15382 + }, + { + "epoch": 0.9913643101114906, + "grad_norm": 0.018869085279380988, + "learning_rate": 1.038310060866452e-07, + "loss": 0.0, + "step": 15383 + }, + { + "epoch": 0.9914287555584198, + "grad_norm": 0.00017469868199359676, + "learning_rate": 1.0311493018259936e-07, + "loss": 0.0, + "step": 15384 + }, + { + "epoch": 0.991493201005349, + "grad_norm": 0.004191734400330657, + "learning_rate": 1.0239885427855354e-07, + "loss": 0.0, + "step": 15385 + }, + { + "epoch": 0.9915576464522782, + "grad_norm": 0.00111540569401605, + "learning_rate": 1.016827783745077e-07, + "loss": 0.0, + "step": 15386 + }, + { + "epoch": 0.9916220918992074, + "grad_norm": 0.0028105942217102444, + "learning_rate": 1.0096670247046188e-07, + "loss": 0.0, + "step": 15387 + }, + { + "epoch": 0.9916865373461365, + "grad_norm": 0.0014986055018981306, + "learning_rate": 1.0025062656641605e-07, + "loss": 0.0, + "step": 15388 + }, + { + "epoch": 0.9917509827930656, + "grad_norm": 0.0698733557021849, + "learning_rate": 9.953455066237022e-08, + "loss": 0.0001, + "step": 15389 + }, + { + "epoch": 0.9918154282399948, + "grad_norm": 1.3593856109555784, + "learning_rate": 9.88184747583244e-08, + "loss": 0.0078, + "step": 15390 + }, + { + "epoch": 0.991879873686924, + "grad_norm": 1.6501662604056602, + "learning_rate": 9.810239885427857e-08, + "loss": 0.0138, + "step": 15391 + }, + { + "epoch": 0.9919443191338532, + "grad_norm": 0.17455723899428105, + "learning_rate": 9.738632295023273e-08, + "loss": 0.0006, + "step": 15392 + }, + { + "epoch": 0.9920087645807824, + "grad_norm": 0.011819212039522532, + "learning_rate": 9.667024704618691e-08, + "loss": 0.0001, + "step": 15393 + }, + { + "epoch": 0.9920732100277115, + "grad_norm": 0.00014493524179976452, + "learning_rate": 9.595417114214107e-08, + "loss": 0.0, + "step": 15394 + }, + { + "epoch": 0.9921376554746407, + "grad_norm": 0.0006374483992628114, + "learning_rate": 9.523809523809525e-08, + "loss": 0.0, + "step": 15395 + }, + { + "epoch": 0.9922021009215699, + "grad_norm": 0.16560081944202426, + "learning_rate": 9.45220193340494e-08, + "loss": 0.0011, + "step": 15396 + }, + { + "epoch": 0.9922665463684991, + "grad_norm": 0.00026379790258233425, + "learning_rate": 9.380594343000359e-08, + "loss": 0.0, + "step": 15397 + }, + { + "epoch": 0.9923309918154283, + "grad_norm": 0.02047838287760723, + "learning_rate": 9.308986752595777e-08, + "loss": 0.0001, + "step": 15398 + }, + { + "epoch": 0.9923954372623575, + "grad_norm": 0.2446784085500637, + "learning_rate": 9.237379162191192e-08, + "loss": 0.0004, + "step": 15399 + }, + { + "epoch": 0.9924598827092865, + "grad_norm": 0.003227860027021074, + "learning_rate": 9.16577157178661e-08, + "loss": 0.0, + "step": 15400 + }, + { + "epoch": 0.9925243281562157, + "grad_norm": 0.0002485238947423622, + "learning_rate": 9.094163981382028e-08, + "loss": 0.0, + "step": 15401 + }, + { + "epoch": 0.9925887736031449, + "grad_norm": 0.01720941809554806, + "learning_rate": 9.022556390977444e-08, + "loss": 0.0001, + "step": 15402 + }, + { + "epoch": 0.9926532190500741, + "grad_norm": 0.00032100960062109813, + "learning_rate": 8.950948800572861e-08, + "loss": 0.0, + "step": 15403 + }, + { + "epoch": 0.9927176644970033, + "grad_norm": 0.00013967251299634785, + "learning_rate": 8.879341210168278e-08, + "loss": 0.0, + "step": 15404 + }, + { + "epoch": 0.9927821099439325, + "grad_norm": 0.009221607936256614, + "learning_rate": 8.807733619763696e-08, + "loss": 0.0, + "step": 15405 + }, + { + "epoch": 0.9928465553908616, + "grad_norm": 0.02119800257858215, + "learning_rate": 8.736126029359114e-08, + "loss": 0.0015, + "step": 15406 + }, + { + "epoch": 0.9929110008377908, + "grad_norm": 0.006607428148178863, + "learning_rate": 8.664518438954529e-08, + "loss": 0.0001, + "step": 15407 + }, + { + "epoch": 0.99297544628472, + "grad_norm": 0.000993933285930776, + "learning_rate": 8.592910848549947e-08, + "loss": 0.0, + "step": 15408 + }, + { + "epoch": 0.9930398917316492, + "grad_norm": 0.04713009178308316, + "learning_rate": 8.521303258145365e-08, + "loss": 0.0001, + "step": 15409 + }, + { + "epoch": 0.9931043371785784, + "grad_norm": 0.00726139924226023, + "learning_rate": 8.449695667740781e-08, + "loss": 0.0001, + "step": 15410 + }, + { + "epoch": 0.9931687826255076, + "grad_norm": 0.0018708575708756142, + "learning_rate": 8.378088077336198e-08, + "loss": 0.0, + "step": 15411 + }, + { + "epoch": 0.9932332280724366, + "grad_norm": 0.0834809032921711, + "learning_rate": 8.306480486931615e-08, + "loss": 0.0002, + "step": 15412 + }, + { + "epoch": 0.9932976735193658, + "grad_norm": 0.00013640415194037607, + "learning_rate": 8.234872896527033e-08, + "loss": 0.0, + "step": 15413 + }, + { + "epoch": 0.993362118966295, + "grad_norm": 0.014602227139775253, + "learning_rate": 8.163265306122451e-08, + "loss": 0.0, + "step": 15414 + }, + { + "epoch": 0.9934265644132242, + "grad_norm": 0.01868237382937571, + "learning_rate": 8.091657715717866e-08, + "loss": 0.0001, + "step": 15415 + }, + { + "epoch": 0.9934910098601534, + "grad_norm": 0.6517121232399404, + "learning_rate": 8.020050125313284e-08, + "loss": 0.0011, + "step": 15416 + }, + { + "epoch": 0.9935554553070826, + "grad_norm": 0.0210594611327598, + "learning_rate": 7.948442534908702e-08, + "loss": 0.0, + "step": 15417 + }, + { + "epoch": 0.9936199007540117, + "grad_norm": 0.20600616643122802, + "learning_rate": 7.876834944504119e-08, + "loss": 0.0003, + "step": 15418 + }, + { + "epoch": 0.9936843462009409, + "grad_norm": 0.0006812269480493978, + "learning_rate": 7.805227354099535e-08, + "loss": 0.0, + "step": 15419 + }, + { + "epoch": 0.9937487916478701, + "grad_norm": 0.011687333493238744, + "learning_rate": 7.733619763694952e-08, + "loss": 0.0, + "step": 15420 + }, + { + "epoch": 0.9938132370947993, + "grad_norm": 0.00022207912377938065, + "learning_rate": 7.66201217329037e-08, + "loss": 0.0, + "step": 15421 + }, + { + "epoch": 0.9938776825417285, + "grad_norm": 0.006105163470982768, + "learning_rate": 7.590404582885786e-08, + "loss": 0.0, + "step": 15422 + }, + { + "epoch": 0.9939421279886576, + "grad_norm": 0.004071650145522754, + "learning_rate": 7.518796992481203e-08, + "loss": 0.0, + "step": 15423 + }, + { + "epoch": 0.9940065734355867, + "grad_norm": 0.0001294769158907121, + "learning_rate": 7.447189402076621e-08, + "loss": 0.0, + "step": 15424 + }, + { + "epoch": 0.9940710188825159, + "grad_norm": 0.007293988964356481, + "learning_rate": 7.375581811672038e-08, + "loss": 0.0, + "step": 15425 + }, + { + "epoch": 0.9941354643294451, + "grad_norm": 0.5071361732953923, + "learning_rate": 7.303974221267456e-08, + "loss": 0.0024, + "step": 15426 + }, + { + "epoch": 0.9941999097763743, + "grad_norm": 0.004333334066827409, + "learning_rate": 7.232366630862872e-08, + "loss": 0.0, + "step": 15427 + }, + { + "epoch": 0.9942643552233035, + "grad_norm": 0.017363078925581327, + "learning_rate": 7.160759040458289e-08, + "loss": 0.0, + "step": 15428 + }, + { + "epoch": 0.9943288006702327, + "grad_norm": 0.0011615992960153968, + "learning_rate": 7.089151450053705e-08, + "loss": 0.0, + "step": 15429 + }, + { + "epoch": 0.9943932461171618, + "grad_norm": 0.0003038760653883744, + "learning_rate": 7.017543859649123e-08, + "loss": 0.0, + "step": 15430 + }, + { + "epoch": 0.994457691564091, + "grad_norm": 0.000252489032190593, + "learning_rate": 6.94593626924454e-08, + "loss": 0.0, + "step": 15431 + }, + { + "epoch": 0.9945221370110202, + "grad_norm": 0.0002281339806080607, + "learning_rate": 6.874328678839958e-08, + "loss": 0.0, + "step": 15432 + }, + { + "epoch": 0.9945865824579494, + "grad_norm": 0.0021669236006270874, + "learning_rate": 6.802721088435375e-08, + "loss": 0.0, + "step": 15433 + }, + { + "epoch": 0.9946510279048785, + "grad_norm": 0.0035040173439457962, + "learning_rate": 6.731113498030793e-08, + "loss": 0.0, + "step": 15434 + }, + { + "epoch": 0.9947154733518077, + "grad_norm": 4.2521529407209614e-05, + "learning_rate": 6.659505907626209e-08, + "loss": 0.0, + "step": 15435 + }, + { + "epoch": 0.9947799187987368, + "grad_norm": 0.0026709807723655685, + "learning_rate": 6.587898317221626e-08, + "loss": 0.0, + "step": 15436 + }, + { + "epoch": 0.994844364245666, + "grad_norm": 0.010820683146318837, + "learning_rate": 6.516290726817043e-08, + "loss": 0.0, + "step": 15437 + }, + { + "epoch": 0.9949088096925952, + "grad_norm": 0.003205757880112345, + "learning_rate": 6.44468313641246e-08, + "loss": 0.0, + "step": 15438 + }, + { + "epoch": 0.9949732551395244, + "grad_norm": 0.0005007387307297139, + "learning_rate": 6.373075546007877e-08, + "loss": 0.0, + "step": 15439 + }, + { + "epoch": 0.9950377005864536, + "grad_norm": 0.046509660126946864, + "learning_rate": 6.301467955603295e-08, + "loss": 0.0003, + "step": 15440 + }, + { + "epoch": 0.9951021460333828, + "grad_norm": 0.013549052359340931, + "learning_rate": 6.229860365198712e-08, + "loss": 0.0, + "step": 15441 + }, + { + "epoch": 0.995166591480312, + "grad_norm": 0.009939784287012338, + "learning_rate": 6.158252774794128e-08, + "loss": 0.0001, + "step": 15442 + }, + { + "epoch": 0.9952310369272411, + "grad_norm": 0.002741087693011581, + "learning_rate": 6.086645184389546e-08, + "loss": 0.0, + "step": 15443 + }, + { + "epoch": 0.9952954823741703, + "grad_norm": 0.006260263699628107, + "learning_rate": 6.015037593984963e-08, + "loss": 0.0, + "step": 15444 + }, + { + "epoch": 0.9953599278210994, + "grad_norm": 0.00049759231216585, + "learning_rate": 5.94343000358038e-08, + "loss": 0.0, + "step": 15445 + }, + { + "epoch": 0.9954243732680286, + "grad_norm": 0.1380881394314596, + "learning_rate": 5.8718224131757975e-08, + "loss": 0.0003, + "step": 15446 + }, + { + "epoch": 0.9954888187149578, + "grad_norm": 0.006507125813773554, + "learning_rate": 5.800214822771214e-08, + "loss": 0.0, + "step": 15447 + }, + { + "epoch": 0.9955532641618869, + "grad_norm": 0.007913763476941546, + "learning_rate": 5.7286072323666315e-08, + "loss": 0.0, + "step": 15448 + }, + { + "epoch": 0.9956177096088161, + "grad_norm": 0.0058919833109365456, + "learning_rate": 5.656999641962049e-08, + "loss": 0.0, + "step": 15449 + }, + { + "epoch": 0.9956821550557453, + "grad_norm": 0.042774442493267895, + "learning_rate": 5.5853920515574654e-08, + "loss": 0.0002, + "step": 15450 + }, + { + "epoch": 0.9957466005026745, + "grad_norm": 0.0007127180380731889, + "learning_rate": 5.513784461152883e-08, + "loss": 0.0, + "step": 15451 + }, + { + "epoch": 0.9958110459496037, + "grad_norm": 0.1438407771996523, + "learning_rate": 5.4421768707482993e-08, + "loss": 0.0006, + "step": 15452 + }, + { + "epoch": 0.9958754913965329, + "grad_norm": 0.0007669071129745034, + "learning_rate": 5.3705692803437166e-08, + "loss": 0.0, + "step": 15453 + }, + { + "epoch": 0.995939936843462, + "grad_norm": 0.0002747317806881671, + "learning_rate": 5.298961689939134e-08, + "loss": 0.0, + "step": 15454 + }, + { + "epoch": 0.9960043822903912, + "grad_norm": 5.1187894086526105e-05, + "learning_rate": 5.227354099534551e-08, + "loss": 0.0, + "step": 15455 + }, + { + "epoch": 0.9960688277373203, + "grad_norm": 0.19142032104464268, + "learning_rate": 5.155746509129968e-08, + "loss": 0.0006, + "step": 15456 + }, + { + "epoch": 0.9961332731842495, + "grad_norm": 0.6594525034042554, + "learning_rate": 5.084138918725385e-08, + "loss": 0.0018, + "step": 15457 + }, + { + "epoch": 0.9961977186311787, + "grad_norm": 0.0003211975571060089, + "learning_rate": 5.0125313283208025e-08, + "loss": 0.0, + "step": 15458 + }, + { + "epoch": 0.9962621640781079, + "grad_norm": 0.0012403521668988536, + "learning_rate": 4.94092373791622e-08, + "loss": 0.0, + "step": 15459 + }, + { + "epoch": 0.996326609525037, + "grad_norm": 0.038828634005938584, + "learning_rate": 4.8693161475116364e-08, + "loss": 0.0001, + "step": 15460 + }, + { + "epoch": 0.9963910549719662, + "grad_norm": 0.0019163043650427354, + "learning_rate": 4.797708557107054e-08, + "loss": 0.0, + "step": 15461 + }, + { + "epoch": 0.9964555004188954, + "grad_norm": 0.006335604384163084, + "learning_rate": 4.72610096670247e-08, + "loss": 0.0, + "step": 15462 + }, + { + "epoch": 0.9965199458658246, + "grad_norm": 0.0015440385523529667, + "learning_rate": 4.654493376297888e-08, + "loss": 0.0, + "step": 15463 + }, + { + "epoch": 0.9965843913127538, + "grad_norm": 0.000717511672539573, + "learning_rate": 4.582885785893305e-08, + "loss": 0.0, + "step": 15464 + }, + { + "epoch": 0.996648836759683, + "grad_norm": 0.00018937198611685636, + "learning_rate": 4.511278195488722e-08, + "loss": 0.0, + "step": 15465 + }, + { + "epoch": 0.9967132822066122, + "grad_norm": 0.0010951535217054588, + "learning_rate": 4.439670605084139e-08, + "loss": 0.0, + "step": 15466 + }, + { + "epoch": 0.9967777276535412, + "grad_norm": 0.038216985921274704, + "learning_rate": 4.368063014679557e-08, + "loss": 0.0, + "step": 15467 + }, + { + "epoch": 0.9968421731004704, + "grad_norm": 0.001310616487340104, + "learning_rate": 4.2964554242749734e-08, + "loss": 0.0, + "step": 15468 + }, + { + "epoch": 0.9969066185473996, + "grad_norm": 0.006292524679154751, + "learning_rate": 4.224847833870391e-08, + "loss": 0.0001, + "step": 15469 + }, + { + "epoch": 0.9969710639943288, + "grad_norm": 0.000838499974695811, + "learning_rate": 4.1532402434658074e-08, + "loss": 0.0, + "step": 15470 + }, + { + "epoch": 0.997035509441258, + "grad_norm": 0.0010050876167897022, + "learning_rate": 4.0816326530612253e-08, + "loss": 0.0, + "step": 15471 + }, + { + "epoch": 0.9970999548881871, + "grad_norm": 0.005370776582966026, + "learning_rate": 4.010025062656642e-08, + "loss": 0.0, + "step": 15472 + }, + { + "epoch": 0.9971644003351163, + "grad_norm": 0.0003092987419902564, + "learning_rate": 3.938417472252059e-08, + "loss": 0.0, + "step": 15473 + }, + { + "epoch": 0.9972288457820455, + "grad_norm": 0.0013591428298022192, + "learning_rate": 3.866809881847476e-08, + "loss": 0.0, + "step": 15474 + }, + { + "epoch": 0.9972932912289747, + "grad_norm": 0.23138436589381467, + "learning_rate": 3.795202291442893e-08, + "loss": 0.0018, + "step": 15475 + }, + { + "epoch": 0.9973577366759039, + "grad_norm": 0.001873350951168154, + "learning_rate": 3.7235947010383105e-08, + "loss": 0.0, + "step": 15476 + }, + { + "epoch": 0.9974221821228331, + "grad_norm": 0.004598840045965651, + "learning_rate": 3.651987110633728e-08, + "loss": 0.0, + "step": 15477 + }, + { + "epoch": 0.9974866275697621, + "grad_norm": 0.005085088395541839, + "learning_rate": 3.5803795202291444e-08, + "loss": 0.0, + "step": 15478 + }, + { + "epoch": 0.9975510730166913, + "grad_norm": 0.001091900765744506, + "learning_rate": 3.508771929824562e-08, + "loss": 0.0, + "step": 15479 + }, + { + "epoch": 0.9976155184636205, + "grad_norm": 0.0009730691764594346, + "learning_rate": 3.437164339419979e-08, + "loss": 0.0, + "step": 15480 + }, + { + "epoch": 0.9976799639105497, + "grad_norm": 0.0017135795294502306, + "learning_rate": 3.365556749015396e-08, + "loss": 0.0, + "step": 15481 + }, + { + "epoch": 0.9977444093574789, + "grad_norm": 0.01811945222196252, + "learning_rate": 3.293949158610813e-08, + "loss": 0.0002, + "step": 15482 + }, + { + "epoch": 0.9978088548044081, + "grad_norm": 0.009926933709437718, + "learning_rate": 3.22234156820623e-08, + "loss": 0.0, + "step": 15483 + }, + { + "epoch": 0.9978733002513372, + "grad_norm": 0.0017775873375116566, + "learning_rate": 3.1507339778016475e-08, + "loss": 0.0, + "step": 15484 + }, + { + "epoch": 0.9979377456982664, + "grad_norm": 0.0006008194079299575, + "learning_rate": 3.079126387397064e-08, + "loss": 0.0, + "step": 15485 + }, + { + "epoch": 0.9980021911451956, + "grad_norm": 0.00021254665315161218, + "learning_rate": 3.0075187969924815e-08, + "loss": 0.0, + "step": 15486 + }, + { + "epoch": 0.9980666365921248, + "grad_norm": 0.00026559116747325334, + "learning_rate": 2.9359112065878988e-08, + "loss": 0.0, + "step": 15487 + }, + { + "epoch": 0.998131082039054, + "grad_norm": 0.7161213032538332, + "learning_rate": 2.8643036161833157e-08, + "loss": 0.0038, + "step": 15488 + }, + { + "epoch": 0.9981955274859832, + "grad_norm": 0.00048493262049720823, + "learning_rate": 2.7926960257787327e-08, + "loss": 0.0, + "step": 15489 + }, + { + "epoch": 0.9982599729329122, + "grad_norm": 0.012352251282173892, + "learning_rate": 2.7210884353741497e-08, + "loss": 0.0, + "step": 15490 + }, + { + "epoch": 0.9983244183798414, + "grad_norm": 0.00370486564736607, + "learning_rate": 2.649480844969567e-08, + "loss": 0.0, + "step": 15491 + }, + { + "epoch": 0.9983888638267706, + "grad_norm": 0.0036452617386337023, + "learning_rate": 2.577873254564984e-08, + "loss": 0.0, + "step": 15492 + }, + { + "epoch": 0.9984533092736998, + "grad_norm": 0.1567663838989723, + "learning_rate": 2.5062656641604012e-08, + "loss": 0.0002, + "step": 15493 + }, + { + "epoch": 0.998517754720629, + "grad_norm": 0.0005995450102191095, + "learning_rate": 2.4346580737558182e-08, + "loss": 0.0, + "step": 15494 + }, + { + "epoch": 0.9985822001675582, + "grad_norm": 0.000392582037022992, + "learning_rate": 2.363050483351235e-08, + "loss": 0.0, + "step": 15495 + }, + { + "epoch": 0.9986466456144873, + "grad_norm": 0.0009271351805534993, + "learning_rate": 2.2914428929466525e-08, + "loss": 0.0, + "step": 15496 + }, + { + "epoch": 0.9987110910614165, + "grad_norm": 0.00014936378110164797, + "learning_rate": 2.2198353025420694e-08, + "loss": 0.0, + "step": 15497 + }, + { + "epoch": 0.9987755365083457, + "grad_norm": 0.00043512063478467827, + "learning_rate": 2.1482277121374867e-08, + "loss": 0.0, + "step": 15498 + }, + { + "epoch": 0.9988399819552749, + "grad_norm": 0.09968455133142981, + "learning_rate": 2.0766201217329037e-08, + "loss": 0.0003, + "step": 15499 + }, + { + "epoch": 0.9989044274022041, + "grad_norm": 0.00020179967403577038, + "learning_rate": 2.005012531328321e-08, + "loss": 0.0, + "step": 15500 + }, + { + "epoch": 0.9989688728491332, + "grad_norm": 0.04932118147213376, + "learning_rate": 1.933404940923738e-08, + "loss": 0.0001, + "step": 15501 + }, + { + "epoch": 0.9990333182960623, + "grad_norm": 0.021431060225883328, + "learning_rate": 1.8617973505191552e-08, + "loss": 0.0001, + "step": 15502 + }, + { + "epoch": 0.9990977637429915, + "grad_norm": 2.3081529524238576e-05, + "learning_rate": 1.7901897601145722e-08, + "loss": 0.0, + "step": 15503 + }, + { + "epoch": 0.9991622091899207, + "grad_norm": 0.0006019946843029049, + "learning_rate": 1.7185821697099895e-08, + "loss": 0.0, + "step": 15504 + }, + { + "epoch": 0.9992266546368499, + "grad_norm": 0.00010010075275028686, + "learning_rate": 1.6469745793054065e-08, + "loss": 0.0, + "step": 15505 + }, + { + "epoch": 0.9992911000837791, + "grad_norm": 0.004928500843443072, + "learning_rate": 1.5753669889008238e-08, + "loss": 0.0, + "step": 15506 + }, + { + "epoch": 0.9993555455307083, + "grad_norm": 0.0028870445087702625, + "learning_rate": 1.5037593984962407e-08, + "loss": 0.0, + "step": 15507 + }, + { + "epoch": 0.9994199909776375, + "grad_norm": 1.111107454028999, + "learning_rate": 1.4321518080916579e-08, + "loss": 0.0037, + "step": 15508 + }, + { + "epoch": 0.9994844364245666, + "grad_norm": 0.006669760087007271, + "learning_rate": 1.3605442176870748e-08, + "loss": 0.0001, + "step": 15509 + }, + { + "epoch": 0.9995488818714958, + "grad_norm": 0.007184647860438429, + "learning_rate": 1.288936627282492e-08, + "loss": 0.0, + "step": 15510 + }, + { + "epoch": 0.999613327318425, + "grad_norm": 0.000263493157723586, + "learning_rate": 1.2173290368779091e-08, + "loss": 0.0, + "step": 15511 + }, + { + "epoch": 0.9996777727653541, + "grad_norm": 0.019085772562670834, + "learning_rate": 1.1457214464733262e-08, + "loss": 0.0, + "step": 15512 + }, + { + "epoch": 0.9997422182122833, + "grad_norm": 0.12087096979193869, + "learning_rate": 1.0741138560687434e-08, + "loss": 0.0003, + "step": 15513 + }, + { + "epoch": 0.9998066636592124, + "grad_norm": 0.04529828473437381, + "learning_rate": 1.0025062656641605e-08, + "loss": 0.0, + "step": 15514 + }, + { + "epoch": 0.9998711091061416, + "grad_norm": 8.925880633125442e-05, + "learning_rate": 9.308986752595776e-09, + "loss": 0.0, + "step": 15515 + }, + { + "epoch": 0.9999355545530708, + "grad_norm": 0.0003679149024459179, + "learning_rate": 8.592910848549948e-09, + "loss": 0.0, + "step": 15516 + }, + { + "epoch": 1.0, + "grad_norm": 0.2631523948960553, + "learning_rate": 7.876834944504119e-09, + "loss": 0.0003, + "step": 15517 + } + ], + "logging_steps": 1.0, + "max_steps": 15517, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 12, + "trial_name": null, + "trial_params": null +}